File size: 3,654 Bytes
284038e
 
0ea0f72
45be40a
98fd87f
9c7e834
0ea0f72
a0a875f
241c678
9c7e834
a0ffc1e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241c678
 
a0ffc1e
 
241c678
a0ffc1e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e0f69b2
 
 
 
 
 
 
a0ffc1e
 
 
 
 
 
 
 
 
 
eb1e32a
e0f69b2
a0ffc1e
 
 
eb1e32a
a0ffc1e
 
 
 
241c678
 
6123551
 
 
eb1e32a
 
6123551
 
d5a5afd
 
 
faf381e
d5a5afd
6123551
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# imports
import gradio as gr
import pandas as pd
import tempfile
import itertools
import torch
import numpy as np
from numpy import dot
from numpy.linalg import norm, multi_dot
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer

# compute dot product of inputs
# summary function - test for single gradio function interfrace
def gr_cosine_similarity(sentence1, sentence2):
  # Create class for data preparation
  class SimpleDataset:
      def __init__(self, tokenized_texts):
          self.tokenized_texts = tokenized_texts
      
      def __len__(self):
          return len(self.tokenized_texts["input_ids"])
      
      def __getitem__(self, idx):
          return {k: v[idx] for k, v in self.tokenized_texts.items()}

  # load tokenizer and model, create trainer
  model_name = "j-hartmann/emotion-english-distilroberta-base"
  tokenizer = AutoTokenizer.from_pretrained(model_name)
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
  trainer = Trainer(model=model)  
    
  # sentences in list
  lines_s = [sentence1, sentence2]
  print(type(sentence1), type(sentence2))
  print(sentence1, sentence2)
  print(lines_s)
 
    # Tokenize texts and create prediction data set
  tokenized_texts = tokenizer(lines_s, truncation=True, padding=True)
  pred_dataset = SimpleDataset(tokenized_texts)

    # Run predictions -> predict whole df
  predictions = trainer.predict(pred_dataset)

    # Transform predictions to labels
  preds = predictions.predictions.argmax(-1)
  labels = pd.Series(preds).map(model.config.id2label)
  scores = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)).max(1)
    # scores raw
  temp = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1, keepdims=True)).tolist()


    # work in progress
  # container
  anger = []
  disgust = []
  fear = []
  joy = []
  neutral = []
  sadness = []
  surprise = []

  print(temp)
  # extract scores (as many entries as exist in pred_texts)
  for i in range(len(lines_s)):
    anger.append(round(temp[i][0], 3))
    disgust.append(round(temp[i][1], 3))
    fear.append(round(temp[i][2], 3))
    joy.append(round(temp[i][3], 3))
    neutral.append(round(temp[i][4], 3))
    sadness.append(round(temp[i][5], 3))
    surprise.append(round(temp[i][6], 3))

  # define both vectors for the dot product
  # each include all values for both predictions
  v1 = temp[0]
  v2 = temp[1]
  print(type(v1), type(v2))
  # compute dot product of all 
  dot_product = dot(v1, v2)

   # define df
  df = pd.DataFrame(list(zip(lines_s, labels, anger, disgust, fear, joy, neutral, sadness, surprise)), 
  columns=['text', 'max_label', 'anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise'])

  # compute cosine similarity
  # is dot product of vectors n / norms 1*..*n vectors
  cosine_similarity = round(dot_product / (norm(v1) * norm(v2)), 3)


  # return dataframe for space output
  return  df, cosine_similarity
  
 

gr.Interface(gr_cosine_similarity,
                   [
                    gr.inputs.Textbox(lines=1, placeholder="This tool is awesome!", default="", label="Text 1"),
                    gr.inputs.Textbox(lines=1, placeholder="I am so happy right now.", default="", label="Text 2"),

                                         ],
                   ["dataframe","text"],
                   
             title="Emotion Similarity",
             description="Input two sentences and the model returns their emotional similarity (between 0 and 1), using this model: https://huggingface.co/j-hartmann/emotion-english-distilroberta-base.",
             
                   ).launch(debug=True)