Spaces:
Runtime error
Runtime error
File size: 3,654 Bytes
284038e 0ea0f72 45be40a 98fd87f 9c7e834 0ea0f72 a0a875f 241c678 9c7e834 a0ffc1e 241c678 a0ffc1e 241c678 a0ffc1e e0f69b2 a0ffc1e eb1e32a e0f69b2 a0ffc1e eb1e32a a0ffc1e 241c678 6123551 eb1e32a 6123551 d5a5afd faf381e d5a5afd 6123551 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
# imports
import gradio as gr
import pandas as pd
import tempfile
import itertools
import torch
import numpy as np
from numpy import dot
from numpy.linalg import norm, multi_dot
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
# compute dot product of inputs
# summary function - test for single gradio function interfrace
def gr_cosine_similarity(sentence1, sentence2):
# Create class for data preparation
class SimpleDataset:
def __init__(self, tokenized_texts):
self.tokenized_texts = tokenized_texts
def __len__(self):
return len(self.tokenized_texts["input_ids"])
def __getitem__(self, idx):
return {k: v[idx] for k, v in self.tokenized_texts.items()}
# load tokenizer and model, create trainer
model_name = "j-hartmann/emotion-english-distilroberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
trainer = Trainer(model=model)
# sentences in list
lines_s = [sentence1, sentence2]
print(type(sentence1), type(sentence2))
print(sentence1, sentence2)
print(lines_s)
# Tokenize texts and create prediction data set
tokenized_texts = tokenizer(lines_s, truncation=True, padding=True)
pred_dataset = SimpleDataset(tokenized_texts)
# Run predictions -> predict whole df
predictions = trainer.predict(pred_dataset)
# Transform predictions to labels
preds = predictions.predictions.argmax(-1)
labels = pd.Series(preds).map(model.config.id2label)
scores = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)).max(1)
# scores raw
temp = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1, keepdims=True)).tolist()
# work in progress
# container
anger = []
disgust = []
fear = []
joy = []
neutral = []
sadness = []
surprise = []
print(temp)
# extract scores (as many entries as exist in pred_texts)
for i in range(len(lines_s)):
anger.append(round(temp[i][0], 3))
disgust.append(round(temp[i][1], 3))
fear.append(round(temp[i][2], 3))
joy.append(round(temp[i][3], 3))
neutral.append(round(temp[i][4], 3))
sadness.append(round(temp[i][5], 3))
surprise.append(round(temp[i][6], 3))
# define both vectors for the dot product
# each include all values for both predictions
v1 = temp[0]
v2 = temp[1]
print(type(v1), type(v2))
# compute dot product of all
dot_product = dot(v1, v2)
# define df
df = pd.DataFrame(list(zip(lines_s, labels, anger, disgust, fear, joy, neutral, sadness, surprise)),
columns=['text', 'max_label', 'anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise'])
# compute cosine similarity
# is dot product of vectors n / norms 1*..*n vectors
cosine_similarity = round(dot_product / (norm(v1) * norm(v2)), 3)
# return dataframe for space output
return df, cosine_similarity
gr.Interface(gr_cosine_similarity,
[
gr.inputs.Textbox(lines=1, placeholder="This tool is awesome!", default="", label="Text 1"),
gr.inputs.Textbox(lines=1, placeholder="I am so happy right now.", default="", label="Text 2"),
],
["dataframe","text"],
title="Emotion Similarity",
description="Input two sentences and the model returns their emotional similarity (between 0 and 1), using this model: https://huggingface.co/j-hartmann/emotion-english-distilroberta-base.",
).launch(debug=True) |