Spaces:
Runtime error
Runtime error
File size: 6,479 Bytes
0b0b452 d4a0f6d 0b0b452 d4a0f6d 0b0b452 d4a0f6d 0b0b452 d4a0f6d 0b0b452 d4a0f6d 0b0b452 d4a0f6d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from collections import Counter
from scipy.special import softmax
import plotly.express as px
import plotly.io as pio # Add this import
# Article string
article_string = "Authors: <a href=\"https://huggingface.co/FpOliveira\">Felipe Oliveira</a> & <a href=\"https://huggingface.co/victoriadreis\">Victoria Reis</a>. Read more about our <a href=\"https://github.com/Silly-Machine/TuPi-Portuguese-Hate-Speech-Dataset\">The Portuguese hate speech dataset (TuPI) </a>."
# App title
app_title = "Portuguese hate speech identifier (Multiclass) - Identificador de discurso de ódio em português (Multiclasse)"
# App description
app_description = """
EN: This application employs multiple natural language models to identify different types of hate speech in portuguese. You have the option to enter your own phrases by filling in the "Text" field or choosing one of the examples provided below.
\nPT: Esta aplicativo emprega múltiplos modelos de linguagem natural para identificar diferentes tipos de discursos de ódio em português. Você tem a opção de inserir suas próprias frases preenchendo o campo "Text" ou escolhendo um dos exemplos abaixo
"""
# App examples
app_examples = [
["bom dia flor do dia!!!"],
["o ódio é muito grande no coração da ex-deputada federal joise hasselmann contra a família bolsonaro"],
["mano deus me livre q nojo da porra!🤮🤮🤮🤮🤮"],
["obrigada princesa, porra, tô muito feliz snrsss 🤩🤩🤩❤️"],
["mds mas o viado vir responder meus status falando q a taylor foi racista foi o auge 😂😂"],
["Pra ser minha inimiga no mínimo tem que ter um rostinho bonito e delicado, não se considere minha rival com essa sua cara de cavalo não, feia, cara de traveco, cabeçuda, queixo quadrado 🤣🤣"]
]
# Output textbox component description
output_textbox_component_description = """
EN: This box will display hate speech results based on the average score of multiple models.
PT: Esta caixa exibirá resultados da classificação de discurso de ódio com base na pontuação média de vários modelos.
"""
# Output JSON component description
output_json_component_description = {
"breakdown": """
This box presents a detailed breakdown of the evaluation for each model.
""",
"detalhamento": """
(Esta caixa apresenta um detalhamento da avaliação para cada modelo.)
"""
}
# Hate speech categories
hate_speech_categories = {
0: "ageism",
1: "aporophobia",
2: "body shame",
3: "capacitism",
4: "lgbtphobia",
5: "political",
6: "racism",
7: "religious intolerance",
8: "misogyny",
9: "xenophobia",
10: "other",
11: "not hate"
}
# Model list
model_list = [
"FpOliveira/tupi-bert-large-portuguese-cased-multiclass-multilabel",
"FpOliveira/tupi-bert-base-portuguese-cased-multiclass-multilabel",
"FpOliveira/tupi-gpt2-small-multiclass-multilabel",
]
# User-friendly names for models
user_friendly_name = {
"FpOliveira/tupi-bert-large-portuguese-cased-multiclass-multilabel": "BERTimbau large (TuPi)",
"FpOliveira/tupi-bert-base-portuguese-cased-multiclass-multilabel": "BERTimbau base (TuPi)",
"FpOliveira/tupi-gpt2-small-multiclass-multilabel":"GPT2 small (TuPi)"
}
# Reverse mapping for user-friendly names
reverse_user_friendly_name = {v: k for k, v in user_friendly_name.items()}
# List of user-friendly model names
user_friendly_name_list = list(user_friendly_name.values())
# Model array
model_array = []
# Populate model array
for model_name in model_list:
row = {}
row["name"] = model_name
row["tokenizer"] = AutoTokenizer.from_pretrained(model_name)
row["model"] = AutoModelForSequenceClassification.from_pretrained(model_name)
model_array.append(row)
# Function to find the most frequent element in an array
def most_frequent(array):
occurence_count = Counter(array)
return occurence_count.most_common(1)[0][0]
def predict(s1, chosen_model):
# Clear previous figure instance
fig = None
if not chosen_model:
chosen_model = user_friendly_name_list[0]
scores = {}
full_chosen_model_name = reverse_user_friendly_name[chosen_model]
for row in model_array:
name = row["name"]
if name != full_chosen_model_name:
continue
else:
tokenizer = row["tokenizer"]
model = row["model"]
model_input = tokenizer(*([s1],), padding=True, return_tensors="pt")
with torch.no_grad():
output = model(**model_input)
logits = output[0][0].detach().numpy()
logits = softmax(logits).tolist()
break
# Get the indices of all probabilities
all_indices = range(len(logits))
# Get the indices of the top two probabilities
top_indices = sorted(range(len(logits)), key=lambda i: logits[i], reverse=True)
# Filter out invalid indices
valid_indices = [index for index in top_indices if index < len(hate_speech_categories)]
# Get the categories and probabilities for all classes
all_categories = [hate_speech_categories[index] for index in valid_indices]
all_probabilities = [logits[index] for index in valid_indices]
fig = px.bar(x=all_categories, y=all_probabilities, labels={'x': 'Categories', 'y': 'Probabilities'},
title=" ",
text=all_probabilities, color_discrete_sequence=['#ff7400'])
fig.update_traces(texttemplate='%{text:.2f}', textposition='outside')
# Rotate the text in x-axis by 90 degrees
fig.update_layout(xaxis_tickangle=-90)
# Increase the space around the chart
fig.update_layout(margin=dict(l=50, r=50, b=100, t=100))
# Set the y-axis range to go up to 1.1
fig.update_layout(yaxis=dict(range=[0, 1.1]))
return fig
# Input components
inputs = [
gr.Textbox(label="Text", value=app_examples[0][0]),
gr.Dropdown(label="Model", choices=user_friendly_name_list, value=user_friendly_name_list[0])
]
outputs = [
gr.Plot(label="Classes Predicted Probabilities") # Add this line
]
# Gradio interface without launching
interface = gr.Interface(fn=predict, inputs=inputs, outputs=outputs, title=app_title,
description=app_description, examples=app_examples, article=article_string, live=False)
# Launch the interface
interface.launch()
|