Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
import torch | |
app_title = "Portuguese Hate Speech Detection" | |
app_description = """ | |
This app detects hate speech on Portuguese text using multiple models. You can either introduce your own sentences by filling in "Text" or click on one of the examples provided below. | |
""" | |
app_examples = [ | |
["as pessoas tem que perceber que ser 'panasca' não é deixar de ser homem, é deixar de ser humano kkk"], | |
["ontem encontrei-me com um amigo meu e tivemos uma conversa agradável"], | |
] | |
output_textbox_component_description = """ | |
This box will display the hate speech detection results based on the average score of multiple models. | |
""" | |
output_json_component_description = { "breakdown": """ | |
This box presents a detailed breakdown of the evaluation for each model. | |
"""} | |
short_score_descriptions = { | |
0: "Non Hate Speech", | |
1: "Hate Speech" | |
} | |
score_descriptions = { | |
0: "This text is not Hate Speech.", | |
1: "This text is Hate Speech.", | |
} | |
model_list = [ | |
"knowhate/HateBERTimbau", | |
"knowhate/HateBERTimbau-youtube", | |
"knowhate/HateBERTimbau-twitter", | |
"knowhate/HateBERTimbau-yt-tt", | |
] | |
user_friendly_name = { | |
"knowhate/HateBERTimbau": "HateBERTimbau (Original)", | |
"knowhate/HateBERTimbau-youtube": "HateBERTimbau (YouTube)", | |
"knowhate/HateBERTimbau-twitter": "HateBERTimbau (Twitter)", | |
"knowhate/HateBERTimbau-yt-tt": "HateBERTimbau (YouTube + Twitter)", | |
} | |
reverse_user_friendly_name = { v:k for k,v in user_friendly_name.items() } | |
user_friendly_name_list = list(user_friendly_name.values()) | |
model_array = [] | |
for model_name in model_list: | |
row = {} | |
row["name"] = model_name | |
row["tokenizer"] = AutoTokenizer.from_pretrained(model_name) | |
row["model"] = AutoModelForSequenceClassification.from_pretrained(model_name) | |
model_array.append(row) | |
def most_frequent(array): | |
occurence_count = Counter(array) | |
return occurence_count.most_common(1)[0][0] | |
def predict(s1, chosen_model): | |
if not chosen_model: | |
chosen_model = user_friendly_name_list[0] | |
scores = {} | |
full_chosen_model_name = reverse_user_friendly_name[chosen_model] | |
for row in model_array: | |
name = row["name"] | |
if name != full_chosen_model_name: | |
continue | |
else: | |
tokenizer = row["tokenizer"] | |
model = row["model"] | |
model_input = tokenizer(*([s1],), padding=True, return_tensors="pt") | |
with torch.no_grad(): | |
output = model(**model_input) | |
logits = output[0][0].detach().numpy() | |
logits = softmax(logits).tolist() | |
break | |
def get_description(idx): | |
description = score_descriptions[idx] | |
description_pt = score_descriptions_pt[idx] | |
final_description = description + "\n \n" + description_pt | |
return final_description | |
max_pos = logits.index(max(logits)) | |
markdown_description = get_description(max_pos) | |
scores = { short_score_descriptions[k]:v for k,v in enumerate(logits) } | |
return scores, markdown_description | |
inputs = [ | |
gr.Textbox(label="Text", value=app_examples[0][0]), | |
gr.Dropdown(label="Model", choices=user_friendly_name_list, value=user_friendly_name_list[0]) | |
] | |
outputs = [ | |
gr.Label(label="Result"), | |
gr.Markdown(), | |
] | |
gr.Interface(fn=predict, inputs=inputs, outputs=outputs, title=app_title, | |
description=app_description, | |
examples=app_examples, | |
article = article_string).launch() |