Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
import torch | |
app_title = "Portuguese Hate Speech Detection" | |
app_description = """ click on one of the examples provided below. | |
""" | |
This app detects hate speech on Portuguese text using multiple models. You can either introduce your own sentences by filling in "Text" or | |
app_examples = [ | |
["as pessoas tem que perceber que ser 'panasca' não é deixar de ser homem, é deixar de ser humano kkk"], | |
["ontem encontrei-me com um amigo meu e tivemos uma conversa agradável"], | |
] | |
output_textbox_component_description = """ | |
This box will display the hate speech detection results based on the average score of multiple models. | |
""" | |
output_json_component_description = { "breakdown": """ | |
This box presents a detailed breakdown of the evaluation for each model. | |
"""} | |
short_score_descriptions = { | |
0: "Non Hate Speech", | |
1: "Hate Speech" | |
} | |
score_descriptions = { | |
0: "This text is not Hate Speech.", | |
1: "This text is Hate Speech.", | |
} | |
model_list = [ | |
"knowhate/HateBERTimbau", | |
"knowhate/HateBERTimbau-youtube", | |
"knowhate/HateBERTimbau-twitter", | |
"knowhate/HateBERTimbau-yt-tt", | |
] | |
user_friendly_name = { | |
"knowhate/HateBERTimbau": "HateBERTimbau (Original)", | |
"knowhate/HateBERTimbau-youtube": "HateBERTimbau (YouTube)", | |
"knowhate/HateBERTimbau-twitter": "HateBERTimbau (Twitter)", | |
"knowhate/HateBERTimbau-yt-tt": "HateBERTimbau (YouTube + Twitter)", | |
} | |
reverse_user_friendly_name = { v:k for k,v in user_friendly_name.items() } | |
user_friendly_name_list = list(user_friendly_name.values()) | |
model_array = [] | |
for model_name in model_list: | |
row = {} | |
row["name"] = model_name | |
row["tokenizer"] = AutoTokenizer.from_pretrained(model_name) | |
row["model"] = AutoModelForSequenceClassification.from_pretrained(model_name) | |
model_array.append(row) | |
def most_frequent(array): | |
occurence_count = Counter(array) | |
return occurence_count.most_common(1)[0][0] | |
def predict(s1, chosen_model): | |
if not chosen_model: | |
chosen_model = user_friendly_name_list[0] | |
scores = {} | |
full_chosen_model_name = reverse_user_friendly_name[chosen_model] | |
for row in model_array: | |
name = row["name"] | |
if name != full_chosen_model_name: | |
continue | |
else: | |
tokenizer = row["tokenizer"] | |
model = row["model"] | |
model_input = tokenizer(*([s1],), padding=True, return_tensors="pt") | |
with torch.no_grad(): | |
output = model(**model_input) | |
logits = output[0][0].detach().numpy() | |
logits = softmax(logits).tolist() | |
break | |
def get_description(idx): | |
description = score_descriptions[idx] | |
description_pt = score_descriptions_pt[idx] | |
final_description = description + "\n \n" + description_pt | |
return final_description | |
max_pos = logits.index(max(logits)) | |
markdown_description = get_description(max_pos) | |
scores = { short_score_descriptions[k]:v for k,v in enumerate(logits) } | |
return scores, markdown_description | |
inputs = [ | |
gr.Textbox(label="Text", value=app_examples[0][0]), | |
gr.Dropdown(label="Model", choices=user_friendly_name_list, value=user_friendly_name_list[0]) | |
] | |
outputs = [ | |
gr.Label(label="Result"), | |
gr.Markdown(), | |
] | |
gr.Interface(fn=predict, inputs=inputs, outputs=outputs, title=app_title, | |
description=app_description, | |
examples=app_examples, | |
article = article_string).launch() |