Spaces:

knowhate
/

portuguese-hate-speech-detection

Sleeping

App Files Files Community

gilramos commited on May 11, 2024

Commit

8096aaf

verified ·

1 Parent(s): 79c19d4

Create app2.py

Browse files

Files changed (1) hide show

app2.py +111 -0

app2.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+app_title = "Portuguese Hate Speech Detection"
+app_description = """ click on one of the examples provided below.
+"""
+This app detects hate speech on Portuguese text using multiple models. You can either introduce your own sentences by filling in "Text" or
+app_examples = [
+    ["as pessoas tem que perceber que ser 'panasca' não é deixar de ser homem, é deixar de ser humano kkk"],
+    ["ontem encontrei-me com um amigo meu e tivemos uma conversa agradável"],
+]
+output_textbox_component_description = """
+This box will display the hate speech detection results based on the average score of multiple models.
+"""
+output_json_component_description = { "breakdown": """
+This box presents a detailed breakdown of the evaluation for each model.
+"""}
+short_score_descriptions = {
+   0: "Non Hate Speech",
+   1: "Hate Speech"
+}
+score_descriptions = {
+    0: "This text is not Hate Speech.",
+    1: "This text is Hate Speech.",
+}
+model_list = [
+    "knowhate/HateBERTimbau",
+    "knowhate/HateBERTimbau-youtube",
+    "knowhate/HateBERTimbau-twitter",
+    "knowhate/HateBERTimbau-yt-tt",
+]
+user_friendly_name = {
+    "knowhate/HateBERTimbau": "HateBERTimbau (Original)",
+    "knowhate/HateBERTimbau-youtube": "HateBERTimbau (YouTube)",
+    "knowhate/HateBERTimbau-twitter": "HateBERTimbau (Twitter)",
+    "knowhate/HateBERTimbau-yt-tt": "HateBERTimbau (YouTube + Twitter)",
+}
+reverse_user_friendly_name = { v:k for k,v in user_friendly_name.items() }
+user_friendly_name_list = list(user_friendly_name.values())
+model_array = []
+for model_name in model_list:
+    row = {}
+    row["name"] = model_name
+    row["tokenizer"] = AutoTokenizer.from_pretrained(model_name)
+    row["model"] = AutoModelForSequenceClassification.from_pretrained(model_name)
+    model_array.append(row)
+def most_frequent(array):
+    occurence_count = Counter(array)
+    return occurence_count.most_common(1)[0][0]
+def predict(s1, chosen_model):
+    if not chosen_model:
+      chosen_model = user_friendly_name_list[0]
+    scores = {}
+    full_chosen_model_name = reverse_user_friendly_name[chosen_model]
+    for row in model_array:
+        name = row["name"]
+        if name != full_chosen_model_name:
+          continue
+        else:
+          tokenizer = row["tokenizer"]
+          model = row["model"]
+          model_input = tokenizer(*([s1],), padding=True, return_tensors="pt")
+          with torch.no_grad():
+              output = model(**model_input)
+              logits = output[0][0].detach().numpy()
+              logits = softmax(logits).tolist()
+              break
+    def get_description(idx):
+      description = score_descriptions[idx]
+      description_pt = score_descriptions_pt[idx]
+      final_description = description + "\n \n" + description_pt
+      return final_description
+    max_pos = logits.index(max(logits))
+    markdown_description = get_description(max_pos)
+    scores = { short_score_descriptions[k]:v for k,v in enumerate(logits) }
+    return scores, markdown_description
+inputs = [
+    gr.Textbox(label="Text", value=app_examples[0][0]),
+    gr.Dropdown(label="Model", choices=user_friendly_name_list, value=user_friendly_name_list[0])
+]
+outputs = [
+ gr.Label(label="Result"),
+ gr.Markdown(),
+]
+gr.Interface(fn=predict, inputs=inputs, outputs=outputs, title=app_title,
+             description=app_description,
+             examples=app_examples,
+             article = article_string).launch()