gilramos commited on
Commit
8096aaf
1 Parent(s): 79c19d4

Create app2.py

Browse files
Files changed (1) hide show
  1. app2.py +111 -0
app2.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
+ import torch
4
+
5
+ app_title = "Portuguese Hate Speech Detection"
6
+
7
+ app_description = """ click on one of the examples provided below.
8
+ """
9
+ This app detects hate speech on Portuguese text using multiple models. You can either introduce your own sentences by filling in "Text" or
10
+
11
+ app_examples = [
12
+ ["as pessoas tem que perceber que ser 'panasca' não é deixar de ser homem, é deixar de ser humano kkk"],
13
+ ["ontem encontrei-me com um amigo meu e tivemos uma conversa agradável"],
14
+ ]
15
+
16
+ output_textbox_component_description = """
17
+ This box will display the hate speech detection results based on the average score of multiple models.
18
+ """
19
+
20
+ output_json_component_description = { "breakdown": """
21
+ This box presents a detailed breakdown of the evaluation for each model.
22
+ """}
23
+
24
+ short_score_descriptions = {
25
+ 0: "Non Hate Speech",
26
+ 1: "Hate Speech"
27
+ }
28
+
29
+ score_descriptions = {
30
+ 0: "This text is not Hate Speech.",
31
+ 1: "This text is Hate Speech.",
32
+ }
33
+
34
+ model_list = [
35
+ "knowhate/HateBERTimbau",
36
+ "knowhate/HateBERTimbau-youtube",
37
+ "knowhate/HateBERTimbau-twitter",
38
+ "knowhate/HateBERTimbau-yt-tt",
39
+ ]
40
+
41
+ user_friendly_name = {
42
+ "knowhate/HateBERTimbau": "HateBERTimbau (Original)",
43
+ "knowhate/HateBERTimbau-youtube": "HateBERTimbau (YouTube)",
44
+ "knowhate/HateBERTimbau-twitter": "HateBERTimbau (Twitter)",
45
+ "knowhate/HateBERTimbau-yt-tt": "HateBERTimbau (YouTube + Twitter)",
46
+ }
47
+
48
+ reverse_user_friendly_name = { v:k for k,v in user_friendly_name.items() }
49
+
50
+ user_friendly_name_list = list(user_friendly_name.values())
51
+
52
+ model_array = []
53
+
54
+ for model_name in model_list:
55
+ row = {}
56
+ row["name"] = model_name
57
+ row["tokenizer"] = AutoTokenizer.from_pretrained(model_name)
58
+ row["model"] = AutoModelForSequenceClassification.from_pretrained(model_name)
59
+ model_array.append(row)
60
+
61
+ def most_frequent(array):
62
+ occurence_count = Counter(array)
63
+ return occurence_count.most_common(1)[0][0]
64
+
65
+
66
+ def predict(s1, chosen_model):
67
+ if not chosen_model:
68
+ chosen_model = user_friendly_name_list[0]
69
+ scores = {}
70
+ full_chosen_model_name = reverse_user_friendly_name[chosen_model]
71
+ for row in model_array:
72
+ name = row["name"]
73
+ if name != full_chosen_model_name:
74
+ continue
75
+ else:
76
+ tokenizer = row["tokenizer"]
77
+ model = row["model"]
78
+ model_input = tokenizer(*([s1],), padding=True, return_tensors="pt")
79
+ with torch.no_grad():
80
+ output = model(**model_input)
81
+ logits = output[0][0].detach().numpy()
82
+ logits = softmax(logits).tolist()
83
+ break
84
+ def get_description(idx):
85
+ description = score_descriptions[idx]
86
+ description_pt = score_descriptions_pt[idx]
87
+ final_description = description + "\n \n" + description_pt
88
+ return final_description
89
+
90
+ max_pos = logits.index(max(logits))
91
+ markdown_description = get_description(max_pos)
92
+ scores = { short_score_descriptions[k]:v for k,v in enumerate(logits) }
93
+
94
+ return scores, markdown_description
95
+
96
+
97
+ inputs = [
98
+ gr.Textbox(label="Text", value=app_examples[0][0]),
99
+ gr.Dropdown(label="Model", choices=user_friendly_name_list, value=user_friendly_name_list[0])
100
+ ]
101
+
102
+ outputs = [
103
+ gr.Label(label="Result"),
104
+ gr.Markdown(),
105
+ ]
106
+
107
+
108
+ gr.Interface(fn=predict, inputs=inputs, outputs=outputs, title=app_title,
109
+ description=app_description,
110
+ examples=app_examples,
111
+ article = article_string).launch()