rmelk commited on
Commit
c50ba05
1 Parent(s): 4cc566b

first commit

Browse files
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
2
+ import torch
3
+
4
+
5
+ device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
6
+ device
7
+
8
+
9
+
10
+ download = False
11
+ save_model_locally= False
12
+ if download:
13
+ tokenizer = AutoTokenizer.from_pretrained("MilaNLProc/feel-it-italian-sentiment", cache_dir="data/")
14
+ model = AutoModelForSequenceClassification.from_pretrained("MilaNLProc/feel-it-italian-sentiment", cache_dir="data/")
15
+ model.eval()
16
+ tokenizer_emo = AutoTokenizer.from_pretrained("MilaNLProc/feel-it-italian-emotion", cache_dir="data/")
17
+ model_emo = AutoModelForSequenceClassification.from_pretrained("MilaNLProc/feel-it-italian-emotion", cache_dir="data/")
18
+ model_emo.eval()
19
+ if save_model_locally:
20
+ model.save_pretrained('./local_models/sentiment_ITA')
21
+ tokenizer.save_pretrained('./local_models/sentiment_ITA')
22
+ model_emo.save_pretrained('./local_models/emotion_ITA')
23
+ tokenizer_emo.save_pretrained('./local_models/emotion_ITA')
24
+ else:
25
+ tokenizer = AutoTokenizer.from_pretrained("./local_models/sentiment_ITA/")
26
+ model = AutoModelForSequenceClassification.from_pretrained("./local_models/sentiment_ITA/", num_labels=2)
27
+ model.eval()
28
+ tokenizer_emo = AutoTokenizer.from_pretrained("./local_models/emotion_ITA/")
29
+ model_emo = AutoModelForSequenceClassification.from_pretrained("./local_models/emotion_ITA/", num_labels=4)
30
+ model_emo.eval()
31
+
32
+
33
+ #%%
34
+
35
+ from transformers import pipeline
36
+ import re
37
+
38
+ generator = pipeline(task="text-classification", model=model, tokenizer=tokenizer, return_all_scores =True)
39
+ generator_emo = pipeline(task="text-classification", model=model_emo, tokenizer=tokenizer_emo, return_all_scores =True)
40
+
41
+ def sentiment_emoji(input_abs):
42
+
43
+ if(input_abs ==""):
44
+ return "🤷‍♂️"
45
+
46
+ res = generator(input_abs)[0]
47
+ res = {res[x]["label"]: res[x]["score"] for x in range(len(res))}
48
+ res["🙂"] = res.pop("positive")
49
+ res["🙁"] = res.pop("negative")
50
+ return res
51
+
52
+
53
+ def emotion_emoji(input_abs):
54
+ if(input_abs ==""):
55
+ return "🤷‍♂️"
56
+
57
+ res = generator_emo(input_abs)[0]
58
+ res = {res[x]["label"]: res[x]["score"] for x in range(len(res))}
59
+ res["😃"] = res.pop("joy")
60
+ res["😡"] = res.pop("anger")
61
+ res["😨"] = res.pop("fear")
62
+ res["😟"] = res.pop("sadness")
63
+
64
+ return res
65
+ #%%
66
+
67
+ import gradio as gr
68
+ demo = gr.Blocks()
69
+ with demo:
70
+ gr.Markdown("# Analisi sentimento/emozioni del testo italiano")
71
+ with gr.Row():
72
+ with gr.Column():
73
+ text_input = gr.Textbox(placeholder="Scrivi qui")
74
+ button_1 = gr.Button("Invia")
75
+ with gr.Column():
76
+ label_sem = gr.Label()
77
+ label_emo = gr.Label()
78
+ # gr.Interface(fn=emotion_emoji, inputs=text_input, outputs="label")
79
+ button_1.click(sentiment_emoji, inputs=text_input, outputs=label_sem)
80
+ button_1.click(emotion_emoji, inputs=text_input, outputs=label_emo)
81
+
82
+
83
+ demo.launch()
local_models/emotion_ITA/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "MilaNLProc/feel-it-italian-emotion",
3
+ "architectures": [
4
+ "CamembertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 5,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 6,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "anger",
16
+ "1": "fear",
17
+ "2": "joy",
18
+ "3": "sadness"
19
+ },
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 3072,
22
+ "label2id": {
23
+ "anger": 0,
24
+ "fear": 1,
25
+ "joy": 2,
26
+ "sadness": 3
27
+ },
28
+ "layer_norm_eps": 1e-05,
29
+ "max_position_embeddings": 514,
30
+ "model_type": "camembert",
31
+ "num_attention_heads": 12,
32
+ "num_hidden_layers": 12,
33
+ "output_past": true,
34
+ "pad_token_id": 1,
35
+ "position_embedding_type": "absolute",
36
+ "torch_dtype": "float32",
37
+ "transformers_version": "4.11.3",
38
+ "type_vocab_size": 1,
39
+ "use_cache": true,
40
+ "vocab_size": 32005
41
+ }
local_models/emotion_ITA/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aae2a4c143248032471a56a42cf9e2afa9930c08535e02df8cbece685fcf5f9c
3
+ size 442585813
local_models/emotion_ITA/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}, "additional_special_tokens": ["<s>NOTUSED", "</s>NOTUSED"]}
local_models/emotion_ITA/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
local_models/emotion_ITA/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "sep_token": "</s>", "cls_token": "<s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "additional_special_tokens": ["<s>NOTUSED", "</s>NOTUSED"], "special_tokens_map_file": null, "name_or_path": "MilaNLProc/feel-it-italian-emotion", "tokenizer_class": "CamembertTokenizer"}
local_models/sentiment_ITA/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "MilaNLProc/feel-it-italian-sentiment",
3
+ "architectures": [
4
+ "CamembertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 5,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 6,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "negative",
16
+ "1": "positive"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "label2id": {
21
+ "negative": 0,
22
+ "positive": 1
23
+ },
24
+ "layer_norm_eps": 1e-05,
25
+ "max_position_embeddings": 514,
26
+ "model_type": "camembert",
27
+ "num_attention_heads": 12,
28
+ "num_hidden_layers": 12,
29
+ "output_past": true,
30
+ "pad_token_id": 1,
31
+ "position_embedding_type": "absolute",
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.11.3",
34
+ "type_vocab_size": 1,
35
+ "use_cache": true,
36
+ "vocab_size": 32005
37
+ }
local_models/sentiment_ITA/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f779a009987fcd3392dedad71923c8c4b8c88ff74898eeae4b30d5c94601844
3
+ size 442579735
local_models/sentiment_ITA/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}, "additional_special_tokens": ["<s>NOTUSED", "</s>NOTUSED"]}
local_models/sentiment_ITA/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
local_models/sentiment_ITA/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "sep_token": "</s>", "cls_token": "<s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "additional_special_tokens": ["<s>NOTUSED", "</s>NOTUSED"], "special_tokens_map_file": null, "name_or_path": "MilaNLProc/feel-it-italian-sentiment", "tokenizer_class": "CamembertTokenizer"}