Matteo Mendula commited on
Commit
389f817
1 Parent(s): 5780c85

Add application file

Browse files
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
2
+
3
+ # import torch
4
+ # device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
5
+ # device
6
+
7
+
8
+
9
+ download = False
10
+ save_model_locally= False
11
+ if download:
12
+ tokenizer = AutoTokenizer.from_pretrained("MilaNLProc/feel-it-italian-sentiment", cache_dir="data/")
13
+ model = AutoModelForSequenceClassification.from_pretrained("MilaNLProc/feel-it-italian-sentiment", cache_dir="data/")
14
+ model.eval()
15
+ tokenizer_emo = AutoTokenizer.from_pretrained("MilaNLProc/feel-it-italian-emotion", cache_dir="data/")
16
+ model_emo = AutoModelForSequenceClassification.from_pretrained("MilaNLProc/feel-it-italian-emotion", cache_dir="data/")
17
+ model_emo.eval()
18
+ if save_model_locally:
19
+ model.save_pretrained('./local_models/sentiment_ITA')
20
+ tokenizer.save_pretrained('./local_models/sentiment_ITA')
21
+ model_emo.save_pretrained('./local_models/emotion_ITA')
22
+ tokenizer_emo.save_pretrained('./local_models/emotion_ITA')
23
+ else:
24
+ tokenizer = AutoTokenizer.from_pretrained("./local_models/sentiment_ITA/")
25
+ model = AutoModelForSequenceClassification.from_pretrained("./local_models/sentiment_ITA/", num_labels=2)
26
+ model.eval()
27
+
28
+ tokenizer_emo = AutoTokenizer.from_pretrained("./local_models/emotion_ITA/")
29
+ model_emo = AutoModelForSequenceClassification.from_pretrained("./local_models/emotion_ITA/", num_labels=4)
30
+ model_emo.eval()
31
+
32
+
33
+ #%%
34
+
35
+ from transformers import pipeline
36
+ import re
37
+
38
+ generator = pipeline(task="text-classification", model=model, tokenizer=tokenizer, return_all_scores =True)
39
+ generator_emo = pipeline(task="text-classification", model=model_emo, tokenizer=tokenizer_emo, return_all_scores =True)
40
+
41
+ def sentiment_emoji(input_abs):
42
+
43
+ if(input_abs ==""):
44
+ return "🤷‍♂️"
45
+
46
+ res = generator(input_abs)[0]
47
+ res = {res[x]["label"]: res[x]["score"] for x in range(len(res))}
48
+ res["🙂 positive"] = res.pop("positive")
49
+ res["🙁 negative"] = res.pop("negative")
50
+ return res
51
+
52
+
53
+ def emotion_emoji(input_abs):
54
+ if(input_abs ==""):
55
+ return "🤷‍♂️"
56
+
57
+ res = generator_emo(input_abs)[0]
58
+ res = {res[x]["label"]: res[x]["score"] for x in range(len(res))}
59
+ res["😃 joy"] = res.pop("joy")
60
+ res["😡 anger"] = res.pop("anger")
61
+ res["😨 fear"] = res.pop("fear")
62
+ res["😟 sadness"] = res.pop("sadness")
63
+
64
+ return res
65
+ #%%
66
+
67
+ import gradio as gr
68
+ demo = gr.Blocks()
69
+ with demo:
70
+ gr.Markdown("# Analisi sentimento/emozioni del testo italiano")
71
+ with gr.Row():
72
+ with gr.Column():
73
+ text_input = gr.Textbox(placeholder="Scrivi qui")
74
+ button_1 = gr.Button("Invia")
75
+ with gr.Column():
76
+ label_sem = gr.Label()
77
+ label_emo = gr.Label()
78
+ # gr.Interface(fn=emotion_emoji, inputs=text_input, outputs="label")
79
+ button_1.click(sentiment_emoji, inputs=text_input, outputs=label_sem, api_name="sentiment")
80
+ button_1.click(emotion_emoji, inputs=text_input, outputs=label_emo, api_name="emotion")
81
+
82
+
83
+ demo.launch(share=True)
84
+ print("Running is terminated")
local_models/emotion_ITA/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "MilaNLProc/feel-it-italian-emotion",
3
+ "architectures": [
4
+ "CamembertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 5,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 6,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "anger",
16
+ "1": "fear",
17
+ "2": "joy",
18
+ "3": "sadness"
19
+ },
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 3072,
22
+ "label2id": {
23
+ "anger": 0,
24
+ "fear": 1,
25
+ "joy": 2,
26
+ "sadness": 3
27
+ },
28
+ "layer_norm_eps": 1e-05,
29
+ "max_position_embeddings": 514,
30
+ "model_type": "camembert",
31
+ "num_attention_heads": 12,
32
+ "num_hidden_layers": 12,
33
+ "output_past": true,
34
+ "pad_token_id": 1,
35
+ "position_embedding_type": "absolute",
36
+ "torch_dtype": "float32",
37
+ "transformers_version": "4.27.1",
38
+ "type_vocab_size": 1,
39
+ "use_cache": true,
40
+ "vocab_size": 32005
41
+ }
local_models/emotion_ITA/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b685808afcf75d3a459b98c0a19da2d468633a0960a171f4f6ba5652f097ce8
3
+ size 442570677
local_models/emotion_ITA/special_tokens_map.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<s>NOTUSED",
4
+ "</s>NOTUSED"
5
+ ],
6
+ "bos_token": "<s>",
7
+ "cls_token": "<s>",
8
+ "eos_token": "</s>",
9
+ "mask_token": {
10
+ "content": "<mask>",
11
+ "lstrip": true,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<pad>",
17
+ "sep_token": "</s>",
18
+ "unk_token": "<unk>"
19
+ }
local_models/emotion_ITA/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
local_models/emotion_ITA/tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<s>NOTUSED",
4
+ "</s>NOTUSED"
5
+ ],
6
+ "bos_token": "<s>",
7
+ "cls_token": "<s>",
8
+ "eos_token": "</s>",
9
+ "mask_token": {
10
+ "__type": "AddedToken",
11
+ "content": "<mask>",
12
+ "lstrip": true,
13
+ "normalized": true,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ "model_max_length": 1000000000000000019884624838656,
18
+ "pad_token": "<pad>",
19
+ "sep_token": "</s>",
20
+ "special_tokens_map_file": null,
21
+ "tokenizer_class": "CamembertTokenizer",
22
+ "unk_token": "<unk>"
23
+ }
local_models/sentiment_ITA/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "MilaNLProc/feel-it-italian-sentiment",
3
+ "architectures": [
4
+ "CamembertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 5,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 6,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "negative",
16
+ "1": "positive"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "label2id": {
21
+ "negative": 0,
22
+ "positive": 1
23
+ },
24
+ "layer_norm_eps": 1e-05,
25
+ "max_position_embeddings": 514,
26
+ "model_type": "camembert",
27
+ "num_attention_heads": 12,
28
+ "num_hidden_layers": 12,
29
+ "output_past": true,
30
+ "pad_token_id": 1,
31
+ "position_embedding_type": "absolute",
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.27.1",
34
+ "type_vocab_size": 1,
35
+ "use_cache": true,
36
+ "vocab_size": 32005
37
+ }
local_models/sentiment_ITA/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c0f2cec262c7b31b727b242e786d4743bc09fb26652324e0b7f84e5f520bfc2
3
+ size 442564533
local_models/sentiment_ITA/special_tokens_map.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<s>NOTUSED",
4
+ "</s>NOTUSED"
5
+ ],
6
+ "bos_token": "<s>",
7
+ "cls_token": "<s>",
8
+ "eos_token": "</s>",
9
+ "mask_token": {
10
+ "content": "<mask>",
11
+ "lstrip": true,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<pad>",
17
+ "sep_token": "</s>",
18
+ "unk_token": "<unk>"
19
+ }
local_models/sentiment_ITA/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
local_models/sentiment_ITA/tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<s>NOTUSED",
4
+ "</s>NOTUSED"
5
+ ],
6
+ "bos_token": "<s>",
7
+ "cls_token": "<s>",
8
+ "eos_token": "</s>",
9
+ "mask_token": {
10
+ "__type": "AddedToken",
11
+ "content": "<mask>",
12
+ "lstrip": true,
13
+ "normalized": true,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ "model_max_length": 1000000000000000019884624838656,
18
+ "pad_token": "<pad>",
19
+ "sep_token": "</s>",
20
+ "special_tokens_map_file": null,
21
+ "tokenizer_class": "CamembertTokenizer",
22
+ "unk_token": "<unk>"
23
+ }