Spaces:
Runtime error
Runtime error
azaninello
commited on
Commit
•
c3b1412
1
Parent(s):
9703df0
Update app.py
Browse files
app.py
CHANGED
@@ -10,41 +10,94 @@ nltk.download('punkt')
|
|
10 |
|
11 |
file = "text.txt"
|
12 |
|
13 |
-
spacy_model = 'https://huggingface.co/spacy/it_core_news_sm'
|
14 |
-
|
15 |
import spacy
|
16 |
-
nlp_IT = spacy.load(
|
17 |
|
18 |
def get_lists(file):
|
19 |
with open(file, 'r', encoding='utf-8') as f:
|
20 |
text = f.read()
|
21 |
|
22 |
-
word_tokenized_text = word_tokenize(text, language='italian')
|
23 |
-
word_tokenized_text_lower = [word.lower() for word in word_tokenized_text]
|
24 |
-
|
25 |
sent_tokenized_text = sent_tokenize(text, language='italian')
|
26 |
sent_tokenized_text_lower = [sent.lower() for sent in sent_tokenized_text]
|
27 |
|
28 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
demo = gr.Interface(
|
35 |
-
|
36 |
[
|
37 |
gr.Textbox(),
|
38 |
-
gr.Radio(["
|
39 |
-
gr.CheckboxGroup(["ran", "swam", "ate", "slept"]),
|
40 |
-
gr.Checkbox(label="Is it the morning?"),
|
41 |
],
|
42 |
"text",
|
43 |
examples=[
|
44 |
-
["
|
45 |
-
["
|
46 |
-
["
|
47 |
-
["cat", "zoo", ["ate"], True],
|
48 |
],
|
49 |
)
|
50 |
|
|
|
10 |
|
11 |
file = "text.txt"
|
12 |
|
|
|
|
|
13 |
import spacy
|
14 |
+
nlp_IT = spacy.load("it_core_news_sm")
|
15 |
|
16 |
def get_lists(file):
|
17 |
with open(file, 'r', encoding='utf-8') as f:
|
18 |
text = f.read()
|
19 |
|
|
|
|
|
|
|
20 |
sent_tokenized_text = sent_tokenize(text, language='italian')
|
21 |
sent_tokenized_text_lower = [sent.lower() for sent in sent_tokenized_text]
|
22 |
|
23 |
+
return sent_tokenized_text, sent_tokenized_text_lower
|
24 |
+
|
25 |
+
sentences, sentences_lower = get_lists(file)
|
26 |
+
|
27 |
+
def search_engine_collocations(target = 'scarto' , colloc = 'azioni' , nlp = nlp_it, sentences_lower = sentences_lower, sentences = sentences):
|
28 |
+
|
29 |
+
verbs = []
|
30 |
+
adjectives = []
|
31 |
+
nouns = []
|
32 |
+
result = 0
|
33 |
+
|
34 |
+
for i,sent in enumerate(sentences_lower):
|
35 |
+
if target.lower() in sent:
|
36 |
+
result += 1
|
37 |
+
doc = nlp(sent)
|
38 |
+
for token in doc:
|
39 |
+
if 'VERB' in token.pos_:
|
40 |
+
verbs.append(token.lemma_)
|
41 |
+
elif 'ADJ' in token.pos_:
|
42 |
+
adjectives.append(token.lemma_)
|
43 |
+
elif 'NOUN' in token.pos_:
|
44 |
+
nouns.append(token.lemma_)
|
45 |
+
|
46 |
+
|
47 |
+
if result == 0:
|
48 |
+
return "Non ho trovato la parola '{target}'.\n"
|
49 |
+
|
50 |
+
else:
|
51 |
+
if colloc = 'azioni' and verbs != []:
|
52 |
+
verbs_fdist = FreqDist(verbs)
|
53 |
+
|
54 |
+
stringed_results = ''
|
55 |
+
for n,r in enumerate(verbs_fdist.most_common()):
|
56 |
+
stringed_results += str(n+1) + ': ' + str(r) + '\n\n'
|
57 |
|
58 |
+
return f"Ho trovato {len(verbs)} azioni legate a '{target}'\n{stringed_results}"
|
59 |
+
|
60 |
+
elif verbs = []:
|
61 |
+
return f"Non ho trovato azioni legate a '{target}'"
|
62 |
+
|
63 |
+
|
64 |
+
if colloc = 'caratteristiche' and adjectives != []:
|
65 |
+
adj_fdist = FreqDist(adjectives)
|
66 |
+
|
67 |
+
stringed_results = ''
|
68 |
+
for n,r in enumerate(adj_fdist.most_common()):
|
69 |
+
stringed_results += str(n+1) + ': ' + str(r) + '\n\n'
|
70 |
|
71 |
+
return f"Ho trovato {len(adjectives)} caratteristiche legate a '{target}'\n{stringed_results}"
|
72 |
+
|
73 |
+
elif adjectives = []:
|
74 |
+
return f"Non ho trovato caratteristiche legate a '{target}'"
|
75 |
+
|
76 |
+
|
77 |
+
if colloc = 'concetti' and nouns != []:
|
78 |
+
nouns_fdist = FreqDist(verbs)
|
79 |
+
|
80 |
+
stringed_results = ''
|
81 |
+
for n,r in enumerate(nouns_fdist.most_common()):
|
82 |
+
stringed_results += str(n+1) + ': ' + str(r) + '\n\n'
|
83 |
|
84 |
+
return f"Ho trovato {len(nouns)} concetti legati a '{target}'\n{stringed_results}"
|
85 |
+
|
86 |
+
elif nouns = []:
|
87 |
+
return f"Non ho trovato concetti legate a '{target}'"
|
88 |
+
|
89 |
|
90 |
demo = gr.Interface(
|
91 |
+
search_engine_collocations,
|
92 |
[
|
93 |
gr.Textbox(),
|
94 |
+
gr.Radio(["azioni", "caratteristiche", "concetti"]),
|
|
|
|
|
95 |
],
|
96 |
"text",
|
97 |
examples=[
|
98 |
+
["scarto", "azioni"],
|
99 |
+
["rifiuto", "caratteristiche"],
|
100 |
+
["sostenibilità", "concetti"],
|
|
|
101 |
],
|
102 |
)
|
103 |
|