Spaces:

azaninello
/

azioni_ICC

Runtime error

App Files Files Community

azaninello commited on Aug 26, 2022

Commit

c3b1412

•

1 Parent(s): 9703df0

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -16

app.py CHANGED Viewed

@@ -10,41 +10,94 @@ nltk.download('punkt')
 file = "text.txt"
-spacy_model = 'https://huggingface.co/spacy/it_core_news_sm'
 import spacy
-nlp_IT = spacy.load(spacy_model)
 def get_lists(file):
   with open(file, 'r', encoding='utf-8') as f:
     text = f.read()
-  word_tokenized_text = word_tokenize(text, language='italian')
-  word_tokenized_text_lower = [word.lower() for word in word_tokenized_text]
   sent_tokenized_text = sent_tokenize(text, language='italian')
   sent_tokenized_text_lower = [sent.lower() for sent in sent_tokenized_text]
-  return word_tokenized_text, word_tokenized_text_lower, sent_tokenized_text, sent_tokenized_text_lower
-#words, words_lower, sentences, sentences = get_lists(file)
 demo = gr.Interface(
-    sentence_builder,
     [
         gr.Textbox(),
-        gr.Radio(["park", "zoo", "road"]),
-        gr.CheckboxGroup(["ran", "swam", "ate", "slept"]),
-        gr.Checkbox(label="Is it the morning?"),
     ],
     "text",
     examples=[
-        ["cats", "park", ["ran", "swam"], True],
-        ["dog", "zoo", ["ate", "swam"], False],
-        ["bird", "road", ["ran"], False],
-        ["cat", "zoo", ["ate"], True],
     ],
 )

 file = "text.txt"
 import spacy
+nlp_IT = spacy.load("it_core_news_sm")
 def get_lists(file):
   with open(file, 'r', encoding='utf-8') as f:
     text = f.read()
   sent_tokenized_text = sent_tokenize(text, language='italian')
   sent_tokenized_text_lower = [sent.lower() for sent in sent_tokenized_text]
+  return sent_tokenized_text, sent_tokenized_text_lower
+sentences, sentences_lower = get_lists(file)
+def search_engine_collocations(target = 'scarto' , colloc = 'azioni' , nlp = nlp_it, sentences_lower = sentences_lower, sentences = sentences):
+  verbs = []
+  adjectives = []
+  nouns = []
+  result = 0
+  for i,sent in enumerate(sentences_lower):
+    if target.lower() in sent:
+      result += 1
+      doc = nlp(sent)
+      for token in doc:
+        if 'VERB' in token.pos_:
+          verbs.append(token.lemma_)
+        elif 'ADJ' in token.pos_:
+          adjectives.append(token.lemma_)
+        elif 'NOUN' in token.pos_:
+          nouns.append(token.lemma_)
+  if result == 0:
+    return "Non ho trovato la parola '{target}'.\n"
+  else:
+    if colloc = 'azioni' and verbs != []:
+        verbs_fdist = FreqDist(verbs)
+        stringed_results = ''
+        for n,r in enumerate(verbs_fdist.most_common()):
+            stringed_results += str(n+1) + ': ' + str(r) + '\n\n'
+        return f"Ho trovato {len(verbs)} azioni legate a '{target}'\n{stringed_results}"
+     elif verbs = []:
+         return f"Non ho trovato azioni legate a '{target}'"
+     if colloc = 'caratteristiche' and adjectives != []:
+        adj_fdist = FreqDist(adjectives)
+        stringed_results = ''
+        for n,r in enumerate(adj_fdist.most_common()):
+            stringed_results += str(n+1) + ': ' + str(r) + '\n\n'
+        return f"Ho trovato {len(adjectives)} caratteristiche legate a '{target}'\n{stringed_results}"
+     elif adjectives = []:
+         return f"Non ho trovato caratteristiche legate a '{target}'"
+      if colloc = 'concetti' and nouns != []:
+        nouns_fdist = FreqDist(verbs)
+        stringed_results = ''
+        for n,r in enumerate(nouns_fdist.most_common()):
+            stringed_results += str(n+1) + ': ' + str(r) + '\n\n'
+        return f"Ho trovato {len(nouns)} concetti legati a '{target}'\n{stringed_results}"
+     elif nouns = []:
+         return f"Non ho trovato concetti legate a '{target}'"
 demo = gr.Interface(
+    search_engine_collocations,
     [
         gr.Textbox(),
+        gr.Radio(["azioni", "caratteristiche", "concetti"]),
     ],
     "text",
     examples=[
+        ["scarto", "azioni"],
+        ["rifiuto", "caratteristiche"],
+        ["sostenibilità", "concetti"],
     ],
 )