Spaces:
Runtime error
Runtime error
import gradio as gr | |
import nltk | |
import simplemma | |
from nltk.tokenize import word_tokenize | |
from nltk.tokenize import sent_tokenize | |
from nltk.probability import FreqDist | |
from simplemma import text_lemmatizer | |
nltk.download('punkt') | |
file = "text.txt" | |
import spacy | |
nlp_IT = spacy.load("it_core_news_sm") | |
def get_lists(file): | |
with open(file, 'r', encoding='utf-8') as f: | |
text = f.read() | |
sent_tokenized_text = sent_tokenize(text, language='italian') | |
sent_tokenized_text_lower = [sent.lower() for sent in sent_tokenized_text] | |
return sent_tokenized_text, sent_tokenized_text_lower | |
sentences, sentences_lower = get_lists(file) | |
def search_engine_collocations(target = 'scarto' , colloc = 'azioni' , nlp = nlp_IT, sentences_lower = sentences_lower, sentences = sentences): | |
verbs = [] | |
adjectives = [] | |
nouns = [] | |
result = 0 | |
for i,sent in enumerate(sentences_lower): | |
if target.lower() in sent: | |
result += 1 | |
doc = nlp(sent) | |
for token in doc: | |
if 'VERB' in token.pos_: | |
verbs.append(token.lemma_) | |
elif 'ADJ' in token.pos_: | |
adjectives.append(token.lemma_) | |
elif 'NOUN' in token.pos_: | |
nouns.append(token.lemma_) | |
if result == 0: | |
return "Non ho trovato la parola '{target}'.\n\n" | |
else: | |
if colloc == 'azioni' and verbs != []: | |
verbs_fdist = FreqDist(verbs) | |
stringed_results = '' | |
for n,r in enumerate(verbs_fdist.most_common()): | |
stringed_results += str(n+1) + ': ' + str(r) + '\n\n' | |
return f"Ho trovato {len(verbs)} azioni legate a '{target}'\n{stringed_results}\n\n" | |
elif verbs == []: | |
return f"Non ho trovato azioni legate a '{target}'" | |
if colloc == 'caratteristiche' and adjectives != []: | |
adj_fdist = FreqDist(adjectives) | |
stringed_results = '' | |
for n,r in enumerate(adj_fdist.most_common()): | |
stringed_results += str(n+1) + ': ' + str(r) + '\n\n' | |
return f"Ho trovato {len(adjectives)} caratteristiche legate a '{target}'\n{stringed_results}\n\n" | |
elif adjectives == []: | |
return f"Non ho trovato caratteristiche legate a '{target}'" | |
if colloc == 'concetti' and nouns != []: | |
nouns_fdist = FreqDist(nouns) | |
stringed_results = '' | |
for n,r in enumerate(nouns_fdist.most_common()): | |
stringed_results += str(n+1) + ': ' + str(r) + '\n\n' | |
return f"Ho trovato {len(nouns)} concetti legati a '{target}'\n{stringed_results}\n\n" | |
elif nouns == []: | |
return f"Non ho trovato concetti legati a '{target}'" | |
demo = gr.Interface( | |
search_engine_collocations, | |
[ | |
gr.Textbox(), | |
gr.Radio(["azioni", "caratteristiche", "concetti"]), | |
], | |
"text", | |
examples=[ | |
["scarto", "azioni"], | |
["rifiuto", "caratteristiche"], | |
["sostenibilità", "concetti"], | |
], | |
) | |
demo.launch() |