Spaces:
Runtime error
Runtime error
File size: 2,948 Bytes
9703df0 c3b1412 9703df0 c3b1412 9703df0 c3b1412 9703df0 c3b1412 9703df0 c3b1412 9703df0 c3b1412 9703df0 c3b1412 9703df0 c3b1412 9703df0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import gradio as gr
import nltk
import simplemma
from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize
from nltk.probability import FreqDist
from simplemma import text_lemmatizer
nltk.download('punkt')
file = "text.txt"
import spacy
nlp_IT = spacy.load("it_core_news_sm")
def get_lists(file):
with open(file, 'r', encoding='utf-8') as f:
text = f.read()
sent_tokenized_text = sent_tokenize(text, language='italian')
sent_tokenized_text_lower = [sent.lower() for sent in sent_tokenized_text]
return sent_tokenized_text, sent_tokenized_text_lower
sentences, sentences_lower = get_lists(file)
def search_engine_collocations(target = 'scarto' , colloc = 'azioni' , nlp = nlp_it, sentences_lower = sentences_lower, sentences = sentences):
verbs = []
adjectives = []
nouns = []
result = 0
for i,sent in enumerate(sentences_lower):
if target.lower() in sent:
result += 1
doc = nlp(sent)
for token in doc:
if 'VERB' in token.pos_:
verbs.append(token.lemma_)
elif 'ADJ' in token.pos_:
adjectives.append(token.lemma_)
elif 'NOUN' in token.pos_:
nouns.append(token.lemma_)
if result == 0:
return "Non ho trovato la parola '{target}'.\n"
else:
if colloc = 'azioni' and verbs != []:
verbs_fdist = FreqDist(verbs)
stringed_results = ''
for n,r in enumerate(verbs_fdist.most_common()):
stringed_results += str(n+1) + ': ' + str(r) + '\n\n'
return f"Ho trovato {len(verbs)} azioni legate a '{target}'\n{stringed_results}"
elif verbs = []:
return f"Non ho trovato azioni legate a '{target}'"
if colloc = 'caratteristiche' and adjectives != []:
adj_fdist = FreqDist(adjectives)
stringed_results = ''
for n,r in enumerate(adj_fdist.most_common()):
stringed_results += str(n+1) + ': ' + str(r) + '\n\n'
return f"Ho trovato {len(adjectives)} caratteristiche legate a '{target}'\n{stringed_results}"
elif adjectives = []:
return f"Non ho trovato caratteristiche legate a '{target}'"
if colloc = 'concetti' and nouns != []:
nouns_fdist = FreqDist(verbs)
stringed_results = ''
for n,r in enumerate(nouns_fdist.most_common()):
stringed_results += str(n+1) + ': ' + str(r) + '\n\n'
return f"Ho trovato {len(nouns)} concetti legati a '{target}'\n{stringed_results}"
elif nouns = []:
return f"Non ho trovato concetti legate a '{target}'"
demo = gr.Interface(
search_engine_collocations,
[
gr.Textbox(),
gr.Radio(["azioni", "caratteristiche", "concetti"]),
],
"text",
examples=[
["scarto", "azioni"],
["rifiuto", "caratteristiche"],
["sostenibilità", "concetti"],
],
)
demo.launch() |