Spaces:
Runtime error
Runtime error
File size: 2,954 Bytes
9703df0 c3b1412 9703df0 c3b1412 fd2d594 c3b1412 9703df0 c3b1412 fd2d594 c3b1412 fd2d594 c3b1412 9703df0 c3b1412 fd2d594 c3b1412 fd2d594 c3b1412 9703df0 c3b1412 fd2d594 c3b1412 9703df0 c3b1412 9703df0 c3b1412 9703df0 c3b1412 9703df0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import gradio as gr
import nltk
import simplemma
from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize
from nltk.probability import FreqDist
from simplemma import text_lemmatizer
nltk.download('punkt')
file = "text.txt"
import spacy
nlp_IT = spacy.load("it_core_news_sm")
def get_lists(file):
with open(file, 'r', encoding='utf-8') as f:
text = f.read()
sent_tokenized_text = sent_tokenize(text, language='italian')
sent_tokenized_text_lower = [sent.lower() for sent in sent_tokenized_text]
return sent_tokenized_text, sent_tokenized_text_lower
sentences, sentences_lower = get_lists(file)
def search_engine_collocations(target = 'scarto' , colloc = 'azioni' , nlp = nlp_it, sentences_lower = sentences_lower, sentences = sentences):
verbs = []
adjectives = []
nouns = []
result = 0
for i,sent in enumerate(sentences_lower):
if target.lower() in sent:
result += 1
doc = nlp(sent)
for token in doc:
if 'VERB' in token.pos_:
verbs.append(token.lemma_)
elif 'ADJ' in token.pos_:
adjectives.append(token.lemma_)
elif 'NOUN' in token.pos_:
nouns.append(token.lemma_)
if result == 0:
return "Non ho trovato la parola '{target}'.\n"
else:
if colloc == 'azioni' and verbs != []:
verbs_fdist = FreqDist(verbs)
stringed_results = ''
for n,r in enumerate(verbs_fdist.most_common()):
stringed_results += str(n+1) + ': ' + str(r) + '\n\n'
return f"Ho trovato {len(verbs)} azioni legate a '{target}'\n{stringed_results}"
elif verbs == []:
return f"Non ho trovato azioni legate a '{target}'"
if colloc == 'caratteristiche' and adjectives != []:
adj_fdist = FreqDist(adjectives)
stringed_results = ''
for n,r in enumerate(adj_fdist.most_common()):
stringed_results += str(n+1) + ': ' + str(r) + '\n\n'
return f"Ho trovato {len(adjectives)} caratteristiche legate a '{target}'\n{stringed_results}"
elif adjectives == []:
return f"Non ho trovato caratteristiche legate a '{target}'"
if colloc == 'concetti' and nouns != []:
nouns_fdist = FreqDist(verbs)
stringed_results = ''
for n,r in enumerate(nouns_fdist.most_common()):
stringed_results += str(n+1) + ': ' + str(r) + '\n\n'
return f"Ho trovato {len(nouns)} concetti legati a '{target}'\n{stringed_results}"
elif nouns == []:
return f"Non ho trovato concetti legate a '{target}'"
demo = gr.Interface(
search_engine_collocations,
[
gr.Textbox(),
gr.Radio(["azioni", "caratteristiche", "concetti"]),
],
"text",
examples=[
["scarto", "azioni"],
["rifiuto", "caratteristiche"],
["sostenibilità", "concetti"],
],
)
demo.launch() |