File size: 1,356 Bytes
9703df0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import gradio as gr

import nltk
import simplemma
from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize
from nltk.probability import FreqDist
from simplemma import text_lemmatizer
nltk.download('punkt')

file = "text.txt"

spacy_model = 'https://huggingface.co/spacy/it_core_news_sm'

import spacy
nlp_IT = spacy.load(spacy_model)

def get_lists(file):
  with open(file, 'r', encoding='utf-8') as f:
    text = f.read()

  word_tokenized_text = word_tokenize(text, language='italian')
  word_tokenized_text_lower = [word.lower() for word in word_tokenized_text]

  sent_tokenized_text = sent_tokenize(text, language='italian')
  sent_tokenized_text_lower = [sent.lower() for sent in sent_tokenized_text]

  return word_tokenized_text, word_tokenized_text_lower, sent_tokenized_text, sent_tokenized_text_lower

#words, words_lower, sentences, sentences = get_lists(file)



demo = gr.Interface(
    sentence_builder,
    [
        gr.Textbox(),
        gr.Radio(["park", "zoo", "road"]),
        gr.CheckboxGroup(["ran", "swam", "ate", "slept"]),
        gr.Checkbox(label="Is it the morning?"),
    ],
    "text",
    examples=[
        ["cats", "park", ["ran", "swam"], True],
        ["dog", "zoo", ["ate", "swam"], False],
        ["bird", "road", ["ran"], False],
        ["cat", "zoo", ["ate"], True],
    ],
)

demo.launch()