gyroing commited on
Commit
11b0444
1 Parent(s): e6e5530

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -0
app.py CHANGED
@@ -1,5 +1,40 @@
1
  import gradio as gr
2
  import hazm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  def greet(name):
4
  return "Hello " + name + "!!"
5
 
 
1
  import gradio as gr
2
  import hazm
3
+
4
+ normalizer = hazm.Normalizer()
5
+ sent_tokenizer = hazm.SentenceTokenizer()
6
+ word_tokenizer = hazm.WordTokenizer()
7
+
8
+ tagger = hazm.POSTagger(
9
+ model=str("postagger.model")
10
+ )
11
+
12
+ def preprocess_text(text: str) -> typing.List[typing.List[str]]:
13
+ """Split/normalize text into sentences/words with hazm"""
14
+ text = normalizer.normalize(text)
15
+ processed_sentences = []
16
+
17
+ for sentence in sent_tokenizer.tokenize(text):
18
+ words = word_tokenizer.tokenize(sentence)
19
+ processed_words = fix_words(words)
20
+ processed_sentences.append(processed_words)
21
+
22
+ return processed_sentences
23
+
24
+ def fix_words(words: typing.List[str]) -> typing.List[str]:
25
+ fixed_words = []
26
+
27
+ for word, pos in tagger.tag(words):
28
+ if pos[-1] == "e":
29
+ if word[-1] != "ِ":
30
+ if (word[-1] == "ه") and (word[-2] != "ا"):
31
+ word += "‌ی"
32
+ word += "ِ"
33
+
34
+ fixed_words.append(word)
35
+
36
+ return fixed_word
37
+
38
  def greet(name):
39
  return "Hello " + name + "!!"
40