Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -12,9 +12,8 @@ normalizer = hazm.Normalizer()
|
|
12 |
sent_tokenizer = hazm.SentenceTokenizer()
|
13 |
word_tokenizer = hazm.WordTokenizer()
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
)
|
18 |
|
19 |
def preprocess_text(text: str) -> typing.List[typing.List[str]]:
|
20 |
"""Split/normalize text into sentences/words with hazm"""
|
@@ -25,9 +24,7 @@ def preprocess_text(text: str) -> typing.List[typing.List[str]]:
|
|
25 |
words = word_tokenizer.tokenize(sentence)
|
26 |
processed_words = fix_words(words)
|
27 |
processed_sentences.append(" ".join(processed_words))
|
28 |
-
|
29 |
-
return " ".join(processed_sentences)
|
30 |
-
|
31 |
def fix_words(words: typing.List[str]) -> typing.List[str]:
|
32 |
fixed_words = []
|
33 |
|
|
|
12 |
sent_tokenizer = hazm.SentenceTokenizer()
|
13 |
word_tokenizer = hazm.WordTokenizer()
|
14 |
|
15 |
+
tagger_path = hf_hub_download(repo_id="gyroing/HAZM_POS_TAGGER", filename="pos_tagger.model")
|
16 |
+
tagger = hazm.POSTagger(model=tagger_path)
|
|
|
17 |
|
18 |
def preprocess_text(text: str) -> typing.List[typing.List[str]]:
|
19 |
"""Split/normalize text into sentences/words with hazm"""
|
|
|
24 |
words = word_tokenizer.tokenize(sentence)
|
25 |
processed_words = fix_words(words)
|
26 |
processed_sentences.append(" ".join(processed_words))
|
27 |
+
return " ".join(processed_sentences)
|
|
|
|
|
28 |
def fix_words(words: typing.List[str]) -> typing.List[str]:
|
29 |
fixed_words = []
|
30 |
|