HMPhuoc commited on
Commit
924cabe
1 Parent(s): 6844ad4

word tokenize

Browse files
Files changed (2) hide show
  1. app.py +2 -0
  2. phoBERT.py +1 -1
app.py CHANGED
@@ -83,6 +83,7 @@ def judge(x):
83
  judge_result = []
84
 
85
  x = ud.normalize('NFKC', x)
 
86
 
87
  lstm_pred = LSTM_predict(x)
88
  gru_pred = GRU_predict(x)
@@ -106,6 +107,7 @@ def judgePlus(x):
106
  judge_result = []
107
 
108
  x = ud.normalize('NFKC', x)
 
109
 
110
  lstm_pred = LSTM_predict(x)
111
  gru_pred = GRU_predict(x)
 
83
  judge_result = []
84
 
85
  x = ud.normalize('NFKC', x)
86
+ x = word_tokenize(x, format="text")
87
 
88
  lstm_pred = LSTM_predict(x)
89
  gru_pred = GRU_predict(x)
 
107
  judge_result = []
108
 
109
  x = ud.normalize('NFKC', x)
110
+ x = word_tokenize(x, format="text")
111
 
112
  lstm_pred = LSTM_predict(x)
113
  gru_pred = GRU_predict(x)
phoBERT.py CHANGED
@@ -69,7 +69,7 @@ def tokenize(data):
69
  return output
70
 
71
  def BERT_predict(text):
72
- text = word_tokenize(text)
73
  text = [text]
74
  token = tokenize(text)
75
 
 
69
  return output
70
 
71
  def BERT_predict(text):
72
+
73
  text = [text]
74
  token = tokenize(text)
75