Spaces:
Running
Running
add tokenize function
Browse files
app.py
CHANGED
@@ -16,98 +16,56 @@ from underthesea import word_tokenize
|
|
16 |
|
17 |
from phoBERT import BERT_predict
|
18 |
|
19 |
-
# Load tokenizer
|
20 |
-
# fp = Path(__file__).with_name('tokenizer.pkl')
|
21 |
-
# with open(fp,mode="rb") as f:
|
22 |
-
# tokenizer = pickle.load(f)
|
23 |
|
24 |
-
#Load LSTM
|
25 |
-
#fp = Path(__file__).with_name('lstm_model.h5')
|
26 |
LSTM_model = tf.keras.models.load_model('lstm_model.tf')
|
27 |
|
28 |
-
#Load GRU
|
29 |
-
#fp = Path(__file__).with_name('gru_model.h5')
|
30 |
GRU_model = tf.keras.models.load_model('gru_model.tf')
|
31 |
|
32 |
-
|
33 |
-
def tokenizer_pad(tokenizer,comment_text,max_length=200):
|
34 |
-
|
35 |
-
comment_text = word_tokenize(comment_text, format="text")
|
36 |
-
comment_text = [comment_text]
|
37 |
-
tokenized_text = tokenizer.texts_to_sequences(comment_text)
|
38 |
-
|
39 |
-
padded_sequences = pad_sequences(sequences=tokenized_text,maxlen=max_length,padding="post",truncating="post")
|
40 |
-
|
41 |
-
return padded_sequences
|
42 |
-
|
43 |
def LSTM_predict(x):
|
44 |
-
# x = tokenizer_pad(tokenizer=tokenizer,comment_text=x)
|
45 |
-
|
46 |
|
47 |
pred_proba = LSTM_model.predict([x])[0]
|
48 |
|
49 |
pred_proba = [round(i,2) for i in pred_proba]
|
50 |
|
51 |
-
#print(pred_proba)
|
52 |
-
|
53 |
return pred_proba
|
54 |
|
55 |
def GRU_predict(x):
|
56 |
-
# x = tokenizer_pad(tokenizer=tokenizer,comment_text=x)
|
57 |
-
|
58 |
|
59 |
pred_proba = GRU_model.predict([x])[0]
|
60 |
|
61 |
pred_proba = [round(i,2) for i in pred_proba]
|
62 |
|
63 |
-
#print(pred_proba)
|
64 |
-
|
65 |
return pred_proba
|
66 |
|
67 |
-
def
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
data['Điểm'] = result
|
72 |
-
|
73 |
-
#print(data)
|
74 |
|
75 |
-
p = px.bar(data, x='Nhãn', y='Điểm', color='Nhãn', range_y=[0, 1] )
|
76 |
-
return p
|
77 |
-
pass
|
78 |
|
79 |
def judge(x):
|
80 |
|
81 |
-
label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
|
82 |
result = []
|
83 |
-
judge_result = []
|
84 |
|
85 |
-
x =
|
86 |
-
x = word_tokenize(x, format="text")
|
87 |
|
88 |
lstm_pred = LSTM_predict(x)
|
89 |
gru_pred = GRU_predict(x)
|
90 |
-
#bert_pred = BERT_predict(x)
|
91 |
-
#print(result)
|
92 |
|
93 |
-
return_result = 'Result'
|
94 |
result_lstm = np.round(lstm_pred, 2)
|
95 |
result_gru = np.round(gru_pred, 2)
|
96 |
-
#result_bert = np.round(bert_pred, 2)
|
97 |
|
98 |
for i in range(6):
|
99 |
result.append((result_lstm[i]+result_gru[i])/2)
|
100 |
|
101 |
return (result)
|
102 |
|
|
|
103 |
def judgePlus(x):
|
104 |
|
105 |
-
label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
|
106 |
result = []
|
107 |
-
judge_result = []
|
108 |
|
109 |
-
x =
|
110 |
-
x = word_tokenize(x, format="text")
|
111 |
|
112 |
lstm_pred = LSTM_predict(x)
|
113 |
gru_pred = GRU_predict(x)
|
@@ -117,11 +75,10 @@ def judgePlus(x):
|
|
117 |
bert_pred = np.average([lstm_pred, gru_pred], axis=0)
|
118 |
|
119 |
|
120 |
-
return_result = 'Result'
|
121 |
result_lstm = np.round(lstm_pred, 2)
|
122 |
result_gru = np.round(gru_pred, 2)
|
123 |
result_bert = np.round(bert_pred, 2)
|
124 |
-
|
125 |
if((result_lstm[0]+result_gru[0])<(result_bert[0]*2)):
|
126 |
for i in range(6):
|
127 |
result.append((result_bert[i])/1)
|
@@ -131,26 +88,19 @@ def judgePlus(x):
|
|
131 |
|
132 |
return (result)
|
133 |
|
|
|
134 |
def judgeBert(x):
|
135 |
|
136 |
-
label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
|
137 |
result = []
|
138 |
-
judge_result = []
|
139 |
|
140 |
-
x =
|
141 |
-
x = word_tokenize(x, format="text")
|
142 |
|
143 |
-
|
144 |
try:
|
145 |
bert_pred = BERT_predict(x)
|
146 |
except:
|
147 |
bert_pred = np.zeros(6, dtype=float)
|
148 |
|
149 |
-
|
150 |
-
return_result = 'Result'
|
151 |
-
|
152 |
result_bert = np.round(bert_pred, 2)
|
153 |
-
#result_bert = np.round(bert_pred, 2)
|
154 |
|
155 |
for i in range(6):
|
156 |
result.append((result_bert[i])/1)
|
|
|
16 |
|
17 |
from phoBERT import BERT_predict
|
18 |
|
|
|
|
|
|
|
|
|
19 |
|
|
|
|
|
20 |
LSTM_model = tf.keras.models.load_model('lstm_model.tf')
|
21 |
|
|
|
|
|
22 |
GRU_model = tf.keras.models.load_model('gru_model.tf')
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
def LSTM_predict(x):
|
|
|
|
|
25 |
|
26 |
pred_proba = LSTM_model.predict([x])[0]
|
27 |
|
28 |
pred_proba = [round(i,2) for i in pred_proba]
|
29 |
|
|
|
|
|
30 |
return pred_proba
|
31 |
|
32 |
def GRU_predict(x):
|
|
|
|
|
33 |
|
34 |
pred_proba = GRU_model.predict([x])[0]
|
35 |
|
36 |
pred_proba = [round(i,2) for i in pred_proba]
|
37 |
|
|
|
|
|
38 |
return pred_proba
|
39 |
|
40 |
+
def tokenize(x):
|
41 |
+
x = ud.normalize('NFKC', x)
|
42 |
+
x = word_tokenize(x, format="text")
|
43 |
+
return x
|
|
|
|
|
|
|
44 |
|
|
|
|
|
|
|
45 |
|
46 |
def judge(x):
|
47 |
|
|
|
48 |
result = []
|
|
|
49 |
|
50 |
+
x = tokenize(x)
|
|
|
51 |
|
52 |
lstm_pred = LSTM_predict(x)
|
53 |
gru_pred = GRU_predict(x)
|
|
|
|
|
54 |
|
|
|
55 |
result_lstm = np.round(lstm_pred, 2)
|
56 |
result_gru = np.round(gru_pred, 2)
|
|
|
57 |
|
58 |
for i in range(6):
|
59 |
result.append((result_lstm[i]+result_gru[i])/2)
|
60 |
|
61 |
return (result)
|
62 |
|
63 |
+
|
64 |
def judgePlus(x):
|
65 |
|
|
|
66 |
result = []
|
|
|
67 |
|
68 |
+
x = tokenize(x)
|
|
|
69 |
|
70 |
lstm_pred = LSTM_predict(x)
|
71 |
gru_pred = GRU_predict(x)
|
|
|
75 |
bert_pred = np.average([lstm_pred, gru_pred], axis=0)
|
76 |
|
77 |
|
|
|
78 |
result_lstm = np.round(lstm_pred, 2)
|
79 |
result_gru = np.round(gru_pred, 2)
|
80 |
result_bert = np.round(bert_pred, 2)
|
81 |
+
|
82 |
if((result_lstm[0]+result_gru[0])<(result_bert[0]*2)):
|
83 |
for i in range(6):
|
84 |
result.append((result_bert[i])/1)
|
|
|
88 |
|
89 |
return (result)
|
90 |
|
91 |
+
|
92 |
def judgeBert(x):
|
93 |
|
|
|
94 |
result = []
|
|
|
95 |
|
96 |
+
x = tokenize(x)
|
|
|
97 |
|
|
|
98 |
try:
|
99 |
bert_pred = BERT_predict(x)
|
100 |
except:
|
101 |
bert_pred = np.zeros(6, dtype=float)
|
102 |
|
|
|
|
|
|
|
103 |
result_bert = np.round(bert_pred, 2)
|
|
|
104 |
|
105 |
for i in range(6):
|
106 |
result.append((result_bert[i])/1)
|