toxic_test / app.py
HMPhuoc's picture
add tokenize function
f5d4e06
raw
history blame
2.06 kB
# import gradio as gr
import tensorflow as tf
import numpy as np
from keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
import pickle
from tensorflow.keras.preprocessing.sequence import pad_sequences
import os
from pathlib import Path
import pandas as pd
import plotly.express as px
import keras
import unicodedata as ud
from underthesea import word_tokenize
from phoBERT import BERT_predict
LSTM_model = tf.keras.models.load_model('lstm_model.tf')
GRU_model = tf.keras.models.load_model('gru_model.tf')
def LSTM_predict(x):
pred_proba = LSTM_model.predict([x])[0]
pred_proba = [round(i,2) for i in pred_proba]
return pred_proba
def GRU_predict(x):
pred_proba = GRU_model.predict([x])[0]
pred_proba = [round(i,2) for i in pred_proba]
return pred_proba
def tokenize(x):
x = ud.normalize('NFKC', x)
x = word_tokenize(x, format="text")
return x
def judge(x):
result = []
x = tokenize(x)
lstm_pred = LSTM_predict(x)
gru_pred = GRU_predict(x)
result_lstm = np.round(lstm_pred, 2)
result_gru = np.round(gru_pred, 2)
for i in range(6):
result.append((result_lstm[i]+result_gru[i])/2)
return (result)
def judgePlus(x):
result = []
x = tokenize(x)
lstm_pred = LSTM_predict(x)
gru_pred = GRU_predict(x)
try:
bert_pred = BERT_predict(x)
except:
bert_pred = np.average([lstm_pred, gru_pred], axis=0)
result_lstm = np.round(lstm_pred, 2)
result_gru = np.round(gru_pred, 2)
result_bert = np.round(bert_pred, 2)
if((result_lstm[0]+result_gru[0])<(result_bert[0]*2)):
for i in range(6):
result.append((result_bert[i])/1)
else:
for i in range(6):
result.append((result_lstm[i]+result_gru[i])/2)
return (result)
def judgeBert(x):
result = []
x = tokenize(x)
try:
bert_pred = BERT_predict(x)
except:
bert_pred = np.zeros(6, dtype=float)
result_bert = np.round(bert_pred, 2)
for i in range(6):
result.append((result_bert[i])/1)
return (result)