# import gradio as gr import tensorflow as tf import numpy as np from keras.models import load_model from tensorflow.keras.preprocessing.text import Tokenizer import pickle from tensorflow.keras.preprocessing.sequence import pad_sequences import os from pathlib import Path import pandas as pd import plotly.express as px import keras import unicodedata as ud from underthesea import word_tokenize from phoBERT import BERT_predict # Load tokenizer # fp = Path(__file__).with_name('tokenizer.pkl') # with open(fp,mode="rb") as f: # tokenizer = pickle.load(f) #Load LSTM #fp = Path(__file__).with_name('lstm_model.h5') LSTM_model = tf.keras.models.load_model('lstm_model.tf') #Load GRU #fp = Path(__file__).with_name('gru_model.h5') GRU_model = tf.keras.models.load_model('gru_model.tf') def tokenizer_pad(tokenizer,comment_text,max_length=200): comment_text = word_tokenize(comment_text, format="text") comment_text = [comment_text] tokenized_text = tokenizer.texts_to_sequences(comment_text) padded_sequences = pad_sequences(sequences=tokenized_text,maxlen=max_length,padding="post",truncating="post") return padded_sequences def LSTM_predict(x): # x = tokenizer_pad(tokenizer=tokenizer,comment_text=x) pred_proba = LSTM_model.predict([x])[0] pred_proba = [round(i,2) for i in pred_proba] #print(pred_proba) return pred_proba def GRU_predict(x): # x = tokenizer_pad(tokenizer=tokenizer,comment_text=x) pred_proba = GRU_model.predict([x])[0] pred_proba = [round(i,2) for i in pred_proba] #print(pred_proba) return pred_proba def plot(result): label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân'] data = pd.DataFrame() data['Nhãn'] = label data['Điểm'] = result #print(data) p = px.bar(data, x='Nhãn', y='Điểm', color='Nhãn', range_y=[0, 1] ) return p pass def judge(x): label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân'] result = [] judge_result = [] x = ud.normalize('NFKC', x) x = word_tokenize(x, format="text") lstm_pred = LSTM_predict(x) gru_pred = GRU_predict(x) #bert_pred = BERT_predict(x) #print(result) return_result = 'Result' result_lstm = np.round(lstm_pred, 2) result_gru = np.round(gru_pred, 2) #result_bert = np.round(bert_pred, 2) for i in range(6): result.append((result_lstm[i]+result_gru[i])/2) return (result) def judgePlus(x): label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân'] result = [] judge_result = [] x = ud.normalize('NFKC', x) x = word_tokenize(x, format="text") lstm_pred = LSTM_predict(x) gru_pred = GRU_predict(x) try: bert_pred = BERT_predict(x) except: bert_pred = np.average([lstm_pred, gru_pred], axis=0) return_result = 'Result' result_lstm = np.round(lstm_pred, 2) result_gru = np.round(gru_pred, 2) result_bert = np.round(bert_pred, 2) #result_bert = np.round(bert_pred, 2) if((result_lstm[0]+result_gru[0])<(result_bert[0]*2)): for i in range(6): result.append((result_bert[i])/1) else: for i in range(6): result.append((result_lstm[i]+result_gru[i])/2) return (result) def judgeBert(x): label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân'] result = [] judge_result = [] x = ud.normalize('NFKC', x) x = word_tokenize(x, format="text") try: bert_pred = BERT_predict(x) except: bert_pred = np.zeros(6, dtype=float) return_result = 'Result' result_bert = np.round(bert_pred, 2) #result_bert = np.round(bert_pred, 2) for i in range(6): result.append((result_bert[i])/1) return (result)