Spaces:
Running
Running
# import gradio as gr | |
import tensorflow as tf | |
import numpy as np | |
from keras.models import load_model | |
from tensorflow.keras.preprocessing.text import Tokenizer | |
import pickle | |
from tensorflow.keras.preprocessing.sequence import pad_sequences | |
import os | |
from pathlib import Path | |
import pandas as pd | |
import plotly.express as px | |
import keras | |
import unicodedata as ud | |
from underthesea import word_tokenize | |
from phoBERT import BERT_predict | |
# Load tokenizer | |
# fp = Path(__file__).with_name('tokenizer.pkl') | |
# with open(fp,mode="rb") as f: | |
# tokenizer = pickle.load(f) | |
#Load LSTM | |
#fp = Path(__file__).with_name('lstm_model.h5') | |
LSTM_model = tf.keras.models.load_model('lstm_model.tf') | |
#Load GRU | |
#fp = Path(__file__).with_name('gru_model.h5') | |
GRU_model = tf.keras.models.load_model('gru_model.tf') | |
def tokenizer_pad(tokenizer,comment_text,max_length=200): | |
comment_text = word_tokenize(comment_text, format="text") | |
comment_text = [comment_text] | |
tokenized_text = tokenizer.texts_to_sequences(comment_text) | |
padded_sequences = pad_sequences(sequences=tokenized_text,maxlen=max_length,padding="post",truncating="post") | |
return padded_sequences | |
def LSTM_predict(x): | |
# x = tokenizer_pad(tokenizer=tokenizer,comment_text=x) | |
pred_proba = LSTM_model.predict([x])[0] | |
pred_proba = [round(i,2) for i in pred_proba] | |
#print(pred_proba) | |
return pred_proba | |
def GRU_predict(x): | |
# x = tokenizer_pad(tokenizer=tokenizer,comment_text=x) | |
pred_proba = GRU_model.predict([x])[0] | |
pred_proba = [round(i,2) for i in pred_proba] | |
#print(pred_proba) | |
return pred_proba | |
def plot(result): | |
label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân'] | |
data = pd.DataFrame() | |
data['Nhãn'] = label | |
data['Điểm'] = result | |
#print(data) | |
p = px.bar(data, x='Nhãn', y='Điểm', color='Nhãn', range_y=[0, 1] ) | |
return p | |
pass | |
def judge(x): | |
label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân'] | |
result = [] | |
judge_result = [] | |
x = ud.normalize('NFKC', x) | |
x = word_tokenize(x, format="text") | |
lstm_pred = LSTM_predict(x) | |
gru_pred = GRU_predict(x) | |
#bert_pred = BERT_predict(x) | |
#print(result) | |
return_result = 'Result' | |
result_lstm = np.round(lstm_pred, 2) | |
result_gru = np.round(gru_pred, 2) | |
#result_bert = np.round(bert_pred, 2) | |
for i in range(6): | |
result.append((result_lstm[i]+result_gru[i])/2) | |
return (result) | |
def judgePlus(x): | |
label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân'] | |
result = [] | |
judge_result = [] | |
x = ud.normalize('NFKC', x) | |
x = word_tokenize(x, format="text") | |
lstm_pred = LSTM_predict(x) | |
gru_pred = GRU_predict(x) | |
bert_pred = BERT_predict(x) | |
#bert_pred = BERT_predict(x) | |
#print(result) | |
return_result = 'Result' | |
result_lstm = np.round(lstm_pred, 2) | |
result_gru = np.round(gru_pred, 2) | |
result_bert = np.round(bert_pred, 2) | |
#result_bert = np.round(bert_pred, 2) | |
for i in range(6): | |
result.append((result_lstm[i]+result_gru[i]+result_bert[i])/3) | |
return (result) | |