File size: 3,206 Bytes
59faeae 56266ec 039b503 99300a7 039b503 d36a83c 3f656fb 2205ed4 541fd71 374606d 56266ec 0b15904 56266ec 0b15904 56266ec d36a83c 56266ec 374606d 56266ec 8e3c42c 56266ec 374606d 56266ec 8e3c42c 56266ec 6844ad4 56266ec 99300a7 924cabe 99300a7 56266ec 9d6299e 56266ec 9d6299e 6844ad4 924cabe 6844ad4 9627035 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
# import gradio as gr
import tensorflow as tf
import numpy as np
from keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
import pickle
from tensorflow.keras.preprocessing.sequence import pad_sequences
import os
from pathlib import Path
import pandas as pd
import plotly.express as px
import keras
import unicodedata as ud
from underthesea import word_tokenize
from phoBERT import BERT_predict
# Load tokenizer
# fp = Path(__file__).with_name('tokenizer.pkl')
# with open(fp,mode="rb") as f:
# tokenizer = pickle.load(f)
#Load LSTM
#fp = Path(__file__).with_name('lstm_model.h5')
LSTM_model = tf.keras.models.load_model('lstm_model.tf')
#Load GRU
#fp = Path(__file__).with_name('gru_model.h5')
GRU_model = tf.keras.models.load_model('gru_model.tf')
def tokenizer_pad(tokenizer,comment_text,max_length=200):
comment_text = word_tokenize(comment_text, format="text")
comment_text = [comment_text]
tokenized_text = tokenizer.texts_to_sequences(comment_text)
padded_sequences = pad_sequences(sequences=tokenized_text,maxlen=max_length,padding="post",truncating="post")
return padded_sequences
def LSTM_predict(x):
# x = tokenizer_pad(tokenizer=tokenizer,comment_text=x)
pred_proba = LSTM_model.predict([x])[0]
pred_proba = [round(i,2) for i in pred_proba]
#print(pred_proba)
return pred_proba
def GRU_predict(x):
# x = tokenizer_pad(tokenizer=tokenizer,comment_text=x)
pred_proba = GRU_model.predict([x])[0]
pred_proba = [round(i,2) for i in pred_proba]
#print(pred_proba)
return pred_proba
def plot(result):
label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
data = pd.DataFrame()
data['Nhãn'] = label
data['Điểm'] = result
#print(data)
p = px.bar(data, x='Nhãn', y='Điểm', color='Nhãn', range_y=[0, 1] )
return p
pass
def judge(x):
label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
result = []
judge_result = []
x = ud.normalize('NFKC', x)
x = word_tokenize(x, format="text")
lstm_pred = LSTM_predict(x)
gru_pred = GRU_predict(x)
#bert_pred = BERT_predict(x)
#print(result)
return_result = 'Result'
result_lstm = np.round(lstm_pred, 2)
result_gru = np.round(gru_pred, 2)
#result_bert = np.round(bert_pred, 2)
for i in range(6):
result.append((result_lstm[i]+result_gru[i])/2)
return (result)
def judgePlus(x):
label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
result = []
judge_result = []
x = ud.normalize('NFKC', x)
x = word_tokenize(x, format="text")
lstm_pred = LSTM_predict(x)
gru_pred = GRU_predict(x)
bert_pred = BERT_predict(x)
#bert_pred = BERT_predict(x)
#print(result)
return_result = 'Result'
result_lstm = np.round(lstm_pred, 2)
result_gru = np.round(gru_pred, 2)
result_bert = np.round(bert_pred, 2)
#result_bert = np.round(bert_pred, 2)
for i in range(6):
result.append((result_lstm[i]+result_gru[i]+result_bert[i])/3)
return (result)
|