File size: 2,494 Bytes
59faeae
56266ec
 
 
 
 
 
 
 
 
 
039b503
99300a7
039b503
d36a83c
 
7163957
2205ed4
541fd71
374606d
 
 
56266ec
 
0b15904
 
56266ec
 
0b15904
 
56266ec
 
 
 
d36a83c
56266ec
 
 
 
 
 
 
 
374606d
56266ec
8e3c42c
 
56266ec
 
 
 
 
 
 
 
374606d
56266ec
 
8e3c42c
56266ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99300a7
 
56266ec
 
7163957
56266ec
 
 
 
 
7163957
56266ec
7163957
9627035
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# import gradio as gr
import tensorflow as tf
import numpy as np
from keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
import pickle
from tensorflow.keras.preprocessing.sequence import pad_sequences
import os
from pathlib import Path
import pandas as pd
import plotly.express as px
import keras
import unicodedata as ud

from underthesea import word_tokenize

#from phoBERT import BERT_predict

# Load tokenizer
# fp = Path(__file__).with_name('tokenizer.pkl')
# with open(fp,mode="rb") as f:
#     tokenizer = pickle.load(f)

#Load LSTM
#fp = Path(__file__).with_name('lstm_model.h5')
LSTM_model = tf.keras.models.load_model('lstm_model.tf')

#Load GRU
#fp = Path(__file__).with_name('gru_model.h5')
GRU_model = tf.keras.models.load_model('gru_model.tf')


def tokenizer_pad(tokenizer,comment_text,max_length=200):
   
    comment_text = word_tokenize(comment_text, format="text")
    comment_text = [comment_text]
    tokenized_text = tokenizer.texts_to_sequences(comment_text)

    padded_sequences = pad_sequences(sequences=tokenized_text,maxlen=max_length,padding="post",truncating="post")

    return padded_sequences

def LSTM_predict(x):
    # x = tokenizer_pad(tokenizer=tokenizer,comment_text=x)

    
    pred_proba = LSTM_model.predict([x])[0]

    pred_proba = [round(i,2) for i in pred_proba]

    #print(pred_proba)

    return pred_proba

def GRU_predict(x):
    # x = tokenizer_pad(tokenizer=tokenizer,comment_text=x)

    
    pred_proba = GRU_model.predict([x])[0]

    pred_proba = [round(i,2) for i in pred_proba]

    #print(pred_proba)

    return pred_proba

def plot(result):
  label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
  data = pd.DataFrame()
  data['Nhãn'] = label
  data['Điểm'] = result

  #print(data)

  p = px.bar(data, x='Nhãn', y='Điểm', color='Nhãn', range_y=[0, 1] )
  return p
  pass

def judge(x):

  label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
  result = []
  judge_result = []

  x = ud.normalize('NFKC', x)

  lstm_pred = LSTM_predict(x)
  gru_pred = GRU_predict(x)
  # bert_pred = BERT_predict(x)
  #print(result)
  
  return_result = 'Result'
  result_lstm = np.round(lstm_pred, 2)
  result_gru = np.round(gru_pred, 2)
  # result_bert = np.round(bert_pred, 2)
  for i in range(6):
    result.append((result_lstm[i]+result_gru[i])/2)
  
  return (result)