File size: 3,238 Bytes
59faeae
56266ec
 
 
 
 
 
 
 
 
 
039b503
99300a7
039b503
d36a83c
 
3f656fb
2205ed4
541fd71
374606d
 
 
56266ec
 
0b15904
 
56266ec
 
0b15904
 
56266ec
 
 
 
d36a83c
56266ec
 
 
 
 
 
 
 
374606d
56266ec
8e3c42c
 
56266ec
 
 
 
 
 
 
 
374606d
56266ec
 
8e3c42c
56266ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6844ad4
56266ec
 
 
 
 
99300a7
924cabe
99300a7
56266ec
 
9d6299e
56266ec
 
 
 
 
9d6299e
6844ad4
 
 
 
 
 
 
 
 
 
 
 
 
924cabe
6844ad4
 
 
518d46d
 
 
4d2949d
 
6844ad4
 
 
 
 
 
 
 
 
9627035
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# import gradio as gr
import tensorflow as tf
import numpy as np
from keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
import pickle
from tensorflow.keras.preprocessing.sequence import pad_sequences
import os
from pathlib import Path
import pandas as pd
import plotly.express as px
import keras
import unicodedata as ud

from underthesea import word_tokenize

from phoBERT import BERT_predict

# Load tokenizer
# fp = Path(__file__).with_name('tokenizer.pkl')
# with open(fp,mode="rb") as f:
#     tokenizer = pickle.load(f)

#Load LSTM
#fp = Path(__file__).with_name('lstm_model.h5')
LSTM_model = tf.keras.models.load_model('lstm_model.tf')

#Load GRU
#fp = Path(__file__).with_name('gru_model.h5')
GRU_model = tf.keras.models.load_model('gru_model.tf')


def tokenizer_pad(tokenizer,comment_text,max_length=200):
   
    comment_text = word_tokenize(comment_text, format="text")
    comment_text = [comment_text]
    tokenized_text = tokenizer.texts_to_sequences(comment_text)

    padded_sequences = pad_sequences(sequences=tokenized_text,maxlen=max_length,padding="post",truncating="post")

    return padded_sequences

def LSTM_predict(x):
    # x = tokenizer_pad(tokenizer=tokenizer,comment_text=x)

    
    pred_proba = LSTM_model.predict([x])[0]

    pred_proba = [round(i,2) for i in pred_proba]

    #print(pred_proba)

    return pred_proba

def GRU_predict(x):
    # x = tokenizer_pad(tokenizer=tokenizer,comment_text=x)

    
    pred_proba = GRU_model.predict([x])[0]

    pred_proba = [round(i,2) for i in pred_proba]

    #print(pred_proba)

    return pred_proba

def plot(result):
  label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
  data = pd.DataFrame()
  data['Nhãn'] = label
  data['Điểm'] = result

  #print(data)

  p = px.bar(data, x='Nhãn', y='Điểm', color='Nhãn', range_y=[0, 1] )
  return p
  pass

def judge(x):

  label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
  result = []
  judge_result = []

  x = ud.normalize('NFKC', x)
  x = word_tokenize(x, format="text")

  lstm_pred = LSTM_predict(x)
  gru_pred = GRU_predict(x)
  #bert_pred = BERT_predict(x)
  #print(result)
  
  return_result = 'Result'
  result_lstm = np.round(lstm_pred, 2)
  result_gru = np.round(gru_pred, 2)
  #result_bert = np.round(bert_pred, 2)

  for i in range(6):
    result.append((result_lstm[i]+result_gru[i])/2)
  
  return (result)

def judgePlus(x):

  label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
  result = []
  judge_result = []

  x = ud.normalize('NFKC', x)
  x = word_tokenize(x, format="text")

  lstm_pred = LSTM_predict(x)
  gru_pred = GRU_predict(x)
  try:
    bert_pred = BERT_predict(x)
  except:
    bert_pred = np.average([lstm_pred, gru_pred], axis=0)
  
  
  return_result = 'Result'
  result_lstm = np.round(lstm_pred, 2)
  result_gru = np.round(gru_pred, 2)
  result_bert = np.round(bert_pred, 2)
  #result_bert = np.round(bert_pred, 2)
  
  for i in range(6):
    result.append((result_lstm[i]+result_gru[i]+result_bert[i])/3)
  
  return (result)