File size: 2,699 Bytes
59faeae
56266ec
 
 
 
 
 
 
 
 
 
039b503
99300a7
039b503
d36a83c
 
3f656fb
2205ed4
541fd71
374606d
 
 
56266ec
 
0b15904
 
56266ec
 
0b15904
 
56266ec
 
 
 
d36a83c
56266ec
 
 
 
 
 
 
 
374606d
56266ec
8e3c42c
 
56266ec
 
 
 
 
 
 
 
374606d
56266ec
 
8e3c42c
56266ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3f656fb
56266ec
 
 
 
 
99300a7
 
56266ec
 
9d6299e
56266ec
 
 
 
 
9d6299e
3f656fb
 
 
 
 
 
 
 
9627035
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# import gradio as gr
import tensorflow as tf
import numpy as np
from keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
import pickle
from tensorflow.keras.preprocessing.sequence import pad_sequences
import os
from pathlib import Path
import pandas as pd
import plotly.express as px
import keras
import unicodedata as ud

from underthesea import word_tokenize

from phoBERT import BERT_predict

# Load tokenizer
# fp = Path(__file__).with_name('tokenizer.pkl')
# with open(fp,mode="rb") as f:
#     tokenizer = pickle.load(f)

#Load LSTM
#fp = Path(__file__).with_name('lstm_model.h5')
LSTM_model = tf.keras.models.load_model('lstm_model.tf')

#Load GRU
#fp = Path(__file__).with_name('gru_model.h5')
GRU_model = tf.keras.models.load_model('gru_model.tf')


def tokenizer_pad(tokenizer,comment_text,max_length=200):
   
    comment_text = word_tokenize(comment_text, format="text")
    comment_text = [comment_text]
    tokenized_text = tokenizer.texts_to_sequences(comment_text)

    padded_sequences = pad_sequences(sequences=tokenized_text,maxlen=max_length,padding="post",truncating="post")

    return padded_sequences

def LSTM_predict(x):
    # x = tokenizer_pad(tokenizer=tokenizer,comment_text=x)

    
    pred_proba = LSTM_model.predict([x])[0]

    pred_proba = [round(i,2) for i in pred_proba]

    #print(pred_proba)

    return pred_proba

def GRU_predict(x):
    # x = tokenizer_pad(tokenizer=tokenizer,comment_text=x)

    
    pred_proba = GRU_model.predict([x])[0]

    pred_proba = [round(i,2) for i in pred_proba]

    #print(pred_proba)

    return pred_proba

def plot(result):
  label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
  data = pd.DataFrame()
  data['Nhãn'] = label
  data['Điểm'] = result

  #print(data)

  p = px.bar(data, x='Nhãn', y='Điểm', color='Nhãn', range_y=[0, 1] )
  return p
  pass

def judge(x, bert=False):

  label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
  result = []
  judge_result = []

  x = ud.normalize('NFKC', x)

  lstm_pred = LSTM_predict(x)
  gru_pred = GRU_predict(x)
  #bert_pred = BERT_predict(x)
  #print(result)
  
  return_result = 'Result'
  result_lstm = np.round(lstm_pred, 2)
  result_gru = np.round(gru_pred, 2)
  #result_bert = np.round(bert_pred, 2)
  if bert == True:
    bert_pred = BERT_predict(x)
    result_bert = np.round(bert_pred, 2)
    for i in range(6):
      result.append((result_lstm[i]+result_gru[i]+result_bert[i])/3)
  else:
    for i in range(6):
      result.append((result_lstm[i]+result_gru[i])/2)
  
  return (result)