|
import json
|
|
import string
|
|
import random
|
|
import nltk
|
|
import os
|
|
import numpy as np
|
|
from nltk.stem import WordNetLemmatizer
|
|
import tensorflow as tf
|
|
from keras import Sequential
|
|
from keras.layers import Dense, Dropout
|
|
from nltk.tokenize import word_tokenize
|
|
|
|
"""
|
|
nltk.download('omw-1.4')
|
|
nltk.download("stopwords", quiet=True)
|
|
nltk.download("punkt", quiet=True)
|
|
nltk.download("wordnet", quiet=True)
|
|
"""
|
|
|
|
|
|
class ModeleDeepLearning:
|
|
def __init__(self, file_path, epochs=200):
|
|
self.file_path = file_path
|
|
self.epochs = epochs
|
|
self.model = None
|
|
self.words = []
|
|
self.classes = []
|
|
self.lemmatizer = WordNetLemmatizer()
|
|
self.run()
|
|
|
|
def importer(self):
|
|
with open(self.file_path, encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
return data
|
|
|
|
def preprocess_data(self):
|
|
doc_X = []
|
|
doc_y = []
|
|
|
|
for intent in self.data["intents"]:
|
|
for pattern in intent["patterns"]:
|
|
tokens = word_tokenize(pattern)
|
|
self.words.extend(tokens)
|
|
doc_X.append(pattern)
|
|
doc_y.append(intent["tag"])
|
|
|
|
if intent["tag"] not in self.classes:
|
|
self.classes.append(intent["tag"])
|
|
|
|
self.words = [self.lemmatizer.lemmatize(word.lower()) for word in self.words if word not in string.punctuation]
|
|
self.words = sorted(set(self.words))
|
|
self.classes = sorted(set(self.classes))
|
|
|
|
training = []
|
|
out_empty = [0] * len(self.classes)
|
|
|
|
for idx, doc in enumerate(doc_X):
|
|
bow = []
|
|
text = self.lemmatizer.lemmatize(doc.lower())
|
|
for word in self.words:
|
|
bow.append(1) if word in text else bow.append(0)
|
|
|
|
output_row = list(out_empty)
|
|
output_row[self.classes.index(doc_y[idx])] = 1
|
|
training.append([bow, output_row])
|
|
|
|
random.shuffle(training)
|
|
training = np.array(training, dtype=object)
|
|
|
|
train_X = np.array(list(training[:, 0]))
|
|
train_y = np.array(list(training[:, 1]))
|
|
|
|
return train_X, train_y
|
|
|
|
def build_model(self, input_shape, output_shape):
|
|
model = Sequential()
|
|
model.add(Dense(128, input_shape=input_shape, activation="relu"))
|
|
model.add(Dropout(0.5))
|
|
model.add(Dense(64, activation="relu"))
|
|
model.add(Dropout(0.3))
|
|
model.add(Dense(output_shape, activation="softmax"))
|
|
adam = tf.keras.optimizers.Adam(learning_rate=0.01)
|
|
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=["accuracy"])
|
|
return model
|
|
|
|
def train_model(self):
|
|
input_shape = (len(self.train_X[0]),)
|
|
output_shape = len(self.train_y[0])
|
|
|
|
self.model = self.build_model(input_shape, output_shape)
|
|
self.model.fit(x=self.train_X, y=self.train_y, epochs=self.epochs, verbose=1)
|
|
|
|
def clean_text(self, text):
|
|
tokens = word_tokenize(text)
|
|
tokens = [self.lemmatizer.lemmatize(word) for word in tokens]
|
|
return tokens
|
|
|
|
def bag_of_words(self, text):
|
|
tokens = self.clean_text(text)
|
|
bow = [0] * len(self.words)
|
|
for w in tokens:
|
|
for idx, word in enumerate(self.words):
|
|
if word == w:
|
|
bow[idx] = 1
|
|
return np.array(bow)
|
|
|
|
def predict_class(self, text):
|
|
bow = self.bag_of_words(text)
|
|
result = self.model.predict(np.array([bow]))[0]
|
|
thresh = 0.2
|
|
y_pred = [[idx, res] for idx, res in enumerate(result) if res > thresh]
|
|
y_pred.sort(key=lambda x: x[1], reverse=True)
|
|
return_list = [self.classes[r[0]] for r in y_pred]
|
|
return return_list
|
|
|
|
def get_response(self, intents_list):
|
|
tag = intents_list[0]
|
|
list_of_intents = self.data["intents"]
|
|
for i in list_of_intents:
|
|
if i["tag"] == tag:
|
|
result = random.choice(i["responses"])
|
|
break
|
|
return result
|
|
|
|
def predict(self, question):
|
|
intents = self.predict_class(question)
|
|
return self.get_response(intents)
|
|
|
|
def run(self):
|
|
self.data = self.importer()
|
|
self.train_X, self.train_y = self.preprocess_data()
|
|
self.train_model()
|
|
|