import json import string import random import nltk import os import numpy as np from nltk.stem import WordNetLemmatizer import tensorflow as tf from keras import Sequential from keras.layers import Dense, Dropout from nltk.tokenize import word_tokenize """ nltk.download('omw-1.4') nltk.download("stopwords", quiet=True) nltk.download("punkt", quiet=True) nltk.download("wordnet", quiet=True) """ class ModeleDeepLearning: def __init__(self, file_path, epochs=200): self.file_path = file_path self.epochs = epochs self.model = None self.words = [] self.classes = [] self.lemmatizer = WordNetLemmatizer() self.run() def importer(self): with open(self.file_path, encoding="utf-8") as f: data = json.load(f) return data def preprocess_data(self): doc_X = [] doc_y = [] for intent in self.data["intents"]: for pattern in intent["patterns"]: tokens = word_tokenize(pattern) self.words.extend(tokens) doc_X.append(pattern) doc_y.append(intent["tag"]) if intent["tag"] not in self.classes: self.classes.append(intent["tag"]) self.words = [self.lemmatizer.lemmatize(word.lower()) for word in self.words if word not in string.punctuation] self.words = sorted(set(self.words)) self.classes = sorted(set(self.classes)) training = [] out_empty = [0] * len(self.classes) for idx, doc in enumerate(doc_X): bow = [] text = self.lemmatizer.lemmatize(doc.lower()) for word in self.words: bow.append(1) if word in text else bow.append(0) output_row = list(out_empty) output_row[self.classes.index(doc_y[idx])] = 1 training.append([bow, output_row]) random.shuffle(training) training = np.array(training, dtype=object) train_X = np.array(list(training[:, 0])) train_y = np.array(list(training[:, 1])) return train_X, train_y def build_model(self, input_shape, output_shape): model = Sequential() model.add(Dense(128, input_shape=input_shape, activation="relu")) model.add(Dropout(0.5)) model.add(Dense(64, activation="relu")) model.add(Dropout(0.3)) model.add(Dense(output_shape, activation="softmax")) adam = tf.keras.optimizers.Adam(learning_rate=0.01) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=["accuracy"]) return model def train_model(self): input_shape = (len(self.train_X[0]),) output_shape = len(self.train_y[0]) self.model = self.build_model(input_shape, output_shape) self.model.fit(x=self.train_X, y=self.train_y, epochs=self.epochs, verbose=1) def clean_text(self, text): tokens = word_tokenize(text) tokens = [self.lemmatizer.lemmatize(word) for word in tokens] return tokens def bag_of_words(self, text): tokens = self.clean_text(text) bow = [0] * len(self.words) for w in tokens: for idx, word in enumerate(self.words): if word == w: bow[idx] = 1 return np.array(bow) def predict_class(self, text): bow = self.bag_of_words(text) result = self.model.predict(np.array([bow]))[0] thresh = 0.2 y_pred = [[idx, res] for idx, res in enumerate(result) if res > thresh] y_pred.sort(key=lambda x: x[1], reverse=True) return_list = [self.classes[r[0]] for r in y_pred] return return_list def get_response(self, intents_list): tag = intents_list[0] list_of_intents = self.data["intents"] for i in list_of_intents: if i["tag"] == tag: result = random.choice(i["responses"]) break return result def predict(self, question): intents = self.predict_class(question) return self.get_response(intents) def run(self): self.data = self.importer() self.train_X, self.train_y = self.preprocess_data() self.train_model()