Commit
·
3ec53db
1
Parent(s):
18db904
Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,371 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import atexit
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
import sys
|
5 |
+
from collections.abc import Generator
|
6 |
+
|
7 |
+
import nltk
|
8 |
+
import numpy as np
|
9 |
+
import pyttsx3
|
10 |
+
import spacy
|
11 |
+
import spacy as nlp
|
12 |
+
import speech_recognition as sr
|
13 |
+
import transformers
|
14 |
+
from nltk.corpus import stopwords
|
15 |
+
from nltk.stem import WordNetLemmatizer
|
16 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
17 |
+
from transformers import AutoTokenizer
|
18 |
+
from transformers import GPTNeoForCausalLM
|
19 |
+
from transformers import pipeline
|
20 |
+
|
21 |
+
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
22 |
+
|
23 |
+
import torch
|
24 |
+
print(torch.__version__)
|
25 |
+
|
26 |
+
|
27 |
+
|
28 |
+
model_path = spacy.util.get_package_path('en_core_web_sm')
|
29 |
+
print(model_path)
|
30 |
+
|
31 |
+
print("transformers version:", transformers.__version__)
|
32 |
+
print("spacy version:", spacy.__version__)
|
33 |
+
print("nltk version:", nltk.__version__)
|
34 |
+
|
35 |
+
|
36 |
+
|
37 |
+
sys.path.append(r"C:\Users\withe\PycharmProjects\no hope2\Gpt-Neo1")
|
38 |
+
|
39 |
+
# Download necessary NLTK resources
|
40 |
+
nltk.download('punkt')
|
41 |
+
nltk.download('stopwords')
|
42 |
+
nltk.download('wordnet')
|
43 |
+
nltk.download('omw-1.4')
|
44 |
+
|
45 |
+
|
46 |
+
# Load the spaCy model
|
47 |
+
nlp = spacy.load('en_core_web_sm')
|
48 |
+
|
49 |
+
# Define a text input
|
50 |
+
text = "Example text to process"
|
51 |
+
|
52 |
+
# Process the text using the nlp object
|
53 |
+
doc = nlp(text)
|
54 |
+
|
55 |
+
|
56 |
+
# Extract named entities from the processed text
|
57 |
+
named_entities = []
|
58 |
+
for entity in doc.ents:
|
59 |
+
if entity.label_ in ['PERSON', 'ORG', 'GPE']:
|
60 |
+
named_entities.append(entity.text)
|
61 |
+
|
62 |
+
# Print the extracted named entities
|
63 |
+
print(named_entities)
|
64 |
+
|
65 |
+
# Load the API key from the environment file
|
66 |
+
dotenv_path = './API_KEY.env'
|
67 |
+
(dotenv_path)
|
68 |
+
|
69 |
+
# Check if GPU is available and set the device accordingly
|
70 |
+
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
|
71 |
+
|
72 |
+
# Initialize the speech engine
|
73 |
+
speech_engine = pyttsx3.init()
|
74 |
+
|
75 |
+
# Get the list of available voices
|
76 |
+
voices = speech_engine.getProperty('voices')
|
77 |
+
for voice in voices:
|
78 |
+
print(voice.id, voice.name)
|
79 |
+
|
80 |
+
# Set the desired voice
|
81 |
+
voice_id = "HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-GB_HAZEL_11.0 Microsoft Hazel Desktop - English (Great Britain)"
|
82 |
+
speech_engine.setProperty('voice', voice_id)
|
83 |
+
|
84 |
+
voices = speech_engine.getProperty('voices')
|
85 |
+
for voice in voices:
|
86 |
+
print(voice.id, voice.name)
|
87 |
+
|
88 |
+
# Set the desired voice
|
89 |
+
desired_voice = "Microsoft Hazel Desktop - English (Great Britain)"
|
90 |
+
voice_id = None
|
91 |
+
|
92 |
+
# Find the voice ID based on the desired voice name
|
93 |
+
for voice in voices:
|
94 |
+
if desired_voice in voice.name:
|
95 |
+
voice_id = voice.id
|
96 |
+
break
|
97 |
+
|
98 |
+
if voice_id:
|
99 |
+
speech_engine.setProperty('voice', voice_id)
|
100 |
+
print("Desired voice set successfully.")
|
101 |
+
else:
|
102 |
+
print("Desired voice not found.")
|
103 |
+
|
104 |
+
|
105 |
+
class CommonModule:
|
106 |
+
def __init__(self, model, name, param1, param2):
|
107 |
+
# Initialize the instance variables using the provided arguments
|
108 |
+
self.model = model
|
109 |
+
self.name = name
|
110 |
+
self.param1 = param1
|
111 |
+
self.param2 = param2
|
112 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model) # Load the tokenizer
|
113 |
+
self.tokenizer.add_special_tokens({'pad_token': '[PAD]'})
|
114 |
+
self.gpt3_model = GPTNeoForCausalLM.from_pretrained('EleutherAI/gpt-neo-1.3B')
|
115 |
+
self.gpt3_model.to(device) # Move model to the device (GPU or CPU)
|
116 |
+
self.memory_module = MemoryModule()
|
117 |
+
self.sentiment_module = SentimentAnalysisModule()
|
118 |
+
self.speech_engine = speech_engine # Assign the initialized speech engine
|
119 |
+
|
120 |
+
self.max_sequence_length = 10 # Decrease the value for faster response
|
121 |
+
self.num_beams = 4 # Reduce the value for faster response
|
122 |
+
self.no_repeat_ngram_size = 2
|
123 |
+
self.temperature = 0.3
|
124 |
+
self.response_cache = {} # Cache for storing frequently occurring responses
|
125 |
+
|
126 |
+
|
127 |
+
|
128 |
+
def reset_conversation(self):
|
129 |
+
self.memory_module.reset_memory()
|
130 |
+
|
131 |
+
def retrieve_cached_response(self, input_text):
|
132 |
+
named_entities = self.memory_module.get_named_entities()
|
133 |
+
for entity in named_entities:
|
134 |
+
if entity.lower() in input_text.lower():
|
135 |
+
return self.response_cache.get(entity)
|
136 |
+
return None
|
137 |
+
|
138 |
+
|
139 |
+
|
140 |
+
def generate_gpt3_response(self, input_text, conversation_history, temperature=0.3):
|
141 |
+
prompt = '\n'.join(conversation_history) + '\n' + input_text + '\n'
|
142 |
+
|
143 |
+
generator = pipeline('text-generation', model='EleutherAI/gpt-neo-1.3B')
|
144 |
+
output = generator(
|
145 |
+
prompt,
|
146 |
+
do_sample=True,
|
147 |
+
min_length=50,
|
148 |
+
temperature=temperature,
|
149 |
+
num_return_sequences=1
|
150 |
+
)
|
151 |
+
|
152 |
+
if output:
|
153 |
+
generated_response = output[0]['generated_text'].strip()
|
154 |
+
return generated_response
|
155 |
+
|
156 |
+
return ""
|
157 |
+
|
158 |
+
def process_input(self, input_text, conversation_history):
|
159 |
+
named_entities = list(self.memory_module.get_named_entities())
|
160 |
+
for entity in named_entities:
|
161 |
+
if entity in input_text:
|
162 |
+
response = "Nice to meet you again, {}!".format(entity)
|
163 |
+
self.memory_module.add_to_memory(response)
|
164 |
+
return response
|
165 |
+
|
166 |
+
# Check if the input contains a question
|
167 |
+
if '?' in input_text:
|
168 |
+
return "You're making me angry, you wouldn't like me when I'm angry."
|
169 |
+
|
170 |
+
# Check if the input contains a keyword for memory search
|
171 |
+
if 'search' in input_text.lower():
|
172 |
+
keyword = input_text.lower().split('search ')[-1]
|
173 |
+
matches = self.memory_module.search_memory(keyword)
|
174 |
+
if matches:
|
175 |
+
return "I found some related information in the memory:\n" + '\n'.join(matches)
|
176 |
+
else:
|
177 |
+
return "Sorry, I couldn't find any relevant information in the memory."
|
178 |
+
|
179 |
+
# Retrieve the cached response
|
180 |
+
response = self.retrieve_cached_response(input_text)
|
181 |
+
|
182 |
+
if response is None:
|
183 |
+
response = self.generate_gpt3_response(input_text, conversation_history)
|
184 |
+
self.cache_response(input_text, response)
|
185 |
+
|
186 |
+
named_entities = self.memory_module.get_named_entities()
|
187 |
+
if named_entities and any(entity in input_text for entity in named_entities):
|
188 |
+
response = "Nice to meet you, {}! I'm still {}".format(named_entities[0], self.name)
|
189 |
+
self.memory_module.add_to_memory(response)
|
190 |
+
return response
|
191 |
+
|
192 |
+
self.memory_module.add_to_memory(response)
|
193 |
+
return response
|
194 |
+
|
195 |
+
def cache_response(self, input_text, response):
|
196 |
+
self.response_cache[input_text] = response
|
197 |
+
|
198 |
+
def speak(self, text, conversation_history=None):
|
199 |
+
if conversation_history is None:
|
200 |
+
conversation_history = []
|
201 |
+
conversation_history.append(text)
|
202 |
+
full_text = "\n".join(conversation_history)
|
203 |
+
print(text)
|
204 |
+
self.speech_engine.say(text)
|
205 |
+
self.speech_engine.runAndWait()
|
206 |
+
|
207 |
+
def listen(self):
|
208 |
+
recognizer = sr.Recognizer()
|
209 |
+
|
210 |
+
with sr.Microphone() as source:
|
211 |
+
print("Listening...")
|
212 |
+
audio = recognizer.listen(source)
|
213 |
+
|
214 |
+
try:
|
215 |
+
user_input = recognizer.recognize_google(audio)
|
216 |
+
print("You said:", user_input)
|
217 |
+
return user_input
|
218 |
+
except sr.UnknownValueError:
|
219 |
+
print("Sorry, I could not understand your speech.")
|
220 |
+
except sr.RequestError as e:
|
221 |
+
print("Sorry, an error occurred while processing your request. Please try again.")
|
222 |
+
|
223 |
+
return ""
|
224 |
+
|
225 |
+
def converse(self):
|
226 |
+
self.reset_conversation()
|
227 |
+
self.speak("Hey, what's up bro? I'm {}".format(self.name))
|
228 |
+
|
229 |
+
conversation_history = []
|
230 |
+
|
231 |
+
while True:
|
232 |
+
user_input = self.listen()
|
233 |
+
|
234 |
+
if user_input:
|
235 |
+
response = self.process_input(user_input, conversation_history)
|
236 |
+
self.speak(response, conversation_history)
|
237 |
+
|
238 |
+
# Check if the user input contains a named entity (name)
|
239 |
+
named_entities = self.memory_module.get_named_entities()
|
240 |
+
if named_entities and any(entity in user_input for entity in named_entities):
|
241 |
+
self.speak("Nice to meet you, {}! I'm still {}".format(named_entities[0], self.name),
|
242 |
+
conversation_history)
|
243 |
+
|
244 |
+
conversation_history.append(user_input)
|
245 |
+
|
246 |
+
# Check if the conversation is over (you can define your own condition here)
|
247 |
+
if user_input == "bye":
|
248 |
+
self.save_memory('C:\\Users\\withe\PycharmProjects\\no hope\\Chat_Bot_Main\\save_memory.json')
|
249 |
+
break
|
250 |
+
|
251 |
+
def save_memory(self, file_path):
|
252 |
+
data = {
|
253 |
+
'memory': self.memory_module.memory,
|
254 |
+
'named_entities': list(self.memory_module.named_entities) # Convert set to list
|
255 |
+
}
|
256 |
+
with open(file_path, 'w') as file:
|
257 |
+
json.dump(data, file)
|
258 |
+
|
259 |
+
def load_memory_data(self, memory_data):
|
260 |
+
self.memory_module.memory = memory_data['memory']
|
261 |
+
self.memory_module.named_entities = set(memory_data['named_entities'])
|
262 |
+
|
263 |
+
|
264 |
+
class MemoryModule:
|
265 |
+
def __init__(self):
|
266 |
+
self.memory = []
|
267 |
+
self.vectorizer = TfidfVectorizer(stop_words=stopwords.words('english'))
|
268 |
+
self.lemmatizer = WordNetLemmatizer()
|
269 |
+
self.tokenizer = nltk.tokenize.word_tokenize
|
270 |
+
self.named_entities = set() # Set to store named entities like names
|
271 |
+
|
272 |
+
def get_named_entities(self):
|
273 |
+
return self.named_entities
|
274 |
+
|
275 |
+
def preprocess_text(self, text):
|
276 |
+
tokens = self.tokenizer(text.lower())
|
277 |
+
tokens = [self.lemmatizer.lemmatize(token) for token in tokens if token.isalnum()]
|
278 |
+
preprocessed_text = ' '.join(tokens)
|
279 |
+
return preprocessed_text
|
280 |
+
|
281 |
+
def add_to_memory(self, text):
|
282 |
+
preprocessed_text = self.preprocess_text(text)
|
283 |
+
self.memory.append(preprocessed_text)
|
284 |
+
|
285 |
+
# Update named entities if any
|
286 |
+
named_entity = self.extract_named_entity(text)
|
287 |
+
if named_entity:
|
288 |
+
self.named_entities.add(named_entity)
|
289 |
+
|
290 |
+
def extract_named_entity(self, text):
|
291 |
+
doc = nlp(text)
|
292 |
+
for entity in doc.ents:
|
293 |
+
if entity.label_ in ['PERSON', 'ORG', 'GPE']:
|
294 |
+
return entity.text
|
295 |
+
return None
|
296 |
+
|
297 |
+
def search_memory(self, keyword):
|
298 |
+
preprocessed_keyword = self.preprocess_text(keyword)
|
299 |
+
vectorized_memory = self.vectorizer.transform(self.memory)
|
300 |
+
vectorized_keyword = self.vectorizer.transform([preprocessed_keyword])
|
301 |
+
similarity_scores = np.dot(vectorized_memory, vectorized_keyword.T).toarray().flatten()
|
302 |
+
sorted_indices = np.argsort(similarity_scores)[::-1]
|
303 |
+
matches = [self.memory[i] for i in sorted_indices if similarity_scores[i] > 0.5]
|
304 |
+
return matches
|
305 |
+
|
306 |
+
def reset_memory(self):
|
307 |
+
self.memory = []
|
308 |
+
self.named_entities = set()
|
309 |
+
|
310 |
+
|
311 |
+
class SentimentAnalysisModule:
|
312 |
+
def __init__(self):
|
313 |
+
self.analyzer = SentimentIntensityAnalyzer()
|
314 |
+
|
315 |
+
def analyze_sentiment(self, text):
|
316 |
+
sentiment_scores = self.analyzer.polarity_scores(text)
|
317 |
+
return sentiment_scores
|
318 |
+
|
319 |
+
def get_sentiment_label(self, sentiment_scores):
|
320 |
+
compound_score = sentiment_scores['compound']
|
321 |
+
if compound_score >= 0.05:
|
322 |
+
return 'positive'
|
323 |
+
elif compound_score <= -0.05:
|
324 |
+
return 'negative'
|
325 |
+
else:
|
326 |
+
return 'neutral'
|
327 |
+
|
328 |
+
|
329 |
+
# Define an exit handler function
|
330 |
+
def exit_handler(common_module):
|
331 |
+
memory_data = {
|
332 |
+
'memory': common_module.memory_module.memory,
|
333 |
+
'named_entities': list(common_module.memory_module.named_entities)
|
334 |
+
}
|
335 |
+
common_module.save_memory('C:\\Users\\withe\\PycharmProjects\\pythonProject2\\Chat_bot1\\save_memory.json')
|
336 |
+
print("Memory data saved successfully.")
|
337 |
+
|
338 |
+
return memory_data
|
339 |
+
|
340 |
+
|
341 |
+
# Define a method to check if the load_memory.json file exists
|
342 |
+
def check_memory_file(file_path):
|
343 |
+
return os.path.isfile(file_path)
|
344 |
+
|
345 |
+
|
346 |
+
if __name__ == "__main__":
|
347 |
+
model = 'EleutherAI/gpt-neo-1.3B'
|
348 |
+
name = "Chat bot1"
|
349 |
+
param1 = 'value1'
|
350 |
+
param2 = 'value2'
|
351 |
+
common_module = CommonModule(model, name, param1, param2)
|
352 |
+
|
353 |
+
memory_file_path = 'C:\\Users\\withe\\PycharmProjects\\pythonProject2\\Chat_bot1\\load_memory1.json'
|
354 |
+
if check_memory_file(memory_file_path):
|
355 |
+
with open(memory_file_path, 'r') as file:
|
356 |
+
memory_data = json.load(file)
|
357 |
+
common_module.load_memory_data(memory_data)
|
358 |
+
|
359 |
+
atexit.register(exit_handler, common_module)
|
360 |
+
|
361 |
+
common_module.converse()
|
362 |
+
|
363 |
+
# Generate response using the generator
|
364 |
+
prompt = "EleutherAI has"
|
365 |
+
generated_text = Generator(prompt, do_sample=True, min_length=50)
|
366 |
+
|
367 |
+
if generated_text:
|
368 |
+
generated_response = generated_text[0]['generated_text'].strip()
|
369 |
+
print(generated_response)
|
370 |
+
|
371 |
+
common_module.save_memory(memory_file_path)
|