razerblade072611 commited on
Commit
3ec53db
·
1 Parent(s): 18db904

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +371 -0
README.md ADDED
@@ -0,0 +1,371 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import atexit
2
+ import json
3
+ import os
4
+ import sys
5
+ from collections.abc import Generator
6
+
7
+ import nltk
8
+ import numpy as np
9
+ import pyttsx3
10
+ import spacy
11
+ import spacy as nlp
12
+ import speech_recognition as sr
13
+ import transformers
14
+ from nltk.corpus import stopwords
15
+ from nltk.stem import WordNetLemmatizer
16
+ from sklearn.feature_extraction.text import TfidfVectorizer
17
+ from transformers import AutoTokenizer
18
+ from transformers import GPTNeoForCausalLM
19
+ from transformers import pipeline
20
+
21
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
22
+
23
+ import torch
24
+ print(torch.__version__)
25
+
26
+
27
+
28
+ model_path = spacy.util.get_package_path('en_core_web_sm')
29
+ print(model_path)
30
+
31
+ print("transformers version:", transformers.__version__)
32
+ print("spacy version:", spacy.__version__)
33
+ print("nltk version:", nltk.__version__)
34
+
35
+
36
+
37
+ sys.path.append(r"C:\Users\withe\PycharmProjects\no hope2\Gpt-Neo1")
38
+
39
+ # Download necessary NLTK resources
40
+ nltk.download('punkt')
41
+ nltk.download('stopwords')
42
+ nltk.download('wordnet')
43
+ nltk.download('omw-1.4')
44
+
45
+
46
+ # Load the spaCy model
47
+ nlp = spacy.load('en_core_web_sm')
48
+
49
+ # Define a text input
50
+ text = "Example text to process"
51
+
52
+ # Process the text using the nlp object
53
+ doc = nlp(text)
54
+
55
+
56
+ # Extract named entities from the processed text
57
+ named_entities = []
58
+ for entity in doc.ents:
59
+ if entity.label_ in ['PERSON', 'ORG', 'GPE']:
60
+ named_entities.append(entity.text)
61
+
62
+ # Print the extracted named entities
63
+ print(named_entities)
64
+
65
+ # Load the API key from the environment file
66
+ dotenv_path = './API_KEY.env'
67
+ (dotenv_path)
68
+
69
+ # Check if GPU is available and set the device accordingly
70
+ device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
71
+
72
+ # Initialize the speech engine
73
+ speech_engine = pyttsx3.init()
74
+
75
+ # Get the list of available voices
76
+ voices = speech_engine.getProperty('voices')
77
+ for voice in voices:
78
+ print(voice.id, voice.name)
79
+
80
+ # Set the desired voice
81
+ voice_id = "HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-GB_HAZEL_11.0 Microsoft Hazel Desktop - English (Great Britain)"
82
+ speech_engine.setProperty('voice', voice_id)
83
+
84
+ voices = speech_engine.getProperty('voices')
85
+ for voice in voices:
86
+ print(voice.id, voice.name)
87
+
88
+ # Set the desired voice
89
+ desired_voice = "Microsoft Hazel Desktop - English (Great Britain)"
90
+ voice_id = None
91
+
92
+ # Find the voice ID based on the desired voice name
93
+ for voice in voices:
94
+ if desired_voice in voice.name:
95
+ voice_id = voice.id
96
+ break
97
+
98
+ if voice_id:
99
+ speech_engine.setProperty('voice', voice_id)
100
+ print("Desired voice set successfully.")
101
+ else:
102
+ print("Desired voice not found.")
103
+
104
+
105
+ class CommonModule:
106
+ def __init__(self, model, name, param1, param2):
107
+ # Initialize the instance variables using the provided arguments
108
+ self.model = model
109
+ self.name = name
110
+ self.param1 = param1
111
+ self.param2 = param2
112
+ self.tokenizer = AutoTokenizer.from_pretrained(model) # Load the tokenizer
113
+ self.tokenizer.add_special_tokens({'pad_token': '[PAD]'})
114
+ self.gpt3_model = GPTNeoForCausalLM.from_pretrained('EleutherAI/gpt-neo-1.3B')
115
+ self.gpt3_model.to(device) # Move model to the device (GPU or CPU)
116
+ self.memory_module = MemoryModule()
117
+ self.sentiment_module = SentimentAnalysisModule()
118
+ self.speech_engine = speech_engine # Assign the initialized speech engine
119
+
120
+ self.max_sequence_length = 10 # Decrease the value for faster response
121
+ self.num_beams = 4 # Reduce the value for faster response
122
+ self.no_repeat_ngram_size = 2
123
+ self.temperature = 0.3
124
+ self.response_cache = {} # Cache for storing frequently occurring responses
125
+
126
+
127
+
128
+ def reset_conversation(self):
129
+ self.memory_module.reset_memory()
130
+
131
+ def retrieve_cached_response(self, input_text):
132
+ named_entities = self.memory_module.get_named_entities()
133
+ for entity in named_entities:
134
+ if entity.lower() in input_text.lower():
135
+ return self.response_cache.get(entity)
136
+ return None
137
+
138
+
139
+
140
+ def generate_gpt3_response(self, input_text, conversation_history, temperature=0.3):
141
+ prompt = '\n'.join(conversation_history) + '\n' + input_text + '\n'
142
+
143
+ generator = pipeline('text-generation', model='EleutherAI/gpt-neo-1.3B')
144
+ output = generator(
145
+ prompt,
146
+ do_sample=True,
147
+ min_length=50,
148
+ temperature=temperature,
149
+ num_return_sequences=1
150
+ )
151
+
152
+ if output:
153
+ generated_response = output[0]['generated_text'].strip()
154
+ return generated_response
155
+
156
+ return ""
157
+
158
+ def process_input(self, input_text, conversation_history):
159
+ named_entities = list(self.memory_module.get_named_entities())
160
+ for entity in named_entities:
161
+ if entity in input_text:
162
+ response = "Nice to meet you again, {}!".format(entity)
163
+ self.memory_module.add_to_memory(response)
164
+ return response
165
+
166
+ # Check if the input contains a question
167
+ if '?' in input_text:
168
+ return "You're making me angry, you wouldn't like me when I'm angry."
169
+
170
+ # Check if the input contains a keyword for memory search
171
+ if 'search' in input_text.lower():
172
+ keyword = input_text.lower().split('search ')[-1]
173
+ matches = self.memory_module.search_memory(keyword)
174
+ if matches:
175
+ return "I found some related information in the memory:\n" + '\n'.join(matches)
176
+ else:
177
+ return "Sorry, I couldn't find any relevant information in the memory."
178
+
179
+ # Retrieve the cached response
180
+ response = self.retrieve_cached_response(input_text)
181
+
182
+ if response is None:
183
+ response = self.generate_gpt3_response(input_text, conversation_history)
184
+ self.cache_response(input_text, response)
185
+
186
+ named_entities = self.memory_module.get_named_entities()
187
+ if named_entities and any(entity in input_text for entity in named_entities):
188
+ response = "Nice to meet you, {}! I'm still {}".format(named_entities[0], self.name)
189
+ self.memory_module.add_to_memory(response)
190
+ return response
191
+
192
+ self.memory_module.add_to_memory(response)
193
+ return response
194
+
195
+ def cache_response(self, input_text, response):
196
+ self.response_cache[input_text] = response
197
+
198
+ def speak(self, text, conversation_history=None):
199
+ if conversation_history is None:
200
+ conversation_history = []
201
+ conversation_history.append(text)
202
+ full_text = "\n".join(conversation_history)
203
+ print(text)
204
+ self.speech_engine.say(text)
205
+ self.speech_engine.runAndWait()
206
+
207
+ def listen(self):
208
+ recognizer = sr.Recognizer()
209
+
210
+ with sr.Microphone() as source:
211
+ print("Listening...")
212
+ audio = recognizer.listen(source)
213
+
214
+ try:
215
+ user_input = recognizer.recognize_google(audio)
216
+ print("You said:", user_input)
217
+ return user_input
218
+ except sr.UnknownValueError:
219
+ print("Sorry, I could not understand your speech.")
220
+ except sr.RequestError as e:
221
+ print("Sorry, an error occurred while processing your request. Please try again.")
222
+
223
+ return ""
224
+
225
+ def converse(self):
226
+ self.reset_conversation()
227
+ self.speak("Hey, what's up bro? I'm {}".format(self.name))
228
+
229
+ conversation_history = []
230
+
231
+ while True:
232
+ user_input = self.listen()
233
+
234
+ if user_input:
235
+ response = self.process_input(user_input, conversation_history)
236
+ self.speak(response, conversation_history)
237
+
238
+ # Check if the user input contains a named entity (name)
239
+ named_entities = self.memory_module.get_named_entities()
240
+ if named_entities and any(entity in user_input for entity in named_entities):
241
+ self.speak("Nice to meet you, {}! I'm still {}".format(named_entities[0], self.name),
242
+ conversation_history)
243
+
244
+ conversation_history.append(user_input)
245
+
246
+ # Check if the conversation is over (you can define your own condition here)
247
+ if user_input == "bye":
248
+ self.save_memory('C:\\Users\\withe\PycharmProjects\\no hope\\Chat_Bot_Main\\save_memory.json')
249
+ break
250
+
251
+ def save_memory(self, file_path):
252
+ data = {
253
+ 'memory': self.memory_module.memory,
254
+ 'named_entities': list(self.memory_module.named_entities) # Convert set to list
255
+ }
256
+ with open(file_path, 'w') as file:
257
+ json.dump(data, file)
258
+
259
+ def load_memory_data(self, memory_data):
260
+ self.memory_module.memory = memory_data['memory']
261
+ self.memory_module.named_entities = set(memory_data['named_entities'])
262
+
263
+
264
+ class MemoryModule:
265
+ def __init__(self):
266
+ self.memory = []
267
+ self.vectorizer = TfidfVectorizer(stop_words=stopwords.words('english'))
268
+ self.lemmatizer = WordNetLemmatizer()
269
+ self.tokenizer = nltk.tokenize.word_tokenize
270
+ self.named_entities = set() # Set to store named entities like names
271
+
272
+ def get_named_entities(self):
273
+ return self.named_entities
274
+
275
+ def preprocess_text(self, text):
276
+ tokens = self.tokenizer(text.lower())
277
+ tokens = [self.lemmatizer.lemmatize(token) for token in tokens if token.isalnum()]
278
+ preprocessed_text = ' '.join(tokens)
279
+ return preprocessed_text
280
+
281
+ def add_to_memory(self, text):
282
+ preprocessed_text = self.preprocess_text(text)
283
+ self.memory.append(preprocessed_text)
284
+
285
+ # Update named entities if any
286
+ named_entity = self.extract_named_entity(text)
287
+ if named_entity:
288
+ self.named_entities.add(named_entity)
289
+
290
+ def extract_named_entity(self, text):
291
+ doc = nlp(text)
292
+ for entity in doc.ents:
293
+ if entity.label_ in ['PERSON', 'ORG', 'GPE']:
294
+ return entity.text
295
+ return None
296
+
297
+ def search_memory(self, keyword):
298
+ preprocessed_keyword = self.preprocess_text(keyword)
299
+ vectorized_memory = self.vectorizer.transform(self.memory)
300
+ vectorized_keyword = self.vectorizer.transform([preprocessed_keyword])
301
+ similarity_scores = np.dot(vectorized_memory, vectorized_keyword.T).toarray().flatten()
302
+ sorted_indices = np.argsort(similarity_scores)[::-1]
303
+ matches = [self.memory[i] for i in sorted_indices if similarity_scores[i] > 0.5]
304
+ return matches
305
+
306
+ def reset_memory(self):
307
+ self.memory = []
308
+ self.named_entities = set()
309
+
310
+
311
+ class SentimentAnalysisModule:
312
+ def __init__(self):
313
+ self.analyzer = SentimentIntensityAnalyzer()
314
+
315
+ def analyze_sentiment(self, text):
316
+ sentiment_scores = self.analyzer.polarity_scores(text)
317
+ return sentiment_scores
318
+
319
+ def get_sentiment_label(self, sentiment_scores):
320
+ compound_score = sentiment_scores['compound']
321
+ if compound_score >= 0.05:
322
+ return 'positive'
323
+ elif compound_score <= -0.05:
324
+ return 'negative'
325
+ else:
326
+ return 'neutral'
327
+
328
+
329
+ # Define an exit handler function
330
+ def exit_handler(common_module):
331
+ memory_data = {
332
+ 'memory': common_module.memory_module.memory,
333
+ 'named_entities': list(common_module.memory_module.named_entities)
334
+ }
335
+ common_module.save_memory('C:\\Users\\withe\\PycharmProjects\\pythonProject2\\Chat_bot1\\save_memory.json')
336
+ print("Memory data saved successfully.")
337
+
338
+ return memory_data
339
+
340
+
341
+ # Define a method to check if the load_memory.json file exists
342
+ def check_memory_file(file_path):
343
+ return os.path.isfile(file_path)
344
+
345
+
346
+ if __name__ == "__main__":
347
+ model = 'EleutherAI/gpt-neo-1.3B'
348
+ name = "Chat bot1"
349
+ param1 = 'value1'
350
+ param2 = 'value2'
351
+ common_module = CommonModule(model, name, param1, param2)
352
+
353
+ memory_file_path = 'C:\\Users\\withe\\PycharmProjects\\pythonProject2\\Chat_bot1\\load_memory1.json'
354
+ if check_memory_file(memory_file_path):
355
+ with open(memory_file_path, 'r') as file:
356
+ memory_data = json.load(file)
357
+ common_module.load_memory_data(memory_data)
358
+
359
+ atexit.register(exit_handler, common_module)
360
+
361
+ common_module.converse()
362
+
363
+ # Generate response using the generator
364
+ prompt = "EleutherAI has"
365
+ generated_text = Generator(prompt, do_sample=True, min_length=50)
366
+
367
+ if generated_text:
368
+ generated_response = generated_text[0]['generated_text'].strip()
369
+ print(generated_response)
370
+
371
+ common_module.save_memory(memory_file_path)