Prathamesh1420 commited on
Commit
1145213
1 Parent(s): 123f110

Upload 9 files

Browse files
Files changed (9) hide show
  1. .DS_Store +0 -0
  2. agenda.xlsx +0 -0
  3. annotation.txt +5 -0
  4. app.py +252 -0
  5. n1.mp3 +0 -0
  6. n2.mp3 +0 -0
  7. n3.mp3 +0 -0
  8. sad.wav +0 -0
  9. testing.py +17 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
agenda.xlsx ADDED
Binary file (8.84 kB). View file
 
annotation.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ go to the supermarket
2
+ the house
3
+ go to the pharmacy
4
+ go to bed
5
+ wash the car
app.py ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pyttsx3
2
+ import speech_recognition as sr
3
+ from playsound import playsound
4
+ import random
5
+ import datetime
6
+ hour = datetime.datetime.now().strftime('%H:%M')
7
+ #print(hour)
8
+ date = datetime.date.today().strftime('%d/%B/%Y')
9
+ #print(date)
10
+ date = date.split('/')
11
+ #print(date)
12
+ import webbrowser as wb
13
+ import tensorflow as tf
14
+ import numpy as np
15
+ import librosa
16
+ import matplotlib.pyplot as plt
17
+ import seaborn as sns
18
+ sns.set()
19
+ from modules import commands_answers, load_agenda
20
+ commands = commands_answers.commands
21
+ answers = commands_answers.answers
22
+ #print(commands)
23
+ #print(answers)
24
+
25
+ my_name = 'Bob'
26
+
27
+ # MacOS
28
+ chrome_path = 'open -a /Applications/Google\ Chrome.app %s'
29
+ # Windows
30
+ #chrome_path = 'C:/Program Files/Google/Chrome/Application/chrome.exe %s'
31
+ # Linux
32
+ # chrome_path = '/usr/bin/google-chrome %s'
33
+
34
+ def search(sentence):
35
+ wb.get(chrome_path).open('https://www.google.com/search?q=' + sentence)
36
+
37
+ #search('python programming language')
38
+
39
+ MODEL_TYPES = ['EMOTION']
40
+ def load_model_by_name(model_type):
41
+ if model_type == MODEL_TYPES[0]:
42
+ model = tf.keras.models.load_model('models/speech_emotion_recognition.hdf5')
43
+ model_dict = list(['calm', 'happy', 'fear', 'nervous', 'neutral', 'disgust', 'surprise', 'sad'])
44
+ SAMPLE_RATE = 48000
45
+ return model, model_dict, SAMPLE_RATE
46
+
47
+ #print(load_model_by_name('EMOTION'))
48
+ #print(load_model_by_name('EMOTION')[0].summary())
49
+
50
+ model_type = 'EMOTION'
51
+ loaded_model = load_model_by_name(model_type)
52
+
53
+ def predict_sound(AUDIO, SAMPLE_RATE, plot = True):
54
+ results = []
55
+ wav_data, sample_rate = librosa.load(AUDIO, sr = SAMPLE_RATE)
56
+ #print(wav_data.shape)
57
+ #print(sample_rate)
58
+ #print(wav_data)
59
+ # ' librosa ' -> 'librosa'
60
+ # https://librosa.org/doc/main/generated/librosa.effects.trim.html
61
+ clip, index = librosa.effects.trim(wav_data, top_db=60, frame_length=512, hop_length=64)
62
+ splitted_audio_data = tf.signal.frame(clip, sample_rate, sample_rate, pad_end = True, pad_value = 0)
63
+ for i, data in enumerate(splitted_audio_data.numpy()):
64
+ #print('Audio split: ', i)
65
+ #print(data.shape)
66
+ #print(data)
67
+ # Mel frequency: https://en.wikipedia.org/wiki/Mel-frequency_cepstrum
68
+ # PCA
69
+ mfccs_features = librosa.feature.mfcc(y = data, sr = sample_rate, n_mfcc=40)
70
+ #print(mfccs_features.shape)
71
+ #print(mfccs_features)
72
+ mfccs_scaled_features = np.mean(mfccs_features.T, axis = 0)
73
+ mfccs_scaled_features = mfccs_scaled_features.reshape(1, -1)
74
+ #print(mfccs_scaled_features.shape)
75
+ mfccs_scaled_features = mfccs_scaled_features[:, :, np.newaxis]
76
+ # batch
77
+ #print(mfccs_scaled_features.shape)
78
+ predictions = loaded_model[0].predict(mfccs_scaled_features)
79
+ #print(predictions)
80
+ #print(predictions.sum())
81
+ if plot:
82
+ plt.figure(figsize=(len(splitted_audio_data), 5))
83
+ plt.barh(loaded_model[1], predictions[0])
84
+ plt.tight_layout()
85
+ plt.show()
86
+
87
+ predictions = predictions.argmax(axis = 1)
88
+ #print(predictions)
89
+ predictions = predictions.astype(int).flatten()
90
+ predictions = loaded_model[1][predictions[0]]
91
+ results.append(predictions)
92
+ #print(results)
93
+
94
+ result_str = 'PART ' + str(i) + ': ' + str(predictions).upper()
95
+ #print(result_str)
96
+
97
+ count_results = [[results.count(x), x] for x in set(results)]
98
+ #print(count_results)
99
+
100
+ #print(max(count_results))
101
+ return max(count_results)
102
+
103
+ #playsound('sad.wav')
104
+ #predict_sound('sad.wav', loaded_model[2], plot=False)
105
+
106
+ def play_music_youtube(emotion):
107
+ play = False
108
+ if emotion == 'sad' or emotion == 'fear':
109
+ wb.get(chrome_path).open('https://www.youtube.com/watch?v=k32IPg4dbz0&ab_channel=Amelhorm%C3%BAsicainstrumental')
110
+ play = True
111
+ if emotion == 'nervous' or emotion == 'surprise':
112
+ wb.get(chrome_path).open('https://www.youtube.com/watch?v=pWjmpSD-ph0&ab_channel=CassioToledo')
113
+ play = True
114
+ return play
115
+
116
+ #play_music_youtube('sad')
117
+ #play_music_youtube('surprise')
118
+ #emotion = predict_sound('sad.wav', loaded_model[2], plot=False)
119
+ #print(emotion)
120
+ #play_music_youtube(emotion[1])
121
+
122
+ def speak(text):
123
+ engine = pyttsx3.init()
124
+ engine.setProperty('rate', 90) # number of words per second
125
+ engine.setProperty('volume', 1) # min: 0, max: 1
126
+ engine.say(text)
127
+ engine.runAndWait()
128
+
129
+ #speak("Testing the Assistant's Speech Synthesizer")
130
+
131
+ def listen_microphone():
132
+ microphone = sr.Recognizer()
133
+ with sr.Microphone() as source:
134
+ microphone.adjust_for_ambient_noise(source, duration=0.8)
135
+ print('Listening: ')
136
+ audio = microphone.listen(source)
137
+ with open('recordings/speech.wav', 'wb') as f:
138
+ f.write(audio.get_wav_data())
139
+ try:
140
+ # https://pypi.org/project/SpeechRecognition/
141
+ sentence = microphone.recognize_google(audio, language='en-US')
142
+ print('You said: ' + sentence)
143
+ except sr.UnknownValueError:
144
+ sentence = ''
145
+ print('Not understood')
146
+ return sentence
147
+
148
+ #playsound('recordings/speech.wav')
149
+ #listen_microphone()
150
+
151
+ def test_models():
152
+ audio_source = '/Users/jonesgranatyr/Documents/Ensino/IA Expert/Cursos/Virtual assistent/virtual_assistant/recordings/speech.wav'
153
+ prediction = predict_sound(audio_source, loaded_model[2], plot = False)
154
+ return prediction
155
+
156
+ #print(test_models())
157
+
158
+ playing = False
159
+ mode_control = False
160
+ print('[INFO] Ready to start!')
161
+ playsound('n1.mp3')
162
+
163
+ while (1):
164
+ result = listen_microphone()
165
+
166
+ if my_name in result:
167
+ result = str(result.split(my_name + ' ')[1])
168
+ result = result.lower()
169
+ #print('The assistant has been activacted!')
170
+ #print('After processing: ', result)
171
+
172
+ if result in commands[0]:
173
+ playsound('n2.mp3')
174
+ speak('I will read my list of functionalities: ' + answers[0])
175
+
176
+ if result in commands[3]:
177
+ playsound('n2.mp3')
178
+ speak('It is now ' + datetime.datetime.now().strftime('%H:%M'))
179
+
180
+ if result in commands[4]:
181
+ playsound('n2.mp3')
182
+ speak('Today is ' + date[0] + ' of ' + date[1])
183
+
184
+ if result in commands[1]:
185
+ playsound('n2.mp3')
186
+ speak('Please, tell me the activity!')
187
+ result = listen_microphone()
188
+ annotation = open('annotation.txt', mode='a+', encoding='utf-8')
189
+ annotation.write(result + '\n')
190
+ annotation.close()
191
+ speak(''.join(random.sample(answers[1], k = 1)))
192
+ speak('Want me to read the notes?')
193
+ result = listen_microphone()
194
+ if result == 'yes' or result == 'sure':
195
+ with open('annotation.txt') as file_source:
196
+ lines = file_source.readlines()
197
+ for line in lines:
198
+ speak(line)
199
+ else:
200
+ speak('Ok!')
201
+
202
+ if result in commands[2]:
203
+ playsound('n2.mp3')
204
+ speak(''.join(random.sample(answers[2], k = 1)))
205
+ result = listen_microphone()
206
+ search(result)
207
+
208
+ if result in commands[6]:
209
+ playsound('n2.mp3')
210
+ if load_agenda.load_agenda():
211
+ speak('These are the events for today:')
212
+ for i in range(len(load_agenda.load_agenda()[1])):
213
+ speak(load_agenda.load_agenda()[1][i] + ' ' + load_agenda.load_agenda()[0][i] + ' schedule for ' + str(load_agenda.load_agenda()[2][i]))
214
+ else:
215
+ speak('There are not events for today considering the current time!')
216
+
217
+ if result in commands[5]:
218
+ mode_control = True
219
+ playsound('n1.mp3')
220
+ speak('Emotion analysis mode has been activacted!')
221
+
222
+ if mode_control:
223
+ analyse = test_models()
224
+ print(f'I heard {analyse} in your voice!')
225
+ if not playing:
226
+ playing = play_music_youtube(analyse[1])
227
+
228
+ if result == 'turn off':
229
+ playsound('n2.mp3')
230
+ speak(''.join(random.sample(answers[4], k = 1)))
231
+ break
232
+ else:
233
+ playsound('n3.mp3')
234
+
235
+
236
+
237
+
238
+
239
+
240
+
241
+
242
+
243
+
244
+
245
+
246
+
247
+
248
+
249
+
250
+
251
+
252
+
n1.mp3 ADDED
Binary file (60.1 kB). View file
 
n2.mp3 ADDED
Binary file (30.5 kB). View file
 
n3.mp3 ADDED
Binary file (17.9 kB). View file
 
sad.wav ADDED
Binary file (418 kB). View file
 
testing.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #from playsound import playsound
2
+ #playsound('n3.mp3')
3
+
4
+ #import speech_recognition
5
+ #print('Speech Recognition:', speech_recognition.__version__)
6
+
7
+ #import pyttsx3
8
+ #pyttsx3.speak('We are now testing the library')
9
+
10
+ import tensorflow
11
+ print('TensorFlow: ', tensorflow.__version__)
12
+ import librosa
13
+ print('Librosa: ', librosa.__version__)
14
+ import matplotlib
15
+ print('Matplotlib:', matplotlib._get_version())
16
+ import seaborn
17
+ print('Seaborn: ', seaborn.__version__)