Prathamesh1420 commited on
Commit
99b56c0
1 Parent(s): 1145213

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -139
app.py CHANGED
@@ -1,41 +1,29 @@
 
1
  import pyttsx3
2
  import speech_recognition as sr
3
  from playsound import playsound
4
  import random
5
  import datetime
6
- hour = datetime.datetime.now().strftime('%H:%M')
7
- #print(hour)
8
- date = datetime.date.today().strftime('%d/%B/%Y')
9
- #print(date)
10
- date = date.split('/')
11
- #print(date)
12
  import webbrowser as wb
13
  import tensorflow as tf
14
  import numpy as np
15
  import librosa
16
  import matplotlib.pyplot as plt
17
  import seaborn as sns
18
- sns.set()
19
  from modules import commands_answers, load_agenda
 
 
 
20
  commands = commands_answers.commands
21
  answers = commands_answers.answers
22
- #print(commands)
23
- #print(answers)
24
-
25
  my_name = 'Bob'
26
 
27
- # MacOS
28
- chrome_path = 'open -a /Applications/Google\ Chrome.app %s'
29
- # Windows
30
- #chrome_path = 'C:/Program Files/Google/Chrome/Application/chrome.exe %s'
31
- # Linux
32
- # chrome_path = '/usr/bin/google-chrome %s'
33
-
34
- def search(sentence):
35
- wb.get(chrome_path).open('https://www.google.com/search?q=' + sentence)
36
-
37
- #search('python programming language')
38
 
 
39
  MODEL_TYPES = ['EMOTION']
40
  def load_model_by_name(model_type):
41
  if model_type == MODEL_TYPES[0]:
@@ -44,65 +32,36 @@ def load_model_by_name(model_type):
44
  SAMPLE_RATE = 48000
45
  return model, model_dict, SAMPLE_RATE
46
 
47
- #print(load_model_by_name('EMOTION'))
48
- #print(load_model_by_name('EMOTION')[0].summary())
49
 
50
- model_type = 'EMOTION'
51
- loaded_model = load_model_by_name(model_type)
 
52
 
53
- def predict_sound(AUDIO, SAMPLE_RATE, plot = True):
54
  results = []
55
- wav_data, sample_rate = librosa.load(AUDIO, sr = SAMPLE_RATE)
56
- #print(wav_data.shape)
57
- #print(sample_rate)
58
- #print(wav_data)
59
- # ' librosa ' -> 'librosa'
60
- # https://librosa.org/doc/main/generated/librosa.effects.trim.html
61
  clip, index = librosa.effects.trim(wav_data, top_db=60, frame_length=512, hop_length=64)
62
- splitted_audio_data = tf.signal.frame(clip, sample_rate, sample_rate, pad_end = True, pad_value = 0)
63
  for i, data in enumerate(splitted_audio_data.numpy()):
64
- #print('Audio split: ', i)
65
- #print(data.shape)
66
- #print(data)
67
- # Mel frequency: https://en.wikipedia.org/wiki/Mel-frequency_cepstrum
68
- # PCA
69
- mfccs_features = librosa.feature.mfcc(y = data, sr = sample_rate, n_mfcc=40)
70
- #print(mfccs_features.shape)
71
- #print(mfccs_features)
72
- mfccs_scaled_features = np.mean(mfccs_features.T, axis = 0)
73
- mfccs_scaled_features = mfccs_scaled_features.reshape(1, -1)
74
- #print(mfccs_scaled_features.shape)
75
- mfccs_scaled_features = mfccs_scaled_features[:, :, np.newaxis]
76
- # batch
77
- #print(mfccs_scaled_features.shape)
78
  predictions = loaded_model[0].predict(mfccs_scaled_features)
79
- #print(predictions)
80
- #print(predictions.sum())
81
  if plot:
82
  plt.figure(figsize=(len(splitted_audio_data), 5))
83
  plt.barh(loaded_model[1], predictions[0])
84
  plt.tight_layout()
85
- plt.show()
86
 
87
- predictions = predictions.argmax(axis = 1)
88
- #print(predictions)
89
  predictions = predictions.astype(int).flatten()
90
  predictions = loaded_model[1][predictions[0]]
91
  results.append(predictions)
92
- #print(results)
93
-
94
- result_str = 'PART ' + str(i) + ': ' + str(predictions).upper()
95
- #print(result_str)
96
 
97
  count_results = [[results.count(x), x] for x in set(results)]
98
- #print(count_results)
99
-
100
- #print(max(count_results))
101
  return max(count_results)
102
 
103
- #playsound('sad.wav')
104
- #predict_sound('sad.wav', loaded_model[2], plot=False)
105
-
106
  def play_music_youtube(emotion):
107
  play = False
108
  if emotion == 'sad' or emotion == 'fear':
@@ -113,82 +72,62 @@ def play_music_youtube(emotion):
113
  play = True
114
  return play
115
 
116
- #play_music_youtube('sad')
117
- #play_music_youtube('surprise')
118
- #emotion = predict_sound('sad.wav', loaded_model[2], plot=False)
119
- #print(emotion)
120
- #play_music_youtube(emotion[1])
121
-
122
  def speak(text):
123
  engine = pyttsx3.init()
124
- engine.setProperty('rate', 90) # number of words per second
125
- engine.setProperty('volume', 1) # min: 0, max: 1
126
  engine.say(text)
127
  engine.runAndWait()
128
 
129
- #speak("Testing the Assistant's Speech Synthesizer")
130
-
131
  def listen_microphone():
132
  microphone = sr.Recognizer()
133
  with sr.Microphone() as source:
134
  microphone.adjust_for_ambient_noise(source, duration=0.8)
135
- print('Listening: ')
136
  audio = microphone.listen(source)
137
  with open('recordings/speech.wav', 'wb') as f:
138
  f.write(audio.get_wav_data())
139
  try:
140
- # https://pypi.org/project/SpeechRecognition/
141
  sentence = microphone.recognize_google(audio, language='en-US')
142
- print('You said: ' + sentence)
143
  except sr.UnknownValueError:
144
  sentence = ''
145
- print('Not understood')
146
  return sentence
147
 
148
- #playsound('recordings/speech.wav')
149
- #listen_microphone()
150
-
151
  def test_models():
152
- audio_source = '/Users/jonesgranatyr/Documents/Ensino/IA Expert/Cursos/Virtual assistent/virtual_assistant/recordings/speech.wav'
153
- prediction = predict_sound(audio_source, loaded_model[2], plot = False)
154
  return prediction
155
 
156
- #print(test_models())
157
-
158
- playing = False
159
- mode_control = False
160
- print('[INFO] Ready to start!')
161
- playsound('n1.mp3')
162
 
163
- while (1):
164
  result = listen_microphone()
165
 
166
- if my_name in result:
167
  result = str(result.split(my_name + ' ')[1])
168
  result = result.lower()
169
- #print('The assistant has been activacted!')
170
- #print('After processing: ', result)
171
 
172
  if result in commands[0]:
173
- playsound('n2.mp3')
174
  speak('I will read my list of functionalities: ' + answers[0])
175
 
176
- if result in commands[3]:
177
- playsound('n2.mp3')
178
  speak('It is now ' + datetime.datetime.now().strftime('%H:%M'))
179
 
180
- if result in commands[4]:
181
- playsound('n2.mp3')
182
  speak('Today is ' + date[0] + ' of ' + date[1])
183
 
184
- if result in commands[1]:
185
- playsound('n2.mp3')
186
  speak('Please, tell me the activity!')
187
  result = listen_microphone()
188
  annotation = open('annotation.txt', mode='a+', encoding='utf-8')
189
  annotation.write(result + '\n')
190
  annotation.close()
191
- speak(''.join(random.sample(answers[1], k = 1)))
192
  speak('Want me to read the notes?')
193
  result = listen_microphone()
194
  if result == 'yes' or result == 'sure':
@@ -199,54 +138,25 @@ while (1):
199
  else:
200
  speak('Ok!')
201
 
202
- if result in commands[2]:
203
- playsound('n2.mp3')
204
- speak(''.join(random.sample(answers[2], k = 1)))
205
  result = listen_microphone()
206
  search(result)
207
 
208
- if result in commands[6]:
209
- playsound('n2.mp3')
210
  if load_agenda.load_agenda():
211
  speak('These are the events for today:')
212
  for i in range(len(load_agenda.load_agenda()[1])):
213
  speak(load_agenda.load_agenda()[1][i] + ' ' + load_agenda.load_agenda()[0][i] + ' schedule for ' + str(load_agenda.load_agenda()[2][i]))
214
  else:
215
- speak('There are not events for today considering the current time!')
216
 
217
- if result in commands[5]:
218
- mode_control = True
219
- playsound('n1.mp3')
220
- speak('Emotion analysis mode has been activacted!')
221
-
222
- if mode_control:
223
  analyse = test_models()
224
- print(f'I heard {analyse} in your voice!')
225
- if not playing:
226
- playing = play_music_youtube(analyse[1])
227
-
228
- if result == 'turn off':
229
- playsound('n2.mp3')
230
- speak(''.join(random.sample(answers[4], k = 1)))
231
- break
232
- else:
233
- playsound('n3.mp3')
234
-
235
-
236
-
237
-
238
-
239
-
240
-
241
-
242
-
243
-
244
-
245
-
246
-
247
-
248
-
249
-
250
-
251
-
252
 
 
 
 
 
1
+ import streamlit as st
2
  import pyttsx3
3
  import speech_recognition as sr
4
  from playsound import playsound
5
  import random
6
  import datetime
 
 
 
 
 
 
7
  import webbrowser as wb
8
  import tensorflow as tf
9
  import numpy as np
10
  import librosa
11
  import matplotlib.pyplot as plt
12
  import seaborn as sns
 
13
  from modules import commands_answers, load_agenda
14
+
15
+ # Initial settings
16
+ sns.set()
17
  commands = commands_answers.commands
18
  answers = commands_answers.answers
 
 
 
19
  my_name = 'Bob'
20
 
21
+ # Paths for browser
22
+ chrome_path = 'open -a /Applications/Google\ Chrome.app %s' # MacOS
23
+ # chrome_path = 'C:/Program Files/Google/Chrome/Application/chrome.exe %s' # Windows
24
+ # chrome_path = '/usr/bin/google-chrome %s' # Linux
 
 
 
 
 
 
 
25
 
26
+ # Load model
27
  MODEL_TYPES = ['EMOTION']
28
  def load_model_by_name(model_type):
29
  if model_type == MODEL_TYPES[0]:
 
32
  SAMPLE_RATE = 48000
33
  return model, model_dict, SAMPLE_RATE
34
 
35
+ loaded_model = load_model_by_name('EMOTION')
 
36
 
37
+ # Functions
38
+ def search(sentence):
39
+ wb.get(chrome_path).open('https://www.google.com/search?q=' + sentence)
40
 
41
+ def predict_sound(AUDIO, SAMPLE_RATE, plot=True):
42
  results = []
43
+ wav_data, sample_rate = librosa.load(AUDIO, sr=SAMPLE_RATE)
 
 
 
 
 
44
  clip, index = librosa.effects.trim(wav_data, top_db=60, frame_length=512, hop_length=64)
45
+ splitted_audio_data = tf.signal.frame(clip, sample_rate, sample_rate, pad_end=True, pad_value=0)
46
  for i, data in enumerate(splitted_audio_data.numpy()):
47
+ mfccs_features = librosa.feature.mfcc(y=data, sr=sample_rate, n_mfcc=40)
48
+ mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
49
+ mfccs_scaled_features = mfccs_scaled_features.reshape(1, -1)[:, :, np.newaxis]
 
 
 
 
 
 
 
 
 
 
 
50
  predictions = loaded_model[0].predict(mfccs_scaled_features)
 
 
51
  if plot:
52
  plt.figure(figsize=(len(splitted_audio_data), 5))
53
  plt.barh(loaded_model[1], predictions[0])
54
  plt.tight_layout()
55
+ st.pyplot(plt)
56
 
57
+ predictions = predictions.argmax(axis=1)
 
58
  predictions = predictions.astype(int).flatten()
59
  predictions = loaded_model[1][predictions[0]]
60
  results.append(predictions)
 
 
 
 
61
 
62
  count_results = [[results.count(x), x] for x in set(results)]
 
 
 
63
  return max(count_results)
64
 
 
 
 
65
  def play_music_youtube(emotion):
66
  play = False
67
  if emotion == 'sad' or emotion == 'fear':
 
72
  play = True
73
  return play
74
 
 
 
 
 
 
 
75
  def speak(text):
76
  engine = pyttsx3.init()
77
+ engine.setProperty('rate', 90) # number of words per second
78
+ engine.setProperty('volume', 1) # min: 0, max: 1
79
  engine.say(text)
80
  engine.runAndWait()
81
 
 
 
82
  def listen_microphone():
83
  microphone = sr.Recognizer()
84
  with sr.Microphone() as source:
85
  microphone.adjust_for_ambient_noise(source, duration=0.8)
86
+ st.write('Listening...')
87
  audio = microphone.listen(source)
88
  with open('recordings/speech.wav', 'wb') as f:
89
  f.write(audio.get_wav_data())
90
  try:
 
91
  sentence = microphone.recognize_google(audio, language='en-US')
92
+ st.write('You said: ' + sentence)
93
  except sr.UnknownValueError:
94
  sentence = ''
95
+ st.write('Not understood')
96
  return sentence
97
 
 
 
 
98
  def test_models():
99
+ audio_source = 'recordings/speech.wav'
100
+ prediction = predict_sound(audio_source, loaded_model[2], plot=False)
101
  return prediction
102
 
103
+ # Streamlit UI
104
+ st.title("Virtual Assistant")
105
+ st.write("This assistant can perform tasks based on your voice commands.")
 
 
 
106
 
107
+ if st.button("Activate Assistant"):
108
  result = listen_microphone()
109
 
110
+ if my_name.lower() in result.lower():
111
  result = str(result.split(my_name + ' ')[1])
112
  result = result.lower()
 
 
113
 
114
  if result in commands[0]:
 
115
  speak('I will read my list of functionalities: ' + answers[0])
116
 
117
+ elif result in commands[3]:
 
118
  speak('It is now ' + datetime.datetime.now().strftime('%H:%M'))
119
 
120
+ elif result in commands[4]:
121
+ date = datetime.date.today().strftime('%d/%B/%Y').split('/')
122
  speak('Today is ' + date[0] + ' of ' + date[1])
123
 
124
+ elif result in commands[1]:
 
125
  speak('Please, tell me the activity!')
126
  result = listen_microphone()
127
  annotation = open('annotation.txt', mode='a+', encoding='utf-8')
128
  annotation.write(result + '\n')
129
  annotation.close()
130
+ speak(''.join(random.sample(answers[1], k=1)))
131
  speak('Want me to read the notes?')
132
  result = listen_microphone()
133
  if result == 'yes' or result == 'sure':
 
138
  else:
139
  speak('Ok!')
140
 
141
+ elif result in commands[2]:
142
+ speak(''.join(random.sample(answers[2], k=1)))
 
143
  result = listen_microphone()
144
  search(result)
145
 
146
+ elif result in commands[6]:
 
147
  if load_agenda.load_agenda():
148
  speak('These are the events for today:')
149
  for i in range(len(load_agenda.load_agenda()[1])):
150
  speak(load_agenda.load_agenda()[1][i] + ' ' + load_agenda.load_agenda()[0][i] + ' schedule for ' + str(load_agenda.load_agenda()[2][i]))
151
  else:
152
+ speak('There are no events for today considering the current time!')
153
 
154
+ elif result in commands[5]:
155
+ st.write('Emotion analysis mode activated!')
 
 
 
 
156
  analyse = test_models()
157
+ st.write(f'I heard {analyse} in your voice!')
158
+ play_music_youtube(analyse[1])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
+ elif result == 'turn off':
161
+ speak(''.join(random.sample(answers[4], k=1)))
162
+ st.write("Assistant turned off.")