Spaces:
Sleeping
Sleeping
Prathamesh1420
commited on
Commit
•
99b56c0
1
Parent(s):
1145213
Update app.py
Browse files
app.py
CHANGED
@@ -1,41 +1,29 @@
|
|
|
|
1 |
import pyttsx3
|
2 |
import speech_recognition as sr
|
3 |
from playsound import playsound
|
4 |
import random
|
5 |
import datetime
|
6 |
-
hour = datetime.datetime.now().strftime('%H:%M')
|
7 |
-
#print(hour)
|
8 |
-
date = datetime.date.today().strftime('%d/%B/%Y')
|
9 |
-
#print(date)
|
10 |
-
date = date.split('/')
|
11 |
-
#print(date)
|
12 |
import webbrowser as wb
|
13 |
import tensorflow as tf
|
14 |
import numpy as np
|
15 |
import librosa
|
16 |
import matplotlib.pyplot as plt
|
17 |
import seaborn as sns
|
18 |
-
sns.set()
|
19 |
from modules import commands_answers, load_agenda
|
|
|
|
|
|
|
20 |
commands = commands_answers.commands
|
21 |
answers = commands_answers.answers
|
22 |
-
#print(commands)
|
23 |
-
#print(answers)
|
24 |
-
|
25 |
my_name = 'Bob'
|
26 |
|
27 |
-
#
|
28 |
-
chrome_path = 'open -a /Applications/Google\ Chrome.app %s'
|
29 |
-
# Windows
|
30 |
-
#chrome_path = '
|
31 |
-
# Linux
|
32 |
-
# chrome_path = '/usr/bin/google-chrome %s'
|
33 |
-
|
34 |
-
def search(sentence):
|
35 |
-
wb.get(chrome_path).open('https://www.google.com/search?q=' + sentence)
|
36 |
-
|
37 |
-
#search('python programming language')
|
38 |
|
|
|
39 |
MODEL_TYPES = ['EMOTION']
|
40 |
def load_model_by_name(model_type):
|
41 |
if model_type == MODEL_TYPES[0]:
|
@@ -44,65 +32,36 @@ def load_model_by_name(model_type):
|
|
44 |
SAMPLE_RATE = 48000
|
45 |
return model, model_dict, SAMPLE_RATE
|
46 |
|
47 |
-
|
48 |
-
#print(load_model_by_name('EMOTION')[0].summary())
|
49 |
|
50 |
-
|
51 |
-
|
|
|
52 |
|
53 |
-
def predict_sound(AUDIO, SAMPLE_RATE, plot
|
54 |
results = []
|
55 |
-
wav_data, sample_rate = librosa.load(AUDIO, sr
|
56 |
-
#print(wav_data.shape)
|
57 |
-
#print(sample_rate)
|
58 |
-
#print(wav_data)
|
59 |
-
# ' librosa ' -> 'librosa'
|
60 |
-
# https://librosa.org/doc/main/generated/librosa.effects.trim.html
|
61 |
clip, index = librosa.effects.trim(wav_data, top_db=60, frame_length=512, hop_length=64)
|
62 |
-
splitted_audio_data = tf.signal.frame(clip, sample_rate, sample_rate, pad_end
|
63 |
for i, data in enumerate(splitted_audio_data.numpy()):
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
# Mel frequency: https://en.wikipedia.org/wiki/Mel-frequency_cepstrum
|
68 |
-
# PCA
|
69 |
-
mfccs_features = librosa.feature.mfcc(y = data, sr = sample_rate, n_mfcc=40)
|
70 |
-
#print(mfccs_features.shape)
|
71 |
-
#print(mfccs_features)
|
72 |
-
mfccs_scaled_features = np.mean(mfccs_features.T, axis = 0)
|
73 |
-
mfccs_scaled_features = mfccs_scaled_features.reshape(1, -1)
|
74 |
-
#print(mfccs_scaled_features.shape)
|
75 |
-
mfccs_scaled_features = mfccs_scaled_features[:, :, np.newaxis]
|
76 |
-
# batch
|
77 |
-
#print(mfccs_scaled_features.shape)
|
78 |
predictions = loaded_model[0].predict(mfccs_scaled_features)
|
79 |
-
#print(predictions)
|
80 |
-
#print(predictions.sum())
|
81 |
if plot:
|
82 |
plt.figure(figsize=(len(splitted_audio_data), 5))
|
83 |
plt.barh(loaded_model[1], predictions[0])
|
84 |
plt.tight_layout()
|
85 |
-
|
86 |
|
87 |
-
predictions = predictions.argmax(axis
|
88 |
-
#print(predictions)
|
89 |
predictions = predictions.astype(int).flatten()
|
90 |
predictions = loaded_model[1][predictions[0]]
|
91 |
results.append(predictions)
|
92 |
-
#print(results)
|
93 |
-
|
94 |
-
result_str = 'PART ' + str(i) + ': ' + str(predictions).upper()
|
95 |
-
#print(result_str)
|
96 |
|
97 |
count_results = [[results.count(x), x] for x in set(results)]
|
98 |
-
#print(count_results)
|
99 |
-
|
100 |
-
#print(max(count_results))
|
101 |
return max(count_results)
|
102 |
|
103 |
-
#playsound('sad.wav')
|
104 |
-
#predict_sound('sad.wav', loaded_model[2], plot=False)
|
105 |
-
|
106 |
def play_music_youtube(emotion):
|
107 |
play = False
|
108 |
if emotion == 'sad' or emotion == 'fear':
|
@@ -113,82 +72,62 @@ def play_music_youtube(emotion):
|
|
113 |
play = True
|
114 |
return play
|
115 |
|
116 |
-
#play_music_youtube('sad')
|
117 |
-
#play_music_youtube('surprise')
|
118 |
-
#emotion = predict_sound('sad.wav', loaded_model[2], plot=False)
|
119 |
-
#print(emotion)
|
120 |
-
#play_music_youtube(emotion[1])
|
121 |
-
|
122 |
def speak(text):
|
123 |
engine = pyttsx3.init()
|
124 |
-
engine.setProperty('rate', 90)
|
125 |
-
engine.setProperty('volume', 1)
|
126 |
engine.say(text)
|
127 |
engine.runAndWait()
|
128 |
|
129 |
-
#speak("Testing the Assistant's Speech Synthesizer")
|
130 |
-
|
131 |
def listen_microphone():
|
132 |
microphone = sr.Recognizer()
|
133 |
with sr.Microphone() as source:
|
134 |
microphone.adjust_for_ambient_noise(source, duration=0.8)
|
135 |
-
|
136 |
audio = microphone.listen(source)
|
137 |
with open('recordings/speech.wav', 'wb') as f:
|
138 |
f.write(audio.get_wav_data())
|
139 |
try:
|
140 |
-
# https://pypi.org/project/SpeechRecognition/
|
141 |
sentence = microphone.recognize_google(audio, language='en-US')
|
142 |
-
|
143 |
except sr.UnknownValueError:
|
144 |
sentence = ''
|
145 |
-
|
146 |
return sentence
|
147 |
|
148 |
-
#playsound('recordings/speech.wav')
|
149 |
-
#listen_microphone()
|
150 |
-
|
151 |
def test_models():
|
152 |
-
audio_source = '
|
153 |
-
prediction = predict_sound(audio_source, loaded_model[2], plot
|
154 |
return prediction
|
155 |
|
156 |
-
#
|
157 |
-
|
158 |
-
|
159 |
-
mode_control = False
|
160 |
-
print('[INFO] Ready to start!')
|
161 |
-
playsound('n1.mp3')
|
162 |
|
163 |
-
|
164 |
result = listen_microphone()
|
165 |
|
166 |
-
if my_name in result:
|
167 |
result = str(result.split(my_name + ' ')[1])
|
168 |
result = result.lower()
|
169 |
-
#print('The assistant has been activacted!')
|
170 |
-
#print('After processing: ', result)
|
171 |
|
172 |
if result in commands[0]:
|
173 |
-
playsound('n2.mp3')
|
174 |
speak('I will read my list of functionalities: ' + answers[0])
|
175 |
|
176 |
-
|
177 |
-
playsound('n2.mp3')
|
178 |
speak('It is now ' + datetime.datetime.now().strftime('%H:%M'))
|
179 |
|
180 |
-
|
181 |
-
|
182 |
speak('Today is ' + date[0] + ' of ' + date[1])
|
183 |
|
184 |
-
|
185 |
-
playsound('n2.mp3')
|
186 |
speak('Please, tell me the activity!')
|
187 |
result = listen_microphone()
|
188 |
annotation = open('annotation.txt', mode='a+', encoding='utf-8')
|
189 |
annotation.write(result + '\n')
|
190 |
annotation.close()
|
191 |
-
speak(''.join(random.sample(answers[1], k
|
192 |
speak('Want me to read the notes?')
|
193 |
result = listen_microphone()
|
194 |
if result == 'yes' or result == 'sure':
|
@@ -199,54 +138,25 @@ while (1):
|
|
199 |
else:
|
200 |
speak('Ok!')
|
201 |
|
202 |
-
|
203 |
-
|
204 |
-
speak(''.join(random.sample(answers[2], k = 1)))
|
205 |
result = listen_microphone()
|
206 |
search(result)
|
207 |
|
208 |
-
|
209 |
-
playsound('n2.mp3')
|
210 |
if load_agenda.load_agenda():
|
211 |
speak('These are the events for today:')
|
212 |
for i in range(len(load_agenda.load_agenda()[1])):
|
213 |
speak(load_agenda.load_agenda()[1][i] + ' ' + load_agenda.load_agenda()[0][i] + ' schedule for ' + str(load_agenda.load_agenda()[2][i]))
|
214 |
else:
|
215 |
-
speak('There are
|
216 |
|
217 |
-
|
218 |
-
|
219 |
-
playsound('n1.mp3')
|
220 |
-
speak('Emotion analysis mode has been activacted!')
|
221 |
-
|
222 |
-
if mode_control:
|
223 |
analyse = test_models()
|
224 |
-
|
225 |
-
|
226 |
-
playing = play_music_youtube(analyse[1])
|
227 |
-
|
228 |
-
if result == 'turn off':
|
229 |
-
playsound('n2.mp3')
|
230 |
-
speak(''.join(random.sample(answers[4], k = 1)))
|
231 |
-
break
|
232 |
-
else:
|
233 |
-
playsound('n3.mp3')
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
import pyttsx3
|
3 |
import speech_recognition as sr
|
4 |
from playsound import playsound
|
5 |
import random
|
6 |
import datetime
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
import webbrowser as wb
|
8 |
import tensorflow as tf
|
9 |
import numpy as np
|
10 |
import librosa
|
11 |
import matplotlib.pyplot as plt
|
12 |
import seaborn as sns
|
|
|
13 |
from modules import commands_answers, load_agenda
|
14 |
+
|
15 |
+
# Initial settings
|
16 |
+
sns.set()
|
17 |
commands = commands_answers.commands
|
18 |
answers = commands_answers.answers
|
|
|
|
|
|
|
19 |
my_name = 'Bob'
|
20 |
|
21 |
+
# Paths for browser
|
22 |
+
chrome_path = 'open -a /Applications/Google\ Chrome.app %s' # MacOS
|
23 |
+
# chrome_path = 'C:/Program Files/Google/Chrome/Application/chrome.exe %s' # Windows
|
24 |
+
# chrome_path = '/usr/bin/google-chrome %s' # Linux
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
+
# Load model
|
27 |
MODEL_TYPES = ['EMOTION']
|
28 |
def load_model_by_name(model_type):
|
29 |
if model_type == MODEL_TYPES[0]:
|
|
|
32 |
SAMPLE_RATE = 48000
|
33 |
return model, model_dict, SAMPLE_RATE
|
34 |
|
35 |
+
loaded_model = load_model_by_name('EMOTION')
|
|
|
36 |
|
37 |
+
# Functions
|
38 |
+
def search(sentence):
|
39 |
+
wb.get(chrome_path).open('https://www.google.com/search?q=' + sentence)
|
40 |
|
41 |
+
def predict_sound(AUDIO, SAMPLE_RATE, plot=True):
|
42 |
results = []
|
43 |
+
wav_data, sample_rate = librosa.load(AUDIO, sr=SAMPLE_RATE)
|
|
|
|
|
|
|
|
|
|
|
44 |
clip, index = librosa.effects.trim(wav_data, top_db=60, frame_length=512, hop_length=64)
|
45 |
+
splitted_audio_data = tf.signal.frame(clip, sample_rate, sample_rate, pad_end=True, pad_value=0)
|
46 |
for i, data in enumerate(splitted_audio_data.numpy()):
|
47 |
+
mfccs_features = librosa.feature.mfcc(y=data, sr=sample_rate, n_mfcc=40)
|
48 |
+
mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
|
49 |
+
mfccs_scaled_features = mfccs_scaled_features.reshape(1, -1)[:, :, np.newaxis]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
predictions = loaded_model[0].predict(mfccs_scaled_features)
|
|
|
|
|
51 |
if plot:
|
52 |
plt.figure(figsize=(len(splitted_audio_data), 5))
|
53 |
plt.barh(loaded_model[1], predictions[0])
|
54 |
plt.tight_layout()
|
55 |
+
st.pyplot(plt)
|
56 |
|
57 |
+
predictions = predictions.argmax(axis=1)
|
|
|
58 |
predictions = predictions.astype(int).flatten()
|
59 |
predictions = loaded_model[1][predictions[0]]
|
60 |
results.append(predictions)
|
|
|
|
|
|
|
|
|
61 |
|
62 |
count_results = [[results.count(x), x] for x in set(results)]
|
|
|
|
|
|
|
63 |
return max(count_results)
|
64 |
|
|
|
|
|
|
|
65 |
def play_music_youtube(emotion):
|
66 |
play = False
|
67 |
if emotion == 'sad' or emotion == 'fear':
|
|
|
72 |
play = True
|
73 |
return play
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
def speak(text):
|
76 |
engine = pyttsx3.init()
|
77 |
+
engine.setProperty('rate', 90) # number of words per second
|
78 |
+
engine.setProperty('volume', 1) # min: 0, max: 1
|
79 |
engine.say(text)
|
80 |
engine.runAndWait()
|
81 |
|
|
|
|
|
82 |
def listen_microphone():
|
83 |
microphone = sr.Recognizer()
|
84 |
with sr.Microphone() as source:
|
85 |
microphone.adjust_for_ambient_noise(source, duration=0.8)
|
86 |
+
st.write('Listening...')
|
87 |
audio = microphone.listen(source)
|
88 |
with open('recordings/speech.wav', 'wb') as f:
|
89 |
f.write(audio.get_wav_data())
|
90 |
try:
|
|
|
91 |
sentence = microphone.recognize_google(audio, language='en-US')
|
92 |
+
st.write('You said: ' + sentence)
|
93 |
except sr.UnknownValueError:
|
94 |
sentence = ''
|
95 |
+
st.write('Not understood')
|
96 |
return sentence
|
97 |
|
|
|
|
|
|
|
98 |
def test_models():
|
99 |
+
audio_source = 'recordings/speech.wav'
|
100 |
+
prediction = predict_sound(audio_source, loaded_model[2], plot=False)
|
101 |
return prediction
|
102 |
|
103 |
+
# Streamlit UI
|
104 |
+
st.title("Virtual Assistant")
|
105 |
+
st.write("This assistant can perform tasks based on your voice commands.")
|
|
|
|
|
|
|
106 |
|
107 |
+
if st.button("Activate Assistant"):
|
108 |
result = listen_microphone()
|
109 |
|
110 |
+
if my_name.lower() in result.lower():
|
111 |
result = str(result.split(my_name + ' ')[1])
|
112 |
result = result.lower()
|
|
|
|
|
113 |
|
114 |
if result in commands[0]:
|
|
|
115 |
speak('I will read my list of functionalities: ' + answers[0])
|
116 |
|
117 |
+
elif result in commands[3]:
|
|
|
118 |
speak('It is now ' + datetime.datetime.now().strftime('%H:%M'))
|
119 |
|
120 |
+
elif result in commands[4]:
|
121 |
+
date = datetime.date.today().strftime('%d/%B/%Y').split('/')
|
122 |
speak('Today is ' + date[0] + ' of ' + date[1])
|
123 |
|
124 |
+
elif result in commands[1]:
|
|
|
125 |
speak('Please, tell me the activity!')
|
126 |
result = listen_microphone()
|
127 |
annotation = open('annotation.txt', mode='a+', encoding='utf-8')
|
128 |
annotation.write(result + '\n')
|
129 |
annotation.close()
|
130 |
+
speak(''.join(random.sample(answers[1], k=1)))
|
131 |
speak('Want me to read the notes?')
|
132 |
result = listen_microphone()
|
133 |
if result == 'yes' or result == 'sure':
|
|
|
138 |
else:
|
139 |
speak('Ok!')
|
140 |
|
141 |
+
elif result in commands[2]:
|
142 |
+
speak(''.join(random.sample(answers[2], k=1)))
|
|
|
143 |
result = listen_microphone()
|
144 |
search(result)
|
145 |
|
146 |
+
elif result in commands[6]:
|
|
|
147 |
if load_agenda.load_agenda():
|
148 |
speak('These are the events for today:')
|
149 |
for i in range(len(load_agenda.load_agenda()[1])):
|
150 |
speak(load_agenda.load_agenda()[1][i] + ' ' + load_agenda.load_agenda()[0][i] + ' schedule for ' + str(load_agenda.load_agenda()[2][i]))
|
151 |
else:
|
152 |
+
speak('There are no events for today considering the current time!')
|
153 |
|
154 |
+
elif result in commands[5]:
|
155 |
+
st.write('Emotion analysis mode activated!')
|
|
|
|
|
|
|
|
|
156 |
analyse = test_models()
|
157 |
+
st.write(f'I heard {analyse} in your voice!')
|
158 |
+
play_music_youtube(analyse[1])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
|
160 |
+
elif result == 'turn off':
|
161 |
+
speak(''.join(random.sample(answers[4], k=1)))
|
162 |
+
st.write("Assistant turned off.")
|