Prathamesh1420's picture
Update app.py
99b56c0 verified
raw
history blame
6.16 kB
import streamlit as st
import pyttsx3
import speech_recognition as sr
from playsound import playsound
import random
import datetime
import webbrowser as wb
import tensorflow as tf
import numpy as np
import librosa
import matplotlib.pyplot as plt
import seaborn as sns
from modules import commands_answers, load_agenda
# Initial settings
sns.set()
commands = commands_answers.commands
answers = commands_answers.answers
my_name = 'Bob'
# Paths for browser
chrome_path = 'open -a /Applications/Google\ Chrome.app %s' # MacOS
# chrome_path = 'C:/Program Files/Google/Chrome/Application/chrome.exe %s' # Windows
# chrome_path = '/usr/bin/google-chrome %s' # Linux
# Load model
MODEL_TYPES = ['EMOTION']
def load_model_by_name(model_type):
if model_type == MODEL_TYPES[0]:
model = tf.keras.models.load_model('models/speech_emotion_recognition.hdf5')
model_dict = list(['calm', 'happy', 'fear', 'nervous', 'neutral', 'disgust', 'surprise', 'sad'])
SAMPLE_RATE = 48000
return model, model_dict, SAMPLE_RATE
loaded_model = load_model_by_name('EMOTION')
# Functions
def search(sentence):
wb.get(chrome_path).open('https://www.google.com/search?q=' + sentence)
def predict_sound(AUDIO, SAMPLE_RATE, plot=True):
results = []
wav_data, sample_rate = librosa.load(AUDIO, sr=SAMPLE_RATE)
clip, index = librosa.effects.trim(wav_data, top_db=60, frame_length=512, hop_length=64)
splitted_audio_data = tf.signal.frame(clip, sample_rate, sample_rate, pad_end=True, pad_value=0)
for i, data in enumerate(splitted_audio_data.numpy()):
mfccs_features = librosa.feature.mfcc(y=data, sr=sample_rate, n_mfcc=40)
mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
mfccs_scaled_features = mfccs_scaled_features.reshape(1, -1)[:, :, np.newaxis]
predictions = loaded_model[0].predict(mfccs_scaled_features)
if plot:
plt.figure(figsize=(len(splitted_audio_data), 5))
plt.barh(loaded_model[1], predictions[0])
plt.tight_layout()
st.pyplot(plt)
predictions = predictions.argmax(axis=1)
predictions = predictions.astype(int).flatten()
predictions = loaded_model[1][predictions[0]]
results.append(predictions)
count_results = [[results.count(x), x] for x in set(results)]
return max(count_results)
def play_music_youtube(emotion):
play = False
if emotion == 'sad' or emotion == 'fear':
wb.get(chrome_path).open('https://www.youtube.com/watch?v=k32IPg4dbz0&ab_channel=Amelhorm%C3%BAsicainstrumental')
play = True
if emotion == 'nervous' or emotion == 'surprise':
wb.get(chrome_path).open('https://www.youtube.com/watch?v=pWjmpSD-ph0&ab_channel=CassioToledo')
play = True
return play
def speak(text):
engine = pyttsx3.init()
engine.setProperty('rate', 90) # number of words per second
engine.setProperty('volume', 1) # min: 0, max: 1
engine.say(text)
engine.runAndWait()
def listen_microphone():
microphone = sr.Recognizer()
with sr.Microphone() as source:
microphone.adjust_for_ambient_noise(source, duration=0.8)
st.write('Listening...')
audio = microphone.listen(source)
with open('recordings/speech.wav', 'wb') as f:
f.write(audio.get_wav_data())
try:
sentence = microphone.recognize_google(audio, language='en-US')
st.write('You said: ' + sentence)
except sr.UnknownValueError:
sentence = ''
st.write('Not understood')
return sentence
def test_models():
audio_source = 'recordings/speech.wav'
prediction = predict_sound(audio_source, loaded_model[2], plot=False)
return prediction
# Streamlit UI
st.title("Virtual Assistant")
st.write("This assistant can perform tasks based on your voice commands.")
if st.button("Activate Assistant"):
result = listen_microphone()
if my_name.lower() in result.lower():
result = str(result.split(my_name + ' ')[1])
result = result.lower()
if result in commands[0]:
speak('I will read my list of functionalities: ' + answers[0])
elif result in commands[3]:
speak('It is now ' + datetime.datetime.now().strftime('%H:%M'))
elif result in commands[4]:
date = datetime.date.today().strftime('%d/%B/%Y').split('/')
speak('Today is ' + date[0] + ' of ' + date[1])
elif result in commands[1]:
speak('Please, tell me the activity!')
result = listen_microphone()
annotation = open('annotation.txt', mode='a+', encoding='utf-8')
annotation.write(result + '\n')
annotation.close()
speak(''.join(random.sample(answers[1], k=1)))
speak('Want me to read the notes?')
result = listen_microphone()
if result == 'yes' or result == 'sure':
with open('annotation.txt') as file_source:
lines = file_source.readlines()
for line in lines:
speak(line)
else:
speak('Ok!')
elif result in commands[2]:
speak(''.join(random.sample(answers[2], k=1)))
result = listen_microphone()
search(result)
elif result in commands[6]:
if load_agenda.load_agenda():
speak('These are the events for today:')
for i in range(len(load_agenda.load_agenda()[1])):
speak(load_agenda.load_agenda()[1][i] + ' ' + load_agenda.load_agenda()[0][i] + ' schedule for ' + str(load_agenda.load_agenda()[2][i]))
else:
speak('There are no events for today considering the current time!')
elif result in commands[5]:
st.write('Emotion analysis mode activated!')
analyse = test_models()
st.write(f'I heard {analyse} in your voice!')
play_music_youtube(analyse[1])
elif result == 'turn off':
speak(''.join(random.sample(answers[4], k=1)))
st.write("Assistant turned off.")