Spaces:

Prathamesh1420
/

Virtual_assistant

Sleeping

App Files Files Community

Prathamesh1420 commited on Aug 22, 2024

Commit

99b56c0

verified ·

1 Parent(s): 1145213

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -139

app.py CHANGED Viewed

@@ -1,41 +1,29 @@
 import pyttsx3
 import speech_recognition as sr
 from playsound import playsound
 import random
 import datetime
-hour = datetime.datetime.now().strftime('%H:%M')
-#print(hour)
-date = datetime.date.today().strftime('%d/%B/%Y')
-#print(date)
-date = date.split('/')
-#print(date)
 import webbrowser as wb
 import tensorflow as tf
 import numpy as np
 import librosa
 import matplotlib.pyplot as plt
 import seaborn as sns
-sns.set()
 from modules import commands_answers, load_agenda
 commands = commands_answers.commands
 answers = commands_answers.answers
-#print(commands)
-#print(answers)
 my_name = 'Bob'
-# MacOS
-chrome_path = 'open -a /Applications/Google\ Chrome.app %s'
-# Windows
-#chrome_path = 'C:/Program Files/Google/Chrome/Application/chrome.exe %s'
-# Linux
-# chrome_path = '/usr/bin/google-chrome %s'
-def search(sentence):
-    wb.get(chrome_path).open('https://www.google.com/search?q=' + sentence)
-#search('python programming language')
 MODEL_TYPES = ['EMOTION']
 def load_model_by_name(model_type):
     if model_type == MODEL_TYPES[0]:
@@ -44,65 +32,36 @@ def load_model_by_name(model_type):
         SAMPLE_RATE = 48000
     return model, model_dict, SAMPLE_RATE
-#print(load_model_by_name('EMOTION'))
-#print(load_model_by_name('EMOTION')[0].summary())
-model_type = 'EMOTION'
-loaded_model = load_model_by_name(model_type)
-def predict_sound(AUDIO, SAMPLE_RATE, plot = True):
     results = []
-    wav_data, sample_rate = librosa.load(AUDIO, sr = SAMPLE_RATE)
-    #print(wav_data.shape)
-    #print(sample_rate)
-    #print(wav_data)
-    # ' librosa ' -> 'librosa'
-    # https://librosa.org/doc/main/generated/librosa.effects.trim.html
     clip, index = librosa.effects.trim(wav_data, top_db=60, frame_length=512, hop_length=64)
-    splitted_audio_data = tf.signal.frame(clip, sample_rate, sample_rate, pad_end = True, pad_value = 0)
     for i, data in enumerate(splitted_audio_data.numpy()):
-        #print('Audio split: ', i)
-        #print(data.shape)
-        #print(data)
-        # Mel frequency: https://en.wikipedia.org/wiki/Mel-frequency_cepstrum
-        # PCA
-        mfccs_features = librosa.feature.mfcc(y = data, sr = sample_rate, n_mfcc=40)
-        #print(mfccs_features.shape)
-        #print(mfccs_features)
-        mfccs_scaled_features = np.mean(mfccs_features.T, axis = 0)
-        mfccs_scaled_features = mfccs_scaled_features.reshape(1, -1)
-        #print(mfccs_scaled_features.shape)
-        mfccs_scaled_features = mfccs_scaled_features[:, :, np.newaxis]
-        # batch
-        #print(mfccs_scaled_features.shape)
         predictions = loaded_model[0].predict(mfccs_scaled_features)
-        #print(predictions)
-        #print(predictions.sum())
         if plot:
             plt.figure(figsize=(len(splitted_audio_data), 5))
             plt.barh(loaded_model[1], predictions[0])
             plt.tight_layout()
-            plt.show()
-        predictions = predictions.argmax(axis = 1)
-        #print(predictions)
         predictions = predictions.astype(int).flatten()
         predictions = loaded_model[1][predictions[0]]
         results.append(predictions)
-        #print(results)
-        result_str = 'PART ' + str(i) + ': ' + str(predictions).upper()
-        #print(result_str)
     count_results = [[results.count(x), x] for x in set(results)]
-    #print(count_results)
-    #print(max(count_results))
     return max(count_results)
-#playsound('sad.wav')
-#predict_sound('sad.wav', loaded_model[2], plot=False)
 def play_music_youtube(emotion):
     play = False
     if emotion == 'sad' or emotion == 'fear':
@@ -113,82 +72,62 @@ def play_music_youtube(emotion):
         play = True
     return play
-#play_music_youtube('sad')
-#play_music_youtube('surprise')
-#emotion = predict_sound('sad.wav', loaded_model[2], plot=False)
-#print(emotion)
-#play_music_youtube(emotion[1])
 def speak(text):
     engine = pyttsx3.init()
-    engine.setProperty('rate', 90) # number of words per second
-    engine.setProperty('volume', 1) # min: 0, max: 1
     engine.say(text)
     engine.runAndWait()
-#speak("Testing the Assistant's Speech Synthesizer")
 def listen_microphone():
     microphone = sr.Recognizer()
     with sr.Microphone() as source:
         microphone.adjust_for_ambient_noise(source, duration=0.8)
-        print('Listening: ')
         audio = microphone.listen(source)
         with open('recordings/speech.wav', 'wb') as f:
             f.write(audio.get_wav_data())
     try:
-        # https://pypi.org/project/SpeechRecognition/
         sentence = microphone.recognize_google(audio, language='en-US')
-        print('You said: ' + sentence)
     except sr.UnknownValueError:
         sentence = ''
-        print('Not understood')
     return sentence
-#playsound('recordings/speech.wav')
-#listen_microphone()
 def test_models():
-    audio_source = '/Users/jonesgranatyr/Documents/Ensino/IA Expert/Cursos/Virtual assistent/virtual_assistant/recordings/speech.wav'
-    prediction = predict_sound(audio_source, loaded_model[2], plot = False)
     return prediction
-#print(test_models())
-playing = False
-mode_control = False
-print('[INFO] Ready to start!')
-playsound('n1.mp3')
-while (1):
     result = listen_microphone()
-    if my_name in result:
         result = str(result.split(my_name + ' ')[1])
         result = result.lower()
-        #print('The assistant has been activacted!')
-        #print('After processing: ', result)
         if result in commands[0]:
-            playsound('n2.mp3')
             speak('I will read my list of functionalities: ' + answers[0])
-        if result in commands[3]:
-            playsound('n2.mp3')
             speak('It is now ' + datetime.datetime.now().strftime('%H:%M'))
-        if result in commands[4]:
-            playsound('n2.mp3')
             speak('Today is ' + date[0] + ' of ' + date[1])
-        if result in commands[1]:
-            playsound('n2.mp3')
             speak('Please, tell me the activity!')
             result = listen_microphone()
             annotation = open('annotation.txt', mode='a+', encoding='utf-8')
             annotation.write(result + '\n')
             annotation.close()
-            speak(''.join(random.sample(answers[1], k = 1)))
             speak('Want me to read the notes?')
             result = listen_microphone()
             if result == 'yes' or result == 'sure':
@@ -199,54 +138,25 @@ while (1):
             else:
                 speak('Ok!')
-        if result in commands[2]:
-            playsound('n2.mp3')
-            speak(''.join(random.sample(answers[2], k = 1)))
             result = listen_microphone()
             search(result)
-        if result in commands[6]:
-            playsound('n2.mp3')
             if load_agenda.load_agenda():
                 speak('These are the events for today:')
                 for i in range(len(load_agenda.load_agenda()[1])):
                     speak(load_agenda.load_agenda()[1][i] + ' ' + load_agenda.load_agenda()[0][i] + ' schedule for ' + str(load_agenda.load_agenda()[2][i]))
             else:
-                speak('There are not events for today considering the current time!')
-        if result in commands[5]:
-            mode_control = True
-            playsound('n1.mp3')
-            speak('Emotion analysis mode has been activacted!')
-        if mode_control:
             analyse = test_models()
-            print(f'I heard {analyse} in your voice!')
-            if not playing:
-                playing = play_music_youtube(analyse[1])
-        if result == 'turn off':
-            playsound('n2.mp3')
-            speak(''.join(random.sample(answers[4], k = 1)))
-            break
-    else:
-        playsound('n3.mp3')

+import streamlit as st
 import pyttsx3
 import speech_recognition as sr
 from playsound import playsound
 import random
 import datetime
 import webbrowser as wb
 import tensorflow as tf
 import numpy as np
 import librosa
 import matplotlib.pyplot as plt
 import seaborn as sns
 from modules import commands_answers, load_agenda
+# Initial settings
+sns.set()
 commands = commands_answers.commands
 answers = commands_answers.answers
 my_name = 'Bob'
+# Paths for browser
+chrome_path = 'open -a /Applications/Google\ Chrome.app %s'  # MacOS
+# chrome_path = 'C:/Program Files/Google/Chrome/Application/chrome.exe %s'  # Windows
+# chrome_path = '/usr/bin/google-chrome %s'  # Linux
+# Load model
 MODEL_TYPES = ['EMOTION']
 def load_model_by_name(model_type):
     if model_type == MODEL_TYPES[0]:
         SAMPLE_RATE = 48000
     return model, model_dict, SAMPLE_RATE
+loaded_model = load_model_by_name('EMOTION')
+# Functions
+def search(sentence):
+    wb.get(chrome_path).open('https://www.google.com/search?q=' + sentence)
+def predict_sound(AUDIO, SAMPLE_RATE, plot=True):
     results = []
+    wav_data, sample_rate = librosa.load(AUDIO, sr=SAMPLE_RATE)
     clip, index = librosa.effects.trim(wav_data, top_db=60, frame_length=512, hop_length=64)
+    splitted_audio_data = tf.signal.frame(clip, sample_rate, sample_rate, pad_end=True, pad_value=0)
     for i, data in enumerate(splitted_audio_data.numpy()):
+        mfccs_features = librosa.feature.mfcc(y=data, sr=sample_rate, n_mfcc=40)
+        mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
+        mfccs_scaled_features = mfccs_scaled_features.reshape(1, -1)[:, :, np.newaxis]
         predictions = loaded_model[0].predict(mfccs_scaled_features)
         if plot:
             plt.figure(figsize=(len(splitted_audio_data), 5))
             plt.barh(loaded_model[1], predictions[0])
             plt.tight_layout()
+            st.pyplot(plt)
+        predictions = predictions.argmax(axis=1)
         predictions = predictions.astype(int).flatten()
         predictions = loaded_model[1][predictions[0]]
         results.append(predictions)
     count_results = [[results.count(x), x] for x in set(results)]
     return max(count_results)
 def play_music_youtube(emotion):
     play = False
     if emotion == 'sad' or emotion == 'fear':
         play = True
     return play
 def speak(text):
     engine = pyttsx3.init()
+    engine.setProperty('rate', 90)  # number of words per second
+    engine.setProperty('volume', 1)  # min: 0, max: 1
     engine.say(text)
     engine.runAndWait()
 def listen_microphone():
     microphone = sr.Recognizer()
     with sr.Microphone() as source:
         microphone.adjust_for_ambient_noise(source, duration=0.8)
+        st.write('Listening...')
         audio = microphone.listen(source)
         with open('recordings/speech.wav', 'wb') as f:
             f.write(audio.get_wav_data())
     try:
         sentence = microphone.recognize_google(audio, language='en-US')
+        st.write('You said: ' + sentence)
     except sr.UnknownValueError:
         sentence = ''
+        st.write('Not understood')
     return sentence
 def test_models():
+    audio_source = 'recordings/speech.wav'
+    prediction = predict_sound(audio_source, loaded_model[2], plot=False)
     return prediction
+# Streamlit UI
+st.title("Virtual Assistant")
+st.write("This assistant can perform tasks based on your voice commands.")
+if st.button("Activate Assistant"):
     result = listen_microphone()
+    if my_name.lower() in result.lower():
         result = str(result.split(my_name + ' ')[1])
         result = result.lower()
         if result in commands[0]:
             speak('I will read my list of functionalities: ' + answers[0])
+        elif result in commands[3]:
             speak('It is now ' + datetime.datetime.now().strftime('%H:%M'))
+        elif result in commands[4]:
+            date = datetime.date.today().strftime('%d/%B/%Y').split('/')
             speak('Today is ' + date[0] + ' of ' + date[1])
+        elif result in commands[1]:
             speak('Please, tell me the activity!')
             result = listen_microphone()
             annotation = open('annotation.txt', mode='a+', encoding='utf-8')
             annotation.write(result + '\n')
             annotation.close()
+            speak(''.join(random.sample(answers[1], k=1)))
             speak('Want me to read the notes?')
             result = listen_microphone()
             if result == 'yes' or result == 'sure':
             else:
                 speak('Ok!')
+        elif result in commands[2]:
+            speak(''.join(random.sample(answers[2], k=1)))
             result = listen_microphone()
             search(result)
+        elif result in commands[6]:
             if load_agenda.load_agenda():
                 speak('These are the events for today:')
                 for i in range(len(load_agenda.load_agenda()[1])):
                     speak(load_agenda.load_agenda()[1][i] + ' ' + load_agenda.load_agenda()[0][i] + ' schedule for ' + str(load_agenda.load_agenda()[2][i]))
             else:
+                speak('There are no events for today considering the current time!')
+        elif result in commands[5]:
+            st.write('Emotion analysis mode activated!')
             analyse = test_models()
+            st.write(f'I heard {analyse} in your voice!')
+            play_music_youtube(analyse[1])
+        elif result == 'turn off':
+            speak(''.join(random.sample(answers[4], k=1)))
+            st.write("Assistant turned off.")