Spaces:

DSatishchandra
/

AIVoiceOrder

Build error

App Files Files Community

DSatishchandra commited on 2 days ago

Commit

4649eb9

•

1 Parent(s): 13ac391

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -45

app.py CHANGED Viewed

@@ -1,53 +1,35 @@
-import speechbrain as sb
 from speechbrain.pretrained import Tacotron2, HIFIGAN
-import speech_recognition as sr
-# Initialize the TTS and Vocoder models using Speechbrain
-try:
-    tts = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts")
-    vocoder = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
-except Exception as e:
-    print(f"Error loading the TTS model: {e}")
-    exit(1)
-# Function to speak text using Speechbrain's Tacotron2 and HIFIGAN
-def speak(text):
-    print(f"Speaking: {text}")
-    try:
-        # Generate the mel spectrogram from text
-        mel_output, mel_length, alignment = tts.encode_text(text)
-        # Use the vocoder to convert the mel spectrogram to audio
-        waveform, _ = vocoder.decode_batch(mel_output)
-        # Save the generated waveform to a .wav file
-        waveform.squeeze(1).cpu().numpy().tofile('output.wav')
-        print(f"Audio saved to 'output.wav'. You can play it using a media player.")
-    except Exception as e:
-        print(f"Error generating speech: {e}")
-# Function to listen for user input using SpeechRecognition
-def listen():
-    recognizer = sr.Recognizer()
-    with sr.Microphone() as source:
-        print("Listening for command...")
-        audio = recognizer.listen(source)
-    try:
-        command = recognizer.recognize_google(audio)
-        print(f"Recognized: {command}")
-        return command
-    except sr.UnknownValueError:
-        print("Sorry, I could not understand the audio.")
-    except sr.RequestError:
-        print("Could not request results; check your network connection.")
-    return None
-# Example interaction
-if __name__ == "__main__":
-    while True:
-        command = listen()
-        if command:
-            speak(command)

+import logging
 from speechbrain.pretrained import Tacotron2, HIFIGAN
+import torch
+import os
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+try:
+    # Load TTS model and vocoder
+    logger.info("Loading Tacotron2 model for TTS...")
+    tts_model = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts")
+    logger.info("Tacotron2 model loaded successfully!")
+    logger.info("Loading HIFIGAN vocoder...")
+    vocoder = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
+    logger.info("HIFIGAN vocoder loaded successfully!")
+    # Define the text to synthesize
+    text = "Hello, I am an AI voice assistant. How can I help you today?"
+    # Run TTS and Vocoder to generate the audio
+    mel_output, mel_length, alignment = tts_model.encode_text(text)
+    waveforms, _ = vocoder.decode_batch(mel_output)
+    # Save the generated waveform as an audio file
+    audio_output_path = "output_audio.wav"
+    logger.info(f"Saving audio to {audio_output_path}...")
+    torch.save(waveforms.squeeze(1), audio_output_path)
+    logger.info(f"Audio saved successfully to {audio_output_path}!")
+except Exception as e:
+    logger.error(f"Error during the TTS process: {str(e)}")