DSatishchandra commited on
Commit
4649eb9
1 Parent(s): 13ac391

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -45
app.py CHANGED
@@ -1,53 +1,35 @@
1
- import speechbrain as sb
2
  from speechbrain.pretrained import Tacotron2, HIFIGAN
3
- import speech_recognition as sr
 
4
 
5
- # Initialize the TTS and Vocoder models using Speechbrain
6
- try:
7
- tts = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts")
8
- vocoder = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
9
- except Exception as e:
10
- print(f"Error loading the TTS model: {e}")
11
- exit(1)
12
-
13
- # Function to speak text using Speechbrain's Tacotron2 and HIFIGAN
14
- def speak(text):
15
- print(f"Speaking: {text}")
16
- try:
17
- # Generate the mel spectrogram from text
18
- mel_output, mel_length, alignment = tts.encode_text(text)
19
 
20
- # Use the vocoder to convert the mel spectrogram to audio
21
- waveform, _ = vocoder.decode_batch(mel_output)
22
-
23
- # Save the generated waveform to a .wav file
24
- waveform.squeeze(1).cpu().numpy().tofile('output.wav')
25
 
26
- print(f"Audio saved to 'output.wav'. You can play it using a media player.")
27
- except Exception as e:
28
- print(f"Error generating speech: {e}")
29
 
30
- # Function to listen for user input using SpeechRecognition
31
- def listen():
32
- recognizer = sr.Recognizer()
33
- with sr.Microphone() as source:
34
- print("Listening for command...")
35
- audio = recognizer.listen(source)
36
 
37
- try:
38
- command = recognizer.recognize_google(audio)
39
- print(f"Recognized: {command}")
40
- return command
41
- except sr.UnknownValueError:
42
- print("Sorry, I could not understand the audio.")
43
- except sr.RequestError:
44
- print("Could not request results; check your network connection.")
45
 
46
- return None
47
 
48
- # Example interaction
49
- if __name__ == "__main__":
50
- while True:
51
- command = listen()
52
- if command:
53
- speak(command)
 
1
+ import logging
2
  from speechbrain.pretrained import Tacotron2, HIFIGAN
3
+ import torch
4
+ import os
5
 
6
+ # Set up logging
7
+ logging.basicConfig(level=logging.INFO)
8
+ logger = logging.getLogger(__name__)
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ try:
11
+ # Load TTS model and vocoder
12
+ logger.info("Loading Tacotron2 model for TTS...")
13
+ tts_model = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts")
14
+ logger.info("Tacotron2 model loaded successfully!")
15
 
16
+ logger.info("Loading HIFIGAN vocoder...")
17
+ vocoder = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
18
+ logger.info("HIFIGAN vocoder loaded successfully!")
19
 
20
+ # Define the text to synthesize
21
+ text = "Hello, I am an AI voice assistant. How can I help you today?"
 
 
 
 
22
 
23
+ # Run TTS and Vocoder to generate the audio
24
+ mel_output, mel_length, alignment = tts_model.encode_text(text)
25
+ waveforms, _ = vocoder.decode_batch(mel_output)
26
+
27
+ # Save the generated waveform as an audio file
28
+ audio_output_path = "output_audio.wav"
29
+ logger.info(f"Saving audio to {audio_output_path}...")
30
+ torch.save(waveforms.squeeze(1), audio_output_path)
31
 
32
+ logger.info(f"Audio saved successfully to {audio_output_path}!")
33
 
34
+ except Exception as e:
35
+ logger.error(f"Error during the TTS process: {str(e)}")