File size: 1,297 Bytes
4649eb9
2decfdd
4649eb9
 
26b99f4
4649eb9
 
 
2decfdd
4649eb9
 
 
 
 
f674c7b
4649eb9
 
 
034d725
4649eb9
 
f674c7b
4649eb9
 
 
 
 
 
 
 
034d725
4649eb9
34c39e0
4649eb9
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import logging
from speechbrain.pretrained import Tacotron2, HIFIGAN
import torch
import os

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

try:
    # Load TTS model and vocoder
    logger.info("Loading Tacotron2 model for TTS...")
    tts_model = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts")
    logger.info("Tacotron2 model loaded successfully!")

    logger.info("Loading HIFIGAN vocoder...")
    vocoder = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
    logger.info("HIFIGAN vocoder loaded successfully!")

    # Define the text to synthesize
    text = "Hello, I am an AI voice assistant. How can I help you today?"

    # Run TTS and Vocoder to generate the audio
    mel_output, mel_length, alignment = tts_model.encode_text(text)
    waveforms, _ = vocoder.decode_batch(mel_output)
    
    # Save the generated waveform as an audio file
    audio_output_path = "output_audio.wav"
    logger.info(f"Saving audio to {audio_output_path}...")
    torch.save(waveforms.squeeze(1), audio_output_path)

    logger.info(f"Audio saved successfully to {audio_output_path}!")

except Exception as e:
    logger.error(f"Error during the TTS process: {str(e)}")