text-to-speech / app.py
pratikshahp's picture
Update app.py
795d45e verified
raw
history blame
996 Bytes
import gradio as gr
import torchaudio
from speechbrain.inference.vocoders import HIFIGAN
from speechbrain.tts import Tacotron2
# Initialize Tacotron2 TTS model and HIFIGAN vocoder
tts_model = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="/tmpdir_tacotron2")
hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="/tmpdir_hifigan")
# Function to generate speech
def generate_speech(text):
# Encode text using Tacotron2
mel_output, mel_length = tts_model.encode_text(text)
# Decode mel spectrogram to waveform using HIFIGAN vocoder
waveform = hifi_gan.decode_batch(mel_output)
# Return the generated waveform for Gradio to play
return waveform.squeeze(1)
# Interface for Gradio
iface = gr.Interface(
fn=generate_speech,
inputs=gr.Textbox(label="Input Text", placeholder="Enter text to convert to speech..."),
outputs=gr.Audio(label="Output Speech")
)
# Launch the Gradio interface
iface.launch()