Spaces:
Running
Running
# Install necessary libraries | |
import os | |
import numpy as np | |
from speechbrain.pretrained import Tacotron2, HIFIGAN | |
from scipy.io.wavfile import write | |
import streamlit as st | |
# Load TTS and vocoder models | |
# Cache the models to avoid reloading | |
def load_models(): | |
tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmp_tts") | |
hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmp_vocoder") | |
return tacotron2, hifi_gan | |
tacotron2, hifi_gan = load_models() | |
# Text-to-Speech function | |
def text_to_speech(text): | |
try: | |
# Generate mel spectrogram | |
mel_output, _, _ = tacotron2.encode_text(text) | |
# Generate waveform from mel spectrogram | |
waveforms = hifi_gan.decode_batch(mel_output) | |
# Convert waveform to numpy format | |
waveform = waveforms.squeeze().cpu().numpy() | |
# Normalize waveform to range [-1, 1] | |
waveform = waveform / np.max(np.abs(waveform)) | |
# Save waveform to a .wav file | |
output_path = "output.wav" | |
write(output_path, 22050, (waveform * 32767).astype(np.int16)) | |
return output_path | |
except Exception as e: | |
st.error(f"Error during text-to-speech generation: {e}") | |
return None | |
# Streamlit UI | |
st.title("Text-to-Speech Application") | |
st.write("Enter text below and convert it to speech!") | |
# Input field | |
text_input = st.text_area("Enter Text:", "Hello, welcome to the Text-to-Speech app!") | |
if st.button("Generate Speech"): | |
if text_input.strip(): | |
output_audio = text_to_speech(text_input) | |
if output_audio: | |
st.audio(output_audio, format="audio/wav") | |
else: | |
st.error("Failed to generate audio. Please check the input text.") | |
else: | |
st.warning("Please enter some text to generate speech.") | |