Spaces:
Running
Running
Create app.py
Browse files
app.py
CHANGED
@@ -1,48 +1,56 @@
|
|
1 |
-
|
|
|
|
|
2 |
from speechbrain.pretrained import Tacotron2, HIFIGAN
|
3 |
from scipy.io.wavfile import write
|
|
|
4 |
|
5 |
-
# Load
|
6 |
-
@st.cache_resource
|
7 |
def load_models():
|
8 |
-
tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="
|
9 |
-
hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="
|
10 |
return tacotron2, hifi_gan
|
11 |
|
12 |
-
# Load models
|
13 |
-
st.write("Loading models... Please wait ⏳")
|
14 |
tacotron2, hifi_gan = load_models()
|
15 |
-
st.success("Models loaded successfully!")
|
16 |
|
17 |
-
#
|
18 |
def text_to_speech(text):
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
# Streamlit UI
|
36 |
-
st.title("
|
37 |
-
|
|
|
|
|
|
|
38 |
|
39 |
if st.button("Generate Speech"):
|
40 |
-
if
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
st.error(f"Error during TTS generation: {e}")
|
47 |
else:
|
48 |
-
st.warning("Please enter some text.")
|
|
|
1 |
+
# Install necessary libraries
|
2 |
+
import os
|
3 |
+
import numpy as np
|
4 |
from speechbrain.pretrained import Tacotron2, HIFIGAN
|
5 |
from scipy.io.wavfile import write
|
6 |
+
import streamlit as st
|
7 |
|
8 |
+
# Load TTS and vocoder models
|
9 |
+
@st.cache_resource # Cache the models to avoid reloading
|
10 |
def load_models():
|
11 |
+
tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmp_tts")
|
12 |
+
hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmp_vocoder")
|
13 |
return tacotron2, hifi_gan
|
14 |
|
|
|
|
|
15 |
tacotron2, hifi_gan = load_models()
|
|
|
16 |
|
17 |
+
# Text-to-Speech function
|
18 |
def text_to_speech(text):
|
19 |
+
try:
|
20 |
+
# Generate mel spectrogram
|
21 |
+
mel_output, _, _ = tacotron2.encode_text(text)
|
22 |
+
|
23 |
+
# Generate waveform from mel spectrogram
|
24 |
+
waveforms = hifi_gan.decode_batch(mel_output)
|
25 |
+
|
26 |
+
# Convert waveform to numpy format
|
27 |
+
waveform = waveforms.squeeze().cpu().numpy()
|
28 |
+
|
29 |
+
# Normalize waveform to range [-1, 1]
|
30 |
+
waveform = waveform / np.max(np.abs(waveform))
|
31 |
+
|
32 |
+
# Save waveform to a .wav file
|
33 |
+
output_path = "output.wav"
|
34 |
+
write(output_path, 22050, (waveform * 32767).astype(np.int16))
|
35 |
+
return output_path
|
36 |
+
|
37 |
+
except Exception as e:
|
38 |
+
st.error(f"Error during text-to-speech generation: {e}")
|
39 |
+
return None
|
40 |
|
41 |
# Streamlit UI
|
42 |
+
st.title("Text-to-Speech Application")
|
43 |
+
st.write("Enter text below and convert it to speech!")
|
44 |
+
|
45 |
+
# Input field
|
46 |
+
text_input = st.text_area("Enter Text:", "Hello, welcome to the Text-to-Speech app!")
|
47 |
|
48 |
if st.button("Generate Speech"):
|
49 |
+
if text_input.strip():
|
50 |
+
output_audio = text_to_speech(text_input)
|
51 |
+
if output_audio:
|
52 |
+
st.audio(output_audio, format="audio/wav")
|
53 |
+
else:
|
54 |
+
st.error("Failed to generate audio. Please check the input text.")
|
|
|
55 |
else:
|
56 |
+
st.warning("Please enter some text to generate speech.")
|