Spaces:

Musawir19
/

Taxt_to_speach

Running

App Files Files Community

Musawir19 commited on 9 days ago

Commit

527cad7

•

1 Parent(s): b41b0e5

Create app.py

Browse files

Files changed (1) hide show

app.py +42 -34

app.py CHANGED Viewed

@@ -1,48 +1,56 @@
-import streamlit as st
 from speechbrain.pretrained import Tacotron2, HIFIGAN
 from scipy.io.wavfile import write
-# Load the TTS and vocoder models
-@st.cache_resource
 def load_models():
-    tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts")
-    hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
     return tacotron2, hifi_gan
-# Load models
-st.write("Loading models... Please wait ⏳")
 tacotron2, hifi_gan = load_models()
-st.success("Models loaded successfully!")
-# TTS function
 def text_to_speech(text):
-    # Generate mel spectrogram
-    mel_output, mel_length, alignment = tacotron2.encode_text(text)
-    # Decode mel spectrogram to waveform
-    waveforms = hifi_gan.decode_batch(mel_output)
-    # Convert waveform to numpy and normalize to int16 range
-    waveform = waveforms.squeeze(1).cpu().numpy()
-    waveform = waveform / max(abs(waveform))  # Normalize to range [-1, 1]
-    waveform = (waveform * 32767).astype("int16")  # Scale to int16 range
-    # Save waveform as audio file
-    audio_path = "output.wav"
-    write(audio_path, 22050, waveform)
-    return audio_path
 # Streamlit UI
-st.title("🗣️ Text-to-Speech App")
-text = st.text_input("Enter text to convert to speech:")
 if st.button("Generate Speech"):
-    if text.strip():
-        st.write("Generating speech...")
-        try:
-            audio_file = text_to_speech(text)
-            st.audio(audio_file, format="audio/wav")
-        except Exception as e:
-            st.error(f"Error during TTS generation: {e}")
     else:
-        st.warning("Please enter some text.")

+# Install necessary libraries
+import os
+import numpy as np
 from speechbrain.pretrained import Tacotron2, HIFIGAN
 from scipy.io.wavfile import write
+import streamlit as st
+# Load TTS and vocoder models
+@st.cache_resource  # Cache the models to avoid reloading
 def load_models():
+    tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmp_tts")
+    hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmp_vocoder")
     return tacotron2, hifi_gan
 tacotron2, hifi_gan = load_models()
+# Text-to-Speech function
 def text_to_speech(text):
+    try:
+        # Generate mel spectrogram
+        mel_output, _, _ = tacotron2.encode_text(text)
+        # Generate waveform from mel spectrogram
+        waveforms = hifi_gan.decode_batch(mel_output)
+        # Convert waveform to numpy format
+        waveform = waveforms.squeeze().cpu().numpy()
+        # Normalize waveform to range [-1, 1]
+        waveform = waveform / np.max(np.abs(waveform))
+        # Save waveform to a .wav file
+        output_path = "output.wav"
+        write(output_path, 22050, (waveform * 32767).astype(np.int16))
+        return output_path
+    except Exception as e:
+        st.error(f"Error during text-to-speech generation: {e}")
+        return None
 # Streamlit UI
+st.title("Text-to-Speech Application")
+st.write("Enter text below and convert it to speech!")
+# Input field
+text_input = st.text_area("Enter Text:", "Hello, welcome to the Text-to-Speech app!")
 if st.button("Generate Speech"):
+    if text_input.strip():
+        output_audio = text_to_speech(text_input)
+        if output_audio:
+            st.audio(output_audio, format="audio/wav")
+        else:
+            st.error("Failed to generate audio. Please check the input text.")
     else:
+        st.warning("Please enter some text to generate speech.")