Musawir19 commited on
Commit
527cad7
1 Parent(s): b41b0e5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -34
app.py CHANGED
@@ -1,48 +1,56 @@
1
- import streamlit as st
 
 
2
  from speechbrain.pretrained import Tacotron2, HIFIGAN
3
  from scipy.io.wavfile import write
 
4
 
5
- # Load the TTS and vocoder models
6
- @st.cache_resource
7
  def load_models():
8
- tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts")
9
- hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
10
  return tacotron2, hifi_gan
11
 
12
- # Load models
13
- st.write("Loading models... Please wait ⏳")
14
  tacotron2, hifi_gan = load_models()
15
- st.success("Models loaded successfully!")
16
 
17
- # TTS function
18
  def text_to_speech(text):
19
- # Generate mel spectrogram
20
- mel_output, mel_length, alignment = tacotron2.encode_text(text)
21
-
22
- # Decode mel spectrogram to waveform
23
- waveforms = hifi_gan.decode_batch(mel_output)
24
-
25
- # Convert waveform to numpy and normalize to int16 range
26
- waveform = waveforms.squeeze(1).cpu().numpy()
27
- waveform = waveform / max(abs(waveform)) # Normalize to range [-1, 1]
28
- waveform = (waveform * 32767).astype("int16") # Scale to int16 range
29
-
30
- # Save waveform as audio file
31
- audio_path = "output.wav"
32
- write(audio_path, 22050, waveform)
33
- return audio_path
 
 
 
 
 
 
34
 
35
  # Streamlit UI
36
- st.title("🗣️ Text-to-Speech App")
37
- text = st.text_input("Enter text to convert to speech:")
 
 
 
38
 
39
  if st.button("Generate Speech"):
40
- if text.strip():
41
- st.write("Generating speech...")
42
- try:
43
- audio_file = text_to_speech(text)
44
- st.audio(audio_file, format="audio/wav")
45
- except Exception as e:
46
- st.error(f"Error during TTS generation: {e}")
47
  else:
48
- st.warning("Please enter some text.")
 
1
+ # Install necessary libraries
2
+ import os
3
+ import numpy as np
4
  from speechbrain.pretrained import Tacotron2, HIFIGAN
5
  from scipy.io.wavfile import write
6
+ import streamlit as st
7
 
8
+ # Load TTS and vocoder models
9
+ @st.cache_resource # Cache the models to avoid reloading
10
  def load_models():
11
+ tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmp_tts")
12
+ hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmp_vocoder")
13
  return tacotron2, hifi_gan
14
 
 
 
15
  tacotron2, hifi_gan = load_models()
 
16
 
17
+ # Text-to-Speech function
18
  def text_to_speech(text):
19
+ try:
20
+ # Generate mel spectrogram
21
+ mel_output, _, _ = tacotron2.encode_text(text)
22
+
23
+ # Generate waveform from mel spectrogram
24
+ waveforms = hifi_gan.decode_batch(mel_output)
25
+
26
+ # Convert waveform to numpy format
27
+ waveform = waveforms.squeeze().cpu().numpy()
28
+
29
+ # Normalize waveform to range [-1, 1]
30
+ waveform = waveform / np.max(np.abs(waveform))
31
+
32
+ # Save waveform to a .wav file
33
+ output_path = "output.wav"
34
+ write(output_path, 22050, (waveform * 32767).astype(np.int16))
35
+ return output_path
36
+
37
+ except Exception as e:
38
+ st.error(f"Error during text-to-speech generation: {e}")
39
+ return None
40
 
41
  # Streamlit UI
42
+ st.title("Text-to-Speech Application")
43
+ st.write("Enter text below and convert it to speech!")
44
+
45
+ # Input field
46
+ text_input = st.text_area("Enter Text:", "Hello, welcome to the Text-to-Speech app!")
47
 
48
  if st.button("Generate Speech"):
49
+ if text_input.strip():
50
+ output_audio = text_to_speech(text_input)
51
+ if output_audio:
52
+ st.audio(output_audio, format="audio/wav")
53
+ else:
54
+ st.error("Failed to generate audio. Please check the input text.")
 
55
  else:
56
+ st.warning("Please enter some text to generate speech.")