fargerm commited on
Commit
a51a9d1
1 Parent(s): fc83b17

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -8
app.py CHANGED
@@ -2,6 +2,7 @@ import streamlit as st
2
  from transformers import MarianMTModel, MarianTokenizer, SpeechT5Processor, SpeechT5ForTextToSpeech
3
  from datasets import load_dataset
4
  import torch
 
5
  from io import BytesIO
6
 
7
  # Define the language options
@@ -36,7 +37,7 @@ def translate_text(text, target_lang):
36
  translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
37
  return translated_text
38
 
39
- def synthesize_speech(text, lang):
40
  # Load the TTS model and processor
41
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
42
  model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
@@ -49,8 +50,14 @@ def synthesize_speech(text, lang):
49
  inputs = processor(text=text, return_tensors="pt")
50
  speech = model.generate_speech(inputs["input_ids"], speaker_embeddings)
51
 
52
- audio_bytes = BytesIO(speech.numpy())
53
- return audio_bytes, None
 
 
 
 
 
 
54
 
55
  if st.button("Translate and Synthesize Speech"):
56
  # Perform translation
@@ -58,11 +65,9 @@ if st.button("Translate and Synthesize Speech"):
58
  st.write(f"Translated text ({languages[target_language]}): {translated_text}")
59
 
60
  # Perform text-to-speech
61
- audio_bytes, error = synthesize_speech(translated_text, target_language)
62
- if error:
63
- st.error(f"Error: {error}")
64
- else:
65
- st.audio(audio_bytes, format="audio/wav")
66
 
67
 
68
 
 
2
  from transformers import MarianMTModel, MarianTokenizer, SpeechT5Processor, SpeechT5ForTextToSpeech
3
  from datasets import load_dataset
4
  import torch
5
+ from scipy.io.wavfile import write as write_wav
6
  from io import BytesIO
7
 
8
  # Define the language options
 
37
  translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
38
  return translated_text
39
 
40
+ def synthesize_speech(text):
41
  # Load the TTS model and processor
42
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
43
  model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
 
50
  inputs = processor(text=text, return_tensors="pt")
51
  speech = model.generate_speech(inputs["input_ids"], speaker_embeddings)
52
 
53
+ # Convert speech tensor to numpy array and save as wav
54
+ audio_np = speech.squeeze().cpu().numpy()
55
+ sample_rate = 16000 # Define a sample rate
56
+ audio_buffer = BytesIO()
57
+ write_wav(audio_buffer, sample_rate, audio_np) # Write the numpy array as a WAV file to the buffer
58
+ audio_buffer.seek(0)
59
+
60
+ return audio_buffer
61
 
62
  if st.button("Translate and Synthesize Speech"):
63
  # Perform translation
 
65
  st.write(f"Translated text ({languages[target_language]}): {translated_text}")
66
 
67
  # Perform text-to-speech
68
+ audio_bytes = synthesize_speech(translated_text)
69
+ st.audio(audio_bytes, format="audio/wav")
70
+
 
 
71
 
72
 
73