fargerm commited on
Commit
b8a60ec
1 Parent(s): f855702

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -59
app.py CHANGED
@@ -1,73 +1,56 @@
1
  import streamlit as st
2
- from transformers import MarianMTModel, MarianTokenizer, SpeechT5Processor, SpeechT5ForTextToSpeech
3
- from datasets import load_dataset
 
4
  import torch
5
- from scipy.io.wavfile import write as write_wav
6
- from io import BytesIO
7
 
8
- # Define the language options
9
- languages = {
10
- 'ur': 'Urdu',
11
- 'hi': 'Hindi',
12
- 'bn': 'Bengali'
13
- }
14
 
15
- # Streamlit app
16
- st.title("Real-Time Language Translator and TTS")
17
-
18
- # Text input
19
- text_to_translate = st.text_area("Enter text to translate:", "My Name is Meer Hassan Farhad, I live in Islamabad")
20
-
21
- # Language selection
22
- target_language = st.selectbox("Select target language:", list(languages.keys()))
23
 
 
24
  def translate_text(text, target_lang):
25
- # Use a more specific model for translation
26
- model_name = f"Helsinki-NLP/opus-mt-en-{target_lang}"
27
- tokenizer = MarianTokenizer.from_pretrained(model_name)
28
- model = MarianMTModel.from_pretrained(model_name)
29
-
30
- # Translate text
31
- encoded_text = tokenizer.encode(text, return_tensors="pt")
32
- translated = model.generate(encoded_text)
33
- translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
34
  return translated_text
35
 
36
- def synthesize_speech(text):
37
- # Load the TTS model and processor
38
- processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
39
- model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
40
-
41
- # Load speaker embeddings
42
- embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
43
- speaker_embeddings = torch.tensor(embeddings_dataset[0]["xvector"]).unsqueeze(0)
44
-
45
- # Synthesize speech
46
- inputs = processor(text=text, return_tensors="pt")
47
- speech = model.generate_speech(inputs["input_ids"], speaker_embeddings)
48
-
49
- # Convert speech tensor to numpy array and save as wav
50
- audio_np = speech.squeeze().cpu().numpy()
51
- sample_rate = 16000 # Define a sample rate
52
- audio_buffer = BytesIO()
53
- write_wav(audio_buffer, sample_rate, audio_np) # Write the numpy array as a WAV file to the buffer
54
- audio_buffer.seek(0)
55
-
56
- return audio_buffer
57
 
58
- if st.button("Translate and Synthesize Speech"):
59
- # Perform translation
60
- translated_text = translate_text(text_to_translate, target_language)
61
- st.write(f"Translated text ({languages[target_language]}): {translated_text}")
62
-
63
- # Perform text-to-speech
64
- try:
65
- audio_bytes = synthesize_speech(translated_text)
66
- st.audio(audio_bytes, format="audio/wav")
67
- except Exception as e:
68
- st.error(f"Error generating audio: {e}")
69
 
 
 
70
 
 
 
71
 
 
 
 
72
 
 
 
 
73
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from transformers import MarianMTModel, MarianTokenizer, pipeline
3
+ from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
4
+ import soundfile as sf
5
  import torch
 
 
6
 
7
+ # Define the language model and tokenizer
8
+ translation_model_name = "Helsinki-NLP/opus-mt-en-ur"
9
+ tokenizer = MarianTokenizer.from_pretrained(translation_model_name)
10
+ translation_model = MarianMTModel.from_pretrained(translation_model_name)
 
 
11
 
12
+ # Load the text-to-speech model
13
+ tts_model_name = "microsoft/speecht5_tts"
14
+ processor = SpeechT5Processor.from_pretrained(tts_model_name)
15
+ tts_model = SpeechT5ForTextToSpeech.from_pretrained(tts_model_name)
 
 
 
 
16
 
17
+ # Function to translate text
18
  def translate_text(text, target_lang):
19
+ if target_lang not in ["Urdu", "Hindi", "Bengali"]:
20
+ return "Error: Target language not supported."
21
+ tokens = tokenizer(text, return_tensors="pt", padding=True)
22
+ translated_tokens = translation_model.generate(**tokens)
23
+ translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
 
 
 
 
24
  return translated_text
25
 
26
+ # Function to generate speech
27
+ def synthesize_speech(text, speaker_embeddings=None):
28
+ inputs = processor(text, return_tensors="pt")
29
+ with torch.no_grad():
30
+ speech = tts_model.generate_speech(inputs, speaker_embeddings)
31
+ sf.write("output.wav", speech.numpy(), 16000)
32
+ return "output.wav"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ # Streamlit app
35
+ st.title("Language Translator with TTS")
36
+ st.write("Enter the text you want to translate and hear the translation.")
 
 
 
 
 
 
 
 
37
 
38
+ # Select target language
39
+ target_language = st.selectbox("Select Target Language", ["Urdu", "Hindi", "Bengali"])
40
 
41
+ # Text input
42
+ text_to_translate = st.text_input("Enter text here")
43
 
44
+ if st.button("Translate and Generate Audio"):
45
+ # Clear input for new text
46
+ st.session_state.text_to_translate = ""
47
 
48
+ # Perform translation
49
+ translated_text = translate_text(text_to_translate, target_language)
50
+ st.write(f"Translated text ({target_language}): {translated_text}")
51
 
52
+ # Generate speech
53
+ if translated_text and "Error" not in translated_text:
54
+ speaker_embeddings = None # Placeholder, use actual embeddings if needed
55
+ audio_file = synthesize_speech(translated_text, speaker_embeddings)
56
+ st.audio(audio_file)