fargerm commited on
Commit
44a1495
1 Parent(s): 8f009fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -37
app.py CHANGED
@@ -1,56 +1,74 @@
1
  import streamlit as st
2
  from transformers import MarianMTModel, MarianTokenizer, pipeline
3
- from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
4
- import soundfile as sf
5
  import torch
 
6
 
7
- # Define the language model and tokenizer
8
- translation_model_name = "Helsinki-NLP/opus-mt-en-ur"
9
- tokenizer = MarianTokenizer.from_pretrained(translation_model_name)
10
- translation_model = MarianMTModel.from_pretrained(translation_model_name)
11
 
12
- # Load the text-to-speech model
13
  tts_model_name = "microsoft/speecht5_tts"
14
- processor = SpeechT5Processor.from_pretrained(tts_model_name)
15
- tts_model = SpeechT5ForTextToSpeech.from_pretrained(tts_model_name)
16
 
17
  # Function to translate text
18
  def translate_text(text, target_lang):
19
- if target_lang not in ["Urdu", "Hindi", "Bengali"]:
20
- return "Error: Target language not supported."
21
- tokens = tokenizer(text, return_tensors="pt", padding=True)
22
- translated_tokens = translation_model.generate(**tokens)
23
- translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
24
  return translated_text
25
 
26
- # Function to generate speech
27
- def synthesize_speech(text, speaker_embeddings=None):
28
- inputs = processor(text, return_tensors="pt")
29
- with torch.no_grad():
30
- speech = tts_model.generate_speech(inputs, speaker_embeddings)
31
- sf.write("output.wav", speech.numpy(), 16000)
 
 
 
 
 
 
 
 
 
 
 
32
  return "output.wav"
33
 
34
- # Streamlit app
35
- st.title("Language Translator with TTS")
36
- st.write("Enter the text you want to translate and hear the translation.")
 
 
 
 
 
 
 
 
 
37
 
38
- # Select target language
39
- target_language = st.selectbox("Select Target Language", ["Urdu", "Hindi", "Bengali"])
 
 
 
 
 
 
40
 
41
- # Text input
42
- text_to_translate = st.text_input("Enter text here")
 
 
 
43
 
44
- if st.button("Translate and Generate Audio"):
45
  # Clear input for new text
46
- st.session_state.text_to_translate = ""
47
 
48
- # Perform translation
49
- translated_text = translate_text(text_to_translate, target_language)
50
- st.write(f"Translated text ({target_language}): {translated_text}")
51
 
52
- # Generate speech
53
- if translated_text and "Error" not in translated_text:
54
- speaker_embeddings = None # Placeholder, use actual embeddings if needed
55
- audio_file = synthesize_speech(translated_text, speaker_embeddings)
56
- st.audio(audio_file)
 
1
  import streamlit as st
2
  from transformers import MarianMTModel, MarianTokenizer, pipeline
3
+ from datasets import load_dataset
 
4
  import torch
5
+ import soundfile as sf
6
 
7
+ # Define the translation model and tokenizer
8
+ model_name = "Helsinki-NLP/opus-mt-en-ur"
9
+ model = MarianMTModel.from_pretrained(model_name)
10
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
11
 
12
+ # Define the TTS model
13
  tts_model_name = "microsoft/speecht5_tts"
14
+ tts_model = pipeline("text-to-speech", model=tts_model_name)
15
+ speaker_embeddings = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
16
 
17
  # Function to translate text
18
  def translate_text(text, target_lang):
19
+ inputs = tokenizer(text, return_tensors="pt", padding=True)
20
+ translated = model.generate(**inputs)
21
+ translated_text = tokenizer.batch_decode(translated, skip_special_tokens=True)[0]
 
 
22
  return translated_text
23
 
24
+ # Function to synthesize speech
25
+ def synthesize_speech(text, target_lang):
26
+ if target_lang == 'ur':
27
+ lang = 'ur_PK'
28
+ elif target_lang == 'hi':
29
+ lang = 'hi_IN'
30
+ elif target_lang == 'bn':
31
+ lang = 'bn_BD'
32
+ else:
33
+ lang = 'en_US'
34
+
35
+ speaker_embedding = torch.tensor(speaker_embeddings[0]["xvector"]).unsqueeze(0)
36
+ audio = tts_model(text, speaker_embeddings=speaker_embedding)
37
+
38
+ with open("output.wav", "wb") as f:
39
+ sf.write(f, audio["waveform"].numpy(), samplerate=16000, format="WAV")
40
+
41
  return "output.wav"
42
 
43
+ # Streamlit UI
44
+ st.title("Language Translator with Speech Synthesis")
45
+
46
+ # Input text
47
+ text_input = st.text_input("Enter text in English:")
48
+ if text_input:
49
+ st.session_state.text_input = text_input
50
+
51
+ # Language selection
52
+ target_lang = st.selectbox("Select Target Language:", [
53
+ "Urdu (ur)", "Hindi (hi)", "Bengali (bn)"
54
+ ])
55
 
56
+ # Translate button
57
+ if st.button("Translate"):
58
+ if target_lang == "Urdu (ur)":
59
+ target_lang = "ur"
60
+ elif target_lang == "Hindi (hi)":
61
+ target_lang = "hi"
62
+ elif target_lang == "Bengali (bn)":
63
+ target_lang = "bn"
64
 
65
+ translated_text = translate_text(st.session_state.text_input, target_lang)
66
+ st.text_area("Translated text:", value=translated_text, height=100)
67
+
68
+ audio_file = synthesize_speech(translated_text, target_lang)
69
+ st.audio(audio_file)
70
 
 
71
  # Clear input for new text
72
+ st.session_state.text_input = ""
73
 
 
 
 
74