Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import MarianMTModel, MarianTokenizer, SpeechT5Processor, SpeechT5ForTextToSpeech | |
from datasets import load_dataset | |
import torch | |
import soundfile as sf | |
# Define the translation model and tokenizer | |
model_name = "Helsinki-NLP/opus-mt-en-ur" | |
model = MarianMTModel.from_pretrained(model_name) | |
tokenizer = MarianTokenizer.from_pretrained(model_name) | |
# Define the TTS model and processor | |
tts_model_name = "microsoft/speecht5_tts" | |
tts_model = SpeechT5ForTextToSpeech.from_pretrained(tts_model_name) | |
processor = SpeechT5Processor.from_pretrained(tts_model_name) | |
# Load speaker embeddings | |
speaker_embeddings = torch.tensor(load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")["xvector"][0]).unsqueeze(0) | |
# Function to translate text | |
def translate_text(text, target_lang): | |
inputs = tokenizer(text, return_tensors="pt", padding=True) | |
translated = model.generate(**inputs) | |
translated_text = tokenizer.batch_decode(translated, skip_special_tokens=True)[0] | |
return translated_text | |
# Function to synthesize speech | |
def synthesize_speech(text, target_lang): | |
inputs = processor(text=text, return_tensors="pt") | |
speech = tts_model.generate_speech(inputs["input_ids"], speaker_embeddings) | |
# Save the speech to a file | |
output_path = "output.wav" | |
sf.write(output_path, speech.numpy(), samplerate=16000) | |
# Check if the audio file was generated correctly | |
try: | |
with open(output_path, 'rb') as f: | |
audio_data = f.read() | |
if not audio_data: | |
st.error("Error: The audio file is empty.") | |
else: | |
st.success("Audio generated successfully.") | |
except Exception as e: | |
st.error(f"Error reading the audio file: {e}") | |
return output_path | |
# Streamlit UI | |
st.title("Language Translator with Speech Synthesis") | |
# Input text | |
text_input = st.text_input("Enter text in English:") | |
if text_input: | |
st.session_state.text_input = text_input | |
# Language selection | |
target_lang = st.selectbox("Select Target Language:", [ | |
"Urdu (ur)", "Hindi (hi)", "Bengali (bn)" | |
]) | |
# Translate button | |
if st.button("Translate"): | |
if target_lang == "Urdu (ur)": | |
target_lang = "ur" | |
elif target_lang == "Hindi (hi)": | |
target_lang = "hi" | |
elif target_lang == "Bengali (bn)": | |
target_lang = "bn" | |
translated_text = translate_text(st.session_state.text_input, target_lang) | |
st.text_area("Translated text:", value=translated_text, height=100) | |
audio_file = synthesize_speech(translated_text, target_lang) | |
st.audio(audio_file) | |
# Clear input for new text | |
st.session_state.text_input = "" | |