Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import MarianMTModel, MarianTokenizer, pipeline | |
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech | |
import soundfile as sf | |
import torch | |
# Define the language model and tokenizer | |
translation_model_name = "Helsinki-NLP/opus-mt-en-ur" | |
tokenizer = MarianTokenizer.from_pretrained(translation_model_name) | |
translation_model = MarianMTModel.from_pretrained(translation_model_name) | |
# Load the text-to-speech model | |
tts_model_name = "microsoft/speecht5_tts" | |
processor = SpeechT5Processor.from_pretrained(tts_model_name) | |
tts_model = SpeechT5ForTextToSpeech.from_pretrained(tts_model_name) | |
# Function to translate text | |
def translate_text(text, target_lang): | |
if target_lang not in ["Urdu", "Hindi", "Bengali"]: | |
return "Error: Target language not supported." | |
tokens = tokenizer(text, return_tensors="pt", padding=True) | |
translated_tokens = translation_model.generate(**tokens) | |
translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True) | |
return translated_text | |
# Function to generate speech | |
def synthesize_speech(text, speaker_embeddings=None): | |
inputs = processor(text, return_tensors="pt") | |
with torch.no_grad(): | |
speech = tts_model.generate_speech(inputs, speaker_embeddings) | |
sf.write("output.wav", speech.numpy(), 16000) | |
return "output.wav" | |
# Streamlit app | |
st.title("Language Translator with TTS") | |
st.write("Enter the text you want to translate and hear the translation.") | |
# Select target language | |
target_language = st.selectbox("Select Target Language", ["Urdu", "Hindi", "Bengali"]) | |
# Text input | |
text_to_translate = st.text_input("Enter text here") | |
if st.button("Translate and Generate Audio"): | |
# Clear input for new text | |
st.session_state.text_to_translate = "" | |
# Perform translation | |
translated_text = translate_text(text_to_translate, target_language) | |
st.write(f"Translated text ({target_language}): {translated_text}") | |
# Generate speech | |
if translated_text and "Error" not in translated_text: | |
speaker_embeddings = None # Placeholder, use actual embeddings if needed | |
audio_file = synthesize_speech(translated_text, speaker_embeddings) | |
st.audio(audio_file) | |