fargerm's picture
Update app.py
c4da995 verified
raw
history blame
2.52 kB
import streamlit as st
from transformers import pipeline, SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
import torch
from transformers import TextToSpeechPipeline
from datasets import load_dataset
import io
# Initialize translation models
translator_urdu = pipeline("translation_en_to_ur", model="Helsinki-NLP/opus-mt-en-ur")
translator_hindi = pipeline("translation_en_to_hi", model="Helsinki-NLP/opus-mt-en-hi")
# Initialize TTS model
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
tts_model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
xvectors = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
# Function to translate text
def translate_text(text, target_lang):
if target_lang == "Urdu":
translated = translator_urdu(text)
elif target_lang == "Hindi":
translated = translator_hindi(text)
else:
return None, "Error: Target language not supported."
return translated[0]['translation_text'], None
# Function to synthesize speech
def synthesize_speech(text, target_lang):
if target_lang not in ["Urdu", "Hindi"]:
return None, "Error: TTS model not available for the selected language."
inputs = processor(text, return_tensors="pt")
speaker_embedding = torch.tensor(xvectors[0]["xvector"]).unsqueeze(0)
speech = tts_model.generate_speech(inputs["input_ids"], speaker_embedding, vocoder=vocoder)
# Convert the speech to an in-memory WAV file using io.BytesIO
audio_io = io.BytesIO()
audio_io.write(speech.numpy().tobytes())
audio_io.seek(0)
return audio_io, None
# Streamlit UI
st.title("Language Translator")
# Text input
text_to_translate = st.text_input("Enter text in English", value="", key="input_text")
# Language selection
target_language = st.selectbox("Select Target Language", ["Urdu", "Hindi"])
if st.button("Translate"):
# Clear previous input
st.session_state.input_text = ""
# Translation
translated_text, error = translate_text(text_to_translate, target_language)
if error:
st.error(error)
else:
st.write(f"Translated text ({target_language}): {translated_text}")
# Text-to-Speech
audio_file, tts_error = synthesize_speech(translated_text, target_language)
if tts_error:
st.error(tts_error)
else:
st.audio(audio_file, format='audio/wav')