Spaces:
Sleeping
Sleeping
File size: 2,663 Bytes
d4d81e1 017af4b 44a1495 f4e544b 44a1495 d4d81e1 44a1495 d4d81e1 017af4b b8a60ec 017af4b 58a8314 017af4b 1829ce2 b8a60ec 1829ce2 44a1495 d4d81e1 44a1495 017af4b 44a1495 017af4b 58a8314 44a1495 017af4b d4d81e1 44a1495 a51a9d1 44a1495 1829ce2 44a1495 390d4d0 b8a60ec 44a1495 6f0da1b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import streamlit as st
from transformers import MarianMTModel, MarianTokenizer, SpeechT5Processor, SpeechT5ForTextToSpeech
from datasets import load_dataset
import torch
import soundfile as sf
# Define the translation model and tokenizer
model_name = "Helsinki-NLP/opus-mt-en-ur"
model = MarianMTModel.from_pretrained(model_name)
tokenizer = MarianTokenizer.from_pretrained(model_name)
# Define the TTS model and processor
tts_model_name = "microsoft/speecht5_tts"
tts_model = SpeechT5ForTextToSpeech.from_pretrained(tts_model_name)
processor = SpeechT5Processor.from_pretrained(tts_model_name)
# Load speaker embeddings
speaker_embeddings = torch.tensor(load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")["xvector"][0]).unsqueeze(0)
# Function to translate text
def translate_text(text, target_lang):
inputs = tokenizer(text, return_tensors="pt", padding=True)
translated = model.generate(**inputs)
translated_text = tokenizer.batch_decode(translated, skip_special_tokens=True)[0]
return translated_text
# Function to synthesize speech
def synthesize_speech(text, target_lang):
inputs = processor(text=text, return_tensors="pt")
speech = tts_model.generate_speech(inputs["input_ids"], speaker_embeddings)
# Save the speech to a file
output_path = "output.wav"
sf.write(output_path, speech.numpy(), samplerate=16000)
# Check if the audio file was generated correctly
try:
with open(output_path, 'rb') as f:
audio_data = f.read()
if not audio_data:
st.error("Error: The audio file is empty.")
else:
st.success("Audio generated successfully.")
except Exception as e:
st.error(f"Error reading the audio file: {e}")
return output_path
# Streamlit UI
st.title("Language Translator with Speech Synthesis")
# Input text
text_input = st.text_input("Enter text in English:")
if text_input:
st.session_state.text_input = text_input
# Language selection
target_lang = st.selectbox("Select Target Language:", [
"Urdu (ur)", "Hindi (hi)", "Bengali (bn)"
])
# Translate button
if st.button("Translate"):
if target_lang == "Urdu (ur)":
target_lang = "ur"
elif target_lang == "Hindi (hi)":
target_lang = "hi"
elif target_lang == "Bengali (bn)":
target_lang = "bn"
translated_text = translate_text(st.session_state.text_input, target_lang)
st.text_area("Translated text:", value=translated_text, height=100)
audio_file = synthesize_speech(translated_text, target_lang)
st.audio(audio_file)
# Clear input for new text
st.session_state.text_input = ""
|