Spaces:

fargerm
/

LangTransAudio

Sleeping

App Files Files Community

LangTransAudio / app.py

fargerm

Update app.py

58a8314 verified 3 months ago

raw

history blame contribute delete

2.66 kB

	import streamlit as st
	from transformers import MarianMTModel, MarianTokenizer, SpeechT5Processor, SpeechT5ForTextToSpeech
	from datasets import load_dataset
	import torch
	import soundfile as sf

	# Define the translation model and tokenizer
	model_name = "Helsinki-NLP/opus-mt-en-ur"
	model = MarianMTModel.from_pretrained(model_name)
	tokenizer = MarianTokenizer.from_pretrained(model_name)

	# Define the TTS model and processor
	tts_model_name = "microsoft/speecht5_tts"
	tts_model = SpeechT5ForTextToSpeech.from_pretrained(tts_model_name)
	processor = SpeechT5Processor.from_pretrained(tts_model_name)

	# Load speaker embeddings
	speaker_embeddings = torch.tensor(load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")["xvector"][0]).unsqueeze(0)

	# Function to translate text
	def translate_text(text, target_lang):
	inputs = tokenizer(text, return_tensors="pt", padding=True)
	translated = model.generate(**inputs)
	translated_text = tokenizer.batch_decode(translated, skip_special_tokens=True)[0]
	return translated_text

	# Function to synthesize speech
	def synthesize_speech(text, target_lang):
	inputs = processor(text=text, return_tensors="pt")
	speech = tts_model.generate_speech(inputs["input_ids"], speaker_embeddings)

	# Save the speech to a file
	output_path = "output.wav"
	sf.write(output_path, speech.numpy(), samplerate=16000)

	# Check if the audio file was generated correctly
	try:
	with open(output_path, 'rb') as f:
	audio_data = f.read()
	if not audio_data:
	st.error("Error: The audio file is empty.")
	else:
	st.success("Audio generated successfully.")
	except Exception as e:
	st.error(f"Error reading the audio file: {e}")

	return output_path

	# Streamlit UI
	st.title("Language Translator with Speech Synthesis")

	# Input text
	text_input = st.text_input("Enter text in English:")
	if text_input:
	st.session_state.text_input = text_input

	# Language selection
	target_lang = st.selectbox("Select Target Language:", [
	"Urdu (ur)", "Hindi (hi)", "Bengali (bn)"
	])

	# Translate button
	if st.button("Translate"):
	if target_lang == "Urdu (ur)":
	target_lang = "ur"
	elif target_lang == "Hindi (hi)":
	target_lang = "hi"
	elif target_lang == "Bengali (bn)":
	target_lang = "bn"

	translated_text = translate_text(st.session_state.text_input, target_lang)
	st.text_area("Translated text:", value=translated_text, height=100)

	audio_file = synthesize_speech(translated_text, target_lang)
	st.audio(audio_file)

	# Clear input for new text
	st.session_state.text_input = ""