Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,73 +1,56 @@
|
|
1 |
import streamlit as st
|
2 |
-
from transformers import MarianMTModel, MarianTokenizer,
|
3 |
-
from
|
|
|
4 |
import torch
|
5 |
-
from scipy.io.wavfile import write as write_wav
|
6 |
-
from io import BytesIO
|
7 |
|
8 |
-
# Define the language
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
'bn': 'Bengali'
|
13 |
-
}
|
14 |
|
15 |
-
#
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
text_to_translate = st.text_area("Enter text to translate:", "My Name is Meer Hassan Farhad, I live in Islamabad")
|
20 |
-
|
21 |
-
# Language selection
|
22 |
-
target_language = st.selectbox("Select target language:", list(languages.keys()))
|
23 |
|
|
|
24 |
def translate_text(text, target_lang):
|
25 |
-
|
26 |
-
|
27 |
-
tokenizer =
|
28 |
-
|
29 |
-
|
30 |
-
# Translate text
|
31 |
-
encoded_text = tokenizer.encode(text, return_tensors="pt")
|
32 |
-
translated = model.generate(encoded_text)
|
33 |
-
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
|
34 |
return translated_text
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
speaker_embeddings = torch.tensor(embeddings_dataset[0]["xvector"]).unsqueeze(0)
|
44 |
-
|
45 |
-
# Synthesize speech
|
46 |
-
inputs = processor(text=text, return_tensors="pt")
|
47 |
-
speech = model.generate_speech(inputs["input_ids"], speaker_embeddings)
|
48 |
-
|
49 |
-
# Convert speech tensor to numpy array and save as wav
|
50 |
-
audio_np = speech.squeeze().cpu().numpy()
|
51 |
-
sample_rate = 16000 # Define a sample rate
|
52 |
-
audio_buffer = BytesIO()
|
53 |
-
write_wav(audio_buffer, sample_rate, audio_np) # Write the numpy array as a WAV file to the buffer
|
54 |
-
audio_buffer.seek(0)
|
55 |
-
|
56 |
-
return audio_buffer
|
57 |
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
st.write(f"Translated text ({languages[target_language]}): {translated_text}")
|
62 |
-
|
63 |
-
# Perform text-to-speech
|
64 |
-
try:
|
65 |
-
audio_bytes = synthesize_speech(translated_text)
|
66 |
-
st.audio(audio_bytes, format="audio/wav")
|
67 |
-
except Exception as e:
|
68 |
-
st.error(f"Error generating audio: {e}")
|
69 |
|
|
|
|
|
70 |
|
|
|
|
|
71 |
|
|
|
|
|
|
|
72 |
|
|
|
|
|
|
|
73 |
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
+
from transformers import MarianMTModel, MarianTokenizer, pipeline
|
3 |
+
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
|
4 |
+
import soundfile as sf
|
5 |
import torch
|
|
|
|
|
6 |
|
7 |
+
# Define the language model and tokenizer
|
8 |
+
translation_model_name = "Helsinki-NLP/opus-mt-en-ur"
|
9 |
+
tokenizer = MarianTokenizer.from_pretrained(translation_model_name)
|
10 |
+
translation_model = MarianMTModel.from_pretrained(translation_model_name)
|
|
|
|
|
11 |
|
12 |
+
# Load the text-to-speech model
|
13 |
+
tts_model_name = "microsoft/speecht5_tts"
|
14 |
+
processor = SpeechT5Processor.from_pretrained(tts_model_name)
|
15 |
+
tts_model = SpeechT5ForTextToSpeech.from_pretrained(tts_model_name)
|
|
|
|
|
|
|
|
|
16 |
|
17 |
+
# Function to translate text
|
18 |
def translate_text(text, target_lang):
|
19 |
+
if target_lang not in ["Urdu", "Hindi", "Bengali"]:
|
20 |
+
return "Error: Target language not supported."
|
21 |
+
tokens = tokenizer(text, return_tensors="pt", padding=True)
|
22 |
+
translated_tokens = translation_model.generate(**tokens)
|
23 |
+
translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
24 |
return translated_text
|
25 |
|
26 |
+
# Function to generate speech
|
27 |
+
def synthesize_speech(text, speaker_embeddings=None):
|
28 |
+
inputs = processor(text, return_tensors="pt")
|
29 |
+
with torch.no_grad():
|
30 |
+
speech = tts_model.generate_speech(inputs, speaker_embeddings)
|
31 |
+
sf.write("output.wav", speech.numpy(), 16000)
|
32 |
+
return "output.wav"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
+
# Streamlit app
|
35 |
+
st.title("Language Translator with TTS")
|
36 |
+
st.write("Enter the text you want to translate and hear the translation.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
+
# Select target language
|
39 |
+
target_language = st.selectbox("Select Target Language", ["Urdu", "Hindi", "Bengali"])
|
40 |
|
41 |
+
# Text input
|
42 |
+
text_to_translate = st.text_input("Enter text here")
|
43 |
|
44 |
+
if st.button("Translate and Generate Audio"):
|
45 |
+
# Clear input for new text
|
46 |
+
st.session_state.text_to_translate = ""
|
47 |
|
48 |
+
# Perform translation
|
49 |
+
translated_text = translate_text(text_to_translate, target_language)
|
50 |
+
st.write(f"Translated text ({target_language}): {translated_text}")
|
51 |
|
52 |
+
# Generate speech
|
53 |
+
if translated_text and "Error" not in translated_text:
|
54 |
+
speaker_embeddings = None # Placeholder, use actual embeddings if needed
|
55 |
+
audio_file = synthesize_speech(translated_text, speaker_embeddings)
|
56 |
+
st.audio(audio_file)
|