Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,69 +1,39 @@
|
|
1 |
import streamlit as st
|
2 |
-
from transformers import pipeline
|
|
|
3 |
import torch
|
4 |
-
from
|
5 |
-
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
-
# Initialize translation models
|
9 |
-
translator_urdu = pipeline("translation_en_to_ur", model="Helsinki-NLP/opus-mt-en-ur")
|
10 |
-
translator_hindi = pipeline("translation_en_to_hi", model="Helsinki-NLP/opus-mt-en-hi")
|
11 |
-
|
12 |
-
# Initialize TTS model
|
13 |
-
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
|
14 |
-
tts_model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
|
15 |
-
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
|
16 |
-
xvectors = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
17 |
-
|
18 |
-
# Function to translate text
|
19 |
-
def translate_text(text, target_lang):
|
20 |
-
if target_lang == "Urdu":
|
21 |
-
translated = translator_urdu(text)
|
22 |
-
elif target_lang == "Hindi":
|
23 |
-
translated = translator_hindi(text)
|
24 |
-
else:
|
25 |
-
return None, "Error: Target language not supported."
|
26 |
-
return translated[0]['translation_text'], None
|
27 |
-
|
28 |
-
# Function to synthesize speech
|
29 |
-
def synthesize_speech(text, target_lang):
|
30 |
-
if target_lang not in ["Urdu", "Hindi"]:
|
31 |
-
return None, "Error: TTS model not available for the selected language."
|
32 |
-
|
33 |
-
inputs = processor(text, return_tensors="pt")
|
34 |
-
speaker_embedding = torch.tensor(xvectors[0]["xvector"]).unsqueeze(0)
|
35 |
-
speech = tts_model.generate_speech(inputs["input_ids"], speaker_embedding, vocoder=vocoder)
|
36 |
-
|
37 |
-
# Convert the speech to an in-memory WAV file using io.BytesIO
|
38 |
-
audio_io = io.BytesIO()
|
39 |
-
audio_io.write(speech.numpy().tobytes())
|
40 |
-
audio_io.seek(0)
|
41 |
-
|
42 |
-
return audio_io, None
|
43 |
-
|
44 |
-
# Streamlit UI
|
45 |
-
st.title("Language Translator")
|
46 |
-
|
47 |
-
# Text input
|
48 |
-
text_to_translate = st.text_input("Enter text in English", value="", key="input_text")
|
49 |
-
|
50 |
-
# Language selection
|
51 |
-
target_language = st.selectbox("Select Target Language", ["Urdu", "Hindi"])
|
52 |
-
|
53 |
-
if st.button("Translate"):
|
54 |
-
# Clear previous input
|
55 |
-
st.session_state.input_text = ""
|
56 |
-
|
57 |
-
# Translation
|
58 |
-
translated_text, error = translate_text(text_to_translate, target_language)
|
59 |
-
if error:
|
60 |
-
st.error(error)
|
61 |
-
else:
|
62 |
-
st.write(f"Translated text ({target_language}): {translated_text}")
|
63 |
-
|
64 |
-
# Text-to-Speech
|
65 |
-
audio_file, tts_error = synthesize_speech(translated_text, target_language)
|
66 |
-
if tts_error:
|
67 |
-
st.error(tts_error)
|
68 |
-
else:
|
69 |
-
st.audio(audio_file, format='audio/wav')
|
|
|
1 |
import streamlit as st
|
2 |
+
from transformers import pipeline
|
3 |
+
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
4 |
import torch
|
5 |
+
from io import BytesIO
|
6 |
+
st.text_area("Enter text in English:", key="text_input")
|
7 |
+
audio_file = st.file_uploader("Or upload an audio file:", type=["wav", "mp3"])
|
8 |
+
if audio_file is not None:
|
9 |
+
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
|
10 |
+
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
|
11 |
+
audio_input = audio_file.read()
|
12 |
+
input_values = processor(torch.tensor(audio_input), return_tensors="pt", padding="longest").input_values
|
13 |
+
logits = model(input_values).logits
|
14 |
+
predicted_ids = torch.argmax(logits, dim=-1)
|
15 |
+
transcription = processor.decode(predicted_ids[0])
|
16 |
+
st.write("Transcription:", transcription)
|
17 |
+
translator = pipeline("translation_en_to_fr") # Change to the desired language pair
|
18 |
+
translated_text = translator(st.session_state.text_input or transcription)
|
19 |
+
st.write("Translated Text:", translated_text[0]['translation_text'])
|
20 |
+
tts = pipeline("text-to-speech", model="microsoft/speecht5_tts")
|
21 |
+
tts_audio = tts(translated_text[0]['translation_text'])[0]
|
22 |
+
st.audio(tts_audio, format="audio/wav")
|
23 |
+
st.title("Text/Audio Translator")
|
24 |
+
|
25 |
+
st.text_area("Enter text in English:", key="text_input")
|
26 |
+
audio_file = st.file_uploader("Or upload an audio file:", type=["wav", "mp3"])
|
27 |
+
|
28 |
+
# Speech-to-Text
|
29 |
+
if audio_file is not None:
|
30 |
+
# Wav2Vec2 model processing here
|
31 |
+
|
32 |
+
# Translation
|
33 |
+
if st.session_state.text_input or transcription:
|
34 |
+
# Translation code here
|
35 |
+
|
36 |
+
# Text-to-Speech
|
37 |
+
if translated_text:
|
38 |
+
# TTS code here
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|