fargerm commited on
Commit
ae15bc4
1 Parent(s): c4da995

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -66
app.py CHANGED
@@ -1,69 +1,39 @@
1
  import streamlit as st
2
- from transformers import pipeline, SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
 
3
  import torch
4
- from transformers import TextToSpeechPipeline
5
- from datasets import load_dataset
6
- import io
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- # Initialize translation models
9
- translator_urdu = pipeline("translation_en_to_ur", model="Helsinki-NLP/opus-mt-en-ur")
10
- translator_hindi = pipeline("translation_en_to_hi", model="Helsinki-NLP/opus-mt-en-hi")
11
-
12
- # Initialize TTS model
13
- processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
14
- tts_model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
15
- vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
16
- xvectors = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
17
-
18
- # Function to translate text
19
- def translate_text(text, target_lang):
20
- if target_lang == "Urdu":
21
- translated = translator_urdu(text)
22
- elif target_lang == "Hindi":
23
- translated = translator_hindi(text)
24
- else:
25
- return None, "Error: Target language not supported."
26
- return translated[0]['translation_text'], None
27
-
28
- # Function to synthesize speech
29
- def synthesize_speech(text, target_lang):
30
- if target_lang not in ["Urdu", "Hindi"]:
31
- return None, "Error: TTS model not available for the selected language."
32
-
33
- inputs = processor(text, return_tensors="pt")
34
- speaker_embedding = torch.tensor(xvectors[0]["xvector"]).unsqueeze(0)
35
- speech = tts_model.generate_speech(inputs["input_ids"], speaker_embedding, vocoder=vocoder)
36
-
37
- # Convert the speech to an in-memory WAV file using io.BytesIO
38
- audio_io = io.BytesIO()
39
- audio_io.write(speech.numpy().tobytes())
40
- audio_io.seek(0)
41
-
42
- return audio_io, None
43
-
44
- # Streamlit UI
45
- st.title("Language Translator")
46
-
47
- # Text input
48
- text_to_translate = st.text_input("Enter text in English", value="", key="input_text")
49
-
50
- # Language selection
51
- target_language = st.selectbox("Select Target Language", ["Urdu", "Hindi"])
52
-
53
- if st.button("Translate"):
54
- # Clear previous input
55
- st.session_state.input_text = ""
56
-
57
- # Translation
58
- translated_text, error = translate_text(text_to_translate, target_language)
59
- if error:
60
- st.error(error)
61
- else:
62
- st.write(f"Translated text ({target_language}): {translated_text}")
63
-
64
- # Text-to-Speech
65
- audio_file, tts_error = synthesize_speech(translated_text, target_language)
66
- if tts_error:
67
- st.error(tts_error)
68
- else:
69
- st.audio(audio_file, format='audio/wav')
 
1
  import streamlit as st
2
+ from transformers import pipeline
3
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
4
  import torch
5
+ from io import BytesIO
6
+ st.text_area("Enter text in English:", key="text_input")
7
+ audio_file = st.file_uploader("Or upload an audio file:", type=["wav", "mp3"])
8
+ if audio_file is not None:
9
+ processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
10
+ model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
11
+ audio_input = audio_file.read()
12
+ input_values = processor(torch.tensor(audio_input), return_tensors="pt", padding="longest").input_values
13
+ logits = model(input_values).logits
14
+ predicted_ids = torch.argmax(logits, dim=-1)
15
+ transcription = processor.decode(predicted_ids[0])
16
+ st.write("Transcription:", transcription)
17
+ translator = pipeline("translation_en_to_fr") # Change to the desired language pair
18
+ translated_text = translator(st.session_state.text_input or transcription)
19
+ st.write("Translated Text:", translated_text[0]['translation_text'])
20
+ tts = pipeline("text-to-speech", model="microsoft/speecht5_tts")
21
+ tts_audio = tts(translated_text[0]['translation_text'])[0]
22
+ st.audio(tts_audio, format="audio/wav")
23
+ st.title("Text/Audio Translator")
24
+
25
+ st.text_area("Enter text in English:", key="text_input")
26
+ audio_file = st.file_uploader("Or upload an audio file:", type=["wav", "mp3"])
27
+
28
+ # Speech-to-Text
29
+ if audio_file is not None:
30
+ # Wav2Vec2 model processing here
31
+
32
+ # Translation
33
+ if st.session_state.text_input or transcription:
34
+ # Translation code here
35
+
36
+ # Text-to-Speech
37
+ if translated_text:
38
+ # TTS code here
39