fargerm commited on
Commit
5abc527
1 Parent(s): f199d8b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -16
app.py CHANGED
@@ -1,22 +1,69 @@
1
  import streamlit as st
2
- import whisper
 
 
3
 
4
- # Function to transcribe uploaded audio file
5
- def transcribe_audio(uploaded_file):
6
- model = whisper.load_model("base")
7
- transcription = model.transcribe(uploaded_file)
8
- return transcription['text']
9
 
10
- # Streamlit App Interface
11
- st.title("Speech-to-Text Transcription")
 
 
 
12
 
13
- st.write("Upload an audio file to get the transcription.")
 
 
 
 
14
 
15
- # Upload audio file
16
- uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "ogg"])
 
 
 
 
17
 
18
- if uploaded_file is not None:
19
- st.write("Transcribing audio...")
20
- transcription = transcribe_audio(uploaded_file)
21
- st.write("Transcription:")
22
- st.write(transcription)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
3
+ import torch
4
+ import soundfile as sf
5
 
6
+ # Load translation model (you can adjust the model name based on your needs)
7
+ translator_model_name = "Helsinki-NLP/opus-mt-en-{target_lang}"
8
+ translator = pipeline("translation", model=translator_model_name)
 
 
9
 
10
+ # Load TTS model
11
+ tts_model_name = "microsoft/speecht5_tts"
12
+ tts_tokenizer = AutoTokenizer.from_pretrained(tts_model_name)
13
+ tts_model = AutoModelForSeq2SeqLM.from_pretrained(tts_model_name)
14
+ vocoder = torch.hub.load('snakers4/silero-vad', 'silero_vad', source='github')
15
 
16
+ # Function to translate text
17
+ def translate_text(text, target_lang):
18
+ translation_pipeline = pipeline("translation_en_to_" + target_lang, model=translator_model_name.format(target_lang=target_lang))
19
+ translated = translation_pipeline(text)[0]['translation_text']
20
+ return translated
21
 
22
+ # Function to generate speech
23
+ def text_to_speech(text, target_lang):
24
+ inputs = tts_tokenizer(text, return_tensors="pt")
25
+ speech = tts_model.generate(**inputs)
26
+ speech_audio = vocoder(speech)
27
+ return speech_audio
28
 
29
+ # Function to save audio to file
30
+ def save_audio(speech_audio, file_name):
31
+ sf.write(file_name, speech_audio.numpy(), 16000)
32
+ return file_name
33
+
34
+ # Streamlit UI layout
35
+ st.title("TextLangAudioGenerator")
36
+
37
+ # Text input
38
+ text_input = st.text_area("Enter your text in English:")
39
+
40
+ # Language selection dropdown
41
+ languages = {
42
+ "French": "fr",
43
+ "Chinese": "zh",
44
+ "Italian": "it",
45
+ "Urdu": "ur",
46
+ "Hindi": "hi",
47
+ "Punjabi": "pa",
48
+ "Pashto": "ps"
49
+ }
50
+ target_lang = st.selectbox("Select target language:", list(languages.keys()))
51
+
52
+ if st.button("Translate and Generate Audio"):
53
+ if text_input:
54
+ # Translate text
55
+ translated_text = translate_text(text_input, languages[target_lang])
56
+ st.write(f"Translated Text ({target_lang}): {translated_text}")
57
+
58
+ # Generate speech from translated text
59
+ speech_audio = text_to_speech(translated_text, target_lang)
60
+
61
+ # Save and play audio
62
+ audio_file = save_audio(speech_audio, 'output.wav')
63
+ st.audio(audio_file)
64
+
65
+ # Clear the input for new text
66
+ text_input = ""
67
+
68
+ # Footer
69
+ st.write("Powered by Hugging Face Transformers and SpeechT5 TTS")