Spaces:

fargerm
/

TextAudioTransAudio

Sleeping

App Files Files Community

fargerm commited on Aug 24

Commit

242815e

•

1 Parent(s): ae15bc4

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -21

app.py CHANGED Viewed

@@ -1,39 +1,77 @@
 import streamlit as st
-from transformers import pipeline
-from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 import torch
 from io import BytesIO
-st.text_area("Enter text in English:", key="text_input")
 audio_file = st.file_uploader("Or upload an audio file:", type=["wav", "mp3"])
 if audio_file is not None:
     processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
     model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
-    audio_input = audio_file.read()
     input_values = processor(torch.tensor(audio_input), return_tensors="pt", padding="longest").input_values
     logits = model(input_values).logits
     predicted_ids = torch.argmax(logits, dim=-1)
     transcription = processor.decode(predicted_ids[0])
     st.write("Transcription:", transcription)
-translator = pipeline("translation_en_to_fr")  # Change to the desired language pair
-translated_text = translator(st.session_state.text_input or transcription)
-st.write("Translated Text:", translated_text[0]['translation_text'])
-tts = pipeline("text-to-speech", model="microsoft/speecht5_tts")
-tts_audio = tts(translated_text[0]['translation_text'])[0]
-st.audio(tts_audio, format="audio/wav")
-st.title("Text/Audio Translator")
-st.text_area("Enter text in English:", key="text_input")
-audio_file = st.file_uploader("Or upload an audio file:", type=["wav", "mp3"])
-# Speech-to-Text
-if audio_file is not None:
-    # Wav2Vec2 model processing here
-# Translation
-if st.session_state.text_input or transcription:
-    # Translation code here
 # Text-to-Speech
 if translated_text:
-    # TTS code here

 import streamlit as st
+from transformers import pipeline, Wav2Vec2ForCTC, Wav2Vec2Processor
 import torch
 from io import BytesIO
+import soundfile as sf
+# Title
+st.title("Text/Audio Translator")
+# Text Input
+text_input = st.text_area("Enter text in English:")
+# Audio Input
 audio_file = st.file_uploader("Or upload an audio file:", type=["wav", "mp3"])
+# Initialize variables
+transcription = ""
+translated_text = ""
+# Speech-to-Text Conversion
 if audio_file is not None:
+    st.write("Processing audio file...")
+    # Load Wav2Vec2 model for speech-to-text
     processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
     model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
+    # Read and process the audio input
+    audio_input, _ = sf.read(BytesIO(audio_file.read()))
     input_values = processor(torch.tensor(audio_input), return_tensors="pt", padding="longest").input_values
+    # Perform speech-to-text
     logits = model(input_values).logits
     predicted_ids = torch.argmax(logits, dim=-1)
     transcription = processor.decode(predicted_ids[0])
     st.write("Transcription:", transcription)
+# Language Translation
+if text_input or transcription:
+    st.write("Translating text...")
+    # Select the language to translate into
+    target_language = st.selectbox(
+        "Select target language:",
+        ["French", "Chinese", "Italian", "Urdu", "Hindi", "Punjabi", "Saraiki", "Pashto"]
+    )
+    # Choose the correct translation model based on the selected language
+    if target_language == "French":
+        translator = pipeline("translation_en_to_fr")
+    elif target_language == "Chinese":
+        translator = pipeline("translation_en_to_zh")
+    elif target_language == "Italian":
+        translator = pipeline("translation_en_to_it")
+    elif target_language == "Urdu":
+        translator = pipeline("translation_en_to_ur")  # Make sure you have the correct model for this
+    elif target_language == "Hindi":
+        translator = pipeline("translation_en_to_hi")  # Make sure you have the correct model for this
+    elif target_language == "Punjabi":
+        translator = pipeline("translation_en_to_pa")  # Make sure you have the correct model for this
+    elif target_language == "Saraiki":
+        translator = pipeline("translation_en_to_skr")  # Custom model for Saraiki, if available
+    elif target_language == "Pashto":
+        translator = pipeline("translation_en_to_ps")  # Custom model for Pashto, if available
+    # Translate the text
+    translated_text = translator(text_input or transcription)
+    st.write("Translated Text:", translated_text[0]['translation_text'])
 # Text-to-Speech
 if translated_text:
+    st.write("Generating speech...")
+    tts = pipeline("text-to-speech", model="microsoft/speecht5_tts")
+    tts_audio = tts(translated_text[0]['translation_text'])[0]
+    st.audio(tts_audio, format="audio/wav")
+# Instructions for deployment on Hugging Face Spaces (not part of the app code)
+st.write("Deploy this app on Hugging Face Spaces by pushing this code to your repository.")