Spaces:

fargerm
/

TextAudioTransAudio

Sleeping

App Files Files Community

fargerm commited on Aug 24

Commit

9bc5a61

•

1 Parent(s): 9cd0c5f

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -44

app.py CHANGED Viewed

@@ -1,40 +1,49 @@
 import streamlit as st
-from transformers import pipeline, Wav2Vec2ForCTC, Wav2Vec2Processor
-import torch
-from io import BytesIO
-import soundfile as sf
-# Title
-st.title("Text/Audio Translator")
-# Text Input
-text_input = st.text_area("Enter text in English:")
-# Audio Input
-audio_file = st.file_uploader("Or upload an audio file:", type=["wav", "mp3"])
-# Initialize variables
-transcription = ""
-translated_text = ""
-# Speech-to-Text Conversion
-if audio_file is not None:
-    st.write("Processing audio file...")
-    # Load Wav2Vec2 model for speech-to-text
-    processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
-    model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
-    # Read and process the audio input
-    audio_input, _ = sf.read(BytesIO(audio_file.read()))
-    input_values = processor(torch.tensor(audio_input), return_tensors="pt", padding="longest").input_values
-    # Perform speech-to-text
-    logits = model(input_values).logits
-    predicted_ids = torch.argmax(logits, dim=-1)
-    transcription = processor.decode(predicted_ids[0])
     st.write("Transcription:", transcription)
 # Language Translation
 if text_input or transcription:
     st.write("Translating text...")
     # Select the language to translate into
@@ -45,33 +54,30 @@ if text_input or transcription:
     # Choose the correct translation model based on the selected language
     if target_language == "French":
-        translator = pipeline("translation_en_to_fr")
     elif target_language == "Chinese":
-        translator = pipeline("translation_en_to_zh")
     elif target_language == "Italian":
-        translator = pipeline("translation_en_to_it")
     elif target_language == "Urdu":
-        translator = pipeline("translation_en_to_ur")  # Make sure you have the correct model for this
     elif target_language == "Hindi":
-        translator = pipeline("translation_en_to_hi")  # Make sure you have the correct model for this
     elif target_language == "Punjabi":
-        translator = pipeline("translation_en_to_pa")  # Make sure you have the correct model for this
     elif target_language == "Saraiki":
-        translator = pipeline("translation_en_to_skr")  # Custom model for Saraiki, if available
     elif target_language == "Pashto":
-        translator = pipeline("translation_en_to_ps")  # Custom model for Pashto, if available
-    # Translate the text
-    translated_text = translator(text_input or transcription)
-    st.write("Translated Text:", translated_text[0]['translation_text'])
 # Text-to-Speech
-if translated_text:
     st.write("Generating speech...")
     tts = pipeline("text-to-speech", model="microsoft/speecht5_tts")
     tts_audio = tts(translated_text[0]['translation_text'])[0]
     st.audio(tts_audio, format="audio/wav")
-# Instructions for deployment on Hugging Face Spaces (not part of the app code)
-st.write("Deploy this app on Hugging Face Spaces by pushing this code to your repository.")

 import streamlit as st
+import whisper
+import numpy as np
+import sounddevice as sd
+import tempfile
+import os
+import io
+# Load Whisper model
+model = whisper.load_model("base")  # You can choose a larger model if needed
+# Real-time audio recording
+def record_audio(duration=5, samplerate=16000):
+    st.write("Recording...")
+    audio = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1, dtype='int16')
+    sd.wait()
+    return audio.flatten()
+# Convert recorded audio to WAV format
+def audio_to_wav(audio, samplerate=16000):
+    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
+    with open(temp_file.name, 'wb') as f:
+        f.write(audio)
+    return temp_file.name
+# Function to transcribe audio using Whisper
+def transcribe_audio(audio):
+    result = model.transcribe(audio)
+    return result['text']
+# Streamlit interface
+st.title("Text/Audio Translator")
+# Recording button
+if st.button("Record"):
+    audio = record_audio(duration=5)  # Record for 5 seconds
+    st.write("Processing audio...")
+    wav_path = audio_to_wav(audio)
+    transcription = transcribe_audio(wav_path)
     st.write("Transcription:", transcription)
+# Text Input
+text_input = st.text_area("Or enter text in English:")
 # Language Translation
+translator = None
 if text_input or transcription:
     st.write("Translating text...")
     # Select the language to translate into
     # Choose the correct translation model based on the selected language
     if target_language == "French":
+        translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-fr")
     elif target_language == "Chinese":
+        translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-zh")
     elif target_language == "Italian":
+        translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-it")
     elif target_language == "Urdu":
+        translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
     elif target_language == "Hindi":
+        translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
     elif target_language == "Punjabi":
+        translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-pa")
     elif target_language == "Saraiki":
+        st.write("Saraiki model not available.")
     elif target_language == "Pashto":
+        st.write("Pashto model not available.")
+    if translator:
+        text = text_input or transcription
+        translated_text = translator(text)
+        st.write("Translated Text:", translated_text[0]['translation_text'])
 # Text-to-Speech
+if translator:
     st.write("Generating speech...")
     tts = pipeline("text-to-speech", model="microsoft/speecht5_tts")
     tts_audio = tts(translated_text[0]['translation_text'])[0]
     st.audio(tts_audio, format="audio/wav")