fargerm's picture
Update app.py
9bc5a61 verified
raw
history blame
2.95 kB
import streamlit as st
import whisper
import numpy as np
import sounddevice as sd
import tempfile
import os
import io
# Load Whisper model
model = whisper.load_model("base") # You can choose a larger model if needed
# Real-time audio recording
def record_audio(duration=5, samplerate=16000):
st.write("Recording...")
audio = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1, dtype='int16')
sd.wait()
return audio.flatten()
# Convert recorded audio to WAV format
def audio_to_wav(audio, samplerate=16000):
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
with open(temp_file.name, 'wb') as f:
f.write(audio)
return temp_file.name
# Function to transcribe audio using Whisper
def transcribe_audio(audio):
result = model.transcribe(audio)
return result['text']
# Streamlit interface
st.title("Text/Audio Translator")
# Recording button
if st.button("Record"):
audio = record_audio(duration=5) # Record for 5 seconds
st.write("Processing audio...")
wav_path = audio_to_wav(audio)
transcription = transcribe_audio(wav_path)
st.write("Transcription:", transcription)
# Text Input
text_input = st.text_area("Or enter text in English:")
# Language Translation
translator = None
if text_input or transcription:
st.write("Translating text...")
# Select the language to translate into
target_language = st.selectbox(
"Select target language:",
["French", "Chinese", "Italian", "Urdu", "Hindi", "Punjabi", "Saraiki", "Pashto"]
)
# Choose the correct translation model based on the selected language
if target_language == "French":
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-fr")
elif target_language == "Chinese":
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-zh")
elif target_language == "Italian":
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-it")
elif target_language == "Urdu":
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
elif target_language == "Hindi":
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
elif target_language == "Punjabi":
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-pa")
elif target_language == "Saraiki":
st.write("Saraiki model not available.")
elif target_language == "Pashto":
st.write("Pashto model not available.")
if translator:
text = text_input or transcription
translated_text = translator(text)
st.write("Translated Text:", translated_text[0]['translation_text'])
# Text-to-Speech
if translator:
st.write("Generating speech...")
tts = pipeline("text-to-speech", model="microsoft/speecht5_tts")
tts_audio = tts(translated_text[0]['translation_text'])[0]
st.audio(tts_audio, format="audio/wav")