Spaces:
Sleeping
Sleeping
import streamlit as st | |
import whisper | |
import numpy as np | |
import sounddevice as sd | |
import tempfile | |
import os | |
import io | |
# Load Whisper model | |
model = whisper.load_model("base") # You can choose a larger model if needed | |
# Real-time audio recording | |
def record_audio(duration=5, samplerate=16000): | |
st.write("Recording...") | |
audio = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1, dtype='int16') | |
sd.wait() | |
return audio.flatten() | |
# Convert recorded audio to WAV format | |
def audio_to_wav(audio, samplerate=16000): | |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav') | |
with open(temp_file.name, 'wb') as f: | |
f.write(audio) | |
return temp_file.name | |
# Function to transcribe audio using Whisper | |
def transcribe_audio(audio): | |
result = model.transcribe(audio) | |
return result['text'] | |
# Streamlit interface | |
st.title("Text/Audio Translator") | |
# Recording button | |
if st.button("Record"): | |
audio = record_audio(duration=5) # Record for 5 seconds | |
st.write("Processing audio...") | |
wav_path = audio_to_wav(audio) | |
transcription = transcribe_audio(wav_path) | |
st.write("Transcription:", transcription) | |
# Text Input | |
text_input = st.text_area("Or enter text in English:") | |
# Language Translation | |
translator = None | |
if text_input or transcription: | |
st.write("Translating text...") | |
# Select the language to translate into | |
target_language = st.selectbox( | |
"Select target language:", | |
["French", "Chinese", "Italian", "Urdu", "Hindi", "Punjabi", "Saraiki", "Pashto"] | |
) | |
# Choose the correct translation model based on the selected language | |
if target_language == "French": | |
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-fr") | |
elif target_language == "Chinese": | |
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-zh") | |
elif target_language == "Italian": | |
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-it") | |
elif target_language == "Urdu": | |
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur") | |
elif target_language == "Hindi": | |
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi") | |
elif target_language == "Punjabi": | |
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-pa") | |
elif target_language == "Saraiki": | |
st.write("Saraiki model not available.") | |
elif target_language == "Pashto": | |
st.write("Pashto model not available.") | |
if translator: | |
text = text_input or transcription | |
translated_text = translator(text) | |
st.write("Translated Text:", translated_text[0]['translation_text']) | |
# Text-to-Speech | |
if translator: | |
st.write("Generating speech...") | |
tts = pipeline("text-to-speech", model="microsoft/speecht5_tts") | |
tts_audio = tts(translated_text[0]['translation_text'])[0] | |
st.audio(tts_audio, format="audio/wav") | |