|
import streamlit as st |
|
import speech_recognition as sr |
|
from deep_translator import GoogleTranslator |
|
from pydub import AudioSegment |
|
from io import BytesIO |
|
import tempfile |
|
|
|
|
|
st.title("Speech-to-Text with Translation to English") |
|
|
|
|
|
recognizer = sr.Recognizer() |
|
|
|
|
|
language_options = {"English": "en", "Hindi": "hi"} |
|
input_language = st.selectbox("Select Input Language", options=language_options.keys()) |
|
selected_lang_code = language_options[input_language] |
|
|
|
|
|
def speech_to_text(audio_data, lang="en"): |
|
try: |
|
st.info("Converting speech to text...") |
|
detected_text = recognizer.recognize_google(audio_data, language=lang) |
|
return detected_text |
|
except Exception as e: |
|
st.error(f"Error in speech recognition: {e}") |
|
return None |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "ogg"]) |
|
if uploaded_file: |
|
with st.spinner("Processing uploaded audio..."): |
|
try: |
|
|
|
audio = AudioSegment.from_file(BytesIO(uploaded_file.read())) |
|
|
|
chunk_duration_ms = 30000 |
|
chunks = [audio[i:i+chunk_duration_ms] for i in range(0, len(audio), chunk_duration_ms)] |
|
text_output = "" |
|
|
|
for i, chunk in enumerate(chunks): |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_wav_file: |
|
chunk.export(tmp_wav_file.name, format="wav") |
|
with sr.AudioFile(tmp_wav_file.name) as source: |
|
audio_data = recognizer.record(source) |
|
detected_text = speech_to_text(audio_data, lang=selected_lang_code) |
|
if detected_text: |
|
text_output += detected_text + " " |
|
|
|
|
|
if text_output: |
|
st.write("Detected Speech Text:", text_output) |
|
translator = GoogleTranslator(source='auto', target='en') |
|
translated_text = translator.translate(text_output) |
|
st.write("Translated Text (English):", translated_text) |
|
|
|
except Exception as e: |
|
st.error(f"Error processing the audio file: {e}") |
|
|