import streamlit as st import speech_recognition as sr from deep_translator import GoogleTranslator from pydub import AudioSegment from io import BytesIO import tempfile # Title of the app st.title("Speech-to-Text with Translation to English") # Initialize recognizer recognizer = sr.Recognizer() # Choice for input language language_options = {"English": "en", "Hindi": "hi"} input_language = st.selectbox("Select Input Language", options=language_options.keys()) selected_lang_code = language_options[input_language] # Function to convert audio chunk to text def speech_to_text(audio_data, lang="en"): try: st.info("Converting speech to text...") detected_text = recognizer.recognize_google(audio_data, language=lang) return detected_text except Exception as e: st.error(f"Error in speech recognition: {e}") return None # Process uploaded audio file uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "ogg"]) if uploaded_file: with st.spinner("Processing uploaded audio..."): try: # Convert uploaded file to WAV format using pydub audio = AudioSegment.from_file(BytesIO(uploaded_file.read())) # Split audio into 30-second chunks chunk_duration_ms = 30000 chunks = [audio[i:i+chunk_duration_ms] for i in range(0, len(audio), chunk_duration_ms)] text_output = "" for i, chunk in enumerate(chunks): with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_wav_file: chunk.export(tmp_wav_file.name, format="wav") with sr.AudioFile(tmp_wav_file.name) as source: audio_data = recognizer.record(source) detected_text = speech_to_text(audio_data, lang=selected_lang_code) if detected_text: text_output += detected_text + " " # Display detected text and translate if text_output: st.write("Detected Speech Text:", text_output) translator = GoogleTranslator(source='auto', target='en') translated_text = translator.translate(text_output) st.write("Translated Text (English):", translated_text) except Exception as e: st.error(f"Error processing the audio file: {e}")