import gradio as gr import torch import os import numpy as np import soundfile as sf import speech_recognition as sr from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM from gtts import gTTS import traceback import pyttsx3 # For better TTS # Initialize TTS engine tts_engine = pyttsx3.init() def save_audio_file(audio_data): """ Save audio data to a temporary file Args: audio_data (tuple): Tuple containing sample rate and numpy array Returns: str: Path to saved audio file """ try: os.makedirs('temp', exist_ok=True) sample_rate, audio_array = audio_data file_path = os.path.join('temp', 'input_audio.wav') sf.write(file_path, audio_array, sample_rate) return file_path except Exception as e: print(f"Error saving audio file: {e}") return None def safe_speech_to_text(audio_data): """ Safe speech-to-text conversion with comprehensive error handling Args: audio_data (tuple): Tuple containing sample rate and numpy array Returns: str: Recognized text or error message """ recognizer = sr.Recognizer() try: audio_path = save_audio_file(audio_data) if not audio_path: return "Error: Could not save audio file" file_size = os.path.getsize(audio_path) if file_size > 10 * 1024 * 1024: # 10MB limit return "Audio file is too large. Please upload a file smaller than 10MB." with sr.AudioFile(audio_path) as source: recognizer.adjust_for_ambient_noise(source, duration=0.5) audio = recognizer.record(source) try: text = recognizer.recognize_google(audio) return text except sr.UnknownValueError: try: text = recognizer.recognize_sphinx(audio) return text except Exception as sphinx_error: return f"Speech recognition failed: {sphinx_error}" except Exception as e: error_trace = traceback.format_exc() return f"Unexpected error during audio processing: {e}\n{error_trace}" def text_to_speech(text): """Convert text to speech with error handling""" try: os.makedirs('temp', exist_ok=True) tts_engine.save_to_file(text, os.path.join('temp', "response.mp3")) tts_engine.runAndWait() return os.path.join('temp', "response.mp3") except Exception as e: print(f"Text-to-speech conversion error: {e}") return None def generate_educational_response(question): """Generate educational response with fallback""" try: model_name = "distilgpt2" device = "cuda" if torch.cuda.is_available() else "cpu" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name).to(device) nlp_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device) prompt = f"Explain in a simple, educational way: {question}" response = nlp_pipeline(prompt, max_length=200, num_return_sequences=1) return response[0]['generated_text'] except Exception as e: error_trace = traceback.format_exc() return f"Error generating response: {e}\n{error_trace}" def process_input(audio): """ Comprehensive input processing with robust error handling Args: audio (tuple): Gradio audio upload data Returns: Tuple of processing results or error messages """ try: if audio is None: return ( "No audio file uploaded", "Please upload an audio file", None, "No Braille conversion", "Error: No input provided" ) text_input = safe_speech_to_text(audio) if not text_input or len(text_input) < 3: return ( "Audio recognition failed", "Could not understand the audio. Please try again.", None, "No Braille conversion", "Error: Unable to recognize speech" ) response_text = generate_educational_response(text_input) audio_output_path = text_to_speech(response_text) braille_text = ' '.join([f"⠈{char}" for char in response_text]) learning_guide = ( "🌟 Learning Guide 🌟\n" f"Original Question: {text_input}\n\n" "Tip: Each Braille character is formed by a unique combination of raised dots.\n" "Practice tracing the dots to understand the pattern." ) return ( text_input, # Recognized speech response_text, # Educational response audio_output_path, # Audio response path braille_text, # Basic Braille text learning_guide # Simple learning guide ) except Exception as e: error_trace = traceback.format_exc() return ( "Processing Error", f"An unexpected error occurred: {e}", None, "Error in Braille conversion", f"Detailed Error:\n{error_trace}" ) # Gradio Interface with Error Handling interface = gr.Interface( fn=process_input, inputs=gr.Audio(label="Upload Audio (MP3/WAV)", type="numpy"), outputs=[ gr.Textbox(label="Recognized Question"), gr.Textbox(label="Educational Response"), gr.Audio(label="Response Audio"), gr.Textbox(label="Braille Representation"), gr.Textbox(label="Learning Guide", lines=8) ], title="🌈 Accessible Learning Companion", description="Upload an audio file to get an educational explanation, audio response, and Braille representation." ) # Launch the interface interface.launch(debug=True)