Spaces:

Anupam251272
/

BrailleBuddy

Build error

File size: 6,019 Bytes

1a65f94

import gradio as gr
import torch
import os
import numpy as np
import soundfile as sf
import speech_recognition as sr
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from gtts import gTTS
import traceback
import pyttsx3  # For better TTS

# Initialize TTS engine
tts_engine = pyttsx3.init()

def save_audio_file(audio_data):
    """
    Save audio data to a temporary file
    
    Args:
        audio_data (tuple): Tuple containing sample rate and numpy array
    
    Returns:
        str: Path to saved audio file
    """
    try:
        os.makedirs('temp', exist_ok=True)
        sample_rate, audio_array = audio_data
        file_path = os.path.join('temp', 'input_audio.wav')
        sf.write(file_path, audio_array, sample_rate)
        return file_path
    except Exception as e:
        print(f"Error saving audio file: {e}")
        return None

def safe_speech_to_text(audio_data):
    """
    Safe speech-to-text conversion with comprehensive error handling
    
    Args:
        audio_data (tuple): Tuple containing sample rate and numpy array
    
    Returns:
        str: Recognized text or error message
    """
    recognizer = sr.Recognizer()
    
    try:
        audio_path = save_audio_file(audio_data)
        if not audio_path:
            return "Error: Could not save audio file"
        
        file_size = os.path.getsize(audio_path)
        if file_size > 10 * 1024 * 1024:  # 10MB limit
            return "Audio file is too large. Please upload a file smaller than 10MB."
        
        with sr.AudioFile(audio_path) as source:
            recognizer.adjust_for_ambient_noise(source, duration=0.5)
            audio = recognizer.record(source)
        
        try:
            text = recognizer.recognize_google(audio)
            return text
        except sr.UnknownValueError:
            try:
                text = recognizer.recognize_sphinx(audio)
                return text
            except Exception as sphinx_error:
                return f"Speech recognition failed: {sphinx_error}"
    
    except Exception as e:
        error_trace = traceback.format_exc()
        return f"Unexpected error during audio processing: {e}\n{error_trace}"

def text_to_speech(text):
    """Convert text to speech with error handling"""
    try:
        os.makedirs('temp', exist_ok=True)
        tts_engine.save_to_file(text, os.path.join('temp', "response.mp3"))
        tts_engine.runAndWait()
        return os.path.join('temp', "response.mp3")
    except Exception as e:
        print(f"Text-to-speech conversion error: {e}")
        return None

def generate_educational_response(question):
    """Generate educational response with fallback"""
    try:
        model_name = "distilgpt2"
        device = "cuda" if torch.cuda.is_available() else "cpu"
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
        nlp_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
        prompt = f"Explain in a simple, educational way: {question}"
        response = nlp_pipeline(prompt, max_length=200, num_return_sequences=1)
        return response[0]['generated_text']
    except Exception as e:
        error_trace = traceback.format_exc()
        return f"Error generating response: {e}\n{error_trace}"

def process_input(audio):
    """
    Comprehensive input processing with robust error handling
    
    Args:
        audio (tuple): Gradio audio upload data
    
    Returns:
        Tuple of processing results or error messages
    """
    try:
        if audio is None:
            return (
                "No audio file uploaded", 
                "Please upload an audio file", 
                None, 
                "No Braille conversion", 
                "Error: No input provided"
            )
        
        text_input = safe_speech_to_text(audio)
        
        if not text_input or len(text_input) < 3:
            return (
                "Audio recognition failed", 
                "Could not understand the audio. Please try again.", 
                None, 
                "No Braille conversion", 
                "Error: Unable to recognize speech"
            )
        
        response_text = generate_educational_response(text_input)
        audio_output_path = text_to_speech(response_text)
        braille_text = ' '.join([f"⠈{char}" for char in response_text])
        learning_guide = (
            "🌟 Learning Guide 🌟\n"
            f"Original Question: {text_input}\n\n"
            "Tip: Each Braille character is formed by a unique combination of raised dots.\n"
            "Practice tracing the dots to understand the pattern."
        )
        
        return (
            text_input,          # Recognized speech
            response_text,       # Educational response
            audio_output_path,   # Audio response path
            braille_text,        # Basic Braille text
            learning_guide       # Simple learning guide
        )
    
    except Exception as e:
        error_trace = traceback.format_exc()
        return (
            "Processing Error", 
            f"An unexpected error occurred: {e}", 
            None, 
            "Error in Braille conversion", 
            f"Detailed Error:\n{error_trace}"
        )

# Gradio Interface with Error Handling
interface = gr.Interface(
    fn=process_input,
    inputs=gr.Audio(label="Upload Audio (MP3/WAV)", type="numpy"),
    outputs=[
        gr.Textbox(label="Recognized Question"),
        gr.Textbox(label="Educational Response"),
        gr.Audio(label="Response Audio"),
        gr.Textbox(label="Braille Representation"),
        gr.Textbox(label="Learning Guide", lines=8)
    ],
    title="🌈 Accessible Learning Companion",
    description="Upload an audio file to get an educational explanation, audio response, and Braille representation."
)

# Launch the interface
interface.launch(debug=True)