Spaces:

Anupam251272
/

BrailleBuddy

Build error

App Files Files Community

Anupam251272 commited on 10 days ago

Commit

1a65f94

•

1 Parent(s): 77a5484

Create app.py

Browse files

Files changed (1) hide show

app.py +175 -0

app.py ADDED Viewed

	@@ -0,0 +1,175 @@

+import gradio as gr
+import torch
+import os
+import numpy as np
+import soundfile as sf
+import speech_recognition as sr
+from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
+from gtts import gTTS
+import traceback
+import pyttsx3  # For better TTS
+# Initialize TTS engine
+tts_engine = pyttsx3.init()
+def save_audio_file(audio_data):
+    """
+    Save audio data to a temporary file
+    Args:
+        audio_data (tuple): Tuple containing sample rate and numpy array
+    Returns:
+        str: Path to saved audio file
+    """
+    try:
+        os.makedirs('temp', exist_ok=True)
+        sample_rate, audio_array = audio_data
+        file_path = os.path.join('temp', 'input_audio.wav')
+        sf.write(file_path, audio_array, sample_rate)
+        return file_path
+    except Exception as e:
+        print(f"Error saving audio file: {e}")
+        return None
+def safe_speech_to_text(audio_data):
+    """
+    Safe speech-to-text conversion with comprehensive error handling
+    Args:
+        audio_data (tuple): Tuple containing sample rate and numpy array
+    Returns:
+        str: Recognized text or error message
+    """
+    recognizer = sr.Recognizer()
+    try:
+        audio_path = save_audio_file(audio_data)
+        if not audio_path:
+            return "Error: Could not save audio file"
+        file_size = os.path.getsize(audio_path)
+        if file_size > 10 * 1024 * 1024:  # 10MB limit
+            return "Audio file is too large. Please upload a file smaller than 10MB."
+        with sr.AudioFile(audio_path) as source:
+            recognizer.adjust_for_ambient_noise(source, duration=0.5)
+            audio = recognizer.record(source)
+        try:
+            text = recognizer.recognize_google(audio)
+            return text
+        except sr.UnknownValueError:
+            try:
+                text = recognizer.recognize_sphinx(audio)
+                return text
+            except Exception as sphinx_error:
+                return f"Speech recognition failed: {sphinx_error}"
+    except Exception as e:
+        error_trace = traceback.format_exc()
+        return f"Unexpected error during audio processing: {e}\n{error_trace}"
+def text_to_speech(text):
+    """Convert text to speech with error handling"""
+    try:
+        os.makedirs('temp', exist_ok=True)
+        tts_engine.save_to_file(text, os.path.join('temp', "response.mp3"))
+        tts_engine.runAndWait()
+        return os.path.join('temp', "response.mp3")
+    except Exception as e:
+        print(f"Text-to-speech conversion error: {e}")
+        return None
+def generate_educational_response(question):
+    """Generate educational response with fallback"""
+    try:
+        model_name = "distilgpt2"
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
+        nlp_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
+        prompt = f"Explain in a simple, educational way: {question}"
+        response = nlp_pipeline(prompt, max_length=200, num_return_sequences=1)
+        return response[0]['generated_text']
+    except Exception as e:
+        error_trace = traceback.format_exc()
+        return f"Error generating response: {e}\n{error_trace}"
+def process_input(audio):
+    """
+    Comprehensive input processing with robust error handling
+    Args:
+        audio (tuple): Gradio audio upload data
+    Returns:
+        Tuple of processing results or error messages
+    """
+    try:
+        if audio is None:
+            return (
+                "No audio file uploaded",
+                "Please upload an audio file",
+                None,
+                "No Braille conversion",
+                "Error: No input provided"
+            )
+        text_input = safe_speech_to_text(audio)
+        if not text_input or len(text_input) < 3:
+            return (
+                "Audio recognition failed",
+                "Could not understand the audio. Please try again.",
+                None,
+                "No Braille conversion",
+                "Error: Unable to recognize speech"
+            )
+        response_text = generate_educational_response(text_input)
+        audio_output_path = text_to_speech(response_text)
+        braille_text = ' '.join([f"⠈{char}" for char in response_text])
+        learning_guide = (
+            "🌟 Learning Guide 🌟\n"
+            f"Original Question: {text_input}\n\n"
+            "Tip: Each Braille character is formed by a unique combination of raised dots.\n"
+            "Practice tracing the dots to understand the pattern."
+        )
+        return (
+            text_input,          # Recognized speech
+            response_text,       # Educational response
+            audio_output_path,   # Audio response path
+            braille_text,        # Basic Braille text
+            learning_guide       # Simple learning guide
+        )
+    except Exception as e:
+        error_trace = traceback.format_exc()
+        return (
+            "Processing Error",
+            f"An unexpected error occurred: {e}",
+            None,
+            "Error in Braille conversion",
+            f"Detailed Error:\n{error_trace}"
+        )
+# Gradio Interface with Error Handling
+interface = gr.Interface(
+    fn=process_input,
+    inputs=gr.Audio(label="Upload Audio (MP3/WAV)", type="numpy"),
+    outputs=[
+        gr.Textbox(label="Recognized Question"),
+        gr.Textbox(label="Educational Response"),
+        gr.Audio(label="Response Audio"),
+        gr.Textbox(label="Braille Representation"),
+        gr.Textbox(label="Learning Guide", lines=8)
+    ],
+    title="🌈 Accessible Learning Companion",
+    description="Upload an audio file to get an educational explanation, audio response, and Braille representation."
+)
+# Launch the interface
+interface.launch(debug=True)