BrailleBuddy / app.py
Anupam251272's picture
Create app.py
1a65f94 verified
import gradio as gr
import torch
import os
import numpy as np
import soundfile as sf
import speech_recognition as sr
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from gtts import gTTS
import traceback
import pyttsx3 # For better TTS
# Initialize TTS engine
tts_engine = pyttsx3.init()
def save_audio_file(audio_data):
"""
Save audio data to a temporary file
Args:
audio_data (tuple): Tuple containing sample rate and numpy array
Returns:
str: Path to saved audio file
"""
try:
os.makedirs('temp', exist_ok=True)
sample_rate, audio_array = audio_data
file_path = os.path.join('temp', 'input_audio.wav')
sf.write(file_path, audio_array, sample_rate)
return file_path
except Exception as e:
print(f"Error saving audio file: {e}")
return None
def safe_speech_to_text(audio_data):
"""
Safe speech-to-text conversion with comprehensive error handling
Args:
audio_data (tuple): Tuple containing sample rate and numpy array
Returns:
str: Recognized text or error message
"""
recognizer = sr.Recognizer()
try:
audio_path = save_audio_file(audio_data)
if not audio_path:
return "Error: Could not save audio file"
file_size = os.path.getsize(audio_path)
if file_size > 10 * 1024 * 1024: # 10MB limit
return "Audio file is too large. Please upload a file smaller than 10MB."
with sr.AudioFile(audio_path) as source:
recognizer.adjust_for_ambient_noise(source, duration=0.5)
audio = recognizer.record(source)
try:
text = recognizer.recognize_google(audio)
return text
except sr.UnknownValueError:
try:
text = recognizer.recognize_sphinx(audio)
return text
except Exception as sphinx_error:
return f"Speech recognition failed: {sphinx_error}"
except Exception as e:
error_trace = traceback.format_exc()
return f"Unexpected error during audio processing: {e}\n{error_trace}"
def text_to_speech(text):
"""Convert text to speech with error handling"""
try:
os.makedirs('temp', exist_ok=True)
tts_engine.save_to_file(text, os.path.join('temp', "response.mp3"))
tts_engine.runAndWait()
return os.path.join('temp', "response.mp3")
except Exception as e:
print(f"Text-to-speech conversion error: {e}")
return None
def generate_educational_response(question):
"""Generate educational response with fallback"""
try:
model_name = "distilgpt2"
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
nlp_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
prompt = f"Explain in a simple, educational way: {question}"
response = nlp_pipeline(prompt, max_length=200, num_return_sequences=1)
return response[0]['generated_text']
except Exception as e:
error_trace = traceback.format_exc()
return f"Error generating response: {e}\n{error_trace}"
def process_input(audio):
"""
Comprehensive input processing with robust error handling
Args:
audio (tuple): Gradio audio upload data
Returns:
Tuple of processing results or error messages
"""
try:
if audio is None:
return (
"No audio file uploaded",
"Please upload an audio file",
None,
"No Braille conversion",
"Error: No input provided"
)
text_input = safe_speech_to_text(audio)
if not text_input or len(text_input) < 3:
return (
"Audio recognition failed",
"Could not understand the audio. Please try again.",
None,
"No Braille conversion",
"Error: Unable to recognize speech"
)
response_text = generate_educational_response(text_input)
audio_output_path = text_to_speech(response_text)
braille_text = ' '.join([f"⠈{char}" for char in response_text])
learning_guide = (
"🌟 Learning Guide 🌟\n"
f"Original Question: {text_input}\n\n"
"Tip: Each Braille character is formed by a unique combination of raised dots.\n"
"Practice tracing the dots to understand the pattern."
)
return (
text_input, # Recognized speech
response_text, # Educational response
audio_output_path, # Audio response path
braille_text, # Basic Braille text
learning_guide # Simple learning guide
)
except Exception as e:
error_trace = traceback.format_exc()
return (
"Processing Error",
f"An unexpected error occurred: {e}",
None,
"Error in Braille conversion",
f"Detailed Error:\n{error_trace}"
)
# Gradio Interface with Error Handling
interface = gr.Interface(
fn=process_input,
inputs=gr.Audio(label="Upload Audio (MP3/WAV)", type="numpy"),
outputs=[
gr.Textbox(label="Recognized Question"),
gr.Textbox(label="Educational Response"),
gr.Audio(label="Response Audio"),
gr.Textbox(label="Braille Representation"),
gr.Textbox(label="Learning Guide", lines=8)
],
title="🌈 Accessible Learning Companion",
description="Upload an audio file to get an educational explanation, audio response, and Braille representation."
)
# Launch the interface
interface.launch(debug=True)