Spaces:

Anupam251272
/

BrailleBuddy

Build error

App Files Files Community

BrailleBuddy / app.py

Anupam251272

Create app.py

1a65f94 verified 16 days ago

raw

history blame

6.02 kB

	import gradio as gr
	import torch
	import os
	import numpy as np
	import soundfile as sf
	import speech_recognition as sr
	from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
	from gtts import gTTS
	import traceback
	import pyttsx3 # For better TTS

	# Initialize TTS engine
	tts_engine = pyttsx3.init()

	def save_audio_file(audio_data):
	"""
	Save audio data to a temporary file

	Args:
	audio_data (tuple): Tuple containing sample rate and numpy array

	Returns:
	str: Path to saved audio file
	"""
	try:
	os.makedirs('temp', exist_ok=True)
	sample_rate, audio_array = audio_data
	file_path = os.path.join('temp', 'input_audio.wav')
	sf.write(file_path, audio_array, sample_rate)
	return file_path
	except Exception as e:
	print(f"Error saving audio file: {e}")
	return None

	def safe_speech_to_text(audio_data):
	"""
	Safe speech-to-text conversion with comprehensive error handling

	Args:
	audio_data (tuple): Tuple containing sample rate and numpy array

	Returns:
	str: Recognized text or error message
	"""
	recognizer = sr.Recognizer()

	try:
	audio_path = save_audio_file(audio_data)
	if not audio_path:
	return "Error: Could not save audio file"

	file_size = os.path.getsize(audio_path)
	if file_size > 10 * 1024 * 1024: # 10MB limit
	return "Audio file is too large. Please upload a file smaller than 10MB."

	with sr.AudioFile(audio_path) as source:
	recognizer.adjust_for_ambient_noise(source, duration=0.5)
	audio = recognizer.record(source)

	try:
	text = recognizer.recognize_google(audio)
	return text
	except sr.UnknownValueError:
	try:
	text = recognizer.recognize_sphinx(audio)
	return text
	except Exception as sphinx_error:
	return f"Speech recognition failed: {sphinx_error}"

	except Exception as e:
	error_trace = traceback.format_exc()
	return f"Unexpected error during audio processing: {e}\n{error_trace}"

	def text_to_speech(text):
	"""Convert text to speech with error handling"""
	try:
	os.makedirs('temp', exist_ok=True)
	tts_engine.save_to_file(text, os.path.join('temp', "response.mp3"))
	tts_engine.runAndWait()
	return os.path.join('temp', "response.mp3")
	except Exception as e:
	print(f"Text-to-speech conversion error: {e}")
	return None

	def generate_educational_response(question):
	"""Generate educational response with fallback"""
	try:
	model_name = "distilgpt2"
	device = "cuda" if torch.cuda.is_available() else "cpu"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
	nlp_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
	prompt = f"Explain in a simple, educational way: {question}"
	response = nlp_pipeline(prompt, max_length=200, num_return_sequences=1)
	return response[0]['generated_text']
	except Exception as e:
	error_trace = traceback.format_exc()
	return f"Error generating response: {e}\n{error_trace}"

	def process_input(audio):
	"""
	Comprehensive input processing with robust error handling

	Args:
	audio (tuple): Gradio audio upload data

	Returns:
	Tuple of processing results or error messages
	"""
	try:
	if audio is None:
	return (
	"No audio file uploaded",
	"Please upload an audio file",
	None,
	"No Braille conversion",
	"Error: No input provided"
	)

	text_input = safe_speech_to_text(audio)

	if not text_input or len(text_input) < 3:
	return (
	"Audio recognition failed",
	"Could not understand the audio. Please try again.",
	None,
	"No Braille conversion",
	"Error: Unable to recognize speech"
	)

	response_text = generate_educational_response(text_input)
	audio_output_path = text_to_speech(response_text)
	braille_text = ' '.join([f"⠈{char}" for char in response_text])
	learning_guide = (
	"🌟 Learning Guide 🌟\n"
	f"Original Question: {text_input}\n\n"
	"Tip: Each Braille character is formed by a unique combination of raised dots.\n"
	"Practice tracing the dots to understand the pattern."
	)

	return (
	text_input, # Recognized speech
	response_text, # Educational response
	audio_output_path, # Audio response path
	braille_text, # Basic Braille text
	learning_guide # Simple learning guide
	)

	except Exception as e:
	error_trace = traceback.format_exc()
	return (
	"Processing Error",
	f"An unexpected error occurred: {e}",
	None,
	"Error in Braille conversion",
	f"Detailed Error:\n{error_trace}"
	)

	# Gradio Interface with Error Handling
	interface = gr.Interface(
	fn=process_input,
	inputs=gr.Audio(label="Upload Audio (MP3/WAV)", type="numpy"),
	outputs=[
	gr.Textbox(label="Recognized Question"),
	gr.Textbox(label="Educational Response"),
	gr.Audio(label="Response Audio"),
	gr.Textbox(label="Braille Representation"),
	gr.Textbox(label="Learning Guide", lines=8)
	],
	title="🌈 Accessible Learning Companion",
	description="Upload an audio file to get an educational explanation, audio response, and Braille representation."
	)

	# Launch the interface
	interface.launch(debug=True)