Spaces:
Build error
Build error
import gradio as gr | |
import torch | |
import os | |
import numpy as np | |
import soundfile as sf | |
import speech_recognition as sr | |
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM | |
from gtts import gTTS | |
import traceback | |
import pyttsx3 # For better TTS | |
# Initialize TTS engine | |
tts_engine = pyttsx3.init() | |
def save_audio_file(audio_data): | |
""" | |
Save audio data to a temporary file | |
Args: | |
audio_data (tuple): Tuple containing sample rate and numpy array | |
Returns: | |
str: Path to saved audio file | |
""" | |
try: | |
os.makedirs('temp', exist_ok=True) | |
sample_rate, audio_array = audio_data | |
file_path = os.path.join('temp', 'input_audio.wav') | |
sf.write(file_path, audio_array, sample_rate) | |
return file_path | |
except Exception as e: | |
print(f"Error saving audio file: {e}") | |
return None | |
def safe_speech_to_text(audio_data): | |
""" | |
Safe speech-to-text conversion with comprehensive error handling | |
Args: | |
audio_data (tuple): Tuple containing sample rate and numpy array | |
Returns: | |
str: Recognized text or error message | |
""" | |
recognizer = sr.Recognizer() | |
try: | |
audio_path = save_audio_file(audio_data) | |
if not audio_path: | |
return "Error: Could not save audio file" | |
file_size = os.path.getsize(audio_path) | |
if file_size > 10 * 1024 * 1024: # 10MB limit | |
return "Audio file is too large. Please upload a file smaller than 10MB." | |
with sr.AudioFile(audio_path) as source: | |
recognizer.adjust_for_ambient_noise(source, duration=0.5) | |
audio = recognizer.record(source) | |
try: | |
text = recognizer.recognize_google(audio) | |
return text | |
except sr.UnknownValueError: | |
try: | |
text = recognizer.recognize_sphinx(audio) | |
return text | |
except Exception as sphinx_error: | |
return f"Speech recognition failed: {sphinx_error}" | |
except Exception as e: | |
error_trace = traceback.format_exc() | |
return f"Unexpected error during audio processing: {e}\n{error_trace}" | |
def text_to_speech(text): | |
"""Convert text to speech with error handling""" | |
try: | |
os.makedirs('temp', exist_ok=True) | |
tts_engine.save_to_file(text, os.path.join('temp', "response.mp3")) | |
tts_engine.runAndWait() | |
return os.path.join('temp', "response.mp3") | |
except Exception as e: | |
print(f"Text-to-speech conversion error: {e}") | |
return None | |
def generate_educational_response(question): | |
"""Generate educational response with fallback""" | |
try: | |
model_name = "distilgpt2" | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name).to(device) | |
nlp_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device) | |
prompt = f"Explain in a simple, educational way: {question}" | |
response = nlp_pipeline(prompt, max_length=200, num_return_sequences=1) | |
return response[0]['generated_text'] | |
except Exception as e: | |
error_trace = traceback.format_exc() | |
return f"Error generating response: {e}\n{error_trace}" | |
def process_input(audio): | |
""" | |
Comprehensive input processing with robust error handling | |
Args: | |
audio (tuple): Gradio audio upload data | |
Returns: | |
Tuple of processing results or error messages | |
""" | |
try: | |
if audio is None: | |
return ( | |
"No audio file uploaded", | |
"Please upload an audio file", | |
None, | |
"No Braille conversion", | |
"Error: No input provided" | |
) | |
text_input = safe_speech_to_text(audio) | |
if not text_input or len(text_input) < 3: | |
return ( | |
"Audio recognition failed", | |
"Could not understand the audio. Please try again.", | |
None, | |
"No Braille conversion", | |
"Error: Unable to recognize speech" | |
) | |
response_text = generate_educational_response(text_input) | |
audio_output_path = text_to_speech(response_text) | |
braille_text = ' '.join([f"β {char}" for char in response_text]) | |
learning_guide = ( | |
"π Learning Guide π\n" | |
f"Original Question: {text_input}\n\n" | |
"Tip: Each Braille character is formed by a unique combination of raised dots.\n" | |
"Practice tracing the dots to understand the pattern." | |
) | |
return ( | |
text_input, # Recognized speech | |
response_text, # Educational response | |
audio_output_path, # Audio response path | |
braille_text, # Basic Braille text | |
learning_guide # Simple learning guide | |
) | |
except Exception as e: | |
error_trace = traceback.format_exc() | |
return ( | |
"Processing Error", | |
f"An unexpected error occurred: {e}", | |
None, | |
"Error in Braille conversion", | |
f"Detailed Error:\n{error_trace}" | |
) | |
# Gradio Interface with Error Handling | |
interface = gr.Interface( | |
fn=process_input, | |
inputs=gr.Audio(label="Upload Audio (MP3/WAV)", type="numpy"), | |
outputs=[ | |
gr.Textbox(label="Recognized Question"), | |
gr.Textbox(label="Educational Response"), | |
gr.Audio(label="Response Audio"), | |
gr.Textbox(label="Braille Representation"), | |
gr.Textbox(label="Learning Guide", lines=8) | |
], | |
title="π Accessible Learning Companion", | |
description="Upload an audio file to get an educational explanation, audio response, and Braille representation." | |
) | |
# Launch the interface | |
interface.launch(debug=True) |