Anupam251272 commited on
Commit
1a65f94
β€’
1 Parent(s): 77a5484

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +175 -0
app.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import os
4
+ import numpy as np
5
+ import soundfile as sf
6
+ import speech_recognition as sr
7
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
8
+ from gtts import gTTS
9
+ import traceback
10
+ import pyttsx3 # For better TTS
11
+
12
+ # Initialize TTS engine
13
+ tts_engine = pyttsx3.init()
14
+
15
+ def save_audio_file(audio_data):
16
+ """
17
+ Save audio data to a temporary file
18
+
19
+ Args:
20
+ audio_data (tuple): Tuple containing sample rate and numpy array
21
+
22
+ Returns:
23
+ str: Path to saved audio file
24
+ """
25
+ try:
26
+ os.makedirs('temp', exist_ok=True)
27
+ sample_rate, audio_array = audio_data
28
+ file_path = os.path.join('temp', 'input_audio.wav')
29
+ sf.write(file_path, audio_array, sample_rate)
30
+ return file_path
31
+ except Exception as e:
32
+ print(f"Error saving audio file: {e}")
33
+ return None
34
+
35
+ def safe_speech_to_text(audio_data):
36
+ """
37
+ Safe speech-to-text conversion with comprehensive error handling
38
+
39
+ Args:
40
+ audio_data (tuple): Tuple containing sample rate and numpy array
41
+
42
+ Returns:
43
+ str: Recognized text or error message
44
+ """
45
+ recognizer = sr.Recognizer()
46
+
47
+ try:
48
+ audio_path = save_audio_file(audio_data)
49
+ if not audio_path:
50
+ return "Error: Could not save audio file"
51
+
52
+ file_size = os.path.getsize(audio_path)
53
+ if file_size > 10 * 1024 * 1024: # 10MB limit
54
+ return "Audio file is too large. Please upload a file smaller than 10MB."
55
+
56
+ with sr.AudioFile(audio_path) as source:
57
+ recognizer.adjust_for_ambient_noise(source, duration=0.5)
58
+ audio = recognizer.record(source)
59
+
60
+ try:
61
+ text = recognizer.recognize_google(audio)
62
+ return text
63
+ except sr.UnknownValueError:
64
+ try:
65
+ text = recognizer.recognize_sphinx(audio)
66
+ return text
67
+ except Exception as sphinx_error:
68
+ return f"Speech recognition failed: {sphinx_error}"
69
+
70
+ except Exception as e:
71
+ error_trace = traceback.format_exc()
72
+ return f"Unexpected error during audio processing: {e}\n{error_trace}"
73
+
74
+ def text_to_speech(text):
75
+ """Convert text to speech with error handling"""
76
+ try:
77
+ os.makedirs('temp', exist_ok=True)
78
+ tts_engine.save_to_file(text, os.path.join('temp', "response.mp3"))
79
+ tts_engine.runAndWait()
80
+ return os.path.join('temp', "response.mp3")
81
+ except Exception as e:
82
+ print(f"Text-to-speech conversion error: {e}")
83
+ return None
84
+
85
+ def generate_educational_response(question):
86
+ """Generate educational response with fallback"""
87
+ try:
88
+ model_name = "distilgpt2"
89
+ device = "cuda" if torch.cuda.is_available() else "cpu"
90
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
91
+ model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
92
+ nlp_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
93
+ prompt = f"Explain in a simple, educational way: {question}"
94
+ response = nlp_pipeline(prompt, max_length=200, num_return_sequences=1)
95
+ return response[0]['generated_text']
96
+ except Exception as e:
97
+ error_trace = traceback.format_exc()
98
+ return f"Error generating response: {e}\n{error_trace}"
99
+
100
+ def process_input(audio):
101
+ """
102
+ Comprehensive input processing with robust error handling
103
+
104
+ Args:
105
+ audio (tuple): Gradio audio upload data
106
+
107
+ Returns:
108
+ Tuple of processing results or error messages
109
+ """
110
+ try:
111
+ if audio is None:
112
+ return (
113
+ "No audio file uploaded",
114
+ "Please upload an audio file",
115
+ None,
116
+ "No Braille conversion",
117
+ "Error: No input provided"
118
+ )
119
+
120
+ text_input = safe_speech_to_text(audio)
121
+
122
+ if not text_input or len(text_input) < 3:
123
+ return (
124
+ "Audio recognition failed",
125
+ "Could not understand the audio. Please try again.",
126
+ None,
127
+ "No Braille conversion",
128
+ "Error: Unable to recognize speech"
129
+ )
130
+
131
+ response_text = generate_educational_response(text_input)
132
+ audio_output_path = text_to_speech(response_text)
133
+ braille_text = ' '.join([f"⠈{char}" for char in response_text])
134
+ learning_guide = (
135
+ "🌟 Learning Guide 🌟\n"
136
+ f"Original Question: {text_input}\n\n"
137
+ "Tip: Each Braille character is formed by a unique combination of raised dots.\n"
138
+ "Practice tracing the dots to understand the pattern."
139
+ )
140
+
141
+ return (
142
+ text_input, # Recognized speech
143
+ response_text, # Educational response
144
+ audio_output_path, # Audio response path
145
+ braille_text, # Basic Braille text
146
+ learning_guide # Simple learning guide
147
+ )
148
+
149
+ except Exception as e:
150
+ error_trace = traceback.format_exc()
151
+ return (
152
+ "Processing Error",
153
+ f"An unexpected error occurred: {e}",
154
+ None,
155
+ "Error in Braille conversion",
156
+ f"Detailed Error:\n{error_trace}"
157
+ )
158
+
159
+ # Gradio Interface with Error Handling
160
+ interface = gr.Interface(
161
+ fn=process_input,
162
+ inputs=gr.Audio(label="Upload Audio (MP3/WAV)", type="numpy"),
163
+ outputs=[
164
+ gr.Textbox(label="Recognized Question"),
165
+ gr.Textbox(label="Educational Response"),
166
+ gr.Audio(label="Response Audio"),
167
+ gr.Textbox(label="Braille Representation"),
168
+ gr.Textbox(label="Learning Guide", lines=8)
169
+ ],
170
+ title="🌈 Accessible Learning Companion",
171
+ description="Upload an audio file to get an educational explanation, audio response, and Braille representation."
172
+ )
173
+
174
+ # Launch the interface
175
+ interface.launch(debug=True)