MediPlusPlus commited on
Commit
7610cf6
1 Parent(s): 29e845b
Files changed (1) hide show
  1. app.py +14 -11
app.py CHANGED
@@ -226,26 +226,29 @@ def predict_category(que, input_image):
226
  return preds[0]
227
 
228
 
229
- def combine(audio, input_image):
230
- que = transcribe_audio(audio)
231
- # que = "What is the animal here?"
 
 
232
 
233
  image = Image.fromarray(input_image).convert('RGB')
234
  category = predict_category(que, image)
235
-
236
  answer = predict_answer(0, que, image)
237
 
238
- # print(category)
239
-
240
  tts = gTTS(answer)
241
  tts.save('answer.mp3')
 
242
  return que, answer, 'answer.mp3'
243
 
244
-
245
-
246
- # Define the Gradio interface for recording audio and displaying the transcription
247
- model_interface = gr.Interface(fn=combine, inputs=[gr.Microphone(label="Ask your question"),gr.Image(label="Upload the image")], outputs=[gr.Text(label="Transcribed Question"), gr.Text(label="Answer"), gr.Audio(label="Audio Answer")])
248
- # image_upload_interface = gr.Interface(fn=upload_image, inputs=gr.Image(label="Upload the image"), outputs="text")
 
 
 
249
 
250
  # Launch the Gradio interface
251
  model_interface.launch(debug=True)
 
226
  return preds[0]
227
 
228
 
229
+ def combine(audio, input_image, text_question=""):
230
+ if audio:
231
+ que = transcribe_audio(audio)
232
+ else:
233
+ que = text_question
234
 
235
  image = Image.fromarray(input_image).convert('RGB')
236
  category = predict_category(que, image)
 
237
  answer = predict_answer(0, que, image)
238
 
 
 
239
  tts = gTTS(answer)
240
  tts.save('answer.mp3')
241
+
242
  return que, answer, 'answer.mp3'
243
 
244
+ # Define the Gradio interface for recording audio, text input, and image upload
245
+ model_interface = gr.Interface(fn=combine,
246
+ inputs=[gr.Microphone(label="Ask your question"),
247
+ gr.Image(label="Upload the image"),
248
+ gr.Textbox(label="Text Question")],
249
+ outputs=[gr.Text(label="Transcribed Question"),
250
+ gr.Text(label="Answer"),
251
+ gr.Audio(label="Audio Answer")])
252
 
253
  # Launch the Gradio interface
254
  model_interface.launch(debug=True)