comparing-VQA-models

Runtime error

AnonymousSub commited on Dec 30, 2023

Commit

7518be4

•

1 Parent(s): 627fbe3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -75,13 +75,13 @@ def generate_answers(image, question):
     answer_blip_large = generate_answer_blip(blip_processor_large, blip_model_large, image, question)
-    answer_vilt = generate_answer_vilt(vilt_processor, vilt_model, image, question)
-    return answer_git_base, answer_git_large, answer_blip_base, answer_blip_large, answer_vilt
 examples = [["cats.jpg", "How many cats are there?"], ["stop_sign.png", "What's behind the stop sign?"], ["astronaut.jpg", "What's the astronaut riding on?"]]
-outputs = [gr.outputs.Textbox(label="Answer generated by GIT-base"), gr.outputs.Textbox(label="Answer generated by GIT-large"), gr.outputs.Textbox(label="Answer generated by BLIP-base"), gr.outputs.Textbox(label="Answer generated by BLIP-large"), gr.outputs.Textbox(label="Answer generated by ViLT")]
 title = "Interactive demo: comparing visual question answering (VQA) models"
 description = "Gradio Demo to compare GIT, BLIP and ViLT, 3 state-of-the-art vision+language models. To use it, simply upload your image and click 'submit', or click one of the examples to load them. Read more at the links below."

     answer_blip_large = generate_answer_blip(blip_processor_large, blip_model_large, image, question)
+    # answer_vilt = generate_answer_vilt(vilt_processor, vilt_model, image, question)
+    return answer_git_base, answer_git_large, answer_blip_base, answer_blip_large#, answer_vilt
 examples = [["cats.jpg", "How many cats are there?"], ["stop_sign.png", "What's behind the stop sign?"], ["astronaut.jpg", "What's the astronaut riding on?"]]
+outputs = [gr.outputs.Textbox(label="Answer generated by GIT-base"), gr.outputs.Textbox(label="Answer generated by GIT-large"), gr.outputs.Textbox(label="Answer generated by BLIP-base"), gr.outputs.Textbox(label="Answer generated by BLIP-large")]#, gr.outputs.Textbox(label="Answer generated by ViLT")]
 title = "Interactive demo: comparing visual question answering (VQA) models"
 description = "Gradio Demo to compare GIT, BLIP and ViLT, 3 state-of-the-art vision+language models. To use it, simply upload your image and click 'submit', or click one of the examples to load them. Read more at the links below."