import gradio as gr from gradio_client import Client import os import json def generate_answer_pix2struct_base(image_path, question): try: client = Client("https://merve-pix2struct.hf.space/") return client.predict( image_path, question, fn_index=1 ) except Exception: gr.Warning("The Pix2Struct Large Space is currently unavailable. Please try again later.") return "" def generate_answer(image_path, question, model_name, space_id): try: client = Client(f"https://{model_name}.hf.space/") result = client.predict(image_path, question, api_name="/predict") if result.endswith(".json"): with open(result, "rb") as json_file: output = json.loads(json_file.read()) if model_name == "TusharGoel-LayoutLM-DocVQA": return output["label"] else: return output["answer"] else: return result except Exception: gr.Warning(f"The {model_name} Space is currently unavailable. Please try again later.") return "" def generate_answers(image_path, question): answer_p2s_base = generate_answer_pix2struct_base(image_path, question) answer_p2s_large = generate_answer(image_path, question, model_name = "akdeniz27-pix2struct-DocVQA", space_id = "Pix2Struct Large") answer_layoutlm = generate_answer(image_path, question, model_name = "TusharGoel-LayoutLM-DocVQA", space_id = "LayoutLM DocVQA") answer_donut = generate_answer(image_path, question, model_name = "nielsr-donut-docvqa", space_id = "Donut DocVQA") return answer_p2s_base, answer_p2s_large, answer_layoutlm, answer_donut examples = [["docvqa_example.png", "How many items are sold?"], ["document-question-answering-input.png", "What is the objective?"]] title = "# Interactive demo: comparing document question answering (VQA) models" css = """ #mkd { height: 500px; overflow: auto; border: 1px solid #ccc; } """ with gr.Blocks(css=css) as demo: gr.HTML("