Spaces:

merve
/

compare_docvqa_models

Running

App Files Files Community

compare_docvqa_models / app.py

merve HF staff

Create app.py

7e02e28 over 1 year ago

raw

history blame contribute delete

3.78 kB

	import gradio as gr
	from gradio_client import Client
	import os
	import json


	def generate_answer_pix2struct_base(image_path, question):
	try:
	client = Client("https://merve-pix2struct.hf.space/")
	return client.predict(
	image_path,
	question,
	fn_index=1
	)
	except Exception:
	gr.Warning("The Pix2Struct Large Space is currently unavailable. Please try again later.")
	return ""


	def generate_answer(image_path, question, model_name, space_id):
	try:
	client = Client(f"https://{model_name}.hf.space/")
	result = client.predict(image_path, question, api_name="/predict")
	if result.endswith(".json"):
	with open(result, "rb") as json_file:
	output = json.loads(json_file.read())
	if model_name == "TusharGoel-LayoutLM-DocVQA":
	return output["label"]
	else:
	return output["answer"]
	else:
	return result
	except Exception:
	gr.Warning(f"The {model_name} Space is currently unavailable. Please try again later.")
	return ""


	def generate_answers(image_path, question):
	answer_p2s_base = generate_answer_pix2struct_base(image_path, question)

	answer_p2s_large = generate_answer(image_path, question, model_name = "akdeniz27-pix2struct-DocVQA", space_id = "Pix2Struct Large")

	answer_layoutlm = generate_answer(image_path, question, model_name = "TusharGoel-LayoutLM-DocVQA", space_id = "LayoutLM DocVQA")

	answer_donut = generate_answer(image_path, question, model_name = "nielsr-donut-docvqa", space_id = "Donut DocVQA")

	return answer_p2s_base, answer_p2s_large, answer_layoutlm, answer_donut

	examples = [["docvqa_example.png", "How many items are sold?"], ["document-question-answering-input.png", "What is the objective?"]]

	title = "# Interactive demo: comparing document question answering (VQA) models"

	css = """
	#mkd {
	height: 500px;
	overflow: auto;
	border: 1px solid #ccc;
	}
	"""

	with gr.Blocks(css=css) as demo:
	gr.HTML("<h1><center>Compare Document Question Answering Models 📄<center><h1>")
	gr.HTML("<h3><center>Document question answering is the task of answering questions from documents in visual form. 📔📕</h3>")
	gr.HTML("<h3><center>To try this Space, simply upload documents and questions. </h3>")
	gr.HTML("<h3><center>If prompted to wait and try again, please try again. This Space uses other Spaces as APIs, so it might take time to get those Spaces up and running if they're stopped. </h3>")

	with gr.Row():
	with gr.Column():
	input_image = gr.Image(label = "Input Document", type="filepath")
	question = gr.Textbox(label = "question")
	run_button = gr.Button("Answer")
	with gr.Column():
	out_p2s_base = gr.Textbox(label="Answer generated by Pix2Struct Base")
	out_p2s_large = gr.Textbox(label="Answer generated by Pix2Struct Large")
	out_layoutlm = gr.Textbox(label="Answer generated by LayoutLM")
	out_donut = gr.Textbox(label="Answer generated by Donut")


	outputs = [
	out_p2s_base,
	out_p2s_large,
	out_layoutlm,
	out_donut,
	]

	gr.Examples(
	examples = [["docvqa_example.png", "How many items are sold?"],
	["document-question-answering-input.png", "What is the objective?"]],
	inputs=[input_image, question],
	outputs=outputs,
	fn=generate_answers,
	cache_examples=True
	)



	run_button.click(
	fn=generate_answers,
	inputs=[input_image,question],
	outputs=outputs
	)

	if __name__ == "__main__":
	demo.queue().launch(debug=True)