Spaces:

merve
/

UDOP

Running on Zero

App Files Files Community

UDOP / app.py

merve HF staff

Update app.py

7062e70 verified 10 months ago

raw

history blame

2.88 kB

	import gradio as gr
	import numpy as np
	import torch
	from PIL import Image
	from gradio_image_prompter import ImagePrompter
	from transformers import AutoProcessor, UdopForConditionalGeneration
	import easyocr
	from PIL import Image
	import spaces

	processor = AutoProcessor.from_pretrained("microsoft/udop-large", apply_ocr=False)
	model = UdopForConditionalGeneration.from_pretrained("microsoft/udop-large")

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	@spaces.GPU
	def udop_box_inference(image, text_prompt, box_coordinates):
	box_coordinates = [box_coordinates[0], box_coordinates[1], box_coordinates[3], box_coordinates[4]]

	extracted_image = extract_box(image, box_coordinates)
	extracted_image.save("cropped_image.png")

	reader = easyocr.Reader(['en'])
	result = reader.readtext('cropped_image.png')
	texts = []
	bboxs = []
	for (bbox, text, prob) in result:
	texts.append(text)
	bboxs.append([bbox[0][0], bbox[0][1], bbox[2][0], bbox[2][1]])

	height = image.size[1]
	width = image.size[0]
	image = image.convert("RGB")
	norm_boxes = []
	for box in bboxs:
	norm_boxes.append(normalize_bbox(box, width, height))

	encoding = processor(image, text_prompt, texts, boxes=norm_boxes, return_tensors="pt")
	predicted_ids = model.generate(**encoding)
	return processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]


	def normalize_bbox(bbox, width, height):
	return [
	int(1000 * (bbox[0] / width)),
	int(1000 * (bbox[1] / height)),
	int(1000 * (bbox[2] / width)),
	int(1000 * (bbox[3] / height)),
	]


	def extract_box(image, coordinates):
	x, y, x2, y2 = coordinates
	cropped_image = image.crop((x, y, x2, y2))
	return cropped_image



	def infer_box(prompts, text_prompts):
	# background (original image) layers[0] ( point prompt) composite (total image)
	image = prompts["image"]
	if image is None:
	gr.Error("Please upload an image and draw a box before submitting")
	points = prompts["points"][0]
	if points is None:
	gr.Error("Please draw a box before submitting.")
	return udop_box_inference(image, text_prompts, points)


	with gr.Blocks(title="UDOP") as demo:
	gr.Markdown("# UDOP")
	gr.Markdown("UDOP is a cutting-edge foundation model for a document understanding and generation.")
	gr.Markdown("Try UDOP in this demo.")

	with gr.Row():
	with gr.Column(scale=1):
	# Title
	gr.Markdown("To try box prompting, simply upload and image and draw a box on it.")
	with gr.Row():
	with gr.Column():
	im = ImagePrompter(type="pil")
	text_prompt = gr.Textbox()
	btn = gr.Button("Submit")
	with gr.Column():
	output = gr.Textbox(label="UDOP Output")


	btn.click(infer_box, inputs=[im,text_prompt], outputs=[output])

	demo.launch(debug=True)