Spaces:

SkalskiP
/

better-florence-2

Running on Zero

App Files Files Community

better-florence-2 / app.py

SkalskiP

more captioning tasks

5d15f06 8 months ago

raw

history blame

5.07 kB

	import gradio as gr
	import supervision as sv
	import torch
	import spaces

	from utils.annotate import annotate_with_boxes
	from utils.models import load_models, run_inference, CHECKPOINTS
	from utils.tasks import TASK_NAMES, TASKS, OBJECT_DETECTION_TASK_NAME, \
	CAPTION_TASK_NAMES

	MARKDOWN = """
	# Better Florence-2 Playground 🔥
	<div>
	<a href="https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-finetune-florence-2-on-detection-dataset.ipynb">
	<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Colab" style="display:inline-block;">
	</a>
	<a href="https://blog.roboflow.com/florence-2/">
	<img src="https://raw.githubusercontent.com/roboflow-ai/notebooks/main/assets/badges/roboflow-blogpost.svg" alt="Roboflow" style="display:inline-block;">
	</a>
	<a href="https://arxiv.org/abs/2311.06242">
	<img src="https://img.shields.io/badge/arXiv-2311.06242-b31b1b.svg" alt="arXiv" style="display:inline-block;">
	</a>
	<a href="https://www.youtube.com/watch?v=i3KjYgxNH6w">
	<img src="https://badges.aleen42.com/src/youtube.svg" alt="YouTube" style="display:inline-block;">
	</a>
	</div>
	"""

	# OBJECT_DETECTION_EXAMPLES = [
	# ["microsoft/Florence-2-large-ft", "Object Detection", "https://media.roboflow.com/notebooks/examples/dog-2.jpeg"]
	# ]
	# CAPTION_EXAMPLES = [
	# ["microsoft/Florence-2-large-ft", "Caption", "https://media.roboflow.com/notebooks/examples/dog-2.jpeg"]
	# ]

	DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	MODELS, PROCESSORS = load_models(DEVICE)


	@spaces.GPU
	def process(checkpoint_dropdown, task_dropdown, image_input):
	model = MODELS[checkpoint_dropdown]
	processor = PROCESSORS[checkpoint_dropdown]
	task = TASKS[task_dropdown]
	if task_dropdown == OBJECT_DETECTION_TASK_NAME:
	_, response = run_inference(
	model, processor, DEVICE, image_input, task)
	detections = sv.Detections.from_lmm(
	lmm=sv.LMM.FLORENCE_2, result=response, resolution_wh=image_input.size)
	return annotate_with_boxes(image_input, detections)
	elif task_dropdown in CAPTION_TASK_NAMES:
	_, response = run_inference(
	model, processor, DEVICE, image_input, task)
	return response[task]


	with gr.Blocks() as demo:
	gr.Markdown(MARKDOWN)
	with gr.Row():
	checkpoint_dropdown_component = gr.Dropdown(
	choices=CHECKPOINTS,
	value=CHECKPOINTS[0],
	label="Model", info="Select a Florence 2 model to use.")
	task_dropdown_component = gr.Dropdown(
	choices=TASK_NAMES,
	value=TASK_NAMES[0],
	label="Task", info="Select a task to perform with the model.")

	with gr.Row():
	with gr.Column():
	image_input_component = gr.Image(type='pil', label='Image Input')
	submit_button_component = gr.Button(value='Submit', variant='primary')

	with gr.Column():
	@gr.render(inputs=task_dropdown_component)
	def show_output(text):
	if text == OBJECT_DETECTION_TASK_NAME:
	image_output_component = gr.Image(type='pil', label='Image Output')
	submit_button_component.click(
	fn=process,
	inputs=[
	checkpoint_dropdown_component,
	task_dropdown_component,
	image_input_component
	],
	outputs=image_output_component
	)
	elif text in CAPTION_TASK_NAMES:
	text_output_component = gr.Textbox(label='Caption Output')
	submit_button_component.click(
	fn=process,
	inputs=[
	checkpoint_dropdown_component,
	task_dropdown_component,
	image_input_component
	],
	outputs=text_output_component
	)

	# @gr.render(inputs=task_dropdown_component)
	# def show_examples(text):
	# if text == "Object Detection":
	# gr.Examples(
	# fn=process,
	# examples=OBJECT_DETECTION_EXAMPLES,
	# inputs=[
	# checkpoint_dropdown_component,
	# task_dropdown_component,
	# image_input_component
	# ],
	# outputs=image_output_component
	# )
	# elif text == "Caption":
	# gr.Examples(
	# fn=process,
	# examples=CAPTION_EXAMPLES,
	# inputs=[
	# checkpoint_dropdown_component,
	# task_dropdown_component,
	# image_input_component
	# ],
	# outputs=text_output_component
	# )

	demo.launch(debug=False, show_error=True, max_threads=1)