import gradio as gr import supervision as sv import torch import spaces from utils.annotate import annotate_with_boxes from utils.models import load_models, run_inference, CHECKPOINTS from utils.tasks import TASK_NAMES, TASKS, OBJECT_DETECTION_TASK_NAME, \ CAPTION_TASK_NAMES MARKDOWN = """ # Better Florence-2 Playground 🔥
""" # OBJECT_DETECTION_EXAMPLES = [ # ["microsoft/Florence-2-large-ft", "Object Detection", "https://media.roboflow.com/notebooks/examples/dog-2.jpeg"] # ] # CAPTION_EXAMPLES = [ # ["microsoft/Florence-2-large-ft", "Caption", "https://media.roboflow.com/notebooks/examples/dog-2.jpeg"] # ] DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") MODELS, PROCESSORS = load_models(DEVICE) @spaces.GPU def process(checkpoint_dropdown, task_dropdown, image_input): model = MODELS[checkpoint_dropdown] processor = PROCESSORS[checkpoint_dropdown] task = TASKS[task_dropdown] if task_dropdown == OBJECT_DETECTION_TASK_NAME: _, response = run_inference( model, processor, DEVICE, image_input, task) detections = sv.Detections.from_lmm( lmm=sv.LMM.FLORENCE_2, result=response, resolution_wh=image_input.size) return annotate_with_boxes(image_input, detections) elif task_dropdown in CAPTION_TASK_NAMES: _, response = run_inference( model, processor, DEVICE, image_input, task) return response[task] with gr.Blocks() as demo: gr.Markdown(MARKDOWN) with gr.Row(): checkpoint_dropdown_component = gr.Dropdown( choices=CHECKPOINTS, value=CHECKPOINTS[0], label="Model", info="Select a Florence 2 model to use.") task_dropdown_component = gr.Dropdown( choices=TASK_NAMES, value=TASK_NAMES[0], label="Task", info="Select a task to perform with the model.") with gr.Row(): with gr.Column(): image_input_component = gr.Image(type='pil', label='Image Input') submit_button_component = gr.Button(value='Submit', variant='primary') with gr.Column(): @gr.render(inputs=task_dropdown_component) def show_output(text): if text == OBJECT_DETECTION_TASK_NAME: image_output_component = gr.Image(type='pil', label='Image Output') submit_button_component.click( fn=process, inputs=[ checkpoint_dropdown_component, task_dropdown_component, image_input_component ], outputs=image_output_component ) elif text in CAPTION_TASK_NAMES: text_output_component = gr.Textbox(label='Caption Output') submit_button_component.click( fn=process, inputs=[ checkpoint_dropdown_component, task_dropdown_component, image_input_component ], outputs=text_output_component ) # @gr.render(inputs=task_dropdown_component) # def show_examples(text): # if text == "Object Detection": # gr.Examples( # fn=process, # examples=OBJECT_DETECTION_EXAMPLES, # inputs=[ # checkpoint_dropdown_component, # task_dropdown_component, # image_input_component # ], # outputs=image_output_component # ) # elif text == "Caption": # gr.Examples( # fn=process, # examples=CAPTION_EXAMPLES, # inputs=[ # checkpoint_dropdown_component, # task_dropdown_component, # image_input_component # ], # outputs=text_output_component # ) demo.launch(debug=False, show_error=True, max_threads=1)