Spaces:
Running
on
Zero
Running
on
Zero
import spaces | |
from transformers import Owlv2Processor, Owlv2ForObjectDetection, AutoProcessor, AutoModelForZeroShotObjectDetection | |
import torch | |
import gradio as gr | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
dino_processor = AutoProcessor.from_pretrained("IDEA-Research/grounding-dino-base") | |
dino_model = AutoModelForZeroShotObjectDetection.from_pretrained("IDEA-Research/grounding-dino-base").to("cuda") | |
def infer(img, text_queries, score_threshold, model): | |
if model == "dino": | |
queries="" | |
for query in text_queries: | |
queries += f"{query}. " | |
width, height = img.shape[:2] | |
target_sizes=[(width, height)] | |
inputs = dino_processor(text=queries, images=img, return_tensors="pt").to(device) | |
with torch.no_grad(): | |
outputs = dino_model(**inputs) | |
outputs.logits = outputs.logits.cpu() | |
outputs.pred_boxes = outputs.pred_boxes.cpu() | |
results = dino_processor.post_process_grounded_object_detection(outputs=outputs, input_ids=inputs.input_ids, | |
box_threshold=score_threshold, | |
target_sizes=target_sizes) | |
boxes, scores, labels = results[0]["boxes"], results[0]["scores"], results[0]["labels"] | |
result_labels = [] | |
for box, score, label in zip(boxes, scores, labels): | |
box = [int(i) for i in box.tolist()] | |
if score < score_threshold: | |
continue | |
if model == "dino": | |
if label != "": | |
result_labels.append((box, label)) | |
return result_labels | |
def query_image(img, text_queries, dino_threshold): | |
text_queries = text_queries | |
text_queries = text_queries.split(",") | |
dino_output = infer(img, text_queries, dino_threshold, "dino") | |
return (img, dino_output) | |
dino_threshold = gr.Slider(0, 1, value=0.12, label="Grounding DINO Threshold") | |
dino_output = gr.AnnotatedImage(label="Grounding DINO Output") | |
demo = gr.Interface( | |
query_image, | |
inputs=[gr.Image(label="Input Image"), gr.Textbox(label="Candidate Labels"), dino_threshold], | |
outputs=[ dino_output], | |
title="Grounding DINO DSA2024", | |
description="DSA2024 Space to evaluate state-of-the-art [Grounding DINO](https://huggingface.co/IDEA-Research/grounding-dino-base) zero-shot object detection model. Simply upload an image and enter a list of the objects you want to detect with comma, or try one of the examples. Play with the threshold to filter out low confidence predictions in the model.", | |
examples=[["./deer.jpg", "zebra, deer, goat", 0.16], ["./zebra.jpg", "zebra, lion, deer", 0.16]] | |
) | |
demo.launch(debug=True) | |