Spaces:
Runtime error
Runtime error
import gradio as gr | |
from autodistill_gpt_4v import GPT4V | |
from autodistill.detection import CaptionOntology | |
from autodistill_grounding_dino import GroundingDINO | |
from autodistill.utils import plot | |
import tempfile | |
import cv2 | |
from autodistill.core.custom_detection_model import CustomDetectionModel | |
# Hardcoded values | |
api_key = "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" | |
dino_prompt = "buildings . parks ." | |
gpt_prompt = "buildings" | |
MARKDOWN = """ | |
# DINO-GPT4V | |
Use Grounding DINO and GPT-4V to label specific objects. | |
Visit [awesome-openai-vision-api-experiments](https://github.com/roboflow/awesome-openai-vision-api-experiments) | |
repository to find more OpenAI Vision API experiments or contribute your own.""" | |
def respond(input_image): | |
input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB) | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file: | |
cv2.imwrite(temp_file.name, input_image) | |
DINOGPT = CustomDetectionModel( | |
detection_model=GroundingDINO( | |
CaptionOntology({dino_prompt: dino_prompt}) | |
), | |
classification_model=GPT4V( | |
CaptionOntology({k: k for k in gpt_prompt.split(", ")}), | |
api_key=api_key | |
) | |
) | |
results = DINOGPT.predict(temp_file.name) | |
if isinstance(results, tuple): | |
# If results are a tuple, handle it accordingly | |
# This is a placeholder, you need to adjust based on the actual structure of the tuple | |
results = results[0] # Assuming the first item in the tuple is the desired data | |
result = plot( | |
image=cv2.imread(temp_file.name), | |
detections=results, | |
classes=gpt_prompt.split(", "), | |
raw=True | |
) | |
return result | |
with gr.Blocks() as demo: | |
gr.Markdown(MARKDOWN) | |
with gr.Row(): | |
with gr.Column(): | |
input_image = gr.Image(type="numpy", label="Input Image") | |
with gr.Column(): | |
output_image = gr.Image(type="numpy", label="Output Image") | |
submit_button = gr.Button("Submit") | |
submit_button.click( | |
fn=respond, | |
inputs=[input_image], | |
outputs=[output_image] | |
) | |
demo.launch() |