DINO-GPT4V / app.py
GRATITUD3's picture
Update app.py
ec67201
import gradio as gr
from autodistill_gpt_4v import GPT4V
from autodistill.detection import CaptionOntology
from autodistill_grounding_dino import GroundingDINO
from autodistill.utils import plot
import tempfile
import cv2
from autodistill.core.custom_detection_model import CustomDetectionModel
# Hardcoded values
api_key = "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"
dino_prompt = "buildings . parks ."
gpt_prompt = "buildings"
MARKDOWN = """
# DINO-GPT4V
Use Grounding DINO and GPT-4V to label specific objects.
Visit [awesome-openai-vision-api-experiments](https://github.com/roboflow/awesome-openai-vision-api-experiments)
repository to find more OpenAI Vision API experiments or contribute your own."""
def respond(input_image):
input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
cv2.imwrite(temp_file.name, input_image)
DINOGPT = CustomDetectionModel(
detection_model=GroundingDINO(
CaptionOntology({dino_prompt: dino_prompt})
),
classification_model=GPT4V(
CaptionOntology({k: k for k in gpt_prompt.split(", ")}),
api_key=api_key
)
)
results = DINOGPT.predict(temp_file.name)
if isinstance(results, tuple):
# If results are a tuple, handle it accordingly
# This is a placeholder, you need to adjust based on the actual structure of the tuple
results = results[0] # Assuming the first item in the tuple is the desired data
result = plot(
image=cv2.imread(temp_file.name),
detections=results,
classes=gpt_prompt.split(", "),
raw=True
)
return result
with gr.Blocks() as demo:
gr.Markdown(MARKDOWN)
with gr.Row():
with gr.Column():
input_image = gr.Image(type="numpy", label="Input Image")
with gr.Column():
output_image = gr.Image(type="numpy", label="Output Image")
submit_button = gr.Button("Submit")
submit_button.click(
fn=respond,
inputs=[input_image],
outputs=[output_image]
)
demo.launch()