DINO-GPT4V

Runtime error

App Files Files Community

DINO-GPT4V / app.py

GRATITUD3

Update app.py

ec67201 over 1 year ago

raw

history blame contribute delete

2.25 kB

	import gradio as gr
	from autodistill_gpt_4v import GPT4V
	from autodistill.detection import CaptionOntology
	from autodistill_grounding_dino import GroundingDINO
	from autodistill.utils import plot
	import tempfile
	import cv2
	from autodistill.core.custom_detection_model import CustomDetectionModel

	# Hardcoded values
	api_key = "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"
	dino_prompt = "buildings . parks ."
	gpt_prompt = "buildings"

	MARKDOWN = """
	# DINO-GPT4V
	Use Grounding DINO and GPT-4V to label specific objects.
	Visit [awesome-openai-vision-api-experiments](https://github.com/roboflow/awesome-openai-vision-api-experiments)
	repository to find more OpenAI Vision API experiments or contribute your own."""

	def respond(input_image):
	input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
	with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
	cv2.imwrite(temp_file.name, input_image)

	DINOGPT = CustomDetectionModel(
	detection_model=GroundingDINO(
	CaptionOntology({dino_prompt: dino_prompt})
	),
	classification_model=GPT4V(
	CaptionOntology({k: k for k in gpt_prompt.split(", ")}),
	api_key=api_key
	)
	)

	results = DINOGPT.predict(temp_file.name)

	if isinstance(results, tuple):
	# If results are a tuple, handle it accordingly
	# This is a placeholder, you need to adjust based on the actual structure of the tuple
	results = results[0] # Assuming the first item in the tuple is the desired data

	result = plot(
	image=cv2.imread(temp_file.name),
	detections=results,
	classes=gpt_prompt.split(", "),
	raw=True
	)

	return result

	with gr.Blocks() as demo:
	gr.Markdown(MARKDOWN)
	with gr.Row():
	with gr.Column():
	input_image = gr.Image(type="numpy", label="Input Image")
	with gr.Column():
	output_image = gr.Image(type="numpy", label="Output Image")
	submit_button = gr.Button("Submit")

	submit_button.click(
	fn=respond,
	inputs=[input_image],
	outputs=[output_image]
	)

	demo.launch()