jadechoghari
/

Ferret-UI-Gemma2b

Image-Text-to-Text

text-generation

Model card Files Files and versions Community

Ferret-UI-Gemma2b / inference.py

jadechoghari's picture

Update inference.py

744d366 verified about 1 month ago

3.42 kB

	import subprocess
	import os
	import subprocess
	from PIL import Image
	import re
	import json

	def process_inference_results(results):
	"""
	Process the inference results by:
	1. Adding bounding boxes on the image based on the coordinates in 'text'.
	2. Extracting and returning the text prompt.

	:param results: List of inference results with bounding boxes in 'text'.
	:return: (image, text)
	"""
	processed_images = []
	extracted_texts = []

	for result in results:
	image_path = result['image_path']
	img = Image.open(image_path).convert("RGB")

	# this no more than extracts bounding box coordinates from the 'text'
	bbox_str = re.search(r'\[\[([0-9,\s]+)\]\]', result['text'])
	if bbox_str:
	bbox = [int(coord) for coord in bbox_str.group(1).split(',')]
	x1, y1, x2, y2 = bbox

	# Draw the bounding box on the image (optional if needed later)
	# draw = ImageDraw.Draw(img)
	# draw.rectangle([x1, y1, x2, y2], outline="red", width=3)

	extracted_texts.append(result['text'])

	processed_images.append(img)

	return processed_images[0], extracted_texts[0]

	def inference_and_run(image_path, prompt, conv_mode="ferret_gemma_instruct", model_path="jadechoghari/Ferret-UI-Gemma2b", box=None):
	"""
	Run the inference and capture the errors for debugging.
	"""
	data_input = [{
	"id": 0,
	"image": os.path.basename(image_path),
	"image_h": Image.open(image_path).height,
	"image_w": Image.open(image_path).width,
	"conversations": [{"from": "human", "value": f"<image>\n{prompt}"}]
	}]

	if box:
	data_input[0]["box_x1y1x2y2"] = [[box]]

	with open("eval.json", "w") as json_file:
	json.dump(data_input, json_file)

	print("eval.json file created successfully.")

	cmd = [
	"python", "-m", "model_UI",
	"--model_path", model_path,
	"--data_path", "eval.json",
	"--image_path", ".",
	"--answers_file", "eval_output.jsonl",
	"--num_beam", "1",
	"--max_new_tokens", "1024",
	"--conv_mode", conv_mode
	]

	if box:
	cmd.extend(["--region_format", "box", "--add_region_feature"])

	result = subprocess.run(cmd, check=True, capture_output=True, text=True)
	print(f"Subprocess output:\n{result.stdout}")
	print(f"Subprocess error (if any):\n{result.stderr}")
	print(f"Inference completed. Output written to eval_output.jsonl")

	output_folder = 'eval_output.jsonl'
	if os.path.exists(output_folder):
	json_files = [f for f in os.listdir(output_folder) if f.endswith(".jsonl")]
	if json_files:
	output_file_path = os.path.join(output_folder, json_files[0])
	with open(output_file_path, "r") as output_file:
	results = [json.loads(line) for line in output_file]

	return process_inference_results(results)
	else:
	print("No output JSONL files found.")
	return None, None
	else:
	print("Output folder not found.")
	return None, None

	except subprocess.CalledProcessError as e:
	print(f"Error occurred during inference:\n{e}")
	print(f"Subprocess output:\n{e.output}")
	return None, None