Spaces:

Vision-CAIR
/

MiniGPT-Med

Runtime error

App Files Files Community

MiniGPT-Med / eval_scripts /clean_json.py

Vision-CAIR

MiniGPT-Med

be10055 verified 4 months ago

raw

history blame contribute delete

2.2 kB

	import json
	import re

	def clean_mimic_json(messy_json, cleaned_output):
	with open(messy_json, 'r') as f:
	messy_data = json.load(f)

	clean_data = []
	for image_id, captions in messy_data.items():
	image_id_clean = image_id.split('.')[0]
	caption_clean = ' '.join(captions)

	clean_item = {
	"image_id": image_id_clean,
	"caption": caption_clean
	}

	clean_data.append(clean_item)

	with open(cleaned_output, 'w') as outfile:
	json.dump(clean_data, outfile, indent=2)


	def clean_vqa_json(messy_json, cleaned_output):
	with open(messy_json, "r") as file:
	messy_json = json.load(file)

	organized_json = {}

	for key, values in messy_json.items():
	organized_json[key] = []
	for value in values:
	organized_json[key].append({
	"question": value["question"],
	"answer": value["answer"]
	})

	with open(cleaned_output, "w") as outfile:
	json.dump(organized_json, outfile, indent=4)



	def clean_detection_json(messy_json, cleaned_output):

	with open(messy_json, "r") as input_file:
	input_json = json.load(input_file)

	organized_data = []

	for key, value in input_json.items():
	if value and isinstance(value, list) and len(value) > 0:
	caption = value[0]
	objects_match = caption.split("<p>")
	if len(objects_match) == 2:
	object_part = objects_match[1].split("</p>")[0].strip()
	else:
	object_part = ""

	bbox_match = re.findall(r'<(\d+)>', caption)

	if object_part and bbox_match and len(bbox_match) == 4:
	key_part = key.split(".png")[0]
	bbox_values = [float(val) for val in bbox_match]

	organized_item = {
	"key": key_part,
	"objects": [object_part],
	"bbox": [bbox_values],
	}

	organized_data.append(organized_item)

	with open(cleaned_output, "w") as output_file:
	json.dump(organized_data, output_file, indent=4)