Spaces:

szukevin
/

VISOR-GPT

Runtime error

App Files Files Community

VISOR-GPT / utils /seq2coord.py

szukevin

upload

7900c16 over 1 year ago

raw

history blame contribute delete

13.5 kB


	"""
	decode sequential output to visual locations
	author: sierkinhane.github.io
	"""
	import random
	from tqdm import tqdm
	import json
	import numpy as np
	import re
	import argparse
	import cv2
	import math
	import os

	# COCO keypoints
	stickwidth = 4

	limbSeq_coco = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
	[10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
	[1, 16], [16, 18], [3, 17], [6, 18]]

	limbSeq_cp = [[14, 2], [14, 1], [2, 4], [4, 6], [1, 3], [3, 5], [14, 8], [8, 10], [10, 12], [14, 7], [7, 9], [9, 11], [13, 14]]

	# CrowdPose
	# {'0': 'left shoulder', '1': 'right shoulder', '2': 'left elbow', '3': 'right elbow', '4': 'left wrist', '5': 'right wrist', '6': 'left hip', '7': 'right hip', '8': 'left knee', '9': 'right knee', '10': 'left ankle', '11': 'right ankle', '12': 'head', '13': 'neck'}

	# for human pose visualization
	colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
	[0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
	[170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]

	# for box visualization
	colors_box = [[217, 221, 116], [137, 165, 171], [230, 126, 175], [63, 157, 5], [107, 51, 75], [217, 147, 152], [129, 132, 8], [232, 85, 249], [254, 98, 33], [89, 108, 230], [253, 34, 161], [91, 150, 30], [255, 147, 26], [209, 154, 205], [134, 57, 11], [143, 181, 122], [241, 176, 87], [104, 73, 26], [122, 147, 59], [235, 230, 229], [119, 18, 125], [185, 61, 138], [237, 115, 90], [13, 209, 111], [219, 172, 212]]

	# Plots one bounding box on image
	def plot_one_box(x, img, color=None, label=None, line_thickness=None, idx=0):
	tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line thickness
	color = color or [random.randint(0, 255) for _ in range(3)]
	color = colors_box[idx]
	c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
	cv2.rectangle(img, c1, c2, color, thickness=tl)
	if label:
	tf = max(tl - 1, 1) # font thickness
	t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
	c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
	cv2.rectangle(img, c1, c2, color, -1) # filled
	cv2.putText(img, label, c1, 0, tl / 3, [0, 0, 0], thickness=tf, lineType=cv2.LINE_AA)
	return img


	# decode one sequence to visual locations
	def decode(coordinate_str, type='box'):

	# find numbers
	locations = np.array([int(i) for i in re.findall(r"\d+", coordinate_str)])

	if type == 'box':
	locations = locations.reshape(-1, 4)
	elif type == 'cocokeypoint':
	locations = locations.reshape(-1, 18, 2)
	visible = np.ones((locations.shape[0], 18, 1))
	eq_0_idx = np.where(locations[:, :, 0] * locations[:, :, 1] == 0)
	visible[eq_0_idx] = 0
	locations = np.concatenate([locations, visible], axis=-1)
	for i in range(locations.shape[0]):
	if locations[i, 2, -1] == 0 or locations[i, 5, -1] == 0:
	locations[i, 1, -1] = 0
	elif type == 'crowdpose':
	locations = locations.reshape(-1, 14, 2)
	visible = np.ones((locations.shape[0], 14, 1))
	eq_0_idx = np.where(locations[:, :, 0] * locations[:, :, 1] == 0)
	visible[eq_0_idx] = 0
	locations = np.concatenate([locations, visible], axis=-1)
	elif type == 'mask':
	locations = []
	for c_str in coordinate_str.split('m0'):
	c_str = ''.join(re.split(r'm\d+', c_str))
	mask_coord = np.array([int(i) for i in re.findall(r"\d+ ", c_str)])
	if len(mask_coord) != 0:
	locations.append(mask_coord.reshape(-1, 1, 2))
	else:
	raise NotImplementedError

	return locations


	# process raw sequences inferred by VisorGPT
	def to_coordinate(file_path, ctn=True):

	if isinstance(file_path, list):
	texts = [i.strip().replace(' ##', '') for i in file_path]
	else:
	with open(file_path, 'r') as file:
	texts = [i.strip().replace(' ##', '') for i in file.readlines()]

	location_list = []
	classname_list = []
	type_list = []
	valid_sequences = []
	cnt = 0
	print('to coordinate ...')

	for ste in tqdm(texts):
	cnt += 1
	if 'box' in ste:
	type = 'box'
	elif 'key point' in ste:
	type = 'cocokeypoint' if '; 18 ;' in ste else 'crowdpose'
	elif 'mask' in ste:
	type = 'mask'
	else:
	raise NotImplementedError

	if '[SEP]' not in ste:
	continue

	try:
	if ctn:
	temp = ste[:ste.index('[SEP]')].split(' ; ')[5].split('] ')
	classnames = []
	for t in temp:
	classnames.append(t.split(' xmin ')[0].split(' m0')[0][2:])
	classnames = classnames[:-1]
	locations = decode(ste[:ste.index('[SEP]')].split(' ; ')[5], type=type)

	else:
	classnames = ste[:ste.index('[SEP]')].split(' ; ')[5].split(' , ')
	locations = decode(ste[:ste.index('[SEP]')].split(' ; ')[6], type=type)
	except:
	pass
	else:
	valid_sequences.append(ste[:ste.index('[SEP]')])
	location_list.append(locations)
	classname_list.append(classnames)
	type_list.append(type)

	with open('valid_sequences.txt', 'w') as file:
	[file.write(i.split('[CLS] ')[-1] + '\n') for i in valid_sequences]

	return location_list, classname_list, type_list, valid_sequences

	# visualize object locations on a canvas
	def visualization(location_list, classname_list, type_list, save_dir='debug/', save_fig=False):

	if save_fig:
	if not os.path.exists(save_dir):
	os.makedirs(save_dir)

	print('visualizing ...')
	for b, (loc, classnames, type) in tqdm(enumerate(zip(location_list, classname_list, type_list))):
	canvas = np.zeros((512, 512, 3), dtype=np.uint8) + 50

	if len(loc) != len(classnames):
	continue

	if type == 'box':
	for i in range(loc.shape[0]):
	canvas = plot_one_box(loc[i], canvas, label=classnames[i], idx=i)

	elif type == 'cocokeypoint':
	for i in range(loc.shape[0]):
	for j in range(loc.shape[1]):
	x, y, v = loc[i, j]
	if v != 0:
	cv2.circle(canvas, (int(x), int(y)), 4, colors[j], thickness=-1)
	for j in range(17):
	lim = limbSeq_coco[j]
	cur_canvas = canvas.copy()

	Y = [loc[i][lim[0] - 1][0], loc[i][lim[1] - 1][0]]
	X = [loc[i][lim[0] - 1][1], loc[i][lim[1] - 1][1]]

	if loc[i][lim[0] - 1][-1] == 0 or loc[i][lim[1] - 1][-1] == 0:
	continue

	mX = np.mean(X)
	mY = np.mean(Y)
	length = ((X[0] - X[1]) 2 + (Y[0] - Y[1]) 2) ** 0.5
	angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
	polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
	cv2.fillConvexPoly(cur_canvas, polygon, colors[j])
	canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)

	elif type == 'crowdpose':
	for i in range(loc.shape[0]):
	for j in range(loc.shape[1]):
	x, y, _ = loc[i, j]
	if x != 0 and y != 0:
	cv2.circle(canvas, (int(x), int(y)), 4, colors[j], thickness=-1)
	for j in range(13):
	lim = limbSeq_cp[j]
	cur_canvas = canvas.copy()

	Y = [loc[i][lim[0] - 1][0], loc[i][lim[1] - 1][0]]
	X = [loc[i][lim[0] - 1][1], loc[i][lim[1] - 1][1]]

	if (Y[0] == 0 and X[0] == 0) or (Y[1] == 0 and X[1] == 0):
	continue

	mX = np.mean(X)
	mY = np.mean(Y)
	length = ((X[0] - X[1]) 2 + (Y[0] - Y[1]) 2) ** 0.5
	angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
	polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
	cv2.fillConvexPoly(cur_canvas, polygon, colors[j])
	canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)

	elif type == 'mask':
	for i in range(len(loc)):
	color = [random.randint(0, 255) for _ in range(3)]
	xmin, ymin, xmax, ymax = loc[i][:, :, 0].min(), loc[i][:, :, 1].min(), loc[i][:, :, 0].max(), loc[i][:, :, 1].max()
	cur_canvas = canvas.copy()
	cv2.fillPoly(cur_canvas, [loc[i]], color)
	cur_canvas = plot_one_box((xmin, ymin, xmax, ymax), cur_canvas, color=color, label=classnames[i])
	canvas = cv2.addWeighted(canvas, 0.5, cur_canvas, 0.5, 0)
	else:
	raise NotImplementedError
	if save_fig:
	cv2.imwrite(f'{save_dir}/test_{b}.png', canvas[..., ::-1])

	return canvas[..., ::-1]

	# to json output
	def to_json(location_list, classname_list, type_list, valid_sequences):

	ret_json_box = {'bboxes': [], 'sequences': []}
	ret_json_mask = {'masks': [], 'sequences': []}
	ret_json_keypoint = {'keypoints': [], 'sequences': []}
	print('to json ...')
	for loc, classnames, type, seq in tqdm(zip(location_list, classname_list, type_list, valid_sequences)):
	ins_list = []
	kpt_list = []
	mask_list = []
	seq_list = []
	if len(loc) != len(classnames):# or len(classnames) > 8:
	continue

	if type == 'box':
	for i in range(loc.shape[0]):
	# xmin, ymin, xmax, ymax = loc[i]
	# area = (xmax - xmin) * (ymax - ymin)
	# compute area and omit very small one due to the synthesis ability of AIGC
	# if area < 32**2:
	# continue

	dic = {classnames[i]: loc[i].tolist()}
	ins_list.append(dic)
	if len(seq_list) == 0:
	seq_list.append(seq)

	elif type == 'cocokeypoint' or type == 'crowdpose':
	for i in range(loc.shape[0]):
	# compute validate key points and omit the less one, as the synthesis ability of AIGC
	# if loc[i, :, -1].sum() <= 4:
	# continue

	# compute area and omit very small one due to the synthesis ability of AIGC
	# xmin, ymin, xmax, ymax = loc[i, :, 0].min(), loc[i, :, 1].min(), loc[i, :, 0].max(), loc[i, :, 1].max()
	# area = (xmax - xmin) * (ymax - ymin)
	# if area < 32 ** 2:
	# continue

	dic = {classnames[i]: loc[i][:, :].tolist()}
	kpt_list.append(dic)
	if len(seq_list) == 0:
	seq_list.append(seq)

	elif type == 'mask':
	for i in range(len(loc)):

	# xmin, ymin, xmax, ymax = loc[i][:, :, 0].min(), loc[i][:, :, 1].min(), loc[i][:, :, 0].max(), loc[i][:, :, 1].max()
	# area = (xmax - xmin) * (ymax - ymin)
	# if area < 32 ** 2:
	# continue

	dic = {classnames[i]: loc[i].tolist()}
	mask_list.append(dic)
	if len(seq_list) == 0:
	seq_list.append(seq)
	else:
	raise NotImplementedError

	if len(ins_list) != 0:
	ret_json_box['bboxes'].append(ins_list)
	ret_json_box['sequences'].append(seq_list)
	if len(kpt_list) != 0:
	ret_json_keypoint['keypoints'].append(kpt_list)
	ret_json_keypoint['sequences'].append(seq_list)
	if len(mask_list) != 0:
	ret_json_mask['masks'].append(mask_list)
	ret_json_mask['sequences'].append(seq_list)

	return [ret_json_box, ret_json_mask, ret_json_keypoint]


	def gen_cond_mask(texts, ctn):
	location_list, classname_list, type_list, valid_sequences = to_coordinate(texts, ctn)
	ret_mask = visualization(location_list, classname_list, type_list, None, False)
	ret_json = to_json(location_list, classname_list, type_list, valid_sequences)
	return ret_mask, ret_json

	if __name__ == '__main__':

	parser = argparse.ArgumentParser()
	parser.add_argument('--file_path', type=str, required=True)
	parser.add_argument('--save_dir', type=str, default='debug')
	parser.add_argument('--visualize', type=bool, default=False)
	args = parser.parse_args()

	location_list, classname_list, type_list, valid_sequences = to_coordinate(args.file_path)

	if not os.path.exists(args.save_dir):
	os.makedirs(args.save_dir)

	# visualization
	if args.visualize:
	visualization(location_list, classname_list, type_list, args.save_dir)

	# to json data
	rets = to_json(location_list, classname_list, type_list, valid_sequences)

	for ret, flag in zip(rets, ['box', 'mask', 'keypoint']):
	save_path = args.file_path.split('/')[-1].split('.')[0] + f'_{flag}.json'
	with open('files/' + save_path, 'w') as file:
	json.dump(ret, file, indent=2)