Spaces:
Runtime error
Runtime error
""" | |
decode sequential output to visual locations | |
author: sierkinhane.github.io | |
""" | |
import random | |
from tqdm import tqdm | |
import json | |
import numpy as np | |
import re | |
import argparse | |
import cv2 | |
import math | |
import os | |
# COCO keypoints | |
stickwidth = 4 | |
limbSeq_coco = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \ | |
[10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \ | |
[1, 16], [16, 18], [3, 17], [6, 18]] | |
limbSeq_cp = [[14, 2], [14, 1], [2, 4], [4, 6], [1, 3], [3, 5], [14, 8], [8, 10], [10, 12], [14, 7], [7, 9], [9, 11], [13, 14]] | |
# CrowdPose | |
# {'0': 'left shoulder', '1': 'right shoulder', '2': 'left elbow', '3': 'right elbow', '4': 'left wrist', '5': 'right wrist', '6': 'left hip', '7': 'right hip', '8': 'left knee', '9': 'right knee', '10': 'left ankle', '11': 'right ankle', '12': 'head', '13': 'neck'} | |
# for human pose visualization | |
colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \ | |
[0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \ | |
[170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]] | |
# for box visualization | |
colors_box = [[217, 221, 116], [137, 165, 171], [230, 126, 175], [63, 157, 5], [107, 51, 75], [217, 147, 152], [129, 132, 8], [232, 85, 249], [254, 98, 33], [89, 108, 230], [253, 34, 161], [91, 150, 30], [255, 147, 26], [209, 154, 205], [134, 57, 11], [143, 181, 122], [241, 176, 87], [104, 73, 26], [122, 147, 59], [235, 230, 229], [119, 18, 125], [185, 61, 138], [237, 115, 90], [13, 209, 111], [219, 172, 212]] | |
# Plots one bounding box on image | |
def plot_one_box(x, img, color=None, label=None, line_thickness=None, idx=0): | |
tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line thickness | |
color = color or [random.randint(0, 255) for _ in range(3)] | |
color = colors_box[idx] | |
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) | |
cv2.rectangle(img, c1, c2, color, thickness=tl) | |
if label: | |
tf = max(tl - 1, 1) # font thickness | |
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] | |
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 | |
cv2.rectangle(img, c1, c2, color, -1) # filled | |
cv2.putText(img, label, c1, 0, tl / 3, [0, 0, 0], thickness=tf, lineType=cv2.LINE_AA) | |
return img | |
# decode one sequence to visual locations | |
def decode(coordinate_str, type='box'): | |
# find numbers | |
locations = np.array([int(i) for i in re.findall(r"\d+", coordinate_str)]) | |
if type == 'box': | |
locations = locations.reshape(-1, 4) | |
elif type == 'cocokeypoint': | |
locations = locations.reshape(-1, 18, 2) | |
visible = np.ones((locations.shape[0], 18, 1)) | |
eq_0_idx = np.where(locations[:, :, 0] * locations[:, :, 1] == 0) | |
visible[eq_0_idx] = 0 | |
locations = np.concatenate([locations, visible], axis=-1) | |
for i in range(locations.shape[0]): | |
if locations[i, 2, -1] == 0 or locations[i, 5, -1] == 0: | |
locations[i, 1, -1] = 0 | |
elif type == 'crowdpose': | |
locations = locations.reshape(-1, 14, 2) | |
visible = np.ones((locations.shape[0], 14, 1)) | |
eq_0_idx = np.where(locations[:, :, 0] * locations[:, :, 1] == 0) | |
visible[eq_0_idx] = 0 | |
locations = np.concatenate([locations, visible], axis=-1) | |
elif type == 'mask': | |
locations = [] | |
for c_str in coordinate_str.split('m0'): | |
c_str = ''.join(re.split(r'm\d+', c_str)) | |
mask_coord = np.array([int(i) for i in re.findall(r"\d+ ", c_str)]) | |
if len(mask_coord) != 0: | |
locations.append(mask_coord.reshape(-1, 1, 2)) | |
else: | |
raise NotImplementedError | |
return locations | |
# process raw sequences inferred by VisorGPT | |
def to_coordinate(file_path, ctn=True): | |
if isinstance(file_path, list): | |
texts = [i.strip().replace(' ##', '') for i in file_path] | |
else: | |
with open(file_path, 'r') as file: | |
texts = [i.strip().replace(' ##', '') for i in file.readlines()] | |
location_list = [] | |
classname_list = [] | |
type_list = [] | |
valid_sequences = [] | |
cnt = 0 | |
print('to coordinate ...') | |
for ste in tqdm(texts): | |
cnt += 1 | |
if 'box' in ste: | |
type = 'box' | |
elif 'key point' in ste: | |
type = 'cocokeypoint' if '; 18 ;' in ste else 'crowdpose' | |
elif 'mask' in ste: | |
type = 'mask' | |
else: | |
raise NotImplementedError | |
if '[SEP]' not in ste: | |
continue | |
try: | |
if ctn: | |
temp = ste[:ste.index('[SEP]')].split(' ; ')[5].split('] ') | |
classnames = [] | |
for t in temp: | |
classnames.append(t.split(' xmin ')[0].split(' m0')[0][2:]) | |
classnames = classnames[:-1] | |
locations = decode(ste[:ste.index('[SEP]')].split(' ; ')[5], type=type) | |
else: | |
classnames = ste[:ste.index('[SEP]')].split(' ; ')[5].split(' , ') | |
locations = decode(ste[:ste.index('[SEP]')].split(' ; ')[6], type=type) | |
except: | |
pass | |
else: | |
valid_sequences.append(ste[:ste.index('[SEP]')]) | |
location_list.append(locations) | |
classname_list.append(classnames) | |
type_list.append(type) | |
with open('valid_sequences.txt', 'w') as file: | |
[file.write(i.split('[CLS] ')[-1] + '\n') for i in valid_sequences] | |
return location_list, classname_list, type_list, valid_sequences | |
# visualize object locations on a canvas | |
def visualization(location_list, classname_list, type_list, save_dir='debug/', save_fig=False): | |
if save_fig: | |
if not os.path.exists(save_dir): | |
os.makedirs(save_dir) | |
print('visualizing ...') | |
for b, (loc, classnames, type) in tqdm(enumerate(zip(location_list, classname_list, type_list))): | |
canvas = np.zeros((512, 512, 3), dtype=np.uint8) + 50 | |
if len(loc) != len(classnames): | |
continue | |
if type == 'box': | |
for i in range(loc.shape[0]): | |
canvas = plot_one_box(loc[i], canvas, label=classnames[i], idx=i) | |
elif type == 'cocokeypoint': | |
for i in range(loc.shape[0]): | |
for j in range(loc.shape[1]): | |
x, y, v = loc[i, j] | |
if v != 0: | |
cv2.circle(canvas, (int(x), int(y)), 4, colors[j], thickness=-1) | |
for j in range(17): | |
lim = limbSeq_coco[j] | |
cur_canvas = canvas.copy() | |
Y = [loc[i][lim[0] - 1][0], loc[i][lim[1] - 1][0]] | |
X = [loc[i][lim[0] - 1][1], loc[i][lim[1] - 1][1]] | |
if loc[i][lim[0] - 1][-1] == 0 or loc[i][lim[1] - 1][-1] == 0: | |
continue | |
mX = np.mean(X) | |
mY = np.mean(Y) | |
length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5 | |
angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) | |
polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1) | |
cv2.fillConvexPoly(cur_canvas, polygon, colors[j]) | |
canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) | |
elif type == 'crowdpose': | |
for i in range(loc.shape[0]): | |
for j in range(loc.shape[1]): | |
x, y, _ = loc[i, j] | |
if x != 0 and y != 0: | |
cv2.circle(canvas, (int(x), int(y)), 4, colors[j], thickness=-1) | |
for j in range(13): | |
lim = limbSeq_cp[j] | |
cur_canvas = canvas.copy() | |
Y = [loc[i][lim[0] - 1][0], loc[i][lim[1] - 1][0]] | |
X = [loc[i][lim[0] - 1][1], loc[i][lim[1] - 1][1]] | |
if (Y[0] == 0 and X[0] == 0) or (Y[1] == 0 and X[1] == 0): | |
continue | |
mX = np.mean(X) | |
mY = np.mean(Y) | |
length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5 | |
angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) | |
polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1) | |
cv2.fillConvexPoly(cur_canvas, polygon, colors[j]) | |
canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) | |
elif type == 'mask': | |
for i in range(len(loc)): | |
color = [random.randint(0, 255) for _ in range(3)] | |
xmin, ymin, xmax, ymax = loc[i][:, :, 0].min(), loc[i][:, :, 1].min(), loc[i][:, :, 0].max(), loc[i][:, :, 1].max() | |
cur_canvas = canvas.copy() | |
cv2.fillPoly(cur_canvas, [loc[i]], color) | |
cur_canvas = plot_one_box((xmin, ymin, xmax, ymax), cur_canvas, color=color, label=classnames[i]) | |
canvas = cv2.addWeighted(canvas, 0.5, cur_canvas, 0.5, 0) | |
else: | |
raise NotImplementedError | |
if save_fig: | |
cv2.imwrite(f'{save_dir}/test_{b}.png', canvas[..., ::-1]) | |
return canvas[..., ::-1] | |
# to json output | |
def to_json(location_list, classname_list, type_list, valid_sequences): | |
ret_json_box = {'bboxes': [], 'sequences': []} | |
ret_json_mask = {'masks': [], 'sequences': []} | |
ret_json_keypoint = {'keypoints': [], 'sequences': []} | |
print('to json ...') | |
for loc, classnames, type, seq in tqdm(zip(location_list, classname_list, type_list, valid_sequences)): | |
ins_list = [] | |
kpt_list = [] | |
mask_list = [] | |
seq_list = [] | |
if len(loc) != len(classnames):# or len(classnames) > 8: | |
continue | |
if type == 'box': | |
for i in range(loc.shape[0]): | |
# xmin, ymin, xmax, ymax = loc[i] | |
# area = (xmax - xmin) * (ymax - ymin) | |
# compute area and omit very small one due to the synthesis ability of AIGC | |
# if area < 32**2: | |
# continue | |
dic = {classnames[i]: loc[i].tolist()} | |
ins_list.append(dic) | |
if len(seq_list) == 0: | |
seq_list.append(seq) | |
elif type == 'cocokeypoint' or type == 'crowdpose': | |
for i in range(loc.shape[0]): | |
# compute validate key points and omit the less one, as the synthesis ability of AIGC | |
# if loc[i, :, -1].sum() <= 4: | |
# continue | |
# compute area and omit very small one due to the synthesis ability of AIGC | |
# xmin, ymin, xmax, ymax = loc[i, :, 0].min(), loc[i, :, 1].min(), loc[i, :, 0].max(), loc[i, :, 1].max() | |
# area = (xmax - xmin) * (ymax - ymin) | |
# if area < 32 ** 2: | |
# continue | |
dic = {classnames[i]: loc[i][:, :].tolist()} | |
kpt_list.append(dic) | |
if len(seq_list) == 0: | |
seq_list.append(seq) | |
elif type == 'mask': | |
for i in range(len(loc)): | |
# xmin, ymin, xmax, ymax = loc[i][:, :, 0].min(), loc[i][:, :, 1].min(), loc[i][:, :, 0].max(), loc[i][:, :, 1].max() | |
# area = (xmax - xmin) * (ymax - ymin) | |
# if area < 32 ** 2: | |
# continue | |
dic = {classnames[i]: loc[i].tolist()} | |
mask_list.append(dic) | |
if len(seq_list) == 0: | |
seq_list.append(seq) | |
else: | |
raise NotImplementedError | |
if len(ins_list) != 0: | |
ret_json_box['bboxes'].append(ins_list) | |
ret_json_box['sequences'].append(seq_list) | |
if len(kpt_list) != 0: | |
ret_json_keypoint['keypoints'].append(kpt_list) | |
ret_json_keypoint['sequences'].append(seq_list) | |
if len(mask_list) != 0: | |
ret_json_mask['masks'].append(mask_list) | |
ret_json_mask['sequences'].append(seq_list) | |
return [ret_json_box, ret_json_mask, ret_json_keypoint] | |
def gen_cond_mask(texts, ctn): | |
location_list, classname_list, type_list, valid_sequences = to_coordinate(texts, ctn) | |
ret_mask = visualization(location_list, classname_list, type_list, None, False) | |
ret_json = to_json(location_list, classname_list, type_list, valid_sequences) | |
return ret_mask, ret_json | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--file_path', type=str, required=True) | |
parser.add_argument('--save_dir', type=str, default='debug') | |
parser.add_argument('--visualize', type=bool, default=False) | |
args = parser.parse_args() | |
location_list, classname_list, type_list, valid_sequences = to_coordinate(args.file_path) | |
if not os.path.exists(args.save_dir): | |
os.makedirs(args.save_dir) | |
# visualization | |
if args.visualize: | |
visualization(location_list, classname_list, type_list, args.save_dir) | |
# to json data | |
rets = to_json(location_list, classname_list, type_list, valid_sequences) | |
for ret, flag in zip(rets, ['box', 'mask', 'keypoint']): | |
save_path = args.file_path.split('/')[-1].split('.')[0] + f'_{flag}.json' | |
with open('files/' + save_path, 'w') as file: | |
json.dump(ret, file, indent=2) | |