Spaces:
Running
Running
import time | |
import numpy as np | |
from PIL import Image | |
from scipy.spatial.distance import cdist | |
from scipy.optimize import linear_sum_assignment | |
class SimpleAffineTransform: | |
""" | |
simple affine transform, only translation and scale. | |
""" | |
def __init__(self, translation=(0, 0), scale=1.0): | |
self.translation = np.array(translation) | |
self.scale = scale | |
def estimate(self, src, dst): | |
src_center = np.mean(src, axis=0) | |
dst_center = np.mean(dst, axis=0) | |
self.translation = dst_center - src_center | |
src_dists = np.linalg.norm(src - src_center, axis=1) | |
dst_dists = np.linalg.norm(dst - dst_center, axis=1) | |
self.scale = np.mean(dst_dists) / (np.mean(src_dists) + 1e-10) | |
def inverse(self): | |
inverse_transform = AffineTransform(-self.translation, 1.0/self.scale) | |
return inverse_transform | |
def __call__(self, coords): | |
return self.scale * (coords - np.mean(coords, axis=0)) + np.mean(coords, axis=0) + self.translation | |
def residuals(self, src, dst): | |
return np.sqrt(np.sum((self(src) - dst) ** 2, axis=1)) | |
def norm_coords(x, left, right): | |
if x < left: | |
return left | |
if x > right: | |
return right | |
return x | |
def norm_same_token(token): | |
special_map = { | |
"\\cdot": ".", | |
"\\mid": "|", | |
"\\to": "\\rightarrow", | |
"\\top": "T", | |
"\\Tilde": "\\tilde", | |
"\\cdots": "\\dots", | |
"\\prime": "'", | |
"\\ast": "*", | |
"\\left<": "\\langle", | |
"\\right>": "\\rangle" | |
} | |
if token in special_map.keys(): | |
token = special_map[token] | |
if token.startswith('\\left') or token.startswith('\\right'): | |
token = token.replace("\\left", "").replace("\\right", "") | |
if token.startswith('\\big') or token.startswith('\\Big'): | |
if "\\" in token[4:]: | |
token = "\\"+token[4:].split("\\")[-1] | |
else: | |
token = token[-1] | |
if token in ['\\leq', '\\geq']: | |
return token[0:-1] | |
if token in ['\\lVert', '\\rVert', '\\Vert']: | |
return '\\|' | |
if token in ['\\lvert', '\\rvert', '\\vert']: | |
return '|' | |
if token.endswith("rightarrow"): | |
return "\\rightarrow" | |
if token.endswith("leftarrow"): | |
return "\\leftarrow" | |
if token.startswith('\\wide'): | |
return token.replace("wide", "") | |
if token.startswith('\\var'): | |
return token.replace("\\var", "") | |
return token | |
class HungarianMatcher: | |
def __init__( | |
self, | |
cost_token: float = 1, | |
cost_position: float = 0.05, | |
cost_order: float = 0.15, | |
): | |
self.cost_token = cost_token | |
self.cost_position = cost_position | |
self.cost_order = cost_order | |
self.cost = {} | |
def calculate_token_cost_old(self, box_gt, box_pred): | |
token_cost = np.ones((len(box_gt), len(box_pred))) | |
for i in range(token_cost.shape[0]): | |
box1 = box_gt[i] | |
for j in range(token_cost.shape[1]): | |
box2 = box_pred[j] | |
if box1['token'] == box2['token']: | |
token_cost[i, j] = 0 | |
elif norm_same_token(box1['token']) == norm_same_token(box2['token']): | |
token_cost[i, j] = 0.05 | |
return np.array(token_cost) | |
def calculate_token_cost(self, box_gt, box_pred): | |
token2id = {} | |
for data in box_gt+box_pred: | |
if data['token'] not in token2id: | |
token2id[data['token']] = len(token2id) | |
num_classes = len(token2id) | |
token2id_norm = {} | |
for data in box_gt+box_pred: | |
if norm_same_token(data['token']) not in token2id_norm: | |
token2id_norm[norm_same_token(data['token'])] = len(token2id_norm) | |
num_classes_norm = len(token2id_norm) | |
gt_token_array = [] | |
norm_gt_token_array = [] | |
for data in box_gt: | |
gt_token_array.append(token2id[data['token']]) | |
norm_gt_token_array.append(token2id_norm[norm_same_token(data['token'])]) | |
pred_token_logits = [] | |
norm_pred_token_logits = [] | |
for data in box_pred: | |
logits = [0] * num_classes | |
logits[token2id[data['token']]] = 1 | |
pred_token_logits.append(logits) | |
logits_norm = [0] * num_classes_norm | |
logits_norm[token2id_norm[norm_same_token(data['token'])]] = 1 | |
norm_pred_token_logits.append(logits_norm) | |
gt_token_array = np.array(gt_token_array) | |
pred_token_logits = np.array(pred_token_logits) | |
norm_gt_token_array = np.array(norm_gt_token_array) | |
norm_pred_token_logits = np.array(norm_pred_token_logits) | |
token_cost = 1.0 - pred_token_logits[:, gt_token_array] | |
norm_token_cost = 1.0 - norm_pred_token_logits[:, norm_gt_token_array] | |
token_cost[np.logical_and(token_cost==1, norm_token_cost==0)] = 0.05 | |
return token_cost.T | |
def box2array(self, box_list, size): | |
W, H = size | |
box_array = [] | |
for box in box_list: | |
x_min, y_min, x_max, y_max = box['bbox'] | |
box_array.append([x_min/W, y_min/H, x_max/W, y_max/H]) | |
return np.array(box_array) | |
def order2array(self, box_list): | |
order_array = [] | |
for idx, box in enumerate(box_list): | |
order_array.append([idx / len(box_list)]) | |
return np.array(order_array) | |
def calculate_l1_cost(self, gt_array, pred_array): | |
scale = gt_array.shape[-1] | |
l1_cost = cdist(gt_array, pred_array, 'minkowski', p=1) | |
return l1_cost / scale | |
def __call__(self, box_gt, box_pred, gt_size, pred_size): | |
aa = time.time() | |
gt_box_array = self.box2array(box_gt, gt_size) | |
pred_box_array = self.box2array(box_pred, pred_size) | |
gt_order_array = self.order2array(box_gt) | |
pred_order_array = self.order2array(box_pred) | |
token_cost = self.calculate_token_cost(box_gt, box_pred) | |
position_cost = self.calculate_l1_cost(gt_box_array, pred_box_array) | |
order_cost = self.calculate_l1_cost(gt_order_array, pred_order_array) | |
self.cost["token"] = token_cost | |
self.cost["position"] = position_cost | |
self.cost["order"] = order_cost | |
cost = self.cost_token * token_cost + self.cost_position * position_cost + self.cost_order * order_cost | |
cost[np.isnan(cost) | np.isinf(cost)] = 100 | |
indexes = linear_sum_assignment(cost) | |
matched_idxes = [] | |
for a, b in zip(*indexes): | |
matched_idxes.append((a, b)) | |
return matched_idxes |