Spaces:
Sleeping
Sleeping
import numpy as np | |
import numpy.linalg as npla | |
import cv2 | |
landmarks_2D_new = np.array([ | |
[ 0.000213256, 0.106454 ], #17 | |
[ 0.0752622, 0.038915 ], #18 | |
[ 0.18113, 0.0187482 ], #19 | |
[ 0.29077, 0.0344891 ], #20 | |
[ 0.393397, 0.0773906 ], #21 | |
[ 0.586856, 0.0773906 ], #22 | |
[ 0.689483, 0.0344891 ], #23 | |
[ 0.799124, 0.0187482 ], #24 | |
[ 0.904991, 0.038915 ], #25 | |
[ 0.98004, 0.106454 ], #26 | |
[ 0.490127, 0.203352 ], #27 | |
[ 0.490127, 0.307009 ], #28 | |
[ 0.490127, 0.409805 ], #29 | |
[ 0.490127, 0.515625 ], #30 | |
[ 0.36688, 0.587326 ], #31 | |
[ 0.426036, 0.609345 ], #32 | |
[ 0.490127, 0.628106 ], #33 | |
[ 0.554217, 0.609345 ], #34 | |
[ 0.613373, 0.587326 ], #35 | |
[ 0.121737, 0.216423 ], #36 | |
[ 0.187122, 0.178758 ], #37 | |
[ 0.265825, 0.179852 ], #38 | |
[ 0.334606, 0.231733 ], #39 | |
[ 0.260918, 0.245099 ], #40 | |
[ 0.182743, 0.244077 ], #41 | |
[ 0.645647, 0.231733 ], #42 | |
[ 0.714428, 0.179852 ], #43 | |
[ 0.793132, 0.178758 ], #44 | |
[ 0.858516, 0.216423 ], #45 | |
[ 0.79751, 0.244077 ], #46 | |
[ 0.719335, 0.245099 ], #47 | |
[ 0.254149, 0.780233 ], #48 | |
[ 0.726104, 0.780233 ], #54 | |
], dtype=np.float32 | |
) | |
landmarks_2D_new = (landmarks_2D_new - 0.5) * 0.8 + 0.5 | |
def get_transform_mat(landmarks, output_size=128): | |
if not isinstance(landmarks, np.ndarray): | |
landmarks = np.array(landmarks) | |
# estimate landmarks transform from global space to local aligned space with bounds [0..1] | |
mat = umeyama(np.concatenate([landmarks[17:49] , landmarks[54:55] ]), landmarks_2D_new, True)[0:2] | |
# get corner points in global space | |
g_p = transform_points(np.float32([(0,0),(1,0),(1,1),(0,1),(0.5,0.5) ]), mat, True) | |
g_c = g_p[4] | |
# calc diagonal vectors between corners in global space | |
tb_diag_vec = (g_p[2]-g_p[0]).astype(np.float32) | |
tb_diag_vec /= npla.norm(tb_diag_vec) | |
bt_diag_vec = (g_p[1]-g_p[3]).astype(np.float32) | |
bt_diag_vec /= npla.norm(bt_diag_vec) | |
# calc modifier of diagonal vectors for scale and padding value | |
mod = npla.norm(g_p[0]-g_p[2])*(0.4*np.sqrt(2.0) + 0.5) | |
# adjust vertical offset for WHOLE_FACE, 20% below in order to cover more forehead | |
vec = (g_p[0]-g_p[3]).astype(np.float32) | |
vec_len = npla.norm(vec) | |
vec /= vec_len | |
g_c += vec*vec_len*0.2 | |
# calc 3 points in global space to estimate 2d affine transform | |
l_t = np.array( [ g_c - tb_diag_vec*mod, | |
g_c + bt_diag_vec*mod, | |
g_c + tb_diag_vec*mod ] ) | |
# calc affine transform from 3 global space points to 3 local space points size of 'output_size' | |
pts2 = np.float32(( (0,0),(output_size,0),(output_size,output_size) )) | |
mat = cv2.getAffineTransform(l_t,pts2) | |
return mat | |
def transform_points(points, mat, invert=False): | |
if invert: | |
mat = cv2.invertAffineTransform (mat) | |
points = np.expand_dims(points, axis=1) | |
points = cv2.transform(points, mat, points.shape) | |
points = np.squeeze(points) | |
return points | |
def get_image_hull_mask(image_shape, landmarks): | |
hull_mask = np.zeros(image_shape[0:2]+(1,),dtype=np.float32) | |
lmrks = expand_eyebrows(landmarks, 1.0) | |
r_jaw = (lmrks[0:9], lmrks[17:18]) | |
l_jaw = (lmrks[8:17], lmrks[26:27]) | |
r_cheek = (lmrks[17:20], lmrks[8:9]) | |
l_cheek = (lmrks[24:27], lmrks[8:9]) | |
nose_ridge = (lmrks[19:25], lmrks[8:9],) | |
r_eye = (lmrks[17:22], lmrks[27:28], lmrks[31:36], lmrks[8:9]) | |
l_eye = (lmrks[22:27], lmrks[27:28], lmrks[31:36], lmrks[8:9]) | |
nose = (lmrks[27:31], lmrks[31:36]) | |
parts = [r_jaw, l_jaw, r_cheek, l_cheek, nose_ridge, r_eye, l_eye, nose] | |
for item in parts: | |
merged = np.concatenate(item) | |
cv2.fillConvexPoly(hull_mask, cv2.convexHull(merged), (1,) ) | |
return hull_mask | |
def expand_eyebrows(lmrks, eyebrows_expand_mod=1.0): | |
lmrks = np.array( lmrks.copy(), dtype=np.int ) | |
# #nose | |
ml_pnt = (lmrks[36] + lmrks[0]) // 2 | |
mr_pnt = (lmrks[16] + lmrks[45]) // 2 | |
# mid points between the mid points and eye | |
ql_pnt = (lmrks[36] + ml_pnt) // 2 | |
qr_pnt = (lmrks[45] + mr_pnt) // 2 | |
# Top of the eye arrays | |
bot_l = np.array((ql_pnt, lmrks[36], lmrks[37], lmrks[38], lmrks[39])) | |
bot_r = np.array((lmrks[42], lmrks[43], lmrks[44], lmrks[45], qr_pnt)) | |
# Eyebrow arrays | |
top_l = lmrks[17:22] | |
top_r = lmrks[22:27] | |
# Adjust eyebrow arrays | |
lmrks[17:22] = top_l + eyebrows_expand_mod * 0.5 * (top_l - bot_l) | |
lmrks[22:27] = top_r + eyebrows_expand_mod * 0.5 * (top_r - bot_r) | |
return lmrks | |
def process_face_det_results(face_det_results): | |
"""Process det results, and return a list of bboxes. | |
:param face_det_results: (top, right, bottom and left) | |
:return: a list of detected bounding boxes (x,y,x,y)-format | |
""" | |
person_results = [] | |
for bbox in face_det_results: | |
bbox = bbox[0] | |
person = {} | |
# left, top, right, bottom | |
person['bbox'] = [bbox[3], bbox[0], bbox[1], bbox[2]] | |
person_results.append(person) | |
return person_results | |
def area_of(left_top, right_bottom): | |
"""Compute the areas of rectangles given two corners. | |
Args: | |
left_top (N, 2): left top corner. | |
right_bottom (N, 2): right bottom corner. | |
Returns: | |
area (N): return the area. | |
""" | |
hw = np.clip(right_bottom - left_top, 0.0, None) | |
return hw[..., 0] * hw[..., 1] | |
def iou_of(boxes0, boxes1, eps=1e-5): | |
"""Return intersection-over-union (Jaccard index) of boxes. | |
Args: | |
boxes0 (N, 4): ground truth boxes. | |
boxes1 (N or 1, 4): predicted boxes. | |
eps: a small number to avoid 0 as denominator. | |
Returns: | |
iou (N): IoU values. | |
""" | |
overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2]) | |
overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:]) | |
overlap_area = area_of(overlap_left_top, overlap_right_bottom) | |
area0 = area_of(boxes0[..., :2], boxes0[..., 2:]) | |
area1 = area_of(boxes1[..., :2], boxes1[..., 2:]) | |
return overlap_area / (area0 + area1 - overlap_area + eps) | |
def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): | |
""" | |
Args: | |
box_scores (N, 5): boxes in corner-form and probabilities. | |
iou_threshold: intersection over union threshold. | |
top_k: keep top_k results. If k <= 0, keep all the results. | |
candidate_size: only consider the candidates with the highest scores. | |
Returns: | |
picked: a list of indexes of the kept boxes | |
""" | |
scores = box_scores[:, -1] | |
boxes = box_scores[:, :-1] | |
picked = [] | |
# _, indexes = scores.sort(descending=True) | |
indexes = np.argsort(scores) | |
# indexes = indexes[:candidate_size] | |
indexes = indexes[-candidate_size:] | |
while len(indexes) > 0: | |
# current = indexes[0] | |
current = indexes[-1] | |
picked.append(current) | |
if 0 < top_k == len(picked) or len(indexes) == 1: | |
break | |
current_box = boxes[current, :] | |
# indexes = indexes[1:] | |
indexes = indexes[:-1] | |
rest_boxes = boxes[indexes, :] | |
iou = iou_of( | |
rest_boxes, | |
np.expand_dims(current_box, axis=0), | |
) | |
indexes = indexes[iou <= iou_threshold] | |
return box_scores[picked, :] | |
def predict_box(width, height, confidences, boxes, prob_threshold, iou_threshold=0.3, top_k=-1): | |
boxes = boxes[0] | |
confidences = confidences[0] | |
picked_box_probs = [] | |
picked_labels = [] | |
for class_index in range(1, confidences.shape[1]): | |
probs = confidences[:, class_index] | |
mask = probs > prob_threshold | |
probs = probs[mask] | |
if probs.shape[0] == 0: | |
continue | |
subset_boxes = boxes[mask, :] | |
box_probs = np.concatenate([subset_boxes, probs.reshape(-1, 1)], axis=1) | |
box_probs = hard_nms(box_probs, iou_threshold=iou_threshold, top_k=top_k) | |
picked_box_probs.append(box_probs) | |
picked_labels.extend([class_index] * box_probs.shape[0]) | |
if not picked_box_probs: | |
return np.array([]), np.array([]), np.array([]) | |
picked_box_probs = np.concatenate(picked_box_probs) | |
picked_box_probs[:, 0] *= width | |
picked_box_probs[:, 1] *= height | |
picked_box_probs[:, 2] *= width | |
picked_box_probs[:, 3] *= height | |
return picked_box_probs[:, :4].astype(np.int32), np.array(picked_labels), picked_box_probs[:, 4] | |
class BBox(object): | |
# bbox is a list of [left, right, top, bottom] | |
def __init__(self, bbox): | |
self.left = bbox[0] | |
self.right = bbox[1] | |
self.top = bbox[2] | |
self.bottom = bbox[3] | |
self.x = bbox[0] | |
self.y = bbox[2] | |
self.w = bbox[1] - bbox[0] | |
self.h = bbox[3] - bbox[2] | |
# scale to [0,1] | |
def projectLandmark(self, landmark): | |
landmark_= np.asarray(np.zeros(landmark.shape)) | |
for i, point in enumerate(landmark): | |
landmark_[i] = ((point[0]-self.x)/self.w, (point[1]-self.y)/self.h) | |
return landmark_ | |
# landmark of (5L, 2L) from [0,1] to real range | |
def reprojectLandmark(self, landmark): | |
landmark_= np.asarray(np.zeros(landmark.shape)) | |
for i, point in enumerate(landmark): | |
x = point[0] * self.w + self.x | |
y = point[1] * self.h + self.y | |
landmark_[i] = (x, y) | |
return landmark_ | |
def umeyama(src, dst, estimate_scale): | |
"""Estimate N-D similarity transformation with or without scaling. | |
Parameters | |
---------- | |
src : (M, N) array | |
Source coordinates. | |
dst : (M, N) array | |
Destination coordinates. | |
estimate_scale : bool | |
Whether to estimate scaling factor. | |
Returns | |
------- | |
T : (N + 1, N + 1) | |
The homogeneous similarity transformation matrix. The matrix contains | |
NaN values only if the problem is not well-conditioned. | |
References | |
---------- | |
.. [1] "Least-squares estimation of transformation parameters between two | |
point patterns", Shinji Umeyama, PAMI 1991, DOI: 10.1109/34.88573 | |
""" | |
num = src.shape[0] | |
dim = src.shape[1] | |
# Compute mean of src and dst. | |
src_mean = src.mean(axis=0) | |
dst_mean = dst.mean(axis=0) | |
# Subtract mean from src and dst. | |
src_demean = src - src_mean | |
dst_demean = dst - dst_mean | |
# Eq. (38). | |
A = np.dot(dst_demean.T, src_demean) / num | |
# Eq. (39). | |
d = np.ones((dim,), dtype=np.double) | |
if np.linalg.det(A) < 0: | |
d[dim - 1] = -1 | |
T = np.eye(dim + 1, dtype=np.double) | |
U, S, V = np.linalg.svd(A) | |
# Eq. (40) and (43). | |
rank = np.linalg.matrix_rank(A) | |
if rank == 0: | |
return np.nan * T | |
elif rank == dim - 1: | |
if np.linalg.det(U) * np.linalg.det(V) > 0: | |
T[:dim, :dim] = np.dot(U, V) | |
else: | |
s = d[dim - 1] | |
d[dim - 1] = -1 | |
T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V)) | |
d[dim - 1] = s | |
else: | |
T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V)) | |
if estimate_scale: | |
# Eq. (41) and (42). | |
scale = 1.0 / src_demean.var(axis=0).sum() * np.dot(S, d) | |
else: | |
scale = 1.0 | |
T[:dim, dim] = dst_mean - scale * np.dot(T[:dim, :dim], src_mean.T) | |
T[:dim, :dim] *= scale | |
return T | |
def xyxy2xywh(bbox_xyxy): | |
"""Transform the bbox format from x1y1x2y2 to xywh. | |
Args: | |
bbox_xyxy (np.ndarray): Bounding boxes (with scores), shaped (n, 4) or | |
(n, 5). (left, top, right, bottom, [score]) | |
Returns: | |
np.ndarray: Bounding boxes (with scores), | |
shaped (n, 4) or (n, 5). (left, top, width, height, [score]) | |
""" | |
bbox_xywh = bbox_xyxy.copy() | |
bbox_xywh[:, 2] = bbox_xywh[:, 2] - bbox_xywh[:, 0] + 1 | |
bbox_xywh[:, 3] = bbox_xywh[:, 3] - bbox_xywh[:, 1] + 1 | |
return bbox_xywh | |
def xywh2xyxy(bbox_xywh): | |
"""Transform the bbox format from xywh to x1y1x2y2. | |
Args: | |
bbox_xywh (ndarray): Bounding boxes (with scores), | |
shaped (n, 4) or (n, 5). (left, top, width, height, [score]) | |
Returns: | |
np.ndarray: Bounding boxes (with scores), shaped (n, 4) or | |
(n, 5). (left, top, right, bottom, [score]) | |
""" | |
bbox_xyxy = bbox_xywh.copy() | |
bbox_xyxy[:, 2] = bbox_xyxy[:, 2] + bbox_xyxy[:, 0] - 1 | |
bbox_xyxy[:, 3] = bbox_xyxy[:, 3] + bbox_xyxy[:, 1] - 1 | |
return bbox_xyxy | |
def box2cs(cfg, box): | |
"""This encodes bbox(x,y,w,h) into (center, scale) | |
Args: | |
x, y, w, h | |
Returns: | |
tuple: A tuple containing center and scale. | |
- np.ndarray[float32](2,): Center of the bbox (x, y). | |
- np.ndarray[float32](2,): Scale of the bbox w & h. | |
""" | |
x, y, w, h = box[:4] | |
input_size = cfg.data_cfg['image_size'] | |
aspect_ratio = input_size[0] / input_size[1] | |
center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32) | |
if w > aspect_ratio * h: | |
h = w * 1.0 / aspect_ratio | |
elif w < aspect_ratio * h: | |
w = h * aspect_ratio | |
# pixel std is 200.0 | |
scale = np.array([w / 200.0, h / 200.0], dtype=np.float32) | |
scale = scale * 1.25 | |
return center, scale |