|
import numpy as np |
|
import torch |
|
import torchvision |
|
from itertools import product as product |
|
from math import ceil |
|
|
|
|
|
class PriorBox(object): |
|
|
|
def __init__(self, cfg, image_size=None, phase='train'): |
|
super(PriorBox, self).__init__() |
|
self.min_sizes = cfg['min_sizes'] |
|
self.steps = cfg['steps'] |
|
self.clip = cfg['clip'] |
|
self.image_size = image_size |
|
self.feature_maps = [[ceil(self.image_size[0] / step), ceil(self.image_size[1] / step)] for step in self.steps] |
|
self.name = 's' |
|
|
|
def forward(self): |
|
anchors = [] |
|
for k, f in enumerate(self.feature_maps): |
|
min_sizes = self.min_sizes[k] |
|
for i, j in product(range(f[0]), range(f[1])): |
|
for min_size in min_sizes: |
|
s_kx = min_size / self.image_size[1] |
|
s_ky = min_size / self.image_size[0] |
|
dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]] |
|
dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]] |
|
for cy, cx in product(dense_cy, dense_cx): |
|
anchors += [cx, cy, s_kx, s_ky] |
|
|
|
|
|
output = torch.Tensor(anchors).view(-1, 4) |
|
if self.clip: |
|
output.clamp_(max=1, min=0) |
|
return output |
|
|
|
|
|
def py_cpu_nms(dets, thresh): |
|
"""Pure Python NMS baseline.""" |
|
keep = torchvision.ops.nms( |
|
boxes=torch.Tensor(dets[:, :4]), |
|
scores=torch.Tensor(dets[:, 4]), |
|
iou_threshold=thresh, |
|
) |
|
|
|
return list(keep) |
|
|
|
|
|
def point_form(boxes): |
|
""" Convert prior_boxes to (xmin, ymin, xmax, ymax) |
|
representation for comparison to point form ground truth data. |
|
Args: |
|
boxes: (tensor) center-size default boxes from priorbox layers. |
|
Return: |
|
boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. |
|
""" |
|
return torch.cat( |
|
( |
|
boxes[:, :2] - boxes[:, 2:] / 2, |
|
boxes[:, :2] + boxes[:, 2:] / 2), |
|
1) |
|
|
|
|
|
def center_size(boxes): |
|
""" Convert prior_boxes to (cx, cy, w, h) |
|
representation for comparison to center-size form ground truth data. |
|
Args: |
|
boxes: (tensor) point_form boxes |
|
Return: |
|
boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. |
|
""" |
|
return torch.cat( |
|
(boxes[:, 2:] + boxes[:, :2]) / 2, |
|
boxes[:, 2:] - boxes[:, :2], |
|
1) |
|
|
|
|
|
def intersect(box_a, box_b): |
|
""" We resize both tensors to [A,B,2] without new malloc: |
|
[A,2] -> [A,1,2] -> [A,B,2] |
|
[B,2] -> [1,B,2] -> [A,B,2] |
|
Then we compute the area of intersect between box_a and box_b. |
|
Args: |
|
box_a: (tensor) bounding boxes, Shape: [A,4]. |
|
box_b: (tensor) bounding boxes, Shape: [B,4]. |
|
Return: |
|
(tensor) intersection area, Shape: [A,B]. |
|
""" |
|
A = box_a.size(0) |
|
B = box_b.size(0) |
|
max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), box_b[:, 2:].unsqueeze(0).expand(A, B, 2)) |
|
min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), box_b[:, :2].unsqueeze(0).expand(A, B, 2)) |
|
inter = torch.clamp((max_xy - min_xy), min=0) |
|
return inter[:, :, 0] * inter[:, :, 1] |
|
|
|
|
|
def jaccard(box_a, box_b): |
|
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap |
|
is simply the intersection over union of two boxes. Here we operate on |
|
ground truth boxes and default boxes. |
|
E.g.: |
|
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) |
|
Args: |
|
box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] |
|
box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] |
|
Return: |
|
jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] |
|
""" |
|
inter = intersect(box_a, box_b) |
|
area_a = ((box_a[:, 2] - box_a[:, 0]) * (box_a[:, 3] - box_a[:, 1])).unsqueeze(1).expand_as(inter) |
|
area_b = ((box_b[:, 2] - box_b[:, 0]) * (box_b[:, 3] - box_b[:, 1])).unsqueeze(0).expand_as(inter) |
|
union = area_a + area_b - inter |
|
return inter / union |
|
|
|
|
|
def matrix_iou(a, b): |
|
""" |
|
return iou of a and b, numpy version for data augenmentation |
|
""" |
|
lt = np.maximum(a[:, np.newaxis, :2], b[:, :2]) |
|
rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) |
|
|
|
area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2) |
|
area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) |
|
area_b = np.prod(b[:, 2:] - b[:, :2], axis=1) |
|
return area_i / (area_a[:, np.newaxis] + area_b - area_i) |
|
|
|
|
|
def matrix_iof(a, b): |
|
""" |
|
return iof of a and b, numpy version for data augenmentation |
|
""" |
|
lt = np.maximum(a[:, np.newaxis, :2], b[:, :2]) |
|
rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) |
|
|
|
area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2) |
|
area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) |
|
return area_i / np.maximum(area_a[:, np.newaxis], 1) |
|
|
|
|
|
def match(threshold, truths, priors, variances, labels, landms, loc_t, conf_t, landm_t, idx): |
|
"""Match each prior box with the ground truth box of the highest jaccard |
|
overlap, encode the bounding boxes, then return the matched indices |
|
corresponding to both confidence and location preds. |
|
Args: |
|
threshold: (float) The overlap threshold used when matching boxes. |
|
truths: (tensor) Ground truth boxes, Shape: [num_obj, 4]. |
|
priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4]. |
|
variances: (tensor) Variances corresponding to each prior coord, |
|
Shape: [num_priors, 4]. |
|
labels: (tensor) All the class labels for the image, Shape: [num_obj]. |
|
landms: (tensor) Ground truth landms, Shape [num_obj, 10]. |
|
loc_t: (tensor) Tensor to be filled w/ encoded location targets. |
|
conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds. |
|
landm_t: (tensor) Tensor to be filled w/ encoded landm targets. |
|
idx: (int) current batch index |
|
Return: |
|
The matched indices corresponding to 1)location 2)confidence |
|
3)landm preds. |
|
""" |
|
|
|
overlaps = jaccard(truths, point_form(priors)) |
|
|
|
|
|
best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True) |
|
|
|
|
|
valid_gt_idx = best_prior_overlap[:, 0] >= 0.2 |
|
best_prior_idx_filter = best_prior_idx[valid_gt_idx, :] |
|
if best_prior_idx_filter.shape[0] <= 0: |
|
loc_t[idx] = 0 |
|
conf_t[idx] = 0 |
|
return |
|
|
|
|
|
best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True) |
|
best_truth_idx.squeeze_(0) |
|
best_truth_overlap.squeeze_(0) |
|
best_prior_idx.squeeze_(1) |
|
best_prior_idx_filter.squeeze_(1) |
|
best_prior_overlap.squeeze_(1) |
|
best_truth_overlap.index_fill_(0, best_prior_idx_filter, 2) |
|
|
|
|
|
for j in range(best_prior_idx.size(0)): |
|
best_truth_idx[best_prior_idx[j]] = j |
|
matches = truths[best_truth_idx] |
|
conf = labels[best_truth_idx] |
|
conf[best_truth_overlap < threshold] = 0 |
|
loc = encode(matches, priors, variances) |
|
|
|
matches_landm = landms[best_truth_idx] |
|
landm = encode_landm(matches_landm, priors, variances) |
|
loc_t[idx] = loc |
|
conf_t[idx] = conf |
|
landm_t[idx] = landm |
|
|
|
|
|
def encode(matched, priors, variances): |
|
"""Encode the variances from the priorbox layers into the ground truth boxes |
|
we have matched (based on jaccard overlap) with the prior boxes. |
|
Args: |
|
matched: (tensor) Coords of ground truth for each prior in point-form |
|
Shape: [num_priors, 4]. |
|
priors: (tensor) Prior boxes in center-offset form |
|
Shape: [num_priors,4]. |
|
variances: (list[float]) Variances of priorboxes |
|
Return: |
|
encoded boxes (tensor), Shape: [num_priors, 4] |
|
""" |
|
|
|
|
|
g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2] |
|
|
|
g_cxcy /= (variances[0] * priors[:, 2:]) |
|
|
|
g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] |
|
g_wh = torch.log(g_wh) / variances[1] |
|
|
|
return torch.cat([g_cxcy, g_wh], 1) |
|
|
|
|
|
def encode_landm(matched, priors, variances): |
|
"""Encode the variances from the priorbox layers into the ground truth boxes |
|
we have matched (based on jaccard overlap) with the prior boxes. |
|
Args: |
|
matched: (tensor) Coords of ground truth for each prior in point-form |
|
Shape: [num_priors, 10]. |
|
priors: (tensor) Prior boxes in center-offset form |
|
Shape: [num_priors,4]. |
|
variances: (list[float]) Variances of priorboxes |
|
Return: |
|
encoded landm (tensor), Shape: [num_priors, 10] |
|
""" |
|
|
|
|
|
matched = torch.reshape(matched, (matched.size(0), 5, 2)) |
|
priors_cx = priors[:, 0].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2) |
|
priors_cy = priors[:, 1].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2) |
|
priors_w = priors[:, 2].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2) |
|
priors_h = priors[:, 3].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2) |
|
priors = torch.cat([priors_cx, priors_cy, priors_w, priors_h], dim=2) |
|
g_cxcy = matched[:, :, :2] - priors[:, :, :2] |
|
|
|
g_cxcy /= (variances[0] * priors[:, :, 2:]) |
|
|
|
g_cxcy = g_cxcy.reshape(g_cxcy.size(0), -1) |
|
|
|
return g_cxcy |
|
|
|
|
|
|
|
def decode(loc, priors, variances): |
|
"""Decode locations from predictions using priors to undo |
|
the encoding we did for offset regression at train time. |
|
Args: |
|
loc (tensor): location predictions for loc layers, |
|
Shape: [num_priors,4] |
|
priors (tensor): Prior boxes in center-offset form. |
|
Shape: [num_priors,4]. |
|
variances: (list[float]) Variances of priorboxes |
|
Return: |
|
decoded bounding box predictions |
|
""" |
|
|
|
boxes = torch.cat((priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], |
|
priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) |
|
boxes[:, :2] -= boxes[:, 2:] / 2 |
|
boxes[:, 2:] += boxes[:, :2] |
|
return boxes |
|
|
|
|
|
def decode_landm(pre, priors, variances): |
|
"""Decode landm from predictions using priors to undo |
|
the encoding we did for offset regression at train time. |
|
Args: |
|
pre (tensor): landm predictions for loc layers, |
|
Shape: [num_priors,10] |
|
priors (tensor): Prior boxes in center-offset form. |
|
Shape: [num_priors,4]. |
|
variances: (list[float]) Variances of priorboxes |
|
Return: |
|
decoded landm predictions |
|
""" |
|
tmp = ( |
|
priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:], |
|
priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:], |
|
priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:], |
|
priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:], |
|
priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:], |
|
) |
|
landms = torch.cat(tmp, dim=1) |
|
return landms |
|
|
|
|
|
def batched_decode(b_loc, priors, variances): |
|
"""Decode locations from predictions using priors to undo |
|
the encoding we did for offset regression at train time. |
|
Args: |
|
b_loc (tensor): location predictions for loc layers, |
|
Shape: [num_batches,num_priors,4] |
|
priors (tensor): Prior boxes in center-offset form. |
|
Shape: [1,num_priors,4]. |
|
variances: (list[float]) Variances of priorboxes |
|
Return: |
|
decoded bounding box predictions |
|
""" |
|
boxes = ( |
|
priors[:, :, :2] + b_loc[:, :, :2] * variances[0] * priors[:, :, 2:], |
|
priors[:, :, 2:] * torch.exp(b_loc[:, :, 2:] * variances[1]), |
|
) |
|
boxes = torch.cat(boxes, dim=2) |
|
|
|
boxes[:, :, :2] -= boxes[:, :, 2:] / 2 |
|
boxes[:, :, 2:] += boxes[:, :, :2] |
|
return boxes |
|
|
|
|
|
def batched_decode_landm(pre, priors, variances): |
|
"""Decode landm from predictions using priors to undo |
|
the encoding we did for offset regression at train time. |
|
Args: |
|
pre (tensor): landm predictions for loc layers, |
|
Shape: [num_batches,num_priors,10] |
|
priors (tensor): Prior boxes in center-offset form. |
|
Shape: [1,num_priors,4]. |
|
variances: (list[float]) Variances of priorboxes |
|
Return: |
|
decoded landm predictions |
|
""" |
|
landms = ( |
|
priors[:, :, :2] + pre[:, :, :2] * variances[0] * priors[:, :, 2:], |
|
priors[:, :, :2] + pre[:, :, 2:4] * variances[0] * priors[:, :, 2:], |
|
priors[:, :, :2] + pre[:, :, 4:6] * variances[0] * priors[:, :, 2:], |
|
priors[:, :, :2] + pre[:, :, 6:8] * variances[0] * priors[:, :, 2:], |
|
priors[:, :, :2] + pre[:, :, 8:10] * variances[0] * priors[:, :, 2:], |
|
) |
|
landms = torch.cat(landms, dim=2) |
|
return landms |
|
|
|
|
|
def log_sum_exp(x): |
|
"""Utility function for computing log_sum_exp while determining |
|
This will be used to determine unaveraged confidence loss across |
|
all examples in a batch. |
|
Args: |
|
x (Variable(tensor)): conf_preds from conf layers |
|
""" |
|
x_max = x.data.max() |
|
return torch.log(torch.sum(torch.exp(x - x_max), 1, keepdim=True)) + x_max |
|
|
|
|
|
|
|
|
|
|
|
def nms(boxes, scores, overlap=0.5, top_k=200): |
|
"""Apply non-maximum suppression at test time to avoid detecting too many |
|
overlapping bounding boxes for a given object. |
|
Args: |
|
boxes: (tensor) The location preds for the img, Shape: [num_priors,4]. |
|
scores: (tensor) The class predscores for the img, Shape:[num_priors]. |
|
overlap: (float) The overlap thresh for suppressing unnecessary boxes. |
|
top_k: (int) The Maximum number of box preds to consider. |
|
Return: |
|
The indices of the kept boxes with respect to num_priors. |
|
""" |
|
|
|
keep = torch.Tensor(scores.size(0)).fill_(0).long() |
|
if boxes.numel() == 0: |
|
return keep |
|
x1 = boxes[:, 0] |
|
y1 = boxes[:, 1] |
|
x2 = boxes[:, 2] |
|
y2 = boxes[:, 3] |
|
area = torch.mul(x2 - x1, y2 - y1) |
|
v, idx = scores.sort(0) |
|
|
|
idx = idx[-top_k:] |
|
xx1 = boxes.new() |
|
yy1 = boxes.new() |
|
xx2 = boxes.new() |
|
yy2 = boxes.new() |
|
w = boxes.new() |
|
h = boxes.new() |
|
|
|
|
|
count = 0 |
|
while idx.numel() > 0: |
|
i = idx[-1] |
|
|
|
keep[count] = i |
|
count += 1 |
|
if idx.size(0) == 1: |
|
break |
|
idx = idx[:-1] |
|
|
|
torch.index_select(x1, 0, idx, out=xx1) |
|
torch.index_select(y1, 0, idx, out=yy1) |
|
torch.index_select(x2, 0, idx, out=xx2) |
|
torch.index_select(y2, 0, idx, out=yy2) |
|
|
|
xx1 = torch.clamp(xx1, min=x1[i]) |
|
yy1 = torch.clamp(yy1, min=y1[i]) |
|
xx2 = torch.clamp(xx2, max=x2[i]) |
|
yy2 = torch.clamp(yy2, max=y2[i]) |
|
w.resize_as_(xx2) |
|
h.resize_as_(yy2) |
|
w = xx2 - xx1 |
|
h = yy2 - yy1 |
|
|
|
w = torch.clamp(w, min=0.0) |
|
h = torch.clamp(h, min=0.0) |
|
inter = w * h |
|
|
|
rem_areas = torch.index_select(area, 0, idx) |
|
union = (rem_areas - inter) + area[i] |
|
IoU = inter / union |
|
|
|
idx = idx[IoU.le(overlap)] |
|
return keep, count |
|
|