zhangbo2008
/

codeformer

Model card Files Files and versions Community

codeformer / facelib /detection /retinaface /retinaface_utils.py

zhangbo2008

Upload folder using huggingface_hub

6c60ccc about 1 year ago

raw

history blame contribute delete

16.5 kB

	import numpy as np
	import torch
	import torchvision
	from itertools import product as product
	from math import ceil


	class PriorBox(object):

	def __init__(self, cfg, image_size=None, phase='train'):
	super(PriorBox, self).__init__()
	self.min_sizes = cfg['min_sizes']
	self.steps = cfg['steps']
	self.clip = cfg['clip']
	self.image_size = image_size
	self.feature_maps = [[ceil(self.image_size[0] / step), ceil(self.image_size[1] / step)] for step in self.steps]
	self.name = 's'

	def forward(self):
	anchors = []
	for k, f in enumerate(self.feature_maps):
	min_sizes = self.min_sizes[k]
	for i, j in product(range(f[0]), range(f[1])):
	for min_size in min_sizes:
	s_kx = min_size / self.image_size[1]
	s_ky = min_size / self.image_size[0]
	dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
	dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
	for cy, cx in product(dense_cy, dense_cx):
	anchors += [cx, cy, s_kx, s_ky]

	# back to torch land
	output = torch.Tensor(anchors).view(-1, 4)
	if self.clip:
	output.clamp_(max=1, min=0)
	return output


	def py_cpu_nms(dets, thresh):
	"""Pure Python NMS baseline."""
	keep = torchvision.ops.nms(
	boxes=torch.Tensor(dets[:, :4]),
	scores=torch.Tensor(dets[:, 4]),
	iou_threshold=thresh,
	)

	return list(keep)


	def point_form(boxes):
	""" Convert prior_boxes to (xmin, ymin, xmax, ymax)
	representation for comparison to point form ground truth data.
	Args:
	boxes: (tensor) center-size default boxes from priorbox layers.
	Return:
	boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
	"""
	return torch.cat(
	(
	boxes[:, :2] - boxes[:, 2:] / 2, # xmin, ymin
	boxes[:, :2] + boxes[:, 2:] / 2),
	1) # xmax, ymax


	def center_size(boxes):
	""" Convert prior_boxes to (cx, cy, w, h)
	representation for comparison to center-size form ground truth data.
	Args:
	boxes: (tensor) point_form boxes
	Return:
	boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
	"""
	return torch.cat(
	(boxes[:, 2:] + boxes[:, :2]) / 2, # cx, cy
	boxes[:, 2:] - boxes[:, :2],
	1) # w, h


	def intersect(box_a, box_b):
	""" We resize both tensors to [A,B,2] without new malloc:
	[A,2] -> [A,1,2] -> [A,B,2]
	[B,2] -> [1,B,2] -> [A,B,2]
	Then we compute the area of intersect between box_a and box_b.
	Args:
	box_a: (tensor) bounding boxes, Shape: [A,4].
	box_b: (tensor) bounding boxes, Shape: [B,4].
	Return:
	(tensor) intersection area, Shape: [A,B].
	"""
	A = box_a.size(0)
	B = box_b.size(0)
	max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
	min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), box_b[:, :2].unsqueeze(0).expand(A, B, 2))
	inter = torch.clamp((max_xy - min_xy), min=0)
	return inter[:, :, 0] * inter[:, :, 1]


	def jaccard(box_a, box_b):
	"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
	is simply the intersection over union of two boxes. Here we operate on
	ground truth boxes and default boxes.
	E.g.:
	A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
	Args:
	box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
	box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
	Return:
	jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
	"""
	inter = intersect(box_a, box_b)
	area_a = ((box_a[:, 2] - box_a[:, 0]) * (box_a[:, 3] - box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B]
	area_b = ((box_b[:, 2] - box_b[:, 0]) * (box_b[:, 3] - box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B]
	union = area_a + area_b - inter
	return inter / union # [A,B]


	def matrix_iou(a, b):
	"""
	return iou of a and b, numpy version for data augenmentation
	"""
	lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
	rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])

	area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
	area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
	area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
	return area_i / (area_a[:, np.newaxis] + area_b - area_i)


	def matrix_iof(a, b):
	"""
	return iof of a and b, numpy version for data augenmentation
	"""
	lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
	rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])

	area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
	area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
	return area_i / np.maximum(area_a[:, np.newaxis], 1)


	def match(threshold, truths, priors, variances, labels, landms, loc_t, conf_t, landm_t, idx):
	"""Match each prior box with the ground truth box of the highest jaccard
	overlap, encode the bounding boxes, then return the matched indices
	corresponding to both confidence and location preds.
	Args:
	threshold: (float) The overlap threshold used when matching boxes.
	truths: (tensor) Ground truth boxes, Shape: [num_obj, 4].
	priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
	variances: (tensor) Variances corresponding to each prior coord,
	Shape: [num_priors, 4].
	labels: (tensor) All the class labels for the image, Shape: [num_obj].
	landms: (tensor) Ground truth landms, Shape [num_obj, 10].
	loc_t: (tensor) Tensor to be filled w/ encoded location targets.
	conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
	landm_t: (tensor) Tensor to be filled w/ encoded landm targets.
	idx: (int) current batch index
	Return:
	The matched indices corresponding to 1)location 2)confidence
	3)landm preds.
	"""
	# jaccard index
	overlaps = jaccard(truths, point_form(priors))
	# (Bipartite Matching)
	# [1,num_objects] best prior for each ground truth
	best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)

	# ignore hard gt
	valid_gt_idx = best_prior_overlap[:, 0] >= 0.2
	best_prior_idx_filter = best_prior_idx[valid_gt_idx, :]
	if best_prior_idx_filter.shape[0] <= 0:
	loc_t[idx] = 0
	conf_t[idx] = 0
	return

	# [1,num_priors] best ground truth for each prior
	best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
	best_truth_idx.squeeze_(0)
	best_truth_overlap.squeeze_(0)
	best_prior_idx.squeeze_(1)
	best_prior_idx_filter.squeeze_(1)
	best_prior_overlap.squeeze_(1)
	best_truth_overlap.index_fill_(0, best_prior_idx_filter, 2) # ensure best prior
	# TODO refactor: index best_prior_idx with long tensor
	# ensure every gt matches with its prior of max overlap
	for j in range(best_prior_idx.size(0)): # 判别此anchor是预测哪一个boxes
	best_truth_idx[best_prior_idx[j]] = j
	matches = truths[best_truth_idx] # Shape: [num_priors,4] 此处为每一个anchor对应的bbox取出来
	conf = labels[best_truth_idx] # Shape: [num_priors] 此处为每一个anchor对应的label取出来
	conf[best_truth_overlap < threshold] = 0 # label as background overlap<0.35的全部作为负样本
	loc = encode(matches, priors, variances)

	matches_landm = landms[best_truth_idx]
	landm = encode_landm(matches_landm, priors, variances)
	loc_t[idx] = loc # [num_priors,4] encoded offsets to learn
	conf_t[idx] = conf # [num_priors] top class label for each prior
	landm_t[idx] = landm


	def encode(matched, priors, variances):
	"""Encode the variances from the priorbox layers into the ground truth boxes
	we have matched (based on jaccard overlap) with the prior boxes.
	Args:
	matched: (tensor) Coords of ground truth for each prior in point-form
	Shape: [num_priors, 4].
	priors: (tensor) Prior boxes in center-offset form
	Shape: [num_priors,4].
	variances: (list[float]) Variances of priorboxes
	Return:
	encoded boxes (tensor), Shape: [num_priors, 4]
	"""

	# dist b/t match center and prior's center
	g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2]
	# encode variance
	g_cxcy /= (variances[0] * priors[:, 2:])
	# match wh / prior wh
	g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
	g_wh = torch.log(g_wh) / variances[1]
	# return target for smooth_l1_loss
	return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4]


	def encode_landm(matched, priors, variances):
	"""Encode the variances from the priorbox layers into the ground truth boxes
	we have matched (based on jaccard overlap) with the prior boxes.
	Args:
	matched: (tensor) Coords of ground truth for each prior in point-form
	Shape: [num_priors, 10].
	priors: (tensor) Prior boxes in center-offset form
	Shape: [num_priors,4].
	variances: (list[float]) Variances of priorboxes
	Return:
	encoded landm (tensor), Shape: [num_priors, 10]
	"""

	# dist b/t match center and prior's center
	matched = torch.reshape(matched, (matched.size(0), 5, 2))
	priors_cx = priors[:, 0].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
	priors_cy = priors[:, 1].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
	priors_w = priors[:, 2].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
	priors_h = priors[:, 3].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
	priors = torch.cat([priors_cx, priors_cy, priors_w, priors_h], dim=2)
	g_cxcy = matched[:, :, :2] - priors[:, :, :2]
	# encode variance
	g_cxcy /= (variances[0] * priors[:, :, 2:])
	# g_cxcy /= priors[:, :, 2:]
	g_cxcy = g_cxcy.reshape(g_cxcy.size(0), -1)
	# return target for smooth_l1_loss
	return g_cxcy


	# Adapted from https://github.com/Hakuyume/chainer-ssd
	def decode(loc, priors, variances):
	"""Decode locations from predictions using priors to undo
	the encoding we did for offset regression at train time.
	Args:
	loc (tensor): location predictions for loc layers,
	Shape: [num_priors,4]
	priors (tensor): Prior boxes in center-offset form.
	Shape: [num_priors,4].
	variances: (list[float]) Variances of priorboxes
	Return:
	decoded bounding box predictions
	"""

	boxes = torch.cat((priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
	priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
	boxes[:, :2] -= boxes[:, 2:] / 2
	boxes[:, 2:] += boxes[:, :2]
	return boxes


	def decode_landm(pre, priors, variances):
	"""Decode landm from predictions using priors to undo
	the encoding we did for offset regression at train time.
	Args:
	pre (tensor): landm predictions for loc layers,
	Shape: [num_priors,10]
	priors (tensor): Prior boxes in center-offset form.
	Shape: [num_priors,4].
	variances: (list[float]) Variances of priorboxes
	Return:
	decoded landm predictions
	"""
	tmp = (
	priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
	priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
	priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
	priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
	priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:],
	)
	landms = torch.cat(tmp, dim=1)
	return landms


	def batched_decode(b_loc, priors, variances):
	"""Decode locations from predictions using priors to undo
	the encoding we did for offset regression at train time.
	Args:
	b_loc (tensor): location predictions for loc layers,
	Shape: [num_batches,num_priors,4]
	priors (tensor): Prior boxes in center-offset form.
	Shape: [1,num_priors,4].
	variances: (list[float]) Variances of priorboxes
	Return:
	decoded bounding box predictions
	"""
	boxes = (
	priors[:, :, :2] + b_loc[:, :, :2] * variances[0] * priors[:, :, 2:],
	priors[:, :, 2:] * torch.exp(b_loc[:, :, 2:] * variances[1]),
	)
	boxes = torch.cat(boxes, dim=2)

	boxes[:, :, :2] -= boxes[:, :, 2:] / 2
	boxes[:, :, 2:] += boxes[:, :, :2]
	return boxes


	def batched_decode_landm(pre, priors, variances):
	"""Decode landm from predictions using priors to undo
	the encoding we did for offset regression at train time.
	Args:
	pre (tensor): landm predictions for loc layers,
	Shape: [num_batches,num_priors,10]
	priors (tensor): Prior boxes in center-offset form.
	Shape: [1,num_priors,4].
	variances: (list[float]) Variances of priorboxes
	Return:
	decoded landm predictions
	"""
	landms = (
	priors[:, :, :2] + pre[:, :, :2] * variances[0] * priors[:, :, 2:],
	priors[:, :, :2] + pre[:, :, 2:4] * variances[0] * priors[:, :, 2:],
	priors[:, :, :2] + pre[:, :, 4:6] * variances[0] * priors[:, :, 2:],
	priors[:, :, :2] + pre[:, :, 6:8] * variances[0] * priors[:, :, 2:],
	priors[:, :, :2] + pre[:, :, 8:10] * variances[0] * priors[:, :, 2:],
	)
	landms = torch.cat(landms, dim=2)
	return landms


	def log_sum_exp(x):
	"""Utility function for computing log_sum_exp while determining
	This will be used to determine unaveraged confidence loss across
	all examples in a batch.
	Args:
	x (Variable(tensor)): conf_preds from conf layers
	"""
	x_max = x.data.max()
	return torch.log(torch.sum(torch.exp(x - x_max), 1, keepdim=True)) + x_max


	# Original author: Francisco Massa:
	# https://github.com/fmassa/object-detection.torch
	# Ported to PyTorch by Max deGroot (02/01/2017)
	def nms(boxes, scores, overlap=0.5, top_k=200):
	"""Apply non-maximum suppression at test time to avoid detecting too many
	overlapping bounding boxes for a given object.
	Args:
	boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
	scores: (tensor) The class predscores for the img, Shape:[num_priors].
	overlap: (float) The overlap thresh for suppressing unnecessary boxes.
	top_k: (int) The Maximum number of box preds to consider.
	Return:
	The indices of the kept boxes with respect to num_priors.
	"""

	keep = torch.Tensor(scores.size(0)).fill_(0).long()
	if boxes.numel() == 0:
	return keep
	x1 = boxes[:, 0]
	y1 = boxes[:, 1]
	x2 = boxes[:, 2]
	y2 = boxes[:, 3]
	area = torch.mul(x2 - x1, y2 - y1)
	v, idx = scores.sort(0) # sort in ascending order
	# I = I[v >= 0.01]
	idx = idx[-top_k:] # indices of the top-k largest vals
	xx1 = boxes.new()
	yy1 = boxes.new()
	xx2 = boxes.new()
	yy2 = boxes.new()
	w = boxes.new()
	h = boxes.new()

	# keep = torch.Tensor()
	count = 0
	while idx.numel() > 0:
	i = idx[-1] # index of current largest val
	# keep.append(i)
	keep[count] = i
	count += 1
	if idx.size(0) == 1:
	break
	idx = idx[:-1] # remove kept element from view
	# load bboxes of next highest vals
	torch.index_select(x1, 0, idx, out=xx1)
	torch.index_select(y1, 0, idx, out=yy1)
	torch.index_select(x2, 0, idx, out=xx2)
	torch.index_select(y2, 0, idx, out=yy2)
	# store element-wise max with next highest score
	xx1 = torch.clamp(xx1, min=x1[i])
	yy1 = torch.clamp(yy1, min=y1[i])
	xx2 = torch.clamp(xx2, max=x2[i])
	yy2 = torch.clamp(yy2, max=y2[i])
	w.resize_as_(xx2)
	h.resize_as_(yy2)
	w = xx2 - xx1
	h = yy2 - yy1
	# check sizes of xx1 and xx2.. after each iteration
	w = torch.clamp(w, min=0.0)
	h = torch.clamp(h, min=0.0)
	inter = w * h
	# IoU = i / (area(a) + area(b) - i)
	rem_areas = torch.index_select(area, 0, idx) # load remaining areas)
	union = (rem_areas - inter) + area[i]
	IoU = inter / union # store result in iou
	# keep only elements with an IoU <= overlap
	idx = idx[IoU.le(overlap)]
	return keep, count