Spaces:
Running
on
T4
Running
on
T4
File size: 4,369 Bytes
ac4ce84 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import numpy as np
import torch
from torch.autograd import Variable
from .get_nets import PNet, RNet, ONet
from .box_utils import nms, calibrate_box, get_image_boxes, convert_to_square
from .first_stage import run_first_stage
def detect_faces(image, min_face_size=20.0,
thresholds=[0.6, 0.7, 0.8],
nms_thresholds=[0.7, 0.7, 0.7]):
"""
Arguments:
image: an instance of PIL.Image.
min_face_size: a float number.
thresholds: a list of length 3.
nms_thresholds: a list of length 3.
Returns:
two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10],
bounding boxes and facial landmarks.
"""
# LOAD MODELS
pnet = PNet()
rnet = RNet()
onet = ONet()
onet.eval()
# BUILD AN IMAGE PYRAMID
width, height = image.size
min_length = min(height, width)
min_detection_size = 12
factor = 0.707 # sqrt(0.5)
# scales for scaling the image
scales = []
# scales the image so that
# minimum size that we can detect equals to
# minimum face size that we want to detect
m = min_detection_size / min_face_size
min_length *= m
factor_count = 0
while min_length > min_detection_size:
scales.append(m * factor ** factor_count)
min_length *= factor
factor_count += 1
# STAGE 1
# it will be returned
bounding_boxes = []
with torch.no_grad():
# run P-Net on different scales
for s in scales:
boxes = run_first_stage(image, pnet, scale=s, threshold=thresholds[0])
bounding_boxes.append(boxes)
# collect boxes (and offsets, and scores) from different scales
bounding_boxes = [i for i in bounding_boxes if i is not None]
bounding_boxes = np.vstack(bounding_boxes)
keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
bounding_boxes = bounding_boxes[keep]
# use offsets predicted by pnet to transform bounding boxes
bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
# shape [n_boxes, 5]
bounding_boxes = convert_to_square(bounding_boxes)
bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
# STAGE 2
img_boxes = get_image_boxes(bounding_boxes, image, size=24)
img_boxes = torch.FloatTensor(img_boxes)
output = rnet(img_boxes)
offsets = output[0].data.numpy() # shape [n_boxes, 4]
probs = output[1].data.numpy() # shape [n_boxes, 2]
keep = np.where(probs[:, 1] > thresholds[1])[0]
bounding_boxes = bounding_boxes[keep]
bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
offsets = offsets[keep]
keep = nms(bounding_boxes, nms_thresholds[1])
bounding_boxes = bounding_boxes[keep]
bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
bounding_boxes = convert_to_square(bounding_boxes)
bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
# STAGE 3
img_boxes = get_image_boxes(bounding_boxes, image, size=48)
if len(img_boxes) == 0:
return [], []
img_boxes = torch.FloatTensor(img_boxes)
output = onet(img_boxes)
landmarks = output[0].data.numpy() # shape [n_boxes, 10]
offsets = output[1].data.numpy() # shape [n_boxes, 4]
probs = output[2].data.numpy() # shape [n_boxes, 2]
keep = np.where(probs[:, 1] > thresholds[2])[0]
bounding_boxes = bounding_boxes[keep]
bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
offsets = offsets[keep]
landmarks = landmarks[keep]
# compute landmark points
width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5]
landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10]
bounding_boxes = calibrate_box(bounding_boxes, offsets)
keep = nms(bounding_boxes, nms_thresholds[2], mode='min')
bounding_boxes = bounding_boxes[keep]
landmarks = landmarks[keep]
return bounding_boxes, landmarks
|