Spaces:
Running
on
T4
Running
on
T4
File size: 6,936 Bytes
ac4ce84 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 |
import numpy as np
from PIL import Image
def nms(boxes, overlap_threshold=0.5, mode='union'):
"""Non-maximum suppression.
Arguments:
boxes: a float numpy array of shape [n, 5],
where each row is (xmin, ymin, xmax, ymax, score).
overlap_threshold: a float number.
mode: 'union' or 'min'.
Returns:
list with indices of the selected boxes
"""
# if there are no boxes, return the empty list
if len(boxes) == 0:
return []
# list of picked indices
pick = []
# grab the coordinates of the bounding boxes
x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]
area = (x2 - x1 + 1.0) * (y2 - y1 + 1.0)
ids = np.argsort(score) # in increasing order
while len(ids) > 0:
# grab index of the largest value
last = len(ids) - 1
i = ids[last]
pick.append(i)
# compute intersections
# of the box with the largest score
# with the rest of boxes
# left top corner of intersection boxes
ix1 = np.maximum(x1[i], x1[ids[:last]])
iy1 = np.maximum(y1[i], y1[ids[:last]])
# right bottom corner of intersection boxes
ix2 = np.minimum(x2[i], x2[ids[:last]])
iy2 = np.minimum(y2[i], y2[ids[:last]])
# width and height of intersection boxes
w = np.maximum(0.0, ix2 - ix1 + 1.0)
h = np.maximum(0.0, iy2 - iy1 + 1.0)
# intersections' areas
inter = w * h
if mode == 'min':
overlap = inter / np.minimum(area[i], area[ids[:last]])
elif mode == 'union':
# intersection over union (IoU)
overlap = inter / (area[i] + area[ids[:last]] - inter)
# delete all boxes where overlap is too big
ids = np.delete(
ids,
np.concatenate([[last], np.where(overlap > overlap_threshold)[0]])
)
return pick
def convert_to_square(bboxes):
"""Convert bounding boxes to a square form.
Arguments:
bboxes: a float numpy array of shape [n, 5].
Returns:
a float numpy array of shape [n, 5],
squared bounding boxes.
"""
square_bboxes = np.zeros_like(bboxes)
x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
h = y2 - y1 + 1.0
w = x2 - x1 + 1.0
max_side = np.maximum(h, w)
square_bboxes[:, 0] = x1 + w * 0.5 - max_side * 0.5
square_bboxes[:, 1] = y1 + h * 0.5 - max_side * 0.5
square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0
square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0
return square_bboxes
def calibrate_box(bboxes, offsets):
"""Transform bounding boxes to be more like true bounding boxes.
'offsets' is one of the outputs of the nets.
Arguments:
bboxes: a float numpy array of shape [n, 5].
offsets: a float numpy array of shape [n, 4].
Returns:
a float numpy array of shape [n, 5].
"""
x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
w = x2 - x1 + 1.0
h = y2 - y1 + 1.0
w = np.expand_dims(w, 1)
h = np.expand_dims(h, 1)
# this is what happening here:
# tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)]
# x1_true = x1 + tx1*w
# y1_true = y1 + ty1*h
# x2_true = x2 + tx2*w
# y2_true = y2 + ty2*h
# below is just more compact form of this
# are offsets always such that
# x1 < x2 and y1 < y2 ?
translation = np.hstack([w, h, w, h]) * offsets
bboxes[:, 0:4] = bboxes[:, 0:4] + translation
return bboxes
def get_image_boxes(bounding_boxes, img, size=24):
"""Cut out boxes from the image.
Arguments:
bounding_boxes: a float numpy array of shape [n, 5].
img: an instance of PIL.Image.
size: an integer, size of cutouts.
Returns:
a float numpy array of shape [n, 3, size, size].
"""
num_boxes = len(bounding_boxes)
width, height = img.size
[dy, edy, dx, edx, y, ey, x, ex, w, h] = correct_bboxes(bounding_boxes, width, height)
img_boxes = np.zeros((num_boxes, 3, size, size), 'float32')
for i in range(num_boxes):
img_box = np.zeros((h[i], w[i], 3), 'uint8')
img_array = np.asarray(img, 'uint8')
img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] = \
img_array[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :]
# resize
img_box = Image.fromarray(img_box)
img_box = img_box.resize((size, size), Image.BILINEAR)
img_box = np.asarray(img_box, 'float32')
img_boxes[i, :, :, :] = _preprocess(img_box)
return img_boxes
def correct_bboxes(bboxes, width, height):
"""Crop boxes that are too big and get coordinates
with respect to cutouts.
Arguments:
bboxes: a float numpy array of shape [n, 5],
where each row is (xmin, ymin, xmax, ymax, score).
width: a float number.
height: a float number.
Returns:
dy, dx, edy, edx: a int numpy arrays of shape [n],
coordinates of the boxes with respect to the cutouts.
y, x, ey, ex: a int numpy arrays of shape [n],
corrected ymin, xmin, ymax, xmax.
h, w: a int numpy arrays of shape [n],
just heights and widths of boxes.
in the following order:
[dy, edy, dx, edx, y, ey, x, ex, w, h].
"""
x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
w, h = x2 - x1 + 1.0, y2 - y1 + 1.0
num_boxes = bboxes.shape[0]
# 'e' stands for end
# (x, y) -> (ex, ey)
x, y, ex, ey = x1, y1, x2, y2
# we need to cut out a box from the image.
# (x, y, ex, ey) are corrected coordinates of the box
# in the image.
# (dx, dy, edx, edy) are coordinates of the box in the cutout
# from the image.
dx, dy = np.zeros((num_boxes,)), np.zeros((num_boxes,))
edx, edy = w.copy() - 1.0, h.copy() - 1.0
# if box's bottom right corner is too far right
ind = np.where(ex > width - 1.0)[0]
edx[ind] = w[ind] + width - 2.0 - ex[ind]
ex[ind] = width - 1.0
# if box's bottom right corner is too low
ind = np.where(ey > height - 1.0)[0]
edy[ind] = h[ind] + height - 2.0 - ey[ind]
ey[ind] = height - 1.0
# if box's top left corner is too far left
ind = np.where(x < 0.0)[0]
dx[ind] = 0.0 - x[ind]
x[ind] = 0.0
# if box's top left corner is too high
ind = np.where(y < 0.0)[0]
dy[ind] = 0.0 - y[ind]
y[ind] = 0.0
return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h]
return_list = [i.astype('int32') for i in return_list]
return return_list
def _preprocess(img):
"""Preprocessing step before feeding the network.
Arguments:
img: a float numpy array of shape [h, w, c].
Returns:
a float numpy array of shape [1, c, h, w].
"""
img = img.transpose((2, 0, 1))
img = np.expand_dims(img, 0)
img = (img - 127.5) * 0.0078125
return img
|