Spaces:
Build error
Build error
File size: 20,859 Bytes
47af768 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 |
import numpy as np
from ._base_metric import _BaseMetric
from .. import _timing
from functools import partial
from .. import utils
from ..utils import TrackEvalException
class TrackMAP(_BaseMetric):
"""Class which implements the TrackMAP metrics"""
@staticmethod
def get_default_metric_config():
"""Default class config values"""
default_config = {
'USE_AREA_RANGES': True, # whether to evaluate for certain area ranges
'AREA_RANGES': [[0 ** 2, 32 ** 2], # additional area range sets for which TrackMAP is evaluated
[32 ** 2, 96 ** 2], # (all area range always included), default values for TAO
[96 ** 2, 1e5 ** 2]], # evaluation
'AREA_RANGE_LABELS': ["area_s", "area_m", "area_l"], # the labels for the area ranges
'USE_TIME_RANGES': True, # whether to evaluate for certain time ranges (length of tracks)
'TIME_RANGES': [[0, 3], [3, 10], [10, 1e5]], # additional time range sets for which TrackMAP is evaluated
# (all time range always included) , default values for TAO evaluation
'TIME_RANGE_LABELS': ["time_s", "time_m", "time_l"], # the labels for the time ranges
'IOU_THRESHOLDS': np.arange(0.5, 0.96, 0.05), # the IoU thresholds
'RECALL_THRESHOLDS': np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01) + 1), endpoint=True),
# recall thresholds at which precision is evaluated
'MAX_DETECTIONS': 0, # limit the maximum number of considered tracks per sequence (0 for unlimited)
'PRINT_CONFIG': True
}
return default_config
def __init__(self, config=None):
super().__init__()
self.config = utils.init_config(config, self.get_default_metric_config(), self.get_name())
self.num_ig_masks = 1
self.lbls = ['all']
self.use_area_rngs = self.config['USE_AREA_RANGES']
if self.use_area_rngs:
self.area_rngs = self.config['AREA_RANGES']
self.area_rng_lbls = self.config['AREA_RANGE_LABELS']
self.num_ig_masks += len(self.area_rng_lbls)
self.lbls += self.area_rng_lbls
self.use_time_rngs = self.config['USE_TIME_RANGES']
if self.use_time_rngs:
self.time_rngs = self.config['TIME_RANGES']
self.time_rng_lbls = self.config['TIME_RANGE_LABELS']
self.num_ig_masks += len(self.time_rng_lbls)
self.lbls += self.time_rng_lbls
self.array_labels = self.config['IOU_THRESHOLDS']
self.rec_thrs = self.config['RECALL_THRESHOLDS']
self.maxDet = self.config['MAX_DETECTIONS']
self.float_array_fields = ['AP_' + lbl for lbl in self.lbls] + ['AR_' + lbl for lbl in self.lbls]
self.fields = self.float_array_fields
self.summary_fields = self.float_array_fields
@_timing.time
def eval_sequence(self, data):
"""Calculates GT and Tracker matches for one sequence for TrackMAP metrics. Adapted from
https://github.com/TAO-Dataset/"""
# Initialise results to zero for each sequence as the fields are only defined over the set of all sequences
res = {}
for field in self.fields:
res[field] = [0 for _ in self.array_labels]
gt_ids, dt_ids = data['gt_track_ids'], data['dt_track_ids']
if len(gt_ids) == 0 and len(dt_ids) == 0:
for idx in range(self.num_ig_masks):
res[idx] = None
return res
# get track data
gt_tr_areas = data.get('gt_track_areas', None) if self.use_area_rngs else None
gt_tr_lengths = data.get('gt_track_lengths', None) if self.use_time_rngs else None
gt_tr_iscrowd = data.get('gt_track_iscrowd', None)
dt_tr_areas = data.get('dt_track_areas', None) if self.use_area_rngs else None
dt_tr_lengths = data.get('dt_track_lengths', None) if self.use_time_rngs else None
is_nel = data.get('not_exhaustively_labeled', False)
# compute ignore masks for different track sets to eval
gt_ig_masks = self._compute_track_ig_masks(len(gt_ids), track_lengths=gt_tr_lengths, track_areas=gt_tr_areas,
iscrowd=gt_tr_iscrowd)
dt_ig_masks = self._compute_track_ig_masks(len(dt_ids), track_lengths=dt_tr_lengths, track_areas=dt_tr_areas,
is_not_exhaustively_labeled=is_nel, is_gt=False)
boxformat = data.get('boxformat', 'xywh')
ious = self._compute_track_ious(data['dt_tracks'], data['gt_tracks'], iou_function=data['iou_type'],
boxformat=boxformat)
for mask_idx in range(self.num_ig_masks):
gt_ig_mask = gt_ig_masks[mask_idx]
# Sort gt ignore last
gt_idx = np.argsort([g for g in gt_ig_mask], kind="mergesort")
gt_ids = [gt_ids[i] for i in gt_idx]
ious_sorted = ious[:, gt_idx] if len(ious) > 0 else ious
num_thrs = len(self.array_labels)
num_gt = len(gt_ids)
num_dt = len(dt_ids)
# Array to store the "id" of the matched dt/gt
gt_m = np.zeros((num_thrs, num_gt)) - 1
dt_m = np.zeros((num_thrs, num_dt)) - 1
gt_ig = np.array([gt_ig_mask[idx] for idx in gt_idx])
dt_ig = np.zeros((num_thrs, num_dt))
for iou_thr_idx, iou_thr in enumerate(self.array_labels):
if len(ious_sorted) == 0:
break
for dt_idx, _dt in enumerate(dt_ids):
iou = min([iou_thr, 1 - 1e-10])
# information about best match so far (m=-1 -> unmatched)
# store the gt_idx which matched for _dt
m = -1
for gt_idx, _ in enumerate(gt_ids):
# if this gt already matched continue
if gt_m[iou_thr_idx, gt_idx] > 0:
continue
# if _dt matched to reg gt, and on ignore gt, stop
if m > -1 and gt_ig[m] == 0 and gt_ig[gt_idx] == 1:
break
# continue to next gt unless better match made
if ious_sorted[dt_idx, gt_idx] < iou - np.finfo('float').eps:
continue
# if match successful and best so far, store appropriately
iou = ious_sorted[dt_idx, gt_idx]
m = gt_idx
# No match found for _dt, go to next _dt
if m == -1:
continue
# if gt to ignore for some reason update dt_ig.
# Should not be used in evaluation.
dt_ig[iou_thr_idx, dt_idx] = gt_ig[m]
# _dt match found, update gt_m, and dt_m with "id"
dt_m[iou_thr_idx, dt_idx] = gt_ids[m]
gt_m[iou_thr_idx, m] = _dt
dt_ig_mask = dt_ig_masks[mask_idx]
dt_ig_mask = np.array(dt_ig_mask).reshape((1, num_dt)) # 1 X num_dt
dt_ig_mask = np.repeat(dt_ig_mask, num_thrs, 0) # num_thrs X num_dt
# Based on dt_ig_mask ignore any unmatched detection by updating dt_ig
dt_ig = np.logical_or(dt_ig, np.logical_and(dt_m == -1, dt_ig_mask))
# store results for given video and category
res[mask_idx] = {
"dt_ids": dt_ids,
"gt_ids": gt_ids,
"dt_matches": dt_m,
"gt_matches": gt_m,
"dt_scores": data['dt_track_scores'],
"gt_ignore": gt_ig,
"dt_ignore": dt_ig,
}
return res
def combine_sequences(self, all_res):
"""Combines metrics across all sequences. Computes precision and recall values based on track matches.
Adapted from https://github.com/TAO-Dataset/
"""
num_thrs = len(self.array_labels)
num_recalls = len(self.rec_thrs)
# -1 for absent categories
precision = -np.ones(
(num_thrs, num_recalls, self.num_ig_masks)
)
recall = -np.ones((num_thrs, self.num_ig_masks))
for ig_idx in range(self.num_ig_masks):
ig_idx_results = [res[ig_idx] for res in all_res.values() if res[ig_idx] is not None]
# Remove elements which are None
if len(ig_idx_results) == 0:
continue
# Append all scores: shape (N,)
# limit considered tracks for each sequence if maxDet > 0
if self.maxDet == 0:
dt_scores = np.concatenate([res["dt_scores"] for res in ig_idx_results], axis=0)
dt_idx = np.argsort(-dt_scores, kind="mergesort")
dt_m = np.concatenate([e["dt_matches"] for e in ig_idx_results],
axis=1)[:, dt_idx]
dt_ig = np.concatenate([e["dt_ignore"] for e in ig_idx_results],
axis=1)[:, dt_idx]
elif self.maxDet > 0:
dt_scores = np.concatenate([res["dt_scores"][0:self.maxDet] for res in ig_idx_results], axis=0)
dt_idx = np.argsort(-dt_scores, kind="mergesort")
dt_m = np.concatenate([e["dt_matches"][:, 0:self.maxDet] for e in ig_idx_results],
axis=1)[:, dt_idx]
dt_ig = np.concatenate([e["dt_ignore"][:, 0:self.maxDet] for e in ig_idx_results],
axis=1)[:, dt_idx]
else:
raise Exception("Number of maximum detections must be >= 0, but is set to %i" % self.maxDet)
gt_ig = np.concatenate([res["gt_ignore"] for res in ig_idx_results])
# num gt anns to consider
num_gt = np.count_nonzero(gt_ig == 0)
if num_gt == 0:
continue
tps = np.logical_and(dt_m != -1, np.logical_not(dt_ig))
fps = np.logical_and(dt_m == -1, np.logical_not(dt_ig))
tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
for iou_thr_idx, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
tp = np.array(tp)
fp = np.array(fp)
num_tp = len(tp)
rc = tp / num_gt
if num_tp:
recall[iou_thr_idx, ig_idx] = rc[-1]
else:
recall[iou_thr_idx, ig_idx] = 0
# np.spacing(1) ~= eps
pr = tp / (fp + tp + np.spacing(1))
pr = pr.tolist()
# Ensure precision values are monotonically decreasing
for i in range(num_tp - 1, 0, -1):
if pr[i] > pr[i - 1]:
pr[i - 1] = pr[i]
# find indices at the predefined recall values
rec_thrs_insert_idx = np.searchsorted(rc, self.rec_thrs, side="left")
pr_at_recall = [0.0] * num_recalls
try:
for _idx, pr_idx in enumerate(rec_thrs_insert_idx):
pr_at_recall[_idx] = pr[pr_idx]
except IndexError:
pass
precision[iou_thr_idx, :, ig_idx] = (np.array(pr_at_recall))
res = {'precision': precision, 'recall': recall}
# compute the precision and recall averages for the respective alpha thresholds and ignore masks
for lbl in self.lbls:
res['AP_' + lbl] = np.zeros((len(self.array_labels)), dtype=np.float)
res['AR_' + lbl] = np.zeros((len(self.array_labels)), dtype=np.float)
for a_id, alpha in enumerate(self.array_labels):
for lbl_idx, lbl in enumerate(self.lbls):
p = precision[a_id, :, lbl_idx]
if len(p[p > -1]) == 0:
mean_p = -1
else:
mean_p = np.mean(p[p > -1])
res['AP_' + lbl][a_id] = mean_p
res['AR_' + lbl][a_id] = recall[a_id, lbl_idx]
return res
def combine_classes_class_averaged(self, all_res, ignore_empty_classes=True):
"""Combines metrics across all classes by averaging over the class values
Note mAP is not well defined for 'empty classes' so 'ignore empty classes' is always true here.
"""
res = {}
for field in self.fields:
res[field] = np.zeros((len(self.array_labels)), dtype=np.float)
field_stacked = np.array([res[field] for res in all_res.values()])
for a_id, alpha in enumerate(self.array_labels):
values = field_stacked[:, a_id]
if len(values[values > -1]) == 0:
mean = -1
else:
mean = np.mean(values[values > -1])
res[field][a_id] = mean
return res
def combine_classes_det_averaged(self, all_res):
"""Combines metrics across all classes by averaging over the detection values"""
res = {}
for field in self.fields:
res[field] = np.zeros((len(self.array_labels)), dtype=np.float)
field_stacked = np.array([res[field] for res in all_res.values()])
for a_id, alpha in enumerate(self.array_labels):
values = field_stacked[:, a_id]
if len(values[values > -1]) == 0:
mean = -1
else:
mean = np.mean(values[values > -1])
res[field][a_id] = mean
return res
def _compute_track_ig_masks(self, num_ids, track_lengths=None, track_areas=None, iscrowd=None,
is_not_exhaustively_labeled=False, is_gt=True):
"""
Computes ignore masks for different track sets to evaluate
:param num_ids: the number of track IDs
:param track_lengths: the lengths of the tracks (number of timesteps)
:param track_areas: the average area of a track
:param iscrowd: whether a track is marked as crowd
:param is_not_exhaustively_labeled: whether the track category is not exhaustively labeled
:param is_gt: whether it is gt
:return: the track ignore masks
"""
# for TAO tracks for classes which are not exhaustively labeled are not evaluated
if not is_gt and is_not_exhaustively_labeled:
track_ig_masks = [[1 for _ in range(num_ids)] for i in range(self.num_ig_masks)]
else:
# consider all tracks
track_ig_masks = [[0 for _ in range(num_ids)]]
# consider tracks with certain area
if self.use_area_rngs:
for rng in self.area_rngs:
track_ig_masks.append([0 if rng[0] - np.finfo('float').eps <= area <= rng[1] + np.finfo('float').eps
else 1 for area in track_areas])
# consider tracks with certain duration
if self.use_time_rngs:
for rng in self.time_rngs:
track_ig_masks.append([0 if rng[0] - np.finfo('float').eps <= length
<= rng[1] + np.finfo('float').eps else 1 for length in track_lengths])
# for YouTubeVIS evaluation tracks with crowd tag are not evaluated
if is_gt and iscrowd:
track_ig_masks = [np.logical_or(mask, iscrowd) for mask in track_ig_masks]
return track_ig_masks
@staticmethod
def _compute_bb_track_iou(dt_track, gt_track, boxformat='xywh'):
"""
Calculates the track IoU for one detected track and one ground truth track for bounding boxes
:param dt_track: the detected track (format: dictionary with frame index as keys and
numpy arrays as values)
:param gt_track: the ground truth track (format: dictionary with frame index as keys and
numpy array as values)
:param boxformat: the format of the boxes
:return: the track IoU
"""
intersect = 0
union = 0
image_ids = set(gt_track.keys()) | set(dt_track.keys())
for image in image_ids:
g = gt_track.get(image, None)
d = dt_track.get(image, None)
if boxformat == 'xywh':
if d is not None and g is not None:
dx, dy, dw, dh = d
gx, gy, gw, gh = g
w = max(min(dx + dw, gx + gw) - max(dx, gx), 0)
h = max(min(dy + dh, gy + gh) - max(dy, gy), 0)
i = w * h
u = dw * dh + gw * gh - i
intersect += i
union += u
elif d is None and g is not None:
union += g[2] * g[3]
elif d is not None and g is None:
union += d[2] * d[3]
elif boxformat == 'x0y0x1y1':
if d is not None and g is not None:
dx0, dy0, dx1, dy1 = d
gx0, gy0, gx1, gy1 = g
w = max(min(dx1, gx1) - max(dx0, gx0), 0)
h = max(min(dy1, gy1) - max(dy0, gy0), 0)
i = w * h
u = (dx1 - dx0) * (dy1 - dy0) + (gx1 - gx0) * (gy1 - gy0) - i
intersect += i
union += u
elif d is None and g is not None:
union += (g[2] - g[0]) * (g[3] - g[1])
elif d is not None and g is None:
union += (d[2] - d[0]) * (d[3] - d[1])
else:
raise TrackEvalException('BoxFormat not implemented')
if intersect > union:
raise TrackEvalException("Intersection value > union value. Are the box values corrupted?")
return intersect / union if union > 0 else 0
@staticmethod
def _compute_mask_track_iou(dt_track, gt_track):
"""
Calculates the track IoU for one detected track and one ground truth track for segmentation masks
:param dt_track: the detected track (format: dictionary with frame index as keys and
pycocotools rle encoded masks as values)
:param gt_track: the ground truth track (format: dictionary with frame index as keys and
pycocotools rle encoded masks as values)
:return: the track IoU
"""
# only loaded when needed to reduce minimum requirements
from pycocotools import mask as mask_utils
intersect = .0
union = .0
image_ids = set(gt_track.keys()) | set(dt_track.keys())
for image in image_ids:
g = gt_track.get(image, None)
d = dt_track.get(image, None)
if d and g:
intersect += mask_utils.area(mask_utils.merge([d, g], True))
union += mask_utils.area(mask_utils.merge([d, g], False))
elif not d and g:
union += mask_utils.area(g)
elif d and not g:
union += mask_utils.area(d)
if union < 0.0 - np.finfo('float').eps:
raise TrackEvalException("Union value < 0. Are the segmentaions corrupted?")
if intersect > union:
raise TrackEvalException("Intersection value > union value. Are the segmentations corrupted?")
iou = intersect / union if union > 0.0 + np.finfo('float').eps else 0.0
return iou
@staticmethod
def _compute_track_ious(dt, gt, iou_function='bbox', boxformat='xywh'):
"""
Calculate track IoUs for a set of ground truth tracks and a set of detected tracks
"""
if len(gt) == 0 and len(dt) == 0:
return []
if iou_function == 'bbox':
track_iou_function = partial(TrackMAP._compute_bb_track_iou, boxformat=boxformat)
elif iou_function == 'mask':
track_iou_function = partial(TrackMAP._compute_mask_track_iou)
else:
raise Exception('IoU function not implemented')
ious = np.zeros([len(dt), len(gt)])
for i, j in np.ndindex(ious.shape):
ious[i, j] = track_iou_function(dt[i], gt[j])
return ious
@staticmethod
def _row_print(*argv):
"""Prints results in an evenly spaced rows, with more space in first row"""
if len(argv) == 1:
argv = argv[0]
to_print = '%-40s' % argv[0]
for v in argv[1:]:
to_print += '%-12s' % str(v)
print(to_print)
|