File size: 20,859 Bytes
47af768
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
import numpy as np
from ._base_metric import _BaseMetric
from .. import _timing
from functools import partial
from .. import utils
from ..utils import TrackEvalException


class TrackMAP(_BaseMetric):
    """Class which implements the TrackMAP metrics"""

    @staticmethod
    def get_default_metric_config():
        """Default class config values"""
        default_config = {
            'USE_AREA_RANGES': True,  # whether to evaluate for certain area ranges
            'AREA_RANGES': [[0 ** 2, 32 ** 2],  # additional area range sets for which TrackMAP is evaluated
                            [32 ** 2, 96 ** 2],  # (all area range always included), default values for TAO
                            [96 ** 2, 1e5 ** 2]],  # evaluation
            'AREA_RANGE_LABELS': ["area_s", "area_m", "area_l"],  # the labels for the area ranges
            'USE_TIME_RANGES': True,  # whether to evaluate for certain time ranges (length of tracks)
            'TIME_RANGES': [[0, 3], [3, 10], [10, 1e5]],  # additional time range sets for which TrackMAP is evaluated
            # (all time range always included) , default values for TAO evaluation
            'TIME_RANGE_LABELS': ["time_s", "time_m", "time_l"],  # the labels for the time ranges
            'IOU_THRESHOLDS': np.arange(0.5, 0.96, 0.05),  # the IoU thresholds
            'RECALL_THRESHOLDS': np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01) + 1), endpoint=True),
            # recall thresholds at which precision is evaluated
            'MAX_DETECTIONS': 0,  # limit the maximum number of considered tracks per sequence (0 for unlimited)
            'PRINT_CONFIG': True
        }
        return default_config

    def __init__(self, config=None):
        super().__init__()
        self.config = utils.init_config(config, self.get_default_metric_config(), self.get_name())

        self.num_ig_masks = 1
        self.lbls = ['all']
        self.use_area_rngs = self.config['USE_AREA_RANGES']
        if self.use_area_rngs:
            self.area_rngs = self.config['AREA_RANGES']
            self.area_rng_lbls = self.config['AREA_RANGE_LABELS']
            self.num_ig_masks += len(self.area_rng_lbls)
            self.lbls += self.area_rng_lbls

        self.use_time_rngs = self.config['USE_TIME_RANGES']
        if self.use_time_rngs:
            self.time_rngs = self.config['TIME_RANGES']
            self.time_rng_lbls = self.config['TIME_RANGE_LABELS']
            self.num_ig_masks += len(self.time_rng_lbls)
            self.lbls += self.time_rng_lbls

        self.array_labels = self.config['IOU_THRESHOLDS']
        self.rec_thrs = self.config['RECALL_THRESHOLDS']

        self.maxDet = self.config['MAX_DETECTIONS']
        self.float_array_fields = ['AP_' + lbl for lbl in self.lbls] + ['AR_' + lbl for lbl in self.lbls]
        self.fields = self.float_array_fields
        self.summary_fields = self.float_array_fields

    @_timing.time
    def eval_sequence(self, data):
        """Calculates GT and Tracker matches for one sequence for TrackMAP metrics. Adapted from
        https://github.com/TAO-Dataset/"""

        # Initialise results to zero for each sequence as the fields are only defined over the set of all sequences
        res = {}
        for field in self.fields:
            res[field] = [0 for _ in self.array_labels]

        gt_ids, dt_ids = data['gt_track_ids'], data['dt_track_ids']

        if len(gt_ids) == 0 and len(dt_ids) == 0:
            for idx in range(self.num_ig_masks):
                res[idx] = None
            return res

        # get track data
        gt_tr_areas = data.get('gt_track_areas', None) if self.use_area_rngs else None
        gt_tr_lengths = data.get('gt_track_lengths', None) if self.use_time_rngs else None
        gt_tr_iscrowd = data.get('gt_track_iscrowd', None)
        dt_tr_areas = data.get('dt_track_areas', None) if self.use_area_rngs else None
        dt_tr_lengths = data.get('dt_track_lengths', None) if self.use_time_rngs else None
        is_nel = data.get('not_exhaustively_labeled', False)

        # compute ignore masks for different track sets to eval
        gt_ig_masks = self._compute_track_ig_masks(len(gt_ids), track_lengths=gt_tr_lengths, track_areas=gt_tr_areas,
                                                   iscrowd=gt_tr_iscrowd)
        dt_ig_masks = self._compute_track_ig_masks(len(dt_ids), track_lengths=dt_tr_lengths, track_areas=dt_tr_areas,
                                                   is_not_exhaustively_labeled=is_nel, is_gt=False)

        boxformat = data.get('boxformat', 'xywh')
        ious = self._compute_track_ious(data['dt_tracks'], data['gt_tracks'], iou_function=data['iou_type'],
                                        boxformat=boxformat)

        for mask_idx in range(self.num_ig_masks):
            gt_ig_mask = gt_ig_masks[mask_idx]

            # Sort gt ignore last
            gt_idx = np.argsort([g for g in gt_ig_mask], kind="mergesort")
            gt_ids = [gt_ids[i] for i in gt_idx]

            ious_sorted = ious[:, gt_idx] if len(ious) > 0 else ious

            num_thrs = len(self.array_labels)
            num_gt = len(gt_ids)
            num_dt = len(dt_ids)

            # Array to store the "id" of the matched dt/gt
            gt_m = np.zeros((num_thrs, num_gt)) - 1
            dt_m = np.zeros((num_thrs, num_dt)) - 1

            gt_ig = np.array([gt_ig_mask[idx] for idx in gt_idx])
            dt_ig = np.zeros((num_thrs, num_dt))

            for iou_thr_idx, iou_thr in enumerate(self.array_labels):
                if len(ious_sorted) == 0:
                    break

                for dt_idx, _dt in enumerate(dt_ids):
                    iou = min([iou_thr, 1 - 1e-10])
                    # information about best match so far (m=-1 -> unmatched)
                    # store the gt_idx which matched for _dt
                    m = -1
                    for gt_idx, _ in enumerate(gt_ids):
                        # if this gt already matched continue
                        if gt_m[iou_thr_idx, gt_idx] > 0:
                            continue
                        # if _dt matched to reg gt, and on ignore gt, stop
                        if m > -1 and gt_ig[m] == 0 and gt_ig[gt_idx] == 1:
                            break
                        # continue to next gt unless better match made
                        if ious_sorted[dt_idx, gt_idx] < iou - np.finfo('float').eps:
                            continue
                        # if match successful and best so far, store appropriately
                        iou = ious_sorted[dt_idx, gt_idx]
                        m = gt_idx

                    # No match found for _dt, go to next _dt
                    if m == -1:
                        continue

                    # if gt to ignore for some reason update dt_ig.
                    # Should not be used in evaluation.
                    dt_ig[iou_thr_idx, dt_idx] = gt_ig[m]
                    # _dt match found, update gt_m, and dt_m with "id"
                    dt_m[iou_thr_idx, dt_idx] = gt_ids[m]
                    gt_m[iou_thr_idx, m] = _dt

            dt_ig_mask = dt_ig_masks[mask_idx]

            dt_ig_mask = np.array(dt_ig_mask).reshape((1, num_dt))  # 1 X num_dt
            dt_ig_mask = np.repeat(dt_ig_mask, num_thrs, 0)  # num_thrs X num_dt

            # Based on dt_ig_mask ignore any unmatched detection by updating dt_ig
            dt_ig = np.logical_or(dt_ig, np.logical_and(dt_m == -1, dt_ig_mask))
            # store results for given video and category
            res[mask_idx] = {
                "dt_ids": dt_ids,
                "gt_ids": gt_ids,
                "dt_matches": dt_m,
                "gt_matches": gt_m,
                "dt_scores": data['dt_track_scores'],
                "gt_ignore": gt_ig,
                "dt_ignore": dt_ig,
            }

        return res

    def combine_sequences(self, all_res):
        """Combines metrics across all sequences. Computes precision and recall values based on track matches.
        Adapted from https://github.com/TAO-Dataset/
        """
        num_thrs = len(self.array_labels)
        num_recalls = len(self.rec_thrs)

        # -1 for absent categories
        precision = -np.ones(
            (num_thrs, num_recalls, self.num_ig_masks)
        )
        recall = -np.ones((num_thrs, self.num_ig_masks))

        for ig_idx in range(self.num_ig_masks):
            ig_idx_results = [res[ig_idx] for res in all_res.values() if res[ig_idx] is not None]

            # Remove elements which are None
            if len(ig_idx_results) == 0:
                continue

            # Append all scores: shape (N,)
            # limit considered tracks for each sequence if maxDet > 0
            if self.maxDet == 0:
                dt_scores = np.concatenate([res["dt_scores"] for res in ig_idx_results], axis=0)

                dt_idx = np.argsort(-dt_scores, kind="mergesort")

                dt_m = np.concatenate([e["dt_matches"] for e in ig_idx_results],
                                      axis=1)[:, dt_idx]
                dt_ig = np.concatenate([e["dt_ignore"] for e in ig_idx_results],
                                       axis=1)[:, dt_idx]
            elif self.maxDet > 0:
                dt_scores = np.concatenate([res["dt_scores"][0:self.maxDet] for res in ig_idx_results], axis=0)

                dt_idx = np.argsort(-dt_scores, kind="mergesort")

                dt_m = np.concatenate([e["dt_matches"][:, 0:self.maxDet] for e in ig_idx_results],
                                      axis=1)[:, dt_idx]
                dt_ig = np.concatenate([e["dt_ignore"][:, 0:self.maxDet] for e in ig_idx_results],
                                       axis=1)[:, dt_idx]
            else:
                raise Exception("Number of maximum detections must be >= 0, but is set to %i" % self.maxDet)

            gt_ig = np.concatenate([res["gt_ignore"] for res in ig_idx_results])
            # num gt anns to consider
            num_gt = np.count_nonzero(gt_ig == 0)

            if num_gt == 0:
                continue

            tps = np.logical_and(dt_m != -1, np.logical_not(dt_ig))
            fps = np.logical_and(dt_m == -1, np.logical_not(dt_ig))

            tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
            fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)

            for iou_thr_idx, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
                tp = np.array(tp)
                fp = np.array(fp)
                num_tp = len(tp)
                rc = tp / num_gt
                if num_tp:
                    recall[iou_thr_idx, ig_idx] = rc[-1]
                else:
                    recall[iou_thr_idx, ig_idx] = 0

                # np.spacing(1) ~= eps
                pr = tp / (fp + tp + np.spacing(1))
                pr = pr.tolist()

                # Ensure precision values are monotonically decreasing
                for i in range(num_tp - 1, 0, -1):
                    if pr[i] > pr[i - 1]:
                        pr[i - 1] = pr[i]

                # find indices at the predefined recall values
                rec_thrs_insert_idx = np.searchsorted(rc, self.rec_thrs, side="left")

                pr_at_recall = [0.0] * num_recalls

                try:
                    for _idx, pr_idx in enumerate(rec_thrs_insert_idx):
                        pr_at_recall[_idx] = pr[pr_idx]
                except IndexError:
                    pass

                precision[iou_thr_idx, :, ig_idx] = (np.array(pr_at_recall))

        res = {'precision': precision, 'recall': recall}

        # compute the precision and recall averages for the respective alpha thresholds and ignore masks
        for lbl in self.lbls:
            res['AP_' + lbl] = np.zeros((len(self.array_labels)), dtype=np.float)
            res['AR_' + lbl] = np.zeros((len(self.array_labels)), dtype=np.float)

        for a_id, alpha in enumerate(self.array_labels):
            for lbl_idx, lbl in enumerate(self.lbls):
                p = precision[a_id, :, lbl_idx]
                if len(p[p > -1]) == 0:
                    mean_p = -1
                else:
                    mean_p = np.mean(p[p > -1])
                res['AP_' + lbl][a_id] = mean_p
                res['AR_' + lbl][a_id] = recall[a_id, lbl_idx]

        return res

    def combine_classes_class_averaged(self, all_res, ignore_empty_classes=True):
        """Combines metrics across all classes by averaging over the class values
        Note mAP is not well defined for 'empty classes' so 'ignore empty classes' is always true here.
        """
        res = {}
        for field in self.fields:
            res[field] = np.zeros((len(self.array_labels)), dtype=np.float)
            field_stacked = np.array([res[field] for res in all_res.values()])

            for a_id, alpha in enumerate(self.array_labels):
                values = field_stacked[:, a_id]
                if len(values[values > -1]) == 0:
                    mean = -1
                else:
                    mean = np.mean(values[values > -1])
                res[field][a_id] = mean
        return res

    def combine_classes_det_averaged(self, all_res):
        """Combines metrics across all classes by averaging over the detection values"""

        res = {}
        for field in self.fields:
            res[field] = np.zeros((len(self.array_labels)), dtype=np.float)
            field_stacked = np.array([res[field] for res in all_res.values()])

            for a_id, alpha in enumerate(self.array_labels):
                values = field_stacked[:, a_id]
                if len(values[values > -1]) == 0:
                    mean = -1
                else:
                    mean = np.mean(values[values > -1])
                res[field][a_id] = mean
        return res

    def _compute_track_ig_masks(self, num_ids, track_lengths=None, track_areas=None, iscrowd=None,
                                is_not_exhaustively_labeled=False, is_gt=True):
        """
        Computes ignore masks for different track sets to evaluate
        :param num_ids: the number of track IDs
        :param track_lengths: the lengths of the tracks (number of timesteps)
        :param track_areas: the average area of a track
        :param iscrowd: whether a track is marked as crowd
        :param is_not_exhaustively_labeled: whether the track category is not exhaustively labeled
        :param is_gt: whether it is gt
        :return: the track ignore masks
        """
        # for TAO tracks for classes which are not exhaustively labeled are not evaluated
        if not is_gt and is_not_exhaustively_labeled:
            track_ig_masks = [[1 for _ in range(num_ids)] for i in range(self.num_ig_masks)]
        else:
            # consider all tracks
            track_ig_masks = [[0 for _ in range(num_ids)]]

            # consider tracks with certain area
            if self.use_area_rngs:
                for rng in self.area_rngs:
                    track_ig_masks.append([0 if rng[0] - np.finfo('float').eps <= area <= rng[1] + np.finfo('float').eps
                                           else 1 for area in track_areas])

            # consider tracks with certain duration
            if self.use_time_rngs:
                for rng in self.time_rngs:
                    track_ig_masks.append([0 if rng[0] - np.finfo('float').eps <= length
                                                <= rng[1] + np.finfo('float').eps else 1 for length in track_lengths])

        # for YouTubeVIS evaluation tracks with crowd tag are not evaluated
        if is_gt and iscrowd:
            track_ig_masks = [np.logical_or(mask, iscrowd) for mask in track_ig_masks]

        return track_ig_masks

    @staticmethod
    def _compute_bb_track_iou(dt_track, gt_track, boxformat='xywh'):
        """
        Calculates the track IoU for one detected track and one ground truth track for bounding boxes
        :param dt_track: the detected track (format: dictionary with frame index as keys and
                            numpy arrays as values)
        :param gt_track: the ground truth track (format: dictionary with frame index as keys and
                        numpy array as values)
        :param boxformat: the format of the boxes
        :return: the track IoU
        """
        intersect = 0
        union = 0
        image_ids = set(gt_track.keys()) | set(dt_track.keys())
        for image in image_ids:
            g = gt_track.get(image, None)
            d = dt_track.get(image, None)
            if boxformat == 'xywh':
                if d is not None and g is not None:
                    dx, dy, dw, dh = d
                    gx, gy, gw, gh = g
                    w = max(min(dx + dw, gx + gw) - max(dx, gx), 0)
                    h = max(min(dy + dh, gy + gh) - max(dy, gy), 0)
                    i = w * h
                    u = dw * dh + gw * gh - i
                    intersect += i
                    union += u
                elif d is None and g is not None:
                    union += g[2] * g[3]
                elif d is not None and g is None:
                    union += d[2] * d[3]
            elif boxformat == 'x0y0x1y1':
                if d is not None and g is not None:
                    dx0, dy0, dx1, dy1 = d
                    gx0, gy0, gx1, gy1 = g
                    w = max(min(dx1, gx1) - max(dx0, gx0), 0)
                    h = max(min(dy1, gy1) - max(dy0, gy0), 0)
                    i = w * h
                    u = (dx1 - dx0) * (dy1 - dy0) + (gx1 - gx0) * (gy1 - gy0) - i
                    intersect += i
                    union += u
                elif d is None and g is not None:
                    union += (g[2] - g[0]) * (g[3] - g[1])
                elif d is not None and g is None:
                    union += (d[2] - d[0]) * (d[3] - d[1])
            else:
                raise TrackEvalException('BoxFormat not implemented')
        if intersect > union:
            raise TrackEvalException("Intersection value > union value. Are the box values corrupted?")
        return intersect / union if union > 0 else 0

    @staticmethod
    def _compute_mask_track_iou(dt_track, gt_track):
        """
        Calculates the track IoU for one detected track and one ground truth track for segmentation masks
        :param dt_track: the detected track (format: dictionary with frame index as keys and
                            pycocotools rle encoded masks as values)
        :param gt_track: the ground truth track (format: dictionary with frame index as keys and
                            pycocotools rle encoded masks as values)
        :return: the track IoU
        """
        # only loaded when needed to reduce minimum requirements
        from pycocotools import mask as mask_utils

        intersect = .0
        union = .0
        image_ids = set(gt_track.keys()) | set(dt_track.keys())
        for image in image_ids:
            g = gt_track.get(image, None)
            d = dt_track.get(image, None)
            if d and g:
                intersect += mask_utils.area(mask_utils.merge([d, g], True))
                union += mask_utils.area(mask_utils.merge([d, g], False))
            elif not d and g:
                union += mask_utils.area(g)
            elif d and not g:
                union += mask_utils.area(d)
        if union < 0.0 - np.finfo('float').eps:
            raise TrackEvalException("Union value < 0. Are the segmentaions corrupted?")
        if intersect > union:
            raise TrackEvalException("Intersection value > union value. Are the segmentations corrupted?")
        iou = intersect / union if union > 0.0 + np.finfo('float').eps else 0.0
        return iou

    @staticmethod
    def _compute_track_ious(dt, gt, iou_function='bbox', boxformat='xywh'):
        """
        Calculate track IoUs for a set of ground truth tracks and a set of detected tracks
        """

        if len(gt) == 0 and len(dt) == 0:
            return []

        if iou_function == 'bbox':
            track_iou_function = partial(TrackMAP._compute_bb_track_iou, boxformat=boxformat)
        elif iou_function == 'mask':
            track_iou_function = partial(TrackMAP._compute_mask_track_iou)
        else:
            raise Exception('IoU function not implemented')

        ious = np.zeros([len(dt), len(gt)])
        for i, j in np.ndindex(ious.shape):
            ious[i, j] = track_iou_function(dt[i], gt[j])
        return ious

    @staticmethod
    def _row_print(*argv):
        """Prints results in an evenly spaced rows, with more space in first row"""
        if len(argv) == 1:
            argv = argv[0]
        to_print = '%-40s' % argv[0]
        for v in argv[1:]:
            to_print += '%-12s' % str(v)
        print(to_print)