Spaces:
Build error
Build error
File size: 5,881 Bytes
47af768 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import numpy as np
from scipy.optimize import linear_sum_assignment
from ._base_metric import _BaseMetric
from .. import _timing
class VACE(_BaseMetric):
"""Class which implements the VACE metrics.
The metrics are described in:
Manohar et al. (2006) "Performance Evaluation of Object Detection and Tracking in Video"
https://link.springer.com/chapter/10.1007/11612704_16
This implementation uses the "relaxed" variant of the metrics,
where an overlap threshold is applied in each frame.
"""
def __init__(self, config=None):
super().__init__()
self.integer_fields = ['VACE_IDs', 'VACE_GT_IDs', 'num_non_empty_timesteps']
self.float_fields = ['STDA', 'ATA', 'FDA', 'SFDA']
self.fields = self.integer_fields + self.float_fields
self.summary_fields = ['SFDA', 'ATA']
# Fields that are accumulated over multiple videos.
self._additive_fields = self.integer_fields + ['STDA', 'FDA']
self.threshold = 0.5
@_timing.time
def eval_sequence(self, data):
"""Calculates VACE metrics for one sequence.
Depends on the fields:
data['num_gt_ids']
data['num_tracker_ids']
data['gt_ids']
data['tracker_ids']
data['similarity_scores']
"""
res = {}
# Obtain Average Tracking Accuracy (ATA) using track correspondence.
# Obtain counts necessary to compute temporal IOU.
# Assume that integer counts can be represented exactly as floats.
potential_matches_count = np.zeros((data['num_gt_ids'], data['num_tracker_ids']))
gt_id_count = np.zeros(data['num_gt_ids'])
tracker_id_count = np.zeros(data['num_tracker_ids'])
both_present_count = np.zeros((data['num_gt_ids'], data['num_tracker_ids']))
for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
# Count the number of frames in which two tracks satisfy the overlap criterion.
matches_mask = np.greater_equal(data['similarity_scores'][t], self.threshold)
match_idx_gt, match_idx_tracker = np.nonzero(matches_mask)
potential_matches_count[gt_ids_t[match_idx_gt], tracker_ids_t[match_idx_tracker]] += 1
# Count the number of frames in which the tracks are present.
gt_id_count[gt_ids_t] += 1
tracker_id_count[tracker_ids_t] += 1
both_present_count[gt_ids_t[:, np.newaxis], tracker_ids_t[np.newaxis, :]] += 1
# Number of frames in which either track is present (union of the two sets of frames).
union_count = (gt_id_count[:, np.newaxis]
+ tracker_id_count[np.newaxis, :]
- both_present_count)
# The denominator should always be non-zero if all tracks are non-empty.
with np.errstate(divide='raise', invalid='raise'):
temporal_iou = potential_matches_count / union_count
# Find assignment that maximizes temporal IOU.
match_rows, match_cols = linear_sum_assignment(-temporal_iou)
res['STDA'] = temporal_iou[match_rows, match_cols].sum()
res['VACE_IDs'] = data['num_tracker_ids']
res['VACE_GT_IDs'] = data['num_gt_ids']
# Obtain Frame Detection Accuracy (FDA) using per-frame correspondence.
non_empty_count = 0
fda = 0
for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])):
n_g = len(gt_ids_t)
n_d = len(tracker_ids_t)
if not (n_g or n_d):
continue
# n_g > 0 or n_d > 0
non_empty_count += 1
if not (n_g and n_d):
continue
# n_g > 0 and n_d > 0
spatial_overlap = data['similarity_scores'][t]
match_rows, match_cols = linear_sum_assignment(-spatial_overlap)
overlap_ratio = spatial_overlap[match_rows, match_cols].sum()
fda += overlap_ratio / (0.5 * (n_g + n_d))
res['FDA'] = fda
res['num_non_empty_timesteps'] = non_empty_count
res.update(self._compute_final_fields(res))
return res
def combine_classes_class_averaged(self, all_res, ignore_empty_classes=True):
"""Combines metrics across all classes by averaging over the class values.
If 'ignore_empty_classes' is True, then it only sums over classes with at least one gt or predicted detection.
"""
res = {}
for field in self.fields:
if ignore_empty_classes:
res[field] = np.mean([v[field] for v in all_res.values()
if v['VACE_GT_IDs'] > 0 or v['VACE_IDs'] > 0], axis=0)
else:
res[field] = np.mean([v[field] for v in all_res.values()], axis=0)
return res
def combine_classes_det_averaged(self, all_res):
"""Combines metrics across all classes by averaging over the detection values"""
res = {}
for field in self._additive_fields:
res[field] = _BaseMetric._combine_sum(all_res, field)
res = self._compute_final_fields(res)
return res
def combine_sequences(self, all_res):
"""Combines metrics across all sequences"""
res = {}
for header in self._additive_fields:
res[header] = _BaseMetric._combine_sum(all_res, header)
res.update(self._compute_final_fields(res))
return res
@staticmethod
def _compute_final_fields(additive):
final = {}
with np.errstate(invalid='ignore'): # Permit nan results.
final['ATA'] = (additive['STDA'] /
(0.5 * (additive['VACE_IDs'] + additive['VACE_GT_IDs'])))
final['SFDA'] = additive['FDA'] / additive['num_non_empty_timesteps']
return final
|