Spaces:
Build error
Build error
import numpy as np | |
from scipy.optimize import linear_sum_assignment | |
from ._base_metric import _BaseMetric | |
from .. import _timing | |
class VACE(_BaseMetric): | |
"""Class which implements the VACE metrics. | |
The metrics are described in: | |
Manohar et al. (2006) "Performance Evaluation of Object Detection and Tracking in Video" | |
https://link.springer.com/chapter/10.1007/11612704_16 | |
This implementation uses the "relaxed" variant of the metrics, | |
where an overlap threshold is applied in each frame. | |
""" | |
def __init__(self, config=None): | |
super().__init__() | |
self.integer_fields = ['VACE_IDs', 'VACE_GT_IDs', 'num_non_empty_timesteps'] | |
self.float_fields = ['STDA', 'ATA', 'FDA', 'SFDA'] | |
self.fields = self.integer_fields + self.float_fields | |
self.summary_fields = ['SFDA', 'ATA'] | |
# Fields that are accumulated over multiple videos. | |
self._additive_fields = self.integer_fields + ['STDA', 'FDA'] | |
self.threshold = 0.5 | |
def eval_sequence(self, data): | |
"""Calculates VACE metrics for one sequence. | |
Depends on the fields: | |
data['num_gt_ids'] | |
data['num_tracker_ids'] | |
data['gt_ids'] | |
data['tracker_ids'] | |
data['similarity_scores'] | |
""" | |
res = {} | |
# Obtain Average Tracking Accuracy (ATA) using track correspondence. | |
# Obtain counts necessary to compute temporal IOU. | |
# Assume that integer counts can be represented exactly as floats. | |
potential_matches_count = np.zeros((data['num_gt_ids'], data['num_tracker_ids'])) | |
gt_id_count = np.zeros(data['num_gt_ids']) | |
tracker_id_count = np.zeros(data['num_tracker_ids']) | |
both_present_count = np.zeros((data['num_gt_ids'], data['num_tracker_ids'])) | |
for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])): | |
# Count the number of frames in which two tracks satisfy the overlap criterion. | |
matches_mask = np.greater_equal(data['similarity_scores'][t], self.threshold) | |
match_idx_gt, match_idx_tracker = np.nonzero(matches_mask) | |
potential_matches_count[gt_ids_t[match_idx_gt], tracker_ids_t[match_idx_tracker]] += 1 | |
# Count the number of frames in which the tracks are present. | |
gt_id_count[gt_ids_t] += 1 | |
tracker_id_count[tracker_ids_t] += 1 | |
both_present_count[gt_ids_t[:, np.newaxis], tracker_ids_t[np.newaxis, :]] += 1 | |
# Number of frames in which either track is present (union of the two sets of frames). | |
union_count = (gt_id_count[:, np.newaxis] | |
+ tracker_id_count[np.newaxis, :] | |
- both_present_count) | |
# The denominator should always be non-zero if all tracks are non-empty. | |
with np.errstate(divide='raise', invalid='raise'): | |
temporal_iou = potential_matches_count / union_count | |
# Find assignment that maximizes temporal IOU. | |
match_rows, match_cols = linear_sum_assignment(-temporal_iou) | |
res['STDA'] = temporal_iou[match_rows, match_cols].sum() | |
res['VACE_IDs'] = data['num_tracker_ids'] | |
res['VACE_GT_IDs'] = data['num_gt_ids'] | |
# Obtain Frame Detection Accuracy (FDA) using per-frame correspondence. | |
non_empty_count = 0 | |
fda = 0 | |
for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(data['gt_ids'], data['tracker_ids'])): | |
n_g = len(gt_ids_t) | |
n_d = len(tracker_ids_t) | |
if not (n_g or n_d): | |
continue | |
# n_g > 0 or n_d > 0 | |
non_empty_count += 1 | |
if not (n_g and n_d): | |
continue | |
# n_g > 0 and n_d > 0 | |
spatial_overlap = data['similarity_scores'][t] | |
match_rows, match_cols = linear_sum_assignment(-spatial_overlap) | |
overlap_ratio = spatial_overlap[match_rows, match_cols].sum() | |
fda += overlap_ratio / (0.5 * (n_g + n_d)) | |
res['FDA'] = fda | |
res['num_non_empty_timesteps'] = non_empty_count | |
res.update(self._compute_final_fields(res)) | |
return res | |
def combine_classes_class_averaged(self, all_res, ignore_empty_classes=True): | |
"""Combines metrics across all classes by averaging over the class values. | |
If 'ignore_empty_classes' is True, then it only sums over classes with at least one gt or predicted detection. | |
""" | |
res = {} | |
for field in self.fields: | |
if ignore_empty_classes: | |
res[field] = np.mean([v[field] for v in all_res.values() | |
if v['VACE_GT_IDs'] > 0 or v['VACE_IDs'] > 0], axis=0) | |
else: | |
res[field] = np.mean([v[field] for v in all_res.values()], axis=0) | |
return res | |
def combine_classes_det_averaged(self, all_res): | |
"""Combines metrics across all classes by averaging over the detection values""" | |
res = {} | |
for field in self._additive_fields: | |
res[field] = _BaseMetric._combine_sum(all_res, field) | |
res = self._compute_final_fields(res) | |
return res | |
def combine_sequences(self, all_res): | |
"""Combines metrics across all sequences""" | |
res = {} | |
for header in self._additive_fields: | |
res[header] = _BaseMetric._combine_sum(all_res, header) | |
res.update(self._compute_final_fields(res)) | |
return res | |
def _compute_final_fields(additive): | |
final = {} | |
with np.errstate(invalid='ignore'): # Permit nan results. | |
final['ATA'] = (additive['STDA'] / | |
(0.5 * (additive['VACE_IDs'] + additive['VACE_GT_IDs']))) | |
final['SFDA'] = additive['FDA'] / additive['num_non_empty_timesteps'] | |
return final | |