|
from typing import List, Tuple |
|
|
|
import numpy as np |
|
|
|
|
|
def connected_component(r: np.ndarray, c: np.ndarray) -> List[List[int]]: |
|
"""Find connected components in the given row and column indices. |
|
|
|
Args: |
|
---- |
|
r (np.ndarray): Row indices. |
|
c (np.ndarray): Column indices. |
|
|
|
Yields: |
|
------ |
|
List[int]: Indices of connected components. |
|
|
|
""" |
|
indices = [0] |
|
for i in range(1, r.size): |
|
if r[i] == r[indices[-1]] and c[i] == c[indices[-1]] + 1: |
|
indices.append(i) |
|
else: |
|
yield indices |
|
indices = [i] |
|
yield indices |
|
|
|
|
|
def nms_horizontal(ratio: np.ndarray, threshold: float) -> np.ndarray: |
|
"""Apply Non-Maximum Suppression (NMS) horizontally on the given ratio matrix. |
|
|
|
Args: |
|
---- |
|
ratio (np.ndarray): Input ratio matrix. |
|
threshold (float): Threshold for NMS. |
|
|
|
Returns: |
|
------- |
|
np.ndarray: Binary mask after applying NMS. |
|
|
|
""" |
|
mask = np.zeros_like(ratio, dtype=bool) |
|
r, c = np.nonzero(ratio > threshold) |
|
if len(r) == 0: |
|
return mask |
|
for ids in connected_component(r, c): |
|
values = [ratio[r[i], c[i]] for i in ids] |
|
mi = np.argmax(values) |
|
mask[r[ids[mi]], c[ids[mi]]] = True |
|
return mask |
|
|
|
|
|
def nms_vertical(ratio: np.ndarray, threshold: float) -> np.ndarray: |
|
"""Apply Non-Maximum Suppression (NMS) vertically on the given ratio matrix. |
|
|
|
Args: |
|
---- |
|
ratio (np.ndarray): Input ratio matrix. |
|
threshold (float): Threshold for NMS. |
|
|
|
Returns: |
|
------- |
|
np.ndarray: Binary mask after applying NMS. |
|
|
|
""" |
|
return np.transpose(nms_horizontal(np.transpose(ratio), threshold)) |
|
|
|
|
|
def fgbg_depth( |
|
d: np.ndarray, t: float |
|
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: |
|
"""Find foreground-background relations between neighboring pixels. |
|
|
|
Args: |
|
---- |
|
d (np.ndarray): Depth matrix. |
|
t (float): Threshold for comparison. |
|
|
|
Returns: |
|
------- |
|
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: Four matrices indicating |
|
left, top, right, and bottom foreground-background relations. |
|
|
|
""" |
|
right_is_big_enough = (d[..., :, 1:] / d[..., :, :-1]) > t |
|
left_is_big_enough = (d[..., :, :-1] / d[..., :, 1:]) > t |
|
bottom_is_big_enough = (d[..., 1:, :] / d[..., :-1, :]) > t |
|
top_is_big_enough = (d[..., :-1, :] / d[..., 1:, :]) > t |
|
return ( |
|
left_is_big_enough, |
|
top_is_big_enough, |
|
right_is_big_enough, |
|
bottom_is_big_enough, |
|
) |
|
|
|
|
|
def fgbg_depth_thinned( |
|
d: np.ndarray, t: float |
|
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: |
|
"""Find foreground-background relations between neighboring pixels with Non-Maximum Suppression. |
|
|
|
Args: |
|
---- |
|
d (np.ndarray): Depth matrix. |
|
t (float): Threshold for NMS. |
|
|
|
Returns: |
|
------- |
|
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: Four matrices indicating |
|
left, top, right, and bottom foreground-background relations with NMS applied. |
|
|
|
""" |
|
right_is_big_enough = nms_horizontal(d[..., :, 1:] / d[..., :, :-1], t) |
|
left_is_big_enough = nms_horizontal(d[..., :, :-1] / d[..., :, 1:], t) |
|
bottom_is_big_enough = nms_vertical(d[..., 1:, :] / d[..., :-1, :], t) |
|
top_is_big_enough = nms_vertical(d[..., :-1, :] / d[..., 1:, :], t) |
|
return ( |
|
left_is_big_enough, |
|
top_is_big_enough, |
|
right_is_big_enough, |
|
bottom_is_big_enough, |
|
) |
|
|
|
|
|
def fgbg_binary_mask( |
|
d: np.ndarray, |
|
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: |
|
"""Find foreground-background relations between neighboring pixels in binary masks. |
|
|
|
Args: |
|
---- |
|
d (np.ndarray): Binary depth matrix. |
|
|
|
Returns: |
|
------- |
|
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: Four matrices indicating |
|
left, top, right, and bottom foreground-background relations in binary masks. |
|
|
|
""" |
|
assert d.dtype == bool |
|
right_is_big_enough = d[..., :, 1:] & ~d[..., :, :-1] |
|
left_is_big_enough = d[..., :, :-1] & ~d[..., :, 1:] |
|
bottom_is_big_enough = d[..., 1:, :] & ~d[..., :-1, :] |
|
top_is_big_enough = d[..., :-1, :] & ~d[..., 1:, :] |
|
return ( |
|
left_is_big_enough, |
|
top_is_big_enough, |
|
right_is_big_enough, |
|
bottom_is_big_enough, |
|
) |
|
|
|
|
|
def edge_recall_matting(pr: np.ndarray, gt: np.ndarray, t: float) -> float: |
|
"""Calculate edge recall for image matting. |
|
|
|
Args: |
|
---- |
|
pr (np.ndarray): Predicted depth matrix. |
|
gt (np.ndarray): Ground truth binary mask. |
|
t (float): Threshold for NMS. |
|
|
|
Returns: |
|
------- |
|
float: Edge recall value. |
|
|
|
""" |
|
assert gt.dtype == bool |
|
ap, bp, cp, dp = fgbg_depth_thinned(pr, t) |
|
ag, bg, cg, dg = fgbg_binary_mask(gt) |
|
return 0.25 * ( |
|
np.count_nonzero(ap & ag) / max(np.count_nonzero(ag), 1) |
|
+ np.count_nonzero(bp & bg) / max(np.count_nonzero(bg), 1) |
|
+ np.count_nonzero(cp & cg) / max(np.count_nonzero(cg), 1) |
|
+ np.count_nonzero(dp & dg) / max(np.count_nonzero(dg), 1) |
|
) |
|
|
|
|
|
def boundary_f1( |
|
pr: np.ndarray, |
|
gt: np.ndarray, |
|
t: float, |
|
return_p: bool = False, |
|
return_r: bool = False, |
|
) -> float: |
|
"""Calculate Boundary F1 score. |
|
|
|
Args: |
|
---- |
|
pr (np.ndarray): Predicted depth matrix. |
|
gt (np.ndarray): Ground truth depth matrix. |
|
t (float): Threshold for comparison. |
|
return_p (bool, optional): If True, return precision. Defaults to False. |
|
return_r (bool, optional): If True, return recall. Defaults to False. |
|
|
|
Returns: |
|
------- |
|
float: Boundary F1 score, or precision, or recall depending on the flags. |
|
|
|
""" |
|
ap, bp, cp, dp = fgbg_depth(pr, t) |
|
ag, bg, cg, dg = fgbg_depth(gt, t) |
|
|
|
r = 0.25 * ( |
|
np.count_nonzero(ap & ag) / max(np.count_nonzero(ag), 1) |
|
+ np.count_nonzero(bp & bg) / max(np.count_nonzero(bg), 1) |
|
+ np.count_nonzero(cp & cg) / max(np.count_nonzero(cg), 1) |
|
+ np.count_nonzero(dp & dg) / max(np.count_nonzero(dg), 1) |
|
) |
|
p = 0.25 * ( |
|
np.count_nonzero(ap & ag) / max(np.count_nonzero(ap), 1) |
|
+ np.count_nonzero(bp & bg) / max(np.count_nonzero(bp), 1) |
|
+ np.count_nonzero(cp & cg) / max(np.count_nonzero(cp), 1) |
|
+ np.count_nonzero(dp & dg) / max(np.count_nonzero(dp), 1) |
|
) |
|
if r + p == 0: |
|
return 0.0 |
|
if return_p: |
|
return p |
|
if return_r: |
|
return r |
|
return 2 * (r * p) / (r + p) |
|
|
|
|
|
def get_thresholds_and_weights( |
|
t_min: float, t_max: float, N: int |
|
) -> Tuple[np.ndarray, np.ndarray]: |
|
"""Generate thresholds and weights for the given range. |
|
|
|
Args: |
|
---- |
|
t_min (float): Minimum threshold. |
|
t_max (float): Maximum threshold. |
|
N (int): Number of thresholds. |
|
|
|
Returns: |
|
------- |
|
Tuple[np.ndarray, np.ndarray]: Array of thresholds and corresponding weights. |
|
|
|
""" |
|
thresholds = np.linspace(t_min, t_max, N) |
|
weights = thresholds / thresholds.sum() |
|
return thresholds, weights |
|
|
|
|
|
def invert_depth(depth: np.ndarray, eps: float = 1e-6) -> np.ndarray: |
|
"""Inverts a depth map with numerical stability. |
|
|
|
Args: |
|
---- |
|
depth (np.ndarray): Depth map to be inverted. |
|
eps (float): Minimum value to avoid division by zero (default is 1e-6). |
|
|
|
Returns: |
|
------- |
|
np.ndarray: Inverted depth map. |
|
|
|
""" |
|
inverse_depth = 1.0 / depth.clip(min=eps) |
|
return inverse_depth |
|
|
|
|
|
def SI_boundary_F1( |
|
predicted_depth: np.ndarray, |
|
target_depth: np.ndarray, |
|
t_min: float = 1.05, |
|
t_max: float = 1.25, |
|
N: int = 10, |
|
) -> float: |
|
"""Calculate Scale-Invariant Boundary F1 Score for depth-based ground-truth. |
|
|
|
Args: |
|
---- |
|
predicted_depth (np.ndarray): Predicted depth matrix. |
|
target_depth (np.ndarray): Ground truth depth matrix. |
|
t_min (float, optional): Minimum threshold. Defaults to 1.05. |
|
t_max (float, optional): Maximum threshold. Defaults to 1.25. |
|
N (int, optional): Number of thresholds. Defaults to 10. |
|
|
|
Returns: |
|
------- |
|
float: Scale-Invariant Boundary F1 Score. |
|
|
|
""" |
|
assert predicted_depth.ndim == target_depth.ndim == 2 |
|
thresholds, weights = get_thresholds_and_weights(t_min, t_max, N) |
|
f1_scores = np.array( |
|
[ |
|
boundary_f1(invert_depth(predicted_depth), invert_depth(target_depth), t) |
|
for t in thresholds |
|
] |
|
) |
|
return np.sum(f1_scores * weights) |
|
|
|
|
|
def SI_boundary_Recall( |
|
predicted_depth: np.ndarray, |
|
target_mask: np.ndarray, |
|
t_min: float = 1.05, |
|
t_max: float = 1.25, |
|
N: int = 10, |
|
alpha_threshold: float = 0.1, |
|
) -> float: |
|
"""Calculate Scale-Invariant Boundary Recall Score for mask-based ground-truth. |
|
|
|
Args: |
|
---- |
|
predicted_depth (np.ndarray): Predicted depth matrix. |
|
target_mask (np.ndarray): Ground truth binary mask. |
|
t_min (float, optional): Minimum threshold. Defaults to 1.05. |
|
t_max (float, optional): Maximum threshold. Defaults to 1.25. |
|
N (int, optional): Number of thresholds. Defaults to 10. |
|
alpha_threshold (float, optional): Threshold for alpha masking. Defaults to 0.1. |
|
|
|
Returns: |
|
------- |
|
float: Scale-Invariant Boundary Recall Score. |
|
|
|
""" |
|
assert predicted_depth.ndim == target_mask.ndim == 2 |
|
thresholds, weights = get_thresholds_and_weights(t_min, t_max, N) |
|
thresholded_target = target_mask > alpha_threshold |
|
|
|
recall_scores = np.array( |
|
[ |
|
edge_recall_matting( |
|
invert_depth(predicted_depth), thresholded_target, t=float(t) |
|
) |
|
for t in thresholds |
|
] |
|
) |
|
weighted_recall = np.sum(recall_scores * weights) |
|
return weighted_recall |
|
|