Vincentqyw
fix: roma
4c12b36
raw
history blame
17.8 kB
"""
Loss function implementations.
"""
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from kornia.geometry import warp_perspective
from ..misc.geometry_utils import keypoints_to_grid, get_dist_mask, get_common_line_mask
def get_loss_and_weights(model_cfg, device=torch.device("cuda")):
"""Get loss functions and either static or dynamic weighting."""
# Get the global weighting policy
w_policy = model_cfg.get("weighting_policy", "static")
if not w_policy in ["static", "dynamic"]:
raise ValueError("[Error] Not supported weighting policy.")
loss_func = {}
loss_weight = {}
# Get junction loss function and weight
w_junc, junc_loss_func = get_junction_loss_and_weight(model_cfg, w_policy)
loss_func["junc_loss"] = junc_loss_func.to(device)
loss_weight["w_junc"] = w_junc
# Get heatmap loss function and weight
w_heatmap, heatmap_loss_func = get_heatmap_loss_and_weight(
model_cfg, w_policy, device
)
loss_func["heatmap_loss"] = heatmap_loss_func.to(device)
loss_weight["w_heatmap"] = w_heatmap
# [Optionally] get descriptor loss function and weight
if model_cfg.get("descriptor_loss_func", None) is not None:
w_descriptor, descriptor_loss_func = get_descriptor_loss_and_weight(
model_cfg, w_policy
)
loss_func["descriptor_loss"] = descriptor_loss_func.to(device)
loss_weight["w_desc"] = w_descriptor
return loss_func, loss_weight
def get_junction_loss_and_weight(model_cfg, global_w_policy):
"""Get the junction loss function and weight."""
junction_loss_cfg = model_cfg.get("junction_loss_cfg", {})
# Get the junction loss weight
w_policy = junction_loss_cfg.get("policy", global_w_policy)
if w_policy == "static":
w_junc = torch.tensor(model_cfg["w_junc"], dtype=torch.float32)
elif w_policy == "dynamic":
w_junc = nn.Parameter(
torch.tensor(model_cfg["w_junc"], dtype=torch.float32), requires_grad=True
)
else:
raise ValueError("[Error] Unknown weighting policy for junction loss weight.")
# Get the junction loss function
junc_loss_name = model_cfg.get("junction_loss_func", "superpoint")
if junc_loss_name == "superpoint":
junc_loss_func = JunctionDetectionLoss(
model_cfg["grid_size"], model_cfg["keep_border_valid"]
)
else:
raise ValueError("[Error] Not supported junction loss function.")
return w_junc, junc_loss_func
def get_heatmap_loss_and_weight(model_cfg, global_w_policy, device):
"""Get the heatmap loss function and weight."""
heatmap_loss_cfg = model_cfg.get("heatmap_loss_cfg", {})
# Get the heatmap loss weight
w_policy = heatmap_loss_cfg.get("policy", global_w_policy)
if w_policy == "static":
w_heatmap = torch.tensor(model_cfg["w_heatmap"], dtype=torch.float32)
elif w_policy == "dynamic":
w_heatmap = nn.Parameter(
torch.tensor(model_cfg["w_heatmap"], dtype=torch.float32),
requires_grad=True,
)
else:
raise ValueError("[Error] Unknown weighting policy for junction loss weight.")
# Get the corresponding heatmap loss based on the config
heatmap_loss_name = model_cfg.get("heatmap_loss_func", "cross_entropy")
if heatmap_loss_name == "cross_entropy":
# Get the heatmap class weight (always static)
heatmap_class_w = model_cfg.get("w_heatmap_class", 1.0)
class_weight = (
torch.tensor(np.array([1.0, heatmap_class_w])).to(torch.float).to(device)
)
heatmap_loss_func = HeatmapLoss(class_weight=class_weight)
else:
raise ValueError("[Error] Not supported heatmap loss function.")
return w_heatmap, heatmap_loss_func
def get_descriptor_loss_and_weight(model_cfg, global_w_policy):
"""Get the descriptor loss function and weight."""
descriptor_loss_cfg = model_cfg.get("descriptor_loss_cfg", {})
# Get the descriptor loss weight
w_policy = descriptor_loss_cfg.get("policy", global_w_policy)
if w_policy == "static":
w_descriptor = torch.tensor(model_cfg["w_desc"], dtype=torch.float32)
elif w_policy == "dynamic":
w_descriptor = nn.Parameter(
torch.tensor(model_cfg["w_desc"], dtype=torch.float32), requires_grad=True
)
else:
raise ValueError("[Error] Unknown weighting policy for descriptor loss weight.")
# Get the descriptor loss function
descriptor_loss_name = model_cfg.get("descriptor_loss_func", "regular_sampling")
if descriptor_loss_name == "regular_sampling":
descriptor_loss_func = TripletDescriptorLoss(
descriptor_loss_cfg["grid_size"],
descriptor_loss_cfg["dist_threshold"],
descriptor_loss_cfg["margin"],
)
else:
raise ValueError("[Error] Not supported descriptor loss function.")
return w_descriptor, descriptor_loss_func
def space_to_depth(input_tensor, grid_size):
"""PixelUnshuffle for pytorch."""
N, C, H, W = input_tensor.size()
# (N, C, H//bs, bs, W//bs, bs)
x = input_tensor.view(N, C, H // grid_size, grid_size, W // grid_size, grid_size)
# (N, bs, bs, C, H//bs, W//bs)
x = x.permute(0, 3, 5, 1, 2, 4).contiguous()
# (N, C*bs^2, H//bs, W//bs)
x = x.view(N, C * (grid_size**2), H // grid_size, W // grid_size)
return x
def junction_detection_loss(
junction_map, junc_predictions, valid_mask=None, grid_size=8, keep_border=True
):
"""Junction detection loss."""
# Convert junc_map to channel tensor
junc_map = space_to_depth(junction_map, grid_size)
map_shape = junc_map.shape[-2:]
batch_size = junc_map.shape[0]
dust_bin_label = (
torch.ones([batch_size, 1, map_shape[0], map_shape[1]])
.to(junc_map.device)
.to(torch.int)
)
junc_map = torch.cat([junc_map * 2, dust_bin_label], dim=1)
labels = torch.argmax(
junc_map.to(torch.float)
+ torch.distributions.Uniform(0, 0.1)
.sample(junc_map.shape)
.to(junc_map.device),
dim=1,
)
# Also convert the valid mask to channel tensor
valid_mask = torch.ones(junction_map.shape) if valid_mask is None else valid_mask
valid_mask = space_to_depth(valid_mask, grid_size)
# Compute junction loss on the border patch or not
if keep_border:
valid_mask = (
torch.sum(valid_mask.to(torch.bool).to(torch.int), dim=1, keepdim=True) > 0
)
else:
valid_mask = (
torch.sum(valid_mask.to(torch.bool).to(torch.int), dim=1, keepdim=True)
>= grid_size * grid_size
)
# Compute the classification loss
loss_func = nn.CrossEntropyLoss(reduction="none")
# The loss still need NCHW format
loss = loss_func(input=junc_predictions, target=labels.to(torch.long))
# Weighted sum by the valid mask
loss_ = torch.sum(
loss * torch.squeeze(valid_mask.to(torch.float), dim=1), dim=[0, 1, 2]
)
loss_final = loss_ / torch.sum(torch.squeeze(valid_mask.to(torch.float), dim=1))
return loss_final
def heatmap_loss(heatmap_gt, heatmap_pred, valid_mask=None, class_weight=None):
"""Heatmap prediction loss."""
# Compute the classification loss on each pixel
if class_weight is None:
loss_func = nn.CrossEntropyLoss(reduction="none")
else:
loss_func = nn.CrossEntropyLoss(class_weight, reduction="none")
loss = loss_func(
input=heatmap_pred, target=torch.squeeze(heatmap_gt.to(torch.long), dim=1)
)
# Weighted sum by the valid mask
# Sum over H and W
loss_spatial_sum = torch.sum(
loss * torch.squeeze(valid_mask.to(torch.float), dim=1), dim=[1, 2]
)
valid_spatial_sum = torch.sum(
torch.squeeze(valid_mask.to(torch.float32), dim=1), dim=[1, 2]
)
# Mean to single scalar over batch dimension
loss = torch.sum(loss_spatial_sum) / torch.sum(valid_spatial_sum)
return loss
class JunctionDetectionLoss(nn.Module):
"""Junction detection loss."""
def __init__(self, grid_size, keep_border):
super(JunctionDetectionLoss, self).__init__()
self.grid_size = grid_size
self.keep_border = keep_border
def forward(self, prediction, target, valid_mask=None):
return junction_detection_loss(
target, prediction, valid_mask, self.grid_size, self.keep_border
)
class HeatmapLoss(nn.Module):
"""Heatmap prediction loss."""
def __init__(self, class_weight):
super(HeatmapLoss, self).__init__()
self.class_weight = class_weight
def forward(self, prediction, target, valid_mask=None):
return heatmap_loss(target, prediction, valid_mask, self.class_weight)
class RegularizationLoss(nn.Module):
"""Module for regularization loss."""
def __init__(self):
super(RegularizationLoss, self).__init__()
self.name = "regularization_loss"
self.loss_init = torch.zeros([])
def forward(self, loss_weights):
# Place it to the same device
loss = self.loss_init.to(loss_weights["w_junc"].device)
for _, val in loss_weights.items():
if isinstance(val, nn.Parameter):
loss += val
return loss
def triplet_loss(
desc_pred1,
desc_pred2,
points1,
points2,
line_indices,
epoch,
grid_size=8,
dist_threshold=8,
init_dist_threshold=64,
margin=1,
):
"""Regular triplet loss for descriptor learning."""
b_size, _, Hc, Wc = desc_pred1.size()
img_size = (Hc * grid_size, Wc * grid_size)
device = desc_pred1.device
# Extract valid keypoints
n_points = line_indices.size()[1]
valid_points = line_indices.bool().flatten()
n_correct_points = torch.sum(valid_points).item()
if n_correct_points == 0:
return torch.tensor(0.0, dtype=torch.float, device=device)
# Check which keypoints are too close to be matched
# dist_threshold is decreased at each epoch for easier training
dist_threshold = max(dist_threshold, 2 * init_dist_threshold // (epoch + 1))
dist_mask = get_dist_mask(points1, points2, valid_points, dist_threshold)
# Additionally ban negative mining along the same line
common_line_mask = get_common_line_mask(line_indices, valid_points)
dist_mask = dist_mask | common_line_mask
# Convert the keypoints to a grid suitable for interpolation
grid1 = keypoints_to_grid(points1, img_size)
grid2 = keypoints_to_grid(points2, img_size)
# Extract the descriptors
desc1 = (
F.grid_sample(desc_pred1, grid1)
.permute(0, 2, 3, 1)
.reshape(b_size * n_points, -1)[valid_points]
)
desc1 = F.normalize(desc1, dim=1)
desc2 = (
F.grid_sample(desc_pred2, grid2)
.permute(0, 2, 3, 1)
.reshape(b_size * n_points, -1)[valid_points]
)
desc2 = F.normalize(desc2, dim=1)
desc_dists = 2 - 2 * (desc1 @ desc2.t())
# Positive distance loss
pos_dist = torch.diag(desc_dists)
# Negative distance loss
max_dist = torch.tensor(4.0, dtype=torch.float, device=device)
desc_dists[
torch.arange(n_correct_points, dtype=torch.long),
torch.arange(n_correct_points, dtype=torch.long),
] = max_dist
desc_dists[dist_mask] = max_dist
neg_dist = torch.min(
torch.min(desc_dists, dim=1)[0], torch.min(desc_dists, dim=0)[0]
)
triplet_loss = F.relu(margin + pos_dist - neg_dist)
return triplet_loss, grid1, grid2, valid_points
class TripletDescriptorLoss(nn.Module):
"""Triplet descriptor loss."""
def __init__(self, grid_size, dist_threshold, margin):
super(TripletDescriptorLoss, self).__init__()
self.grid_size = grid_size
self.init_dist_threshold = 64
self.dist_threshold = dist_threshold
self.margin = margin
def forward(self, desc_pred1, desc_pred2, points1, points2, line_indices, epoch):
return self.descriptor_loss(
desc_pred1, desc_pred2, points1, points2, line_indices, epoch
)
# The descriptor loss based on regularly sampled points along the lines
def descriptor_loss(
self, desc_pred1, desc_pred2, points1, points2, line_indices, epoch
):
return torch.mean(
triplet_loss(
desc_pred1,
desc_pred2,
points1,
points2,
line_indices,
epoch,
self.grid_size,
self.dist_threshold,
self.init_dist_threshold,
self.margin,
)[0]
)
class TotalLoss(nn.Module):
"""Total loss summing junction, heatma, descriptor
and regularization losses."""
def __init__(self, loss_funcs, loss_weights, weighting_policy):
super(TotalLoss, self).__init__()
# Whether we need to compute the descriptor loss
self.compute_descriptors = "descriptor_loss" in loss_funcs.keys()
self.loss_funcs = loss_funcs
self.loss_weights = loss_weights
self.weighting_policy = weighting_policy
# Always add regularization loss (it will return zero if not used)
self.loss_funcs["reg_loss"] = RegularizationLoss().cuda()
def forward(
self, junc_pred, junc_target, heatmap_pred, heatmap_target, valid_mask=None
):
"""Detection only loss."""
# Compute the junction loss
junc_loss = self.loss_funcs["junc_loss"](junc_pred, junc_target, valid_mask)
# Compute the heatmap loss
heatmap_loss = self.loss_funcs["heatmap_loss"](
heatmap_pred, heatmap_target, valid_mask
)
# Compute the total loss.
if self.weighting_policy == "dynamic":
reg_loss = self.loss_funcs["reg_loss"](self.loss_weights)
total_loss = (
junc_loss * torch.exp(-self.loss_weights["w_junc"])
+ heatmap_loss * torch.exp(-self.loss_weights["w_heatmap"])
+ reg_loss
)
return {
"total_loss": total_loss,
"junc_loss": junc_loss,
"heatmap_loss": heatmap_loss,
"reg_loss": reg_loss,
"w_junc": torch.exp(-self.loss_weights["w_junc"]).item(),
"w_heatmap": torch.exp(-self.loss_weights["w_heatmap"]).item(),
}
elif self.weighting_policy == "static":
total_loss = (
junc_loss * self.loss_weights["w_junc"]
+ heatmap_loss * self.loss_weights["w_heatmap"]
)
return {
"total_loss": total_loss,
"junc_loss": junc_loss,
"heatmap_loss": heatmap_loss,
}
else:
raise ValueError("[Error] Unknown weighting policy.")
def forward_descriptors(
self,
junc_map_pred1,
junc_map_pred2,
junc_map_target1,
junc_map_target2,
heatmap_pred1,
heatmap_pred2,
heatmap_target1,
heatmap_target2,
line_points1,
line_points2,
line_indices,
desc_pred1,
desc_pred2,
epoch,
valid_mask1=None,
valid_mask2=None,
):
"""Loss for detection + description."""
# Compute junction loss
junc_loss = self.loss_funcs["junc_loss"](
torch.cat([junc_map_pred1, junc_map_pred2], dim=0),
torch.cat([junc_map_target1, junc_map_target2], dim=0),
torch.cat([valid_mask1, valid_mask2], dim=0),
)
# Get junction loss weight (dynamic or not)
if isinstance(self.loss_weights["w_junc"], nn.Parameter):
w_junc = torch.exp(-self.loss_weights["w_junc"])
else:
w_junc = self.loss_weights["w_junc"]
# Compute heatmap loss
heatmap_loss = self.loss_funcs["heatmap_loss"](
torch.cat([heatmap_pred1, heatmap_pred2], dim=0),
torch.cat([heatmap_target1, heatmap_target2], dim=0),
torch.cat([valid_mask1, valid_mask2], dim=0),
)
# Get heatmap loss weight (dynamic or not)
if isinstance(self.loss_weights["w_heatmap"], nn.Parameter):
w_heatmap = torch.exp(-self.loss_weights["w_heatmap"])
else:
w_heatmap = self.loss_weights["w_heatmap"]
# Compute the descriptor loss
descriptor_loss = self.loss_funcs["descriptor_loss"](
desc_pred1, desc_pred2, line_points1, line_points2, line_indices, epoch
)
# Get descriptor loss weight (dynamic or not)
if isinstance(self.loss_weights["w_desc"], nn.Parameter):
w_descriptor = torch.exp(-self.loss_weights["w_desc"])
else:
w_descriptor = self.loss_weights["w_desc"]
# Update the total loss
total_loss = (
junc_loss * w_junc
+ heatmap_loss * w_heatmap
+ descriptor_loss * w_descriptor
)
outputs = {
"junc_loss": junc_loss,
"heatmap_loss": heatmap_loss,
"w_junc": w_junc.item() if isinstance(w_junc, nn.Parameter) else w_junc,
"w_heatmap": w_heatmap.item()
if isinstance(w_heatmap, nn.Parameter)
else w_heatmap,
"descriptor_loss": descriptor_loss,
"w_desc": w_descriptor.item()
if isinstance(w_descriptor, nn.Parameter)
else w_descriptor,
}
# Compute the regularization loss
reg_loss = self.loss_funcs["reg_loss"](self.loss_weights)
total_loss += reg_loss
outputs.update({"reg_loss": reg_loss, "total_loss": total_loss})
return outputs