Spaces:
Running
Running
import torch | |
import torch.nn as nn | |
import torchvision.transforms as tvf | |
from .modules import InterestPointModule, CorrespondenceModule | |
def warp_homography_batch(sources, homographies): | |
""" | |
Batch warp keypoints given homographies. From https://github.com/TRI-ML/KP2D. | |
Parameters | |
---------- | |
sources: torch.Tensor (B,H,W,C) | |
Keypoints vector. | |
homographies: torch.Tensor (B,3,3) | |
Homographies. | |
Returns | |
------- | |
warped_sources: torch.Tensor (B,H,W,C) | |
Warped keypoints vector. | |
""" | |
B, H, W, _ = sources.shape | |
warped_sources = [] | |
for b in range(B): | |
source = sources[b].clone() | |
source = source.view(-1,2) | |
''' | |
[X, [M11, M12, M13 [x, M11*x + M12*y + M13 [M11, M12 [M13, | |
Y, = M21, M22, M23 * y, = M21*x + M22*y + M23 = [x, y] * M21, M22 + M23, | |
Z] M31, M32, M33] 1] M31*x + M32*y + M33 M31, M32].T M33] | |
''' | |
source = torch.addmm(homographies[b,:,2], source, homographies[b,:,:2].t()) | |
source.mul_(1/source[:,2].unsqueeze(1)) | |
source = source[:,:2].contiguous().view(H,W,2) | |
warped_sources.append(source) | |
return torch.stack(warped_sources, dim=0) | |
class PointModel(nn.Module): | |
def __init__(self, is_test=True): | |
super(PointModel, self).__init__() | |
self.is_test = is_test | |
self.interestpoint_module = InterestPointModule(is_test=self.is_test) | |
self.correspondence_module = CorrespondenceModule() | |
self.norm_rgb = tvf.Normalize(mean=[0.5, 0.5, 0.5], std=[0.225, 0.225, 0.225]) | |
def forward(self, *args): | |
if self.is_test: | |
img = args[0] | |
img = self.norm_rgb(img) | |
score, coord, desc = self.interestpoint_module(img) | |
return score, coord, desc | |
else: | |
source_score, source_coord, source_desc_block = self.interestpoint_module(args[0]) | |
target_score, target_coord, target_desc_block = self.interestpoint_module(args[1]) | |
B, _, H, W = args[0].shape | |
B, _, hc, wc = source_score.shape | |
device = source_score.device | |
# Normalize the coordinates from ([0, h], [0, w]) to ([0, 1], [0, 1]). | |
source_coord_norm = source_coord.clone() | |
source_coord_norm[:, 0] = (source_coord_norm[:, 0] / (float(W - 1) / 2.)) - 1. | |
source_coord_norm[:, 1] = (source_coord_norm[:, 1] / (float(H - 1) / 2.)) - 1. | |
source_coord_norm = source_coord_norm.permute(0, 2, 3, 1) | |
target_coord_norm = target_coord.clone() | |
target_coord_norm[:, 0] = (target_coord_norm[:, 0] / (float(W - 1) / 2.)) - 1. | |
target_coord_norm[:, 1] = (target_coord_norm[:, 1] / (float(H - 1) / 2.)) - 1. | |
target_coord_norm = target_coord_norm.permute(0, 2, 3, 1) | |
target_coord_warped_norm = warp_homography_batch(source_coord_norm, args[2]) | |
target_coord_warped = target_coord_warped_norm.clone() | |
# de-normlize the coordinates | |
target_coord_warped[:, :, :, 0] = (target_coord_warped[:, :, :, 0] + 1) * (float(W - 1) / 2.) | |
target_coord_warped[:, :, :, 1] = (target_coord_warped[:, :, :, 1] + 1) * (float(H - 1) / 2.) | |
target_coord_warped = target_coord_warped.permute(0, 3, 1, 2) | |
# Border mask | |
border_mask_ori = torch.ones(B, hc, wc) | |
border_mask_ori[:, 0] = 0 | |
border_mask_ori[:, hc - 1] = 0 | |
border_mask_ori[:, :, 0] = 0 | |
border_mask_ori[:, :, wc - 1] = 0 | |
border_mask_ori = border_mask_ori.gt(1e-3).to(device) | |
oob_mask2 = target_coord_warped_norm[:, :, :, 0].lt(1) & target_coord_warped_norm[:, :, :, 0].gt(-1) & target_coord_warped_norm[:, :, :, 1].lt(1) & target_coord_warped_norm[:, :, :, 1].gt(-1) | |
border_mask = border_mask_ori & oob_mask2 | |
# score | |
target_score_warped = torch.nn.functional.grid_sample(target_score, target_coord_warped_norm.detach(), align_corners=False) | |
# descriptor | |
source_desc2 = torch.nn.functional.grid_sample(source_desc_block[0], source_coord_norm.detach()) | |
source_desc3 = torch.nn.functional.grid_sample(source_desc_block[1], source_coord_norm.detach()) | |
source_aware = source_desc_block[2] | |
source_desc = torch.mul(source_desc2, source_aware[:, 0, :, :].unsqueeze(1).contiguous()) + torch.mul(source_desc3, source_aware[:, 1, :, :].unsqueeze(1).contiguous()) | |
target_desc2 = torch.nn.functional.grid_sample(target_desc_block[0], target_coord_norm.detach()) | |
target_desc3 = torch.nn.functional.grid_sample(target_desc_block[1], target_coord_norm.detach()) | |
target_aware = target_desc_block[2] | |
target_desc = torch.mul(target_desc2, target_aware[:, 0, :, :].unsqueeze(1).contiguous()) + torch.mul(target_desc3, target_aware[:, 1, :, :].unsqueeze(1).contiguous()) | |
target_desc2_warped = torch.nn.functional.grid_sample(target_desc_block[0], target_coord_warped_norm.detach()) | |
target_desc3_warped = torch.nn.functional.grid_sample(target_desc_block[1], target_coord_warped_norm.detach()) | |
target_aware_warped = torch.nn.functional.grid_sample(target_desc_block[2], target_coord_warped_norm.detach()) | |
target_desc_warped = torch.mul(target_desc2_warped, target_aware_warped[:, 0, :, :].unsqueeze(1).contiguous()) + torch.mul(target_desc3_warped, target_aware_warped[:, 1, :, :].unsqueeze(1).contiguous()) | |
confidence_matrix = self.correspondence_module(source_desc, target_desc) | |
confidence_matrix = torch.clamp(confidence_matrix, 1e-12, 1 - 1e-12) | |
output = { | |
'source_score': source_score, | |
'source_coord': source_coord, | |
'source_desc': source_desc, | |
'source_aware': source_aware, | |
'target_score': target_score, | |
'target_coord': target_coord, | |
'target_score_warped': target_score_warped, | |
'target_coord_warped': target_coord_warped, | |
'target_desc_warped': target_desc_warped, | |
'target_aware_warped': target_aware_warped, | |
'border_mask': border_mask, | |
'confidence_matrix': confidence_matrix | |
} | |
return output | |