Vincentqyw
update: features and matchers
404d2af
raw
history blame
7.16 kB
# Copyright 2019-present NAVER Corp.
# CC BY-NC-SA 3.0
# Available only for non-commercial use
import pdb
import torch
import torch.nn as nn
import torch.nn.functional as F
class BaseNet (nn.Module):
""" Takes a list of images as input, and returns for each image:
- a pixelwise descriptor
- a pixelwise confidence
"""
def softmax(self, ux):
if ux.shape[1] == 1:
x = F.softplus(ux)
return x / (1 + x) # for sure in [0,1], much less plateaus than softmax
elif ux.shape[1] == 2:
return F.softmax(ux, dim=1)[:,1:2]
def normalize(self, x, ureliability, urepeatability):
return dict(descriptors = F.normalize(x, p=2, dim=1),
repeatability = self.softmax( urepeatability ),
reliability = self.softmax( ureliability ))
def forward_one(self, x):
raise NotImplementedError()
def forward(self, imgs, **kw):
res = [self.forward_one(img) for img in imgs]
# merge all dictionaries into one
res = {k:[r[k] for r in res if k in r] for k in {k for r in res for k in r}}
return dict(res, imgs=imgs, **kw)
class PatchNet (BaseNet):
""" Helper class to construct a fully-convolutional network that
extract a l2-normalized patch descriptor.
"""
def __init__(self, inchan=3, dilated=True, dilation=1, bn=True, bn_affine=False):
BaseNet.__init__(self)
self.inchan = inchan
self.curchan = inchan
self.dilated = dilated
self.dilation = dilation
self.bn = bn
self.bn_affine = bn_affine
self.ops = nn.ModuleList([])
def _make_bn(self, outd):
return nn.BatchNorm2d(outd, affine=self.bn_affine)
def _add_conv(self, outd, k=3, stride=1, dilation=1, bn=True, relu=True, k_pool = 1, pool_type='max'):
# as in the original implementation, dilation is applied at the end of layer, so it will have impact only from next layer
d = self.dilation * dilation
if self.dilated:
conv_params = dict(padding=((k-1)*d)//2, dilation=d, stride=1)
self.dilation *= stride
else:
conv_params = dict(padding=((k-1)*d)//2, dilation=d, stride=stride)
self.ops.append( nn.Conv2d(self.curchan, outd, kernel_size=k, **conv_params) )
if bn and self.bn: self.ops.append( self._make_bn(outd) )
if relu: self.ops.append( nn.ReLU(inplace=True) )
self.curchan = outd
if k_pool > 1:
if pool_type == 'avg':
self.ops.append(torch.nn.AvgPool2d(kernel_size=k_pool))
elif pool_type == 'max':
self.ops.append(torch.nn.MaxPool2d(kernel_size=k_pool))
else:
print(f"Error, unknown pooling type {pool_type}...")
def forward_one(self, x):
assert self.ops, "You need to add convolutions first"
for n,op in enumerate(self.ops):
x = op(x)
return self.normalize(x)
class L2_Net (PatchNet):
""" Compute a 128D descriptor for all overlapping 32x32 patches.
From the L2Net paper (CVPR'17).
"""
def __init__(self, dim=128, **kw ):
PatchNet.__init__(self, **kw)
add_conv = lambda n,**kw: self._add_conv((n*dim)//128,**kw)
add_conv(32)
add_conv(32)
add_conv(64, stride=2)
add_conv(64)
add_conv(128, stride=2)
add_conv(128)
add_conv(128, k=7, stride=8, bn=False, relu=False)
self.out_dim = dim
class Quad_L2Net (PatchNet):
""" Same than L2_Net, but replace the final 8x8 conv by 3 successive 2x2 convs.
"""
def __init__(self, dim=128, mchan=4, relu22=False, **kw ):
PatchNet.__init__(self, **kw)
self._add_conv( 8*mchan)
self._add_conv( 8*mchan)
self._add_conv( 16*mchan, stride=2)
self._add_conv( 16*mchan)
self._add_conv( 32*mchan, stride=2)
self._add_conv( 32*mchan)
# replace last 8x8 convolution with 3 2x2 convolutions
self._add_conv( 32*mchan, k=2, stride=2, relu=relu22)
self._add_conv( 32*mchan, k=2, stride=2, relu=relu22)
self._add_conv(dim, k=2, stride=2, bn=False, relu=False)
self.out_dim = dim
class Quad_L2Net_ConfCFS (Quad_L2Net):
""" Same than Quad_L2Net, with 2 confidence maps for repeatability and reliability.
"""
def __init__(self, **kw ):
Quad_L2Net.__init__(self, **kw)
# reliability classifier
self.clf = nn.Conv2d(self.out_dim, 2, kernel_size=1)
# repeatability classifier: for some reasons it's a softplus, not a softmax!
# Why? I guess it's a mistake that was left unnoticed in the code for a long time...
self.sal = nn.Conv2d(self.out_dim, 1, kernel_size=1)
def forward_one(self, x):
assert self.ops, "You need to add convolutions first"
for op in self.ops:
x = op(x)
# compute the confidence maps
ureliability = self.clf(x**2)
urepeatability = self.sal(x**2)
return self.normalize(x, ureliability, urepeatability)
class Fast_Quad_L2Net (PatchNet):
""" Faster version of Quad l2 net, replacing one dilated conv with one pooling to diminish image resolution thus increase inference time
Dilation factors and pooling:
1,1,1, pool2, 1,1, 2,2, 4, 8, upsample2
"""
def __init__(self, dim=128, mchan=4, relu22=False, downsample_factor=2, **kw ):
PatchNet.__init__(self, **kw)
self._add_conv( 8*mchan)
self._add_conv( 8*mchan)
self._add_conv( 16*mchan, k_pool = downsample_factor) # added avg pooling to decrease img resolution
self._add_conv( 16*mchan)
self._add_conv( 32*mchan, stride=2)
self._add_conv( 32*mchan)
# replace last 8x8 convolution with 3 2x2 convolutions
self._add_conv( 32*mchan, k=2, stride=2, relu=relu22)
self._add_conv( 32*mchan, k=2, stride=2, relu=relu22)
self._add_conv(dim, k=2, stride=2, bn=False, relu=False)
# Go back to initial image resolution with upsampling
self.ops.append(torch.nn.Upsample(scale_factor=downsample_factor, mode='bilinear', align_corners=False))
self.out_dim = dim
class Fast_Quad_L2Net_ConfCFS (Fast_Quad_L2Net):
""" Fast r2d2 architecture
"""
def __init__(self, **kw ):
Fast_Quad_L2Net.__init__(self, **kw)
# reliability classifier
self.clf = nn.Conv2d(self.out_dim, 2, kernel_size=1)
# repeatability classifier: for some reasons it's a softplus, not a softmax!
# Why? I guess it's a mistake that was left unnoticed in the code for a long time...
self.sal = nn.Conv2d(self.out_dim, 1, kernel_size=1)
def forward_one(self, x):
assert self.ops, "You need to add convolutions first"
for op in self.ops:
x = op(x)
# compute the confidence maps
ureliability = self.clf(x**2)
urepeatability = self.sal(x**2)
return self.normalize(x, ureliability, urepeatability)