diff --git a/third_party/ALIKE/LICENSE b/third_party/ALIKE/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..4ee705bf59834a4b0195b1b0e499ee950469668e --- /dev/null +++ b/third_party/ALIKE/LICENSE @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) 2022, Zhao Xiaoming +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/third_party/ALIKE/README.md b/third_party/ALIKE/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8f40f15c56f6c54b14bb438e47096737a440fe89 --- /dev/null +++ b/third_party/ALIKE/README.md @@ -0,0 +1,131 @@ +# News + +- The [ALIKED](https://github.com/Shiaoming/ALIKED) is released. +- The [ALIKE training code](https://github.com/Shiaoming/ALIKE/raw/main/assets/ALIKE_code.zip) is released. + +# ALIKE: Accurate and Lightweight Keypoint Detection and Descriptor Extraction + +ALIKE applies a differentiable keypoint detection module to detect accurate sub-pixel keypoints. The network can run at 95 frames per second for 640 x 480 images on NVIDIA Titan X (Pascal) GPU and achieve equivalent performance with the state-of-the-arts. ALIKE benefits real-time applications in resource-limited platforms/devices. Technical details are described in [this paper](https://arxiv.org/pdf/2112.02906.pdf). + +> ``` +> Xiaoming Zhao, Xingming Wu, Jinyu Miao, Weihai Chen, Peter C. Y. Chen, Zhengguo Li, "ALIKE: Accurate and Lightweight Keypoint +> Detection and Descriptor Extraction," IEEE Transactions on Multimedia, 2022. +> ``` + +![](./assets/alike.png) + + +If you use ALIKE in an academic work, please cite: + +``` +@article{Zhao2023ALIKED, + title = {ALIKED: A Lighter Keypoint and Descriptor Extraction Network via Deformable Transformation}, + url = {https://arxiv.org/pdf/2304.03608.pdf}, + doi = {10.1109/TIM.2023.3271000}, + journal = {IEEE Transactions on Instrumentation & Measurement}, + author = {Zhao, Xiaoming and Wu, Xingming and Chen, Weihai and Chen, Peter C. Y. and Xu, Qingsong and Li, Zhengguo}, + year = {2023}, + volume = {72}, + pages = {1-16}, +} + +@article{Zhao2022ALIKE, + title = {ALIKE: Accurate and Lightweight Keypoint Detection and Descriptor Extraction}, + url = {http://arxiv.org/abs/2112.02906}, + doi = {10.1109/TMM.2022.3155927}, + journal = {IEEE Transactions on Multimedia}, + author = {Zhao, Xiaoming and Wu, Xingming and Miao, Jinyu and Chen, Weihai and Chen, Peter C. Y. and Li, Zhengguo}, + month = march, + year = {2022}, +} +``` + + + +## 1. Prerequisites + +The required packages are listed in the `requirements.txt` : + +```shell +pip install -r requirements.txt +``` + + + +## 2. Models + +The off-the-shelf weights of four variant ALIKE models are provided in `models/` . + + + +## 3. Run demo + +```shell +$ python demo.py -h +usage: demo.py [-h] [--model {alike-t,alike-s,alike-n,alike-l}] + [--device DEVICE] [--top_k TOP_K] [--scores_th SCORES_TH] + [--n_limit N_LIMIT] [--no_display] [--no_sub_pixel] + input + +ALike Demo. + +positional arguments: + input Image directory or movie file or "camera0" (for + webcam0). + +optional arguments: + -h, --help show this help message and exit + --model {alike-t,alike-s,alike-n,alike-l} + The model configuration + --device DEVICE Running device (default: cuda). + --top_k TOP_K Detect top K keypoints. -1 for threshold based mode, + >0 for top K mode. (default: -1) + --scores_th SCORES_TH + Detector score threshold (default: 0.2). + --n_limit N_LIMIT Maximum number of keypoints to be detected (default: + 5000). + --no_display Do not display images to screen. Useful if running + remotely (default: False). + --no_sub_pixel Do not detect sub-pixel keypoints (default: False). +``` + + + +## 4. Examples + +### KITTI example +```shell +python demo.py assets/kitti +``` +![](./assets/kitti.gif) + +### TUM example +```shell +python demo.py assets/tum +``` +![](./assets/tum.gif) + +## 5. Efficiency and performance + +| Models | Parameters | GFLOPs(640x480) | MHA@3 on Hpatches | mAA(10°) on [IMW2020-test](https://www.cs.ubc.ca/research/image-matching-challenge/2021/leaderboard) (Stereo) | +|:---:|:---:|:---:|:-----------------:|:-------------------------------------------------------------------------------------------------------------:| +| D2-Net(MS) | 7653KB | 889.40 | 38.33% | 12.27% | +| LF-Net(MS) | 2642KB | 24.37 | 57.78% | 23.44% | +| SuperPoint | 1301KB | 26.11 | 70.19% | 28.97% | +| R2D2(MS) | 484KB | 464.55 | 71.48% | 39.02% | +| ASLFeat(MS) | 823KB | 77.58 | 73.52% | 33.65% | +| DISK | 1092KB | 98.97 | 70.56% | 51.22% | +| ALike-N | 318KB | 7.909 | 75.74% | 47.18% | +| ALike-L | 653KB | 19.685 | 76.85% | 49.58% | + +### Evaluation on Hpatches + +- Download [hpatches-sequences-release](https://hpatches.github.io/) and put it into `hseq/hpatches-sequences-release`. +- Remove the unreliable sequences as D2-Net. +- Run the following command to evaluate the performance: + ```shell + python hseq/eval.py + ``` + + +For more details, please refer to the [paper](https://arxiv.org/abs/2112.02906). diff --git a/third_party/ALIKE/alike.py b/third_party/ALIKE/alike.py new file mode 100644 index 0000000000000000000000000000000000000000..303616d52581efce0ae0eb86af70f5ea8984909d --- /dev/null +++ b/third_party/ALIKE/alike.py @@ -0,0 +1,143 @@ +import logging +import os +import cv2 +import torch +from copy import deepcopy +import torch.nn.functional as F +from torchvision.transforms import ToTensor +import math + +from alnet import ALNet +from soft_detect import DKD +import time + +configs = { + 'alike-t': {'c1': 8, 'c2': 16, 'c3': 32, 'c4': 64, 'dim': 64, 'single_head': True, 'radius': 2, + 'model_path': os.path.join(os.path.split(__file__)[0], 'models', 'alike-t.pth')}, + 'alike-s': {'c1': 8, 'c2': 16, 'c3': 48, 'c4': 96, 'dim': 96, 'single_head': True, 'radius': 2, + 'model_path': os.path.join(os.path.split(__file__)[0], 'models', 'alike-s.pth')}, + 'alike-n': {'c1': 16, 'c2': 32, 'c3': 64, 'c4': 128, 'dim': 128, 'single_head': True, 'radius': 2, + 'model_path': os.path.join(os.path.split(__file__)[0], 'models', 'alike-n.pth')}, + 'alike-l': {'c1': 32, 'c2': 64, 'c3': 128, 'c4': 128, 'dim': 128, 'single_head': False, 'radius': 2, + 'model_path': os.path.join(os.path.split(__file__)[0], 'models', 'alike-l.pth')}, +} + + +class ALike(ALNet): + def __init__(self, + # ================================== feature encoder + c1: int = 32, c2: int = 64, c3: int = 128, c4: int = 128, dim: int = 128, + single_head: bool = False, + # ================================== detect parameters + radius: int = 2, + top_k: int = 500, scores_th: float = 0.5, + n_limit: int = 5000, + device: str = 'cpu', + model_path: str = '' + ): + super().__init__(c1, c2, c3, c4, dim, single_head) + self.radius = radius + self.top_k = top_k + self.n_limit = n_limit + self.scores_th = scores_th + self.dkd = DKD(radius=self.radius, top_k=self.top_k, + scores_th=self.scores_th, n_limit=self.n_limit) + self.device = device + + if model_path != '': + state_dict = torch.load(model_path, self.device) + self.load_state_dict(state_dict) + self.to(self.device) + self.eval() + logging.info(f'Loaded model parameters from {model_path}') + logging.info( + f"Number of model parameters: {sum(p.numel() for p in self.parameters() if p.requires_grad) / 1e3}KB") + + def extract_dense_map(self, image, ret_dict=False): + # ==================================================== + # check image size, should be integer multiples of 2^5 + # if it is not a integer multiples of 2^5, padding zeros + device = image.device + b, c, h, w = image.shape + h_ = math.ceil(h / 32) * 32 if h % 32 != 0 else h + w_ = math.ceil(w / 32) * 32 if w % 32 != 0 else w + if h_ != h: + h_padding = torch.zeros(b, c, h_ - h, w, device=device) + image = torch.cat([image, h_padding], dim=2) + if w_ != w: + w_padding = torch.zeros(b, c, h_, w_ - w, device=device) + image = torch.cat([image, w_padding], dim=3) + # ==================================================== + + scores_map, descriptor_map = super().forward(image) + + # ==================================================== + if h_ != h or w_ != w: + descriptor_map = descriptor_map[:, :, :h, :w] + scores_map = scores_map[:, :, :h, :w] # Bx1xHxW + # ==================================================== + + # BxCxHxW + descriptor_map = torch.nn.functional.normalize(descriptor_map, p=2, dim=1) + + if ret_dict: + return {'descriptor_map': descriptor_map, 'scores_map': scores_map, } + else: + return descriptor_map, scores_map + + def forward(self, img, image_size_max=99999, sort=False, sub_pixel=False): + """ + :param img: np.array HxWx3, RGB + :param image_size_max: maximum image size, otherwise, the image will be resized + :param sort: sort keypoints by scores + :param sub_pixel: whether to use sub-pixel accuracy + :return: a dictionary with 'keypoints', 'descriptors', 'scores', and 'time' + """ + H, W, three = img.shape + assert three == 3, "input image shape should be [HxWx3]" + + # ==================== image size constraint + image = deepcopy(img) + max_hw = max(H, W) + if max_hw > image_size_max: + ratio = float(image_size_max / max_hw) + image = cv2.resize(image, dsize=None, fx=ratio, fy=ratio) + + # ==================== convert image to tensor + image = torch.from_numpy(image).to(self.device).to(torch.float32).permute(2, 0, 1)[None] / 255.0 + + # ==================== extract keypoints + start = time.time() + + with torch.no_grad(): + descriptor_map, scores_map = self.extract_dense_map(image) + keypoints, descriptors, scores, _ = self.dkd(scores_map, descriptor_map, + sub_pixel=sub_pixel) + keypoints, descriptors, scores = keypoints[0], descriptors[0], scores[0] + keypoints = (keypoints + 1) / 2 * keypoints.new_tensor([[W - 1, H - 1]]) + + if sort: + indices = torch.argsort(scores, descending=True) + keypoints = keypoints[indices] + descriptors = descriptors[indices] + scores = scores[indices] + + end = time.time() + + return {'keypoints': keypoints.cpu().numpy(), + 'descriptors': descriptors.cpu().numpy(), + 'scores': scores.cpu().numpy(), + 'scores_map': scores_map.cpu().numpy(), + 'time': end - start, } + + +if __name__ == '__main__': + import numpy as np + from thop import profile + + net = ALike(c1=32, c2=64, c3=128, c4=128, dim=128, single_head=False) + + image = np.random.random((640, 480, 3)).astype(np.float32) + flops, params = profile(net, inputs=(image, 9999, False), verbose=False) + print('{:<30} {:<8} GFLops'.format('Computational complexity: ', flops / 1e9)) + print('{:<30} {:<8} KB'.format('Number of parameters: ', params / 1e3)) diff --git a/third_party/ALIKE/alnet.py b/third_party/ALIKE/alnet.py new file mode 100644 index 0000000000000000000000000000000000000000..53127063233660c7b96aa15e89aa4a8a1a340dd1 --- /dev/null +++ b/third_party/ALIKE/alnet.py @@ -0,0 +1,164 @@ +import torch +from torch import nn +from torchvision.models import resnet +from typing import Optional, Callable + + +class ConvBlock(nn.Module): + def __init__(self, in_channels, out_channels, + gate: Optional[Callable[..., nn.Module]] = None, + norm_layer: Optional[Callable[..., nn.Module]] = None): + super().__init__() + if gate is None: + self.gate = nn.ReLU(inplace=True) + else: + self.gate = gate + if norm_layer is None: + norm_layer = nn.BatchNorm2d + self.conv1 = resnet.conv3x3(in_channels, out_channels) + self.bn1 = norm_layer(out_channels) + self.conv2 = resnet.conv3x3(out_channels, out_channels) + self.bn2 = norm_layer(out_channels) + + def forward(self, x): + x = self.gate(self.bn1(self.conv1(x))) # B x in_channels x H x W + x = self.gate(self.bn2(self.conv2(x))) # B x out_channels x H x W + return x + + +# copied from torchvision\models\resnet.py#27->BasicBlock +class ResBlock(nn.Module): + expansion: int = 1 + + def __init__( + self, + inplanes: int, + planes: int, + stride: int = 1, + downsample: Optional[nn.Module] = None, + groups: int = 1, + base_width: int = 64, + dilation: int = 1, + gate: Optional[Callable[..., nn.Module]] = None, + norm_layer: Optional[Callable[..., nn.Module]] = None + ) -> None: + super(ResBlock, self).__init__() + if gate is None: + self.gate = nn.ReLU(inplace=True) + else: + self.gate = gate + if norm_layer is None: + norm_layer = nn.BatchNorm2d + if groups != 1 or base_width != 64: + raise ValueError('ResBlock only supports groups=1 and base_width=64') + if dilation > 1: + raise NotImplementedError("Dilation > 1 not supported in ResBlock") + # Both self.conv1 and self.downsample layers downsample the input when stride != 1 + self.conv1 = resnet.conv3x3(inplanes, planes, stride) + self.bn1 = norm_layer(planes) + self.conv2 = resnet.conv3x3(planes, planes) + self.bn2 = norm_layer(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x: torch.Tensor) -> torch.Tensor: + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.gate(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.gate(out) + + return out + + +class ALNet(nn.Module): + def __init__(self, c1: int = 32, c2: int = 64, c3: int = 128, c4: int = 128, dim: int = 128, + single_head: bool = True, + ): + super().__init__() + + self.gate = nn.ReLU(inplace=True) + + self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) + self.pool4 = nn.MaxPool2d(kernel_size=4, stride=4) + + self.block1 = ConvBlock(3, c1, self.gate, nn.BatchNorm2d) + + self.block2 = ResBlock(inplanes=c1, planes=c2, stride=1, + downsample=nn.Conv2d(c1, c2, 1), + gate=self.gate, + norm_layer=nn.BatchNorm2d) + self.block3 = ResBlock(inplanes=c2, planes=c3, stride=1, + downsample=nn.Conv2d(c2, c3, 1), + gate=self.gate, + norm_layer=nn.BatchNorm2d) + self.block4 = ResBlock(inplanes=c3, planes=c4, stride=1, + downsample=nn.Conv2d(c3, c4, 1), + gate=self.gate, + norm_layer=nn.BatchNorm2d) + + # ================================== feature aggregation + self.conv1 = resnet.conv1x1(c1, dim // 4) + self.conv2 = resnet.conv1x1(c2, dim // 4) + self.conv3 = resnet.conv1x1(c3, dim // 4) + self.conv4 = resnet.conv1x1(dim, dim // 4) + self.upsample2 = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) + self.upsample4 = nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True) + self.upsample8 = nn.Upsample(scale_factor=8, mode='bilinear', align_corners=True) + self.upsample32 = nn.Upsample(scale_factor=32, mode='bilinear', align_corners=True) + + # ================================== detector and descriptor head + self.single_head = single_head + if not self.single_head: + self.convhead1 = resnet.conv1x1(dim, dim) + self.convhead2 = resnet.conv1x1(dim, dim + 1) + + def forward(self, image): + # ================================== feature encoder + x1 = self.block1(image) # B x c1 x H x W + x2 = self.pool2(x1) + x2 = self.block2(x2) # B x c2 x H/2 x W/2 + x3 = self.pool4(x2) + x3 = self.block3(x3) # B x c3 x H/8 x W/8 + x4 = self.pool4(x3) + x4 = self.block4(x4) # B x dim x H/32 x W/32 + + # ================================== feature aggregation + x1 = self.gate(self.conv1(x1)) # B x dim//4 x H x W + x2 = self.gate(self.conv2(x2)) # B x dim//4 x H//2 x W//2 + x3 = self.gate(self.conv3(x3)) # B x dim//4 x H//8 x W//8 + x4 = self.gate(self.conv4(x4)) # B x dim//4 x H//32 x W//32 + x2_up = self.upsample2(x2) # B x dim//4 x H x W + x3_up = self.upsample8(x3) # B x dim//4 x H x W + x4_up = self.upsample32(x4) # B x dim//4 x H x W + x1234 = torch.cat([x1, x2_up, x3_up, x4_up], dim=1) + + # ================================== detector and descriptor head + if not self.single_head: + x1234 = self.gate(self.convhead1(x1234)) + x = self.convhead2(x1234) # B x dim+1 x H x W + + descriptor_map = x[:, :-1, :, :] + scores_map = torch.sigmoid(x[:, -1, :, :]).unsqueeze(1) + + return scores_map, descriptor_map + + +if __name__ == '__main__': + from thop import profile + + net = ALNet(c1=16, c2=32, c3=64, c4=128, dim=128, single_head=True) + + image = torch.randn(1, 3, 640, 480) + flops, params = profile(net, inputs=(image,), verbose=False) + print('{:<30} {:<8} GFLops'.format('Computational complexity: ', flops / 1e9)) + print('{:<30} {:<8} KB'.format('Number of parameters: ', params / 1e3)) diff --git a/third_party/ALIKE/assets/ALIKE_code.zip b/third_party/ALIKE/assets/ALIKE_code.zip new file mode 100644 index 0000000000000000000000000000000000000000..553a21da1224790ceb313255ad85be59d59ff343 --- /dev/null +++ b/third_party/ALIKE/assets/ALIKE_code.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:891e8431c047e7aeed77c9e5f64ffeed262d92389d8ae6235dde0964a9048a08 +size 62774 diff --git a/third_party/ALIKE/assets/alike.png b/third_party/ALIKE/assets/alike.png new file mode 100644 index 0000000000000000000000000000000000000000..031d99dc8b46473340151d824efa61ccdcd5ab3b --- /dev/null +++ b/third_party/ALIKE/assets/alike.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d35e59f8e4d9c34b0e2686ecd5ca5414fe975b81553e4968eccc4bff1535c2d4 +size 162421 diff --git a/third_party/ALIKE/assets/kitti.gif b/third_party/ALIKE/assets/kitti.gif new file mode 100644 index 0000000000000000000000000000000000000000..a2e5232941b0c2f60a999f2954eab011036e5853 --- /dev/null +++ b/third_party/ALIKE/assets/kitti.gif @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b05e4dc0000b9abf53183a3ebdfc0b95a92513952e235ea24f27f2945389ea1 +size 7032794 diff --git a/third_party/ALIKE/assets/kitti/000100.png b/third_party/ALIKE/assets/kitti/000100.png new file mode 100644 index 0000000000000000000000000000000000000000..da51dfdfdf23c593b8eb441a091e8b52bfe87218 --- /dev/null +++ b/third_party/ALIKE/assets/kitti/000100.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8d4a81ad91c7945cabd15de286aacf27ab661163b5eee0177128721782d5405 +size 273062 diff --git a/third_party/ALIKE/assets/kitti/000101.png b/third_party/ALIKE/assets/kitti/000101.png new file mode 100644 index 0000000000000000000000000000000000000000..3256afa05966521824d0b66d3905dad813cc6d30 --- /dev/null +++ b/third_party/ALIKE/assets/kitti/000101.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:539c684432726e903191a2471c8dae8c4b0012b88e1b3af7590de08c24890327 +size 271723 diff --git a/third_party/ALIKE/assets/kitti/000102.png b/third_party/ALIKE/assets/kitti/000102.png new file mode 100644 index 0000000000000000000000000000000000000000..00dc0b5ef67bb8cdfc53ba8b7376f6f7d83eac95 --- /dev/null +++ b/third_party/ALIKE/assets/kitti/000102.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bbc9a5b04bd425a5e146f3ba114027041086477a5fa123a50463932ab62617e +size 270490 diff --git a/third_party/ALIKE/assets/kitti/000103.png b/third_party/ALIKE/assets/kitti/000103.png new file mode 100644 index 0000000000000000000000000000000000000000..5cf8b1796c42286c7194e2534118d71060772b25 --- /dev/null +++ b/third_party/ALIKE/assets/kitti/000103.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2041e633aeb85022b1222277cace17132bed09ca19856d1e6787984b05d61339 +size 271246 diff --git a/third_party/ALIKE/assets/kitti/000104.png b/third_party/ALIKE/assets/kitti/000104.png new file mode 100644 index 0000000000000000000000000000000000000000..616183a428187af96bd59ed3a5d5ec79d8088c3e --- /dev/null +++ b/third_party/ALIKE/assets/kitti/000104.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ca8a30c0edb7d2c6d6e5c2f5317bdffdae2269157d69e71f9602e0bbf2090ab +size 270873 diff --git a/third_party/ALIKE/assets/kitti/000105.png b/third_party/ALIKE/assets/kitti/000105.png new file mode 100644 index 0000000000000000000000000000000000000000..1d3839a9f59d5265721d5048cfdf57eff96cfa76 --- /dev/null +++ b/third_party/ALIKE/assets/kitti/000105.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8bca67672e8b2181b193f0577a9a3b42b64df9bb57d98608dbdbb54e79925bd +size 269647 diff --git a/third_party/ALIKE/assets/kitti/000106.png b/third_party/ALIKE/assets/kitti/000106.png new file mode 100644 index 0000000000000000000000000000000000000000..0cc544cfda2ffeac8367e4f00b80b8e84755717c --- /dev/null +++ b/third_party/ALIKE/assets/kitti/000106.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ccc83d57703afdcda4afd746dd99458b425fbc11ce3155583abde25e988e389 +size 268717 diff --git a/third_party/ALIKE/assets/kitti/000107.png b/third_party/ALIKE/assets/kitti/000107.png new file mode 100644 index 0000000000000000000000000000000000000000..92b3d9f54f894b13cb8a729ba5c34e5c56cddd5e --- /dev/null +++ b/third_party/ALIKE/assets/kitti/000107.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:980f4c74ac9117020f954cc75718cf0a09baeb30894aea123db59f9e4555ecef +size 269361 diff --git a/third_party/ALIKE/assets/kitti/000108.png b/third_party/ALIKE/assets/kitti/000108.png new file mode 100644 index 0000000000000000000000000000000000000000..4a9bfb75d1e550e3559a9428feafa52ce7ed9530 --- /dev/null +++ b/third_party/ALIKE/assets/kitti/000108.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7c2234c8ba8c056c452a0d625db6eac09c8963b0c5e8a5d0b1c3af15a4b7516 +size 271453 diff --git a/third_party/ALIKE/assets/kitti/000109.png b/third_party/ALIKE/assets/kitti/000109.png new file mode 100644 index 0000000000000000000000000000000000000000..8bdfe7f16ac41ded8455f234532c0c03d310162a --- /dev/null +++ b/third_party/ALIKE/assets/kitti/000109.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a34b9639806e7deefe1cb24ae7b376343d394d2d032f95e763e4b6921cd61c7 +size 275767 diff --git a/third_party/ALIKE/assets/kitti/000110.png b/third_party/ALIKE/assets/kitti/000110.png new file mode 100644 index 0000000000000000000000000000000000000000..cecaf12f471442aa32538dd8199bd63e5f35afd7 --- /dev/null +++ b/third_party/ALIKE/assets/kitti/000110.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6af1b3e55b9c1eac208c887c44592f93e8ae7cc0196acaa2639c265f8bf959e3 +size 274569 diff --git a/third_party/ALIKE/assets/kitti/000111.png b/third_party/ALIKE/assets/kitti/000111.png new file mode 100644 index 0000000000000000000000000000000000000000..825ecf590398c03d88d125340b7a22654b3a7bbd --- /dev/null +++ b/third_party/ALIKE/assets/kitti/000111.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:215ed5306f4976458110836a620dcf55030d8dd20618e6365d60176988c1cfa6 +size 276191 diff --git a/third_party/ALIKE/assets/kitti/000112.png b/third_party/ALIKE/assets/kitti/000112.png new file mode 100644 index 0000000000000000000000000000000000000000..9bc56a5eb236cbbea7ff42216b47d95d73c28e8e --- /dev/null +++ b/third_party/ALIKE/assets/kitti/000112.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a265252457871d4dd2f17c42eafa1c0da99df90d103c653c8097aad26073d22 +size 275704 diff --git a/third_party/ALIKE/assets/kitti/000113.png b/third_party/ALIKE/assets/kitti/000113.png new file mode 100644 index 0000000000000000000000000000000000000000..c86b79c0a1dd9db12c7dc467260f86250390c49c --- /dev/null +++ b/third_party/ALIKE/assets/kitti/000113.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c83f220b29b5d04ead44c9304f9eccde3a4ff4e60627d7014f8fe424afb873f4 +size 276252 diff --git a/third_party/ALIKE/assets/kitti/000114.png b/third_party/ALIKE/assets/kitti/000114.png new file mode 100644 index 0000000000000000000000000000000000000000..772819a718f58268e7e717dfdf837e590a5a2a59 --- /dev/null +++ b/third_party/ALIKE/assets/kitti/000114.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1abad021db35c21f2e9ac0ce7e54a5721eec3ff32bc4ce820f5b7091af4d6fac +size 275917 diff --git a/third_party/ALIKE/assets/kitti/000115.png b/third_party/ALIKE/assets/kitti/000115.png new file mode 100644 index 0000000000000000000000000000000000000000..3f859249dc3f021e93734bfc8ac9edb8f0aa672f --- /dev/null +++ b/third_party/ALIKE/assets/kitti/000115.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6be815b2b0aa8aa3dc47e314ed6645eeb474996e9a920fab2abe8a35fb3ea089 +size 274239 diff --git a/third_party/ALIKE/assets/kitti/000116.png b/third_party/ALIKE/assets/kitti/000116.png new file mode 100644 index 0000000000000000000000000000000000000000..96e9559ae51e8edf81bc43f459ce3136bdfa73fd --- /dev/null +++ b/third_party/ALIKE/assets/kitti/000116.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96b8df04ee570d877a04e43f1f4c30abc7e7383b24ce70a1a83a82dcbd863293 +size 270547 diff --git a/third_party/ALIKE/assets/kitti/000117.png b/third_party/ALIKE/assets/kitti/000117.png new file mode 100644 index 0000000000000000000000000000000000000000..20d8f84e9b6e2c2d5d8826dba9094c73265d4f83 --- /dev/null +++ b/third_party/ALIKE/assets/kitti/000117.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f32567394c096442df0c768822af1e21f2163f373eec94b7a36f2941ae08b199 +size 267343 diff --git a/third_party/ALIKE/assets/kitti/000118.png b/third_party/ALIKE/assets/kitti/000118.png new file mode 100644 index 0000000000000000000000000000000000000000..953cb198ab2fd6767dc8fadc97dd9392afc5d805 --- /dev/null +++ b/third_party/ALIKE/assets/kitti/000118.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b76476a8856d33960302b29cbd339c8bc513c52e7b81b21ba7d9f07dd0e4b096 +size 268085 diff --git a/third_party/ALIKE/assets/kitti/000119.png b/third_party/ALIKE/assets/kitti/000119.png new file mode 100644 index 0000000000000000000000000000000000000000..28db31e43a28fae867b975a2f5327e0b6de7908c --- /dev/null +++ b/third_party/ALIKE/assets/kitti/000119.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c818d19b8a1ce7051b006361bc14f638d8df2989b0bba8a96472e8551e02e5d1 +size 270004 diff --git a/third_party/ALIKE/assets/tum.gif b/third_party/ALIKE/assets/tum.gif new file mode 100644 index 0000000000000000000000000000000000000000..481d036bf683ae0f8c58d0712da0deafb473197b --- /dev/null +++ b/third_party/ALIKE/assets/tum.gif @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df6ecf9666386bfa5925c8e57d196f15c077d550eb84dd392f5f49b90e86a5dc +size 4040012 diff --git a/third_party/ALIKE/assets/tum/1311868169.163498.png b/third_party/ALIKE/assets/tum/1311868169.163498.png new file mode 100644 index 0000000000000000000000000000000000000000..47d2ca57576dceecf89730b08178f4f7a254d7ca --- /dev/null +++ b/third_party/ALIKE/assets/tum/1311868169.163498.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20bc06c1249727c16efc812082454bc8305438f756bcc95f913b9f79819f08e3 +size 511982 diff --git a/third_party/ALIKE/assets/tum/1311868169.263274.png b/third_party/ALIKE/assets/tum/1311868169.263274.png new file mode 100644 index 0000000000000000000000000000000000000000..85242f0f6ed952c9e3d84ee021ebc38f431b4782 --- /dev/null +++ b/third_party/ALIKE/assets/tum/1311868169.263274.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0954d005c8f9ab146718f52601136c513b96a4414b0a0cbc02a01184686fb01e +size 516093 diff --git a/third_party/ALIKE/assets/tum/1311868169.363470.png b/third_party/ALIKE/assets/tum/1311868169.363470.png new file mode 100644 index 0000000000000000000000000000000000000000..a34621c3143e1cb31739b497a6f9a753c4d4f4f0 --- /dev/null +++ b/third_party/ALIKE/assets/tum/1311868169.363470.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d2681bb2b8a907d53469d9e67f6d1809b9ec435ec210622bf255c66c8918efd +size 505590 diff --git a/third_party/ALIKE/assets/tum/1311868169.463229.png b/third_party/ALIKE/assets/tum/1311868169.463229.png new file mode 100644 index 0000000000000000000000000000000000000000..3e7952773564794a8cda5aa0f7c5285dea74015f --- /dev/null +++ b/third_party/ALIKE/assets/tum/1311868169.463229.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba2cd89601523665d0bee9dd3ea2117d9249e7ea4c7b43753298c1bab74cd532 +size 509438 diff --git a/third_party/ALIKE/assets/tum/1311868169.563501.png b/third_party/ALIKE/assets/tum/1311868169.563501.png new file mode 100644 index 0000000000000000000000000000000000000000..e64857bb40b474e79464137bd0d474ec750fa976 --- /dev/null +++ b/third_party/ALIKE/assets/tum/1311868169.563501.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a0239c7cb08fefbe4f5ec87f1c5e5fd5a32be11349744dc45158caa7d403744 +size 526168 diff --git a/third_party/ALIKE/assets/tum/1311868169.663240.png b/third_party/ALIKE/assets/tum/1311868169.663240.png new file mode 100644 index 0000000000000000000000000000000000000000..78120e0b5527404eca9191d6df1ad2fa2122e96e --- /dev/null +++ b/third_party/ALIKE/assets/tum/1311868169.663240.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e538c9dbaf4242072949920b3105ccdcfac68af955d623a701b9eea0e6e0f6f +size 520924 diff --git a/third_party/ALIKE/assets/tum/1311868169.763417.png b/third_party/ALIKE/assets/tum/1311868169.763417.png new file mode 100644 index 0000000000000000000000000000000000000000..109d96a4956ea4988e72eabf961d3bc06a130d06 --- /dev/null +++ b/third_party/ALIKE/assets/tum/1311868169.763417.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22a4fadfc031c36efd4cee5f70d0b501557bf820fa4b39a1c77f4268d0c12e86 +size 543908 diff --git a/third_party/ALIKE/assets/tum/1311868169.863396.png b/third_party/ALIKE/assets/tum/1311868169.863396.png new file mode 100644 index 0000000000000000000000000000000000000000..0696353fe74f5316e9da2ac0330cf665b5111c68 --- /dev/null +++ b/third_party/ALIKE/assets/tum/1311868169.863396.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae0ee5be82b14aa1ed19e0b20a72bc37964c64732c7016739a5b30158453049 +size 549088 diff --git a/third_party/ALIKE/assets/tum/1311868169.963415.png b/third_party/ALIKE/assets/tum/1311868169.963415.png new file mode 100644 index 0000000000000000000000000000000000000000..9310b9a4f1afd36578a11535a724960deef3a363 --- /dev/null +++ b/third_party/ALIKE/assets/tum/1311868169.963415.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a590b6fdb98c4a4ee8e13aafcd9d2392c78a7881b4cc7fd1109231adc3cc8b91 +size 541362 diff --git a/third_party/ALIKE/assets/tum/1311868170.063469.png b/third_party/ALIKE/assets/tum/1311868170.063469.png new file mode 100644 index 0000000000000000000000000000000000000000..12514256b4eb22826bc301c1b11b0d3fa1fce10d --- /dev/null +++ b/third_party/ALIKE/assets/tum/1311868170.063469.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d2d6058e036b307efa7d6008a02103b9c31ed8d0edd4b2f1e9ad49717b89684 +size 550211 diff --git a/third_party/ALIKE/assets/tum/1311868170.163416.png b/third_party/ALIKE/assets/tum/1311868170.163416.png new file mode 100644 index 0000000000000000000000000000000000000000..3c76ee1ab9f1ec86465ab10abb06a8b25f532f77 --- /dev/null +++ b/third_party/ALIKE/assets/tum/1311868170.163416.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:741d1e0ede775dd4b7054314c1a95ed3e5116792245b9eb1a5e2492ffe4d935c +size 549592 diff --git a/third_party/ALIKE/assets/tum/1311868170.263521.png b/third_party/ALIKE/assets/tum/1311868170.263521.png new file mode 100644 index 0000000000000000000000000000000000000000..1c30ce373f54133dec17e5c3eea93e84843e3e2d --- /dev/null +++ b/third_party/ALIKE/assets/tum/1311868170.263521.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04ce12ed16c6fa89a9fdb3b64e7471335d13b82b84c7a554b3f9fd08f6e254a0 +size 545606 diff --git a/third_party/ALIKE/assets/tum/1311868170.363400.png b/third_party/ALIKE/assets/tum/1311868170.363400.png new file mode 100644 index 0000000000000000000000000000000000000000..09ae86f21246ee986d678b64ec973dc508ced9b5 --- /dev/null +++ b/third_party/ALIKE/assets/tum/1311868170.363400.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb6be184df6fd2ca2e287bc64ada937ce2cec3f5d90e15c244fffa8aa44b11b1 +size 545166 diff --git a/third_party/ALIKE/assets/tum/1311868170.463383.png b/third_party/ALIKE/assets/tum/1311868170.463383.png new file mode 100644 index 0000000000000000000000000000000000000000..3470eb7117c391cb0b9a97feed3884d6829f812e --- /dev/null +++ b/third_party/ALIKE/assets/tum/1311868170.463383.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d82953d4580894111f15a5b57e0059dca0baf02e788e0726a2849647cf570b63 +size 541845 diff --git a/third_party/ALIKE/assets/tum/1311868170.563345.png b/third_party/ALIKE/assets/tum/1311868170.563345.png new file mode 100644 index 0000000000000000000000000000000000000000..75054626b291976386ae729de421b19d3b59162c --- /dev/null +++ b/third_party/ALIKE/assets/tum/1311868170.563345.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d498847d7b8bc2389550941b01e95b1bf6459c70ff645d9893637d59e129ae29 +size 549261 diff --git a/third_party/ALIKE/assets/tum/1311868170.663430.png b/third_party/ALIKE/assets/tum/1311868170.663430.png new file mode 100644 index 0000000000000000000000000000000000000000..bc7d196020c94a120d483d8b80f8449cc36e321f --- /dev/null +++ b/third_party/ALIKE/assets/tum/1311868170.663430.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b299c55e430afecb9f5d0ff6e1485ce72d90f5ddf1ec1a186fbcb2b110e035f2 +size 540815 diff --git a/third_party/ALIKE/assets/tum/1311868170.763453.png b/third_party/ALIKE/assets/tum/1311868170.763453.png new file mode 100644 index 0000000000000000000000000000000000000000..720f2e7f4ba69d7c3b07c375e351c1794641b9ea --- /dev/null +++ b/third_party/ALIKE/assets/tum/1311868170.763453.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8073cc59711d6bea5038b698fb74eaa72eeca663dcc35850e0b334e234605385 +size 541019 diff --git a/third_party/ALIKE/assets/tum/1311868170.863446.png b/third_party/ALIKE/assets/tum/1311868170.863446.png new file mode 100644 index 0000000000000000000000000000000000000000..78f725e414fb4f35dd4cf620b40369375561e036 --- /dev/null +++ b/third_party/ALIKE/assets/tum/1311868170.863446.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70b27a2d1c9e30ad0b164af13eb992b9c54c11aa7b408221515b6b106de87763 +size 543505 diff --git a/third_party/ALIKE/assets/tum/1311868170.963440.png b/third_party/ALIKE/assets/tum/1311868170.963440.png new file mode 100644 index 0000000000000000000000000000000000000000..259d37d63734018c2d52d2f155cb8f06d7543db6 --- /dev/null +++ b/third_party/ALIKE/assets/tum/1311868170.963440.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36c02db5125b37725ce2c6fb502ba80e3ff85755dabf1a21d952e186480b8e56 +size 535141 diff --git a/third_party/ALIKE/assets/tum/1311868171.063438.png b/third_party/ALIKE/assets/tum/1311868171.063438.png new file mode 100644 index 0000000000000000000000000000000000000000..863c9564ce96f1d1736841d92b18b0d6e076204c --- /dev/null +++ b/third_party/ALIKE/assets/tum/1311868171.063438.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f54d76a6b4bb8d3fb81c257920ddffdf75480bba34d506b481ee6dfaff894ecf +size 535510 diff --git a/third_party/ALIKE/demo.py b/third_party/ALIKE/demo.py new file mode 100644 index 0000000000000000000000000000000000000000..9bfbefdd26cfeceefc75f90d1c44a7f922c624a5 --- /dev/null +++ b/third_party/ALIKE/demo.py @@ -0,0 +1,167 @@ +import copy +import os +import cv2 +import glob +import logging +import argparse +import numpy as np +from tqdm import tqdm +from alike import ALike, configs + + +class ImageLoader(object): + def __init__(self, filepath: str): + self.N = 3000 + if filepath.startswith('camera'): + camera = int(filepath[6:]) + self.cap = cv2.VideoCapture(camera) + if not self.cap.isOpened(): + raise IOError(f"Can't open camera {camera}!") + logging.info(f'Opened camera {camera}') + self.mode = 'camera' + elif os.path.exists(filepath): + if os.path.isfile(filepath): + self.cap = cv2.VideoCapture(filepath) + if not self.cap.isOpened(): + raise IOError(f"Can't open video {filepath}!") + rate = self.cap.get(cv2.CAP_PROP_FPS) + self.N = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1 + duration = self.N / rate + logging.info(f'Opened video {filepath}') + logging.info(f'Frames: {self.N}, FPS: {rate}, Duration: {duration}s') + self.mode = 'video' + else: + self.images = glob.glob(os.path.join(filepath, '*.png')) + \ + glob.glob(os.path.join(filepath, '*.jpg')) + \ + glob.glob(os.path.join(filepath, '*.ppm')) + self.images.sort() + self.N = len(self.images) + logging.info(f'Loading {self.N} images') + self.mode = 'images' + else: + raise IOError('Error filepath (camerax/path of images/path of videos): ', filepath) + + def __getitem__(self, item): + if self.mode == 'camera' or self.mode == 'video': + if item > self.N: + return None + ret, img = self.cap.read() + if not ret: + raise "Can't read image from camera" + if self.mode == 'video': + self.cap.set(cv2.CAP_PROP_POS_FRAMES, item) + elif self.mode == 'images': + filename = self.images[item] + img = cv2.imread(filename) + if img is None: + raise Exception('Error reading image %s' % filename) + return img + + def __len__(self): + return self.N + + +class SimpleTracker(object): + def __init__(self): + self.pts_prev = None + self.desc_prev = None + + def update(self, img, pts, desc): + N_matches = 0 + if self.pts_prev is None: + self.pts_prev = pts + self.desc_prev = desc + + out = copy.deepcopy(img) + for pt1 in pts: + p1 = (int(round(pt1[0])), int(round(pt1[1]))) + cv2.circle(out, p1, 1, (0, 0, 255), -1, lineType=16) + else: + matches = self.mnn_mather(self.desc_prev, desc) + mpts1, mpts2 = self.pts_prev[matches[:, 0]], pts[matches[:, 1]] + N_matches = len(matches) + + out = copy.deepcopy(img) + for pt1, pt2 in zip(mpts1, mpts2): + p1 = (int(round(pt1[0])), int(round(pt1[1]))) + p2 = (int(round(pt2[0])), int(round(pt2[1]))) + cv2.line(out, p1, p2, (0, 255, 0), lineType=16) + cv2.circle(out, p2, 1, (0, 0, 255), -1, lineType=16) + + self.pts_prev = pts + self.desc_prev = desc + + return out, N_matches + + def mnn_mather(self, desc1, desc2): + sim = desc1 @ desc2.transpose() + sim[sim < 0.9] = 0 + nn12 = np.argmax(sim, axis=1) + nn21 = np.argmax(sim, axis=0) + ids1 = np.arange(0, sim.shape[0]) + mask = (ids1 == nn21[nn12]) + matches = np.stack([ids1[mask], nn12[mask]]) + return matches.transpose() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='ALike Demo.') + parser.add_argument('input', type=str, default='', + help='Image directory or movie file or "camera0" (for webcam0).') + parser.add_argument('--model', choices=['alike-t', 'alike-s', 'alike-n', 'alike-l'], default="alike-t", + help="The model configuration") + parser.add_argument('--device', type=str, default='cuda', help="Running device (default: cuda).") + parser.add_argument('--top_k', type=int, default=-1, + help='Detect top K keypoints. -1 for threshold based mode, >0 for top K mode. (default: -1)') + parser.add_argument('--scores_th', type=float, default=0.2, + help='Detector score threshold (default: 0.2).') + parser.add_argument('--n_limit', type=int, default=5000, + help='Maximum number of keypoints to be detected (default: 5000).') + parser.add_argument('--no_display', action='store_true', + help='Do not display images to screen. Useful if running remotely (default: False).') + parser.add_argument('--no_sub_pixel', action='store_true', + help='Do not detect sub-pixel keypoints (default: False).') + args = parser.parse_args() + + logging.basicConfig(level=logging.INFO) + + image_loader = ImageLoader(args.input) + model = ALike(**configs[args.model], + device=args.device, + top_k=args.top_k, + scores_th=args.scores_th, + n_limit=args.n_limit) + tracker = SimpleTracker() + + if not args.no_display: + logging.info("Press 'q' to stop!") + cv2.namedWindow(args.model) + + runtime = [] + progress_bar = tqdm(image_loader) + for img in progress_bar: + if img is None: + break + + img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + pred = model(img_rgb, sub_pixel=not args.no_sub_pixel) + kpts = pred['keypoints'] + desc = pred['descriptors'] + runtime.append(pred['time']) + + out, N_matches = tracker.update(img, kpts, desc) + + ave_fps = (1. / np.stack(runtime)).mean() + status = f"Fps:{ave_fps:.1f}, Keypoints/Matches: {len(kpts)}/{N_matches}" + progress_bar.set_description(status) + + if not args.no_display: + cv2.setWindowTitle(args.model, args.model + ': ' + status) + cv2.imshow(args.model, out) + if cv2.waitKey(1) == ord('q'): + break + + logging.info('Finished!') + if not args.no_display: + logging.info('Press any key to exit!') + cv2.waitKey() diff --git a/third_party/ALIKE/hseq/cache/alike-l-ms.npy b/third_party/ALIKE/hseq/cache/alike-l-ms.npy new file mode 100644 index 0000000000000000000000000000000000000000..bd988fb065ecd4a900178a3cb974bbbf56de0dc0 --- /dev/null +++ b/third_party/ALIKE/hseq/cache/alike-l-ms.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1350ab826afdd9b7542a556e2fda9ad9f94388a875c8edb7874e4bcdfebc63ca +size 13124 diff --git a/third_party/ALIKE/hseq/cache/alike-l.npy b/third_party/ALIKE/hseq/cache/alike-l.npy new file mode 100644 index 0000000000000000000000000000000000000000..7c63bbec1588af102721df60d0ab8043586036d1 --- /dev/null +++ b/third_party/ALIKE/hseq/cache/alike-l.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:999daff1155f3d4736bb7374fb2058f520b0cb4c75b5d7d87fc1e7025a7d2a7d +size 13124 diff --git a/third_party/ALIKE/hseq/cache/alike-n-ms.npy b/third_party/ALIKE/hseq/cache/alike-n-ms.npy new file mode 100644 index 0000000000000000000000000000000000000000..02e2d32258dcaed882ca7a28e7dd47c97c4bb65a --- /dev/null +++ b/third_party/ALIKE/hseq/cache/alike-n-ms.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e5967048eddb61e423bf2ea05a2a626e18d8a716b6a0ad42471059aec0b934c +size 13124 diff --git a/third_party/ALIKE/hseq/cache/alike-n.npy b/third_party/ALIKE/hseq/cache/alike-n.npy new file mode 100644 index 0000000000000000000000000000000000000000..3ec339ab8cd7a629d752576e8b275cba215614da --- /dev/null +++ b/third_party/ALIKE/hseq/cache/alike-n.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e2eba5ff96b25d0a100b6c7273549de91586e6069dcb5320a20edbb24ea462e +size 13124 diff --git a/third_party/ALIKE/hseq/cache/aslfeat.npy b/third_party/ALIKE/hseq/cache/aslfeat.npy new file mode 100644 index 0000000000000000000000000000000000000000..24fb50ccae5d7fa86fb6d4224beb983e54160895 --- /dev/null +++ b/third_party/ALIKE/hseq/cache/aslfeat.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce06fd1b6265e09ed3b26768b68f624e2d556358ab98addd8ebdb7a5a076abe8 +size 15352 diff --git a/third_party/ALIKE/hseq/cache/d2.npy b/third_party/ALIKE/hseq/cache/d2.npy new file mode 100644 index 0000000000000000000000000000000000000000..741588a2e42c40fd8a3f7c097d56898ef66c5ceb --- /dev/null +++ b/third_party/ALIKE/hseq/cache/d2.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:976d81c6b51a98f89eac60c6d25990130c1df571ef6536280f4b00577eab56f0 +size 15352 diff --git a/third_party/ALIKE/hseq/cache/disk.npy b/third_party/ALIKE/hseq/cache/disk.npy new file mode 100644 index 0000000000000000000000000000000000000000..27871bccf7a206df33b94f25db28259b2b7cd456 --- /dev/null +++ b/third_party/ALIKE/hseq/cache/disk.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df2d9e0dfd0baa19f2af12f4604368ca65a1643159e7e3438e25efc41ab15357 +size 15352 diff --git a/third_party/ALIKE/hseq/cache/lfnet.npy b/third_party/ALIKE/hseq/cache/lfnet.npy new file mode 100644 index 0000000000000000000000000000000000000000..2b3fc3514b2c85a856aae46f5f75bcf6cc6e2afd --- /dev/null +++ b/third_party/ALIKE/hseq/cache/lfnet.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:417327dee726cffccc6dfbc9b0e6b3c06b277ea8878ccf87b87475d1cd6e65ca +size 15352 diff --git a/third_party/ALIKE/hseq/cache/r2d2.npy b/third_party/ALIKE/hseq/cache/r2d2.npy new file mode 100644 index 0000000000000000000000000000000000000000..247b6e2952cf7a2a2e86479c4b888eb55f63cdd2 --- /dev/null +++ b/third_party/ALIKE/hseq/cache/r2d2.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1375a21adcc932db2c9e210e52f633c1903cca6d37066391eb9d645ff87d0120 +size 15352 diff --git a/third_party/ALIKE/hseq/cache/superpoint.npy b/third_party/ALIKE/hseq/cache/superpoint.npy new file mode 100644 index 0000000000000000000000000000000000000000..b2d1ec429e6ffd960bc8a35128d6926683ba5162 --- /dev/null +++ b/third_party/ALIKE/hseq/cache/superpoint.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e4d4a4ca79518af47467e9ddd69fe159c9305a580dadc4fdab6ffde6f8b48c2 +size 15352 diff --git a/third_party/ALIKE/hseq/eval.py b/third_party/ALIKE/hseq/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..abca625044013a0cd34a518223c32d3ec8abb8a3 --- /dev/null +++ b/third_party/ALIKE/hseq/eval.py @@ -0,0 +1,162 @@ +import cv2 +import os +from tqdm import tqdm +import torch +import numpy as np +from extract import extract_method + +use_cuda = torch.cuda.is_available() +device = torch.device('cuda' if use_cuda else 'cpu') + +methods = ['d2', 'lfnet', 'superpoint', 'r2d2', 'aslfeat', 'disk', + 'alike-n', 'alike-l', 'alike-n-ms', 'alike-l-ms'] +names = ['D2-Net(MS)', 'LF-Net(MS)', 'SuperPoint', 'R2D2(MS)', 'ASLFeat(MS)', 'DISK', + 'ALike-N', 'ALike-L', 'ALike-N(MS)', 'ALike-L(MS)'] + +top_k = None +n_i = 52 +n_v = 56 +cache_dir = 'hseq/cache' +dataset_path = 'hseq/hpatches-sequences-release' + + +def generate_read_function(method, extension='ppm'): + def read_function(seq_name, im_idx): + aux = np.load(os.path.join(dataset_path, seq_name, '%d.%s.%s' % (im_idx, extension, method))) + if top_k is None: + return aux['keypoints'], aux['descriptors'] + else: + assert ('scores' in aux) + ids = np.argsort(aux['scores'])[-top_k:] + return aux['keypoints'][ids, :], aux['descriptors'][ids, :] + + return read_function + + +def mnn_matcher(descriptors_a, descriptors_b): + device = descriptors_a.device + sim = descriptors_a @ descriptors_b.t() + nn12 = torch.max(sim, dim=1)[1] + nn21 = torch.max(sim, dim=0)[1] + ids1 = torch.arange(0, sim.shape[0], device=device) + mask = (ids1 == nn21[nn12]) + matches = torch.stack([ids1[mask], nn12[mask]]) + return matches.t().data.cpu().numpy() + + +def homo_trans(coord, H): + kpt_num = coord.shape[0] + homo_coord = np.concatenate((coord, np.ones((kpt_num, 1))), axis=-1) + proj_coord = np.matmul(H, homo_coord.T).T + proj_coord = proj_coord / proj_coord[:, 2][..., None] + proj_coord = proj_coord[:, 0:2] + return proj_coord + + +def benchmark_features(read_feats): + lim = [1, 5] + rng = np.arange(lim[0], lim[1] + 1) + + seq_names = sorted(os.listdir(dataset_path)) + + n_feats = [] + n_matches = [] + seq_type = [] + i_err = {thr: 0 for thr in rng} + v_err = {thr: 0 for thr in rng} + + i_err_homo = {thr: 0 for thr in rng} + v_err_homo = {thr: 0 for thr in rng} + + for seq_idx, seq_name in tqdm(enumerate(seq_names), total=len(seq_names)): + keypoints_a, descriptors_a = read_feats(seq_name, 1) + n_feats.append(keypoints_a.shape[0]) + + # =========== compute homography + ref_img = cv2.imread(os.path.join(dataset_path, seq_name, '1.ppm')) + ref_img_shape = ref_img.shape + + for im_idx in range(2, 7): + keypoints_b, descriptors_b = read_feats(seq_name, im_idx) + n_feats.append(keypoints_b.shape[0]) + + matches = mnn_matcher( + torch.from_numpy(descriptors_a).to(device=device), + torch.from_numpy(descriptors_b).to(device=device) + ) + + homography = np.loadtxt(os.path.join(dataset_path, seq_name, "H_1_" + str(im_idx))) + + pos_a = keypoints_a[matches[:, 0], : 2] + pos_a_h = np.concatenate([pos_a, np.ones([matches.shape[0], 1])], axis=1) + pos_b_proj_h = np.transpose(np.dot(homography, np.transpose(pos_a_h))) + pos_b_proj = pos_b_proj_h[:, : 2] / pos_b_proj_h[:, 2:] + + pos_b = keypoints_b[matches[:, 1], : 2] + + dist = np.sqrt(np.sum((pos_b - pos_b_proj) ** 2, axis=1)) + + n_matches.append(matches.shape[0]) + seq_type.append(seq_name[0]) + + if dist.shape[0] == 0: + dist = np.array([float("inf")]) + + for thr in rng: + if seq_name[0] == 'i': + i_err[thr] += np.mean(dist <= thr) + else: + v_err[thr] += np.mean(dist <= thr) + + # =========== compute homography + gt_homo = homography + pred_homo, _ = cv2.findHomography(keypoints_a[matches[:, 0], : 2], keypoints_b[matches[:, 1], : 2], + cv2.RANSAC) + if pred_homo is None: + homo_dist = np.array([float("inf")]) + else: + corners = np.array([[0, 0], + [ref_img_shape[1] - 1, 0], + [0, ref_img_shape[0] - 1], + [ref_img_shape[1] - 1, ref_img_shape[0] - 1]]) + real_warped_corners = homo_trans(corners, gt_homo) + warped_corners = homo_trans(corners, pred_homo) + homo_dist = np.mean(np.linalg.norm(real_warped_corners - warped_corners, axis=1)) + + for thr in rng: + if seq_name[0] == 'i': + i_err_homo[thr] += np.mean(homo_dist <= thr) + else: + v_err_homo[thr] += np.mean(homo_dist <= thr) + + seq_type = np.array(seq_type) + n_feats = np.array(n_feats) + n_matches = np.array(n_matches) + + return i_err, v_err, i_err_homo, v_err_homo, [seq_type, n_feats, n_matches] + + +if __name__ == '__main__': + errors = {} + for method in methods: + output_file = os.path.join(cache_dir, method + '.npy') + read_function = generate_read_function(method) + if os.path.exists(output_file): + errors[method] = np.load(output_file, allow_pickle=True) + else: + extract_method(method) + errors[method] = benchmark_features(read_function) + np.save(output_file, errors[method]) + + for name, method in zip(names, methods): + i_err, v_err, i_err_hom, v_err_hom, _ = errors[method] + + print(f"====={name}=====") + print(f"MMA@1 MMA@2 MMA@3 MHA@1 MHA@2 MHA@3: ", end='') + for thr in range(1, 4): + err = (i_err[thr] + v_err[thr]) / ((n_i + n_v) * 5) + print(f"{err * 100:.2f}%", end=' ') + for thr in range(1, 4): + err_hom = (i_err_hom[thr] + v_err_hom[thr]) / ((n_i + n_v) * 5) + print(f"{err_hom * 100:.2f}%", end=' ') + print('') diff --git a/third_party/ALIKE/hseq/extract.py b/third_party/ALIKE/hseq/extract.py new file mode 100644 index 0000000000000000000000000000000000000000..1342e40dd2d0e1d1986e90f995c95b17972ec4e1 --- /dev/null +++ b/third_party/ALIKE/hseq/extract.py @@ -0,0 +1,159 @@ +import os +import sys +import cv2 +from pathlib import Path +import numpy as np +import torch +import torch.utils.data as data +from tqdm import tqdm +from copy import deepcopy +from torchvision.transforms import ToTensor + +sys.path.append(os.path.join(os.path.dirname(__file__), '..')) +from alike import ALike, configs + +dataset_root = 'hseq/hpatches-sequences-release' +use_cuda = torch.cuda.is_available() +device = 'cuda' if use_cuda else 'cpu' +methods = ['alike-n', 'alike-l', 'alike-n-ms', 'alike-l-ms'] + + +class HPatchesDataset(data.Dataset): + def __init__(self, root: str = dataset_root, alteration: str = 'all'): + """ + Args: + root: dataset root path + alteration: # 'all', 'i' for illumination or 'v' for viewpoint + """ + assert (Path(root).exists()), f"Dataset root path {root} dose not exist!" + self.root = root + + # get all image file name + self.image0_list = [] + self.image1_list = [] + self.homographies = [] + folders = [x for x in Path(self.root).iterdir() if x.is_dir()] + self.seqs = [] + for folder in folders: + if alteration == 'i' and folder.stem[0] != 'i': + continue + if alteration == 'v' and folder.stem[0] != 'v': + continue + + self.seqs.append(folder) + + self.len = len(self.seqs) + assert (self.len > 0), f'Can not find PatchDataset in path {self.root}' + + def __getitem__(self, item): + folder = self.seqs[item] + + imgs = [] + homos = [] + for i in range(1, 7): + img = cv2.imread(str(folder / f'{i}.ppm'), cv2.IMREAD_COLOR) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # HxWxC + imgs.append(img) + + if i != 1: + homo = np.loadtxt(str(folder / f'H_1_{i}')).astype('float32') + homos.append(homo) + + return imgs, homos, folder.stem + + def __len__(self): + return self.len + + def name(self): + return self.__class__ + + +def extract_multiscale(model, img, scale_f=2 ** 0.5, + min_scale=1., max_scale=1., + min_size=0., max_size=99999., + image_size_max=99999, + n_k=0, sort=False): + H_, W_, three = img.shape + assert three == 3, "input image shape should be [HxWx3]" + + old_bm = torch.backends.cudnn.benchmark + torch.backends.cudnn.benchmark = False # speedup + + # ==================== image size constraint + image = deepcopy(img) + max_hw = max(H_, W_) + if max_hw > image_size_max: + ratio = float(image_size_max / max_hw) + image = cv2.resize(image, dsize=None, fx=ratio, fy=ratio) + + # ==================== convert image to tensor + H, W, three = image.shape + image = ToTensor()(image).unsqueeze(0) + image = image.to(device) + + s = 1.0 # current scale factor + keypoints, descriptors, scores, scores_maps, descriptor_maps = [], [], [], [], [] + while s + 0.001 >= max(min_scale, min_size / max(H, W)): + if s - 0.001 <= min(max_scale, max_size / max(H, W)): + nh, nw = image.shape[2:] + + # extract descriptors + with torch.no_grad(): + descriptor_map, scores_map = model.extract_dense_map(image) + keypoints_, descriptors_, scores_, _ = model.dkd(scores_map, descriptor_map) + + keypoints.append(keypoints_[0]) + descriptors.append(descriptors_[0]) + scores.append(scores_[0]) + + s /= scale_f + + # down-scale the image for next iteration + nh, nw = round(H * s), round(W * s) + image = torch.nn.functional.interpolate(image, (nh, nw), mode='bilinear', align_corners=False) + + # restore value + torch.backends.cudnn.benchmark = old_bm + + keypoints = torch.cat(keypoints) + descriptors = torch.cat(descriptors) + scores = torch.cat(scores) + keypoints = (keypoints + 1) / 2 * keypoints.new_tensor([[W_ - 1, H_ - 1]]) + + if sort or 0 < n_k < len(keypoints): + indices = torch.argsort(scores, descending=True) + keypoints = keypoints[indices] + descriptors = descriptors[indices] + scores = scores[indices] + + if 0 < n_k < len(keypoints): + keypoints = keypoints[0:n_k] + descriptors = descriptors[0:n_k] + scores = scores[0:n_k] + + return {'keypoints': keypoints, 'descriptors': descriptors, 'scores': scores} + + +def extract_method(m): + hpatches = HPatchesDataset(root=dataset_root, alteration='all') + model = m[:7] + min_scale = 0.3 if m[8:] == 'ms' else 1.0 + + model = ALike(**configs[model], device=device, top_k=0, scores_th=0.2, n_limit=5000) + + progbar = tqdm(hpatches, desc='Extracting for {}'.format(m)) + for imgs, homos, seq_name in progbar: + for i in range(1, 7): + img = imgs[i - 1] + pred = extract_multiscale(model, img, min_scale=min_scale, max_scale=1, sort=False, n_k=5000) + kpts, descs, scores = pred['keypoints'], pred['descriptors'], pred['scores'] + + with open(os.path.join(dataset_root, seq_name, f'{i}.ppm.{m}'), 'wb') as f: + np.savez(f, keypoints=kpts.cpu().numpy(), + scores=scores.cpu().numpy(), + descriptors=descs.cpu().numpy()) + + +if __name__ == '__main__': + for method in methods: + extract_method(method) diff --git a/third_party/ALIKE/matlab/createfigure.m b/third_party/ALIKE/matlab/createfigure.m new file mode 100644 index 0000000000000000000000000000000000000000..038090c7e570aeaed25bd4dfaffb71134d707082 --- /dev/null +++ b/third_party/ALIKE/matlab/createfigure.m @@ -0,0 +1,75 @@ +function createfigure(X1, YMatrix1, Y1, l1, l2, l3) +%CREATEFIGURE(X1, YMatrix1, Y1) +% X1: vector of x data +% YMATRIX1: matrix of y data +% Y1: vector of y data + +% Auto-generated by MATLAB on 29-Oct-2021 15:42:14 + +% Create figure +figure1 = figure; + +% Create axes +axes1 = axes('Parent',figure1); +hold(axes1,'on'); + +% Create multiple lines using matrix input to plot +plot1 = plot(X1,YMatrix1,'Parent',axes1,'LineWidth',1); +set(plot1(1),'LineStyle','-.','Color',[1 0 0]); +set(plot1(2),'Color',[0 1 0]); +set(plot1(3),'LineStyle','--',... + 'Color',[0.87058824300766 0.490196079015732 0]); + +% Uncomment the following line to preserve the X-limits of the axes +% xlim(axes1,[-1.1 1.1]); +% Uncomment the following line to preserve the Y-limits of the axes +ylim(axes1,[0 2.2]); +box(axes1,'on'); +hold(axes1,'off'); +% Set the remaining axes properties +set(axes1,'XColor',[0 0 0],'YColor',[0 0 0],'YTick',[0 0.5 1 1.5 2 2.5]); +% Create axes +axes2 = axes('Parent',figure1); +hold(axes2,'on'); +colororder([0.494 0.184 0.556;0.466 0.674 0.188;0.301 0.745 0.933;0.635 0.078 0.184;0 0.447 0.741;0.85 0.325 0.098;0.929 0.694 0.125]); + +% Create plot +plot(X1,Y1,'Parent',axes2,'LineWidth',1,'LineStyle',':','Color',[0 0 1]); + +% Uncomment the following line to preserve the X-limits of the axes +% xlim(axes2,[-1.1 1.1]); +% Uncomment the following line to preserve the Y-limits of the axes +ylim(axes2,[0 1.6]); +hold(axes2,'off'); +% Set the remaining axes properties +set(axes2,'Color','none','HitTest','off','XColor',[0 0 0],'YAxisLocation',... + 'right','YColor',[0 0 0],'YTick',[0 0.5 1 1.5]); +% Create textbox +annotation(figure1,'textbox',... + [0.255427607968038,0.605539475745798,0.304947448327989,0.235148519909872],... + 'Color',[0.8 0 0],... + 'String',{sprintf('peak loss=%.4f',l1)},... + 'EdgeColor','none'); + +% Create textbox +annotation(figure1,'textbox',... + [0.631790371410027,0.083530640355914,0.178879315581032,0.235148519909871],... + 'Color',[0 0 1],... + 'String',{'keypoint'},... + 'EdgeColor','none'); + +% Create textbox +annotation(figure1,'textbox',... + [0.59663112557549,0.640686239621974,0.318247136419826,0.22093023731067],... + 'Color',[0 0.498039215803146 0],... + 'String',{sprintf('peak loss=%.4f',l2)},... + 'EdgeColor','none'); + +% Create textbox +annotation(figure1,'textbox',... + [0.595423071596731,0.415858983920567,0.318247136419826,0.235148519909871],... + 'Color',[0.87058824300766 0.490196079015732 0],... + 'String',{sprintf('peak loss=%.4f',l3)},... + 'FitBoxToText','off',... + 'EdgeColor','none'); + diff --git a/third_party/ALIKE/matlab/peakloss_rect.m b/third_party/ALIKE/matlab/peakloss_rect.m new file mode 100644 index 0000000000000000000000000000000000000000..fa0d811c126aec1d6f6868352d89be69ea351577 --- /dev/null +++ b/third_party/ALIKE/matlab/peakloss_rect.m @@ -0,0 +1,19 @@ +clear; +close all; + +x = -1:0.01:1; + +p0 = 0.5; +p1 = -0.5; + +d = abs(x - p0); + +c0 = 2 .* (x>=-0.75 & x <= -0.25); +c1 = 2 .* (x>=0.25 & x <= 0.75); +c2 = 1.25 .* (x>=0.1 & x <= 0.9); + +peak_loss0 = sum(d.*c0) / length(x) +peak_loss1 = sum(d.*c1) / length(x) +peak_loss2 = sum(d.*c2) / length(x) + +createfigure(x, [c0;c1;c2], d, peak_loss0,peak_loss1, peak_loss2); \ No newline at end of file diff --git a/third_party/ALIKE/models/alike-l.pth b/third_party/ALIKE/models/alike-l.pth new file mode 100644 index 0000000000000000000000000000000000000000..525f6dd5128d95650096d860e371cbd558203ffa --- /dev/null +++ b/third_party/ALIKE/models/alike-l.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bed5fbbf352ab1c3e92e2241881f8b84edce949984fa23bc7f2517eab93938a0 +size 2639857 diff --git a/third_party/ALIKE/models/alike-n.pth b/third_party/ALIKE/models/alike-n.pth new file mode 100644 index 0000000000000000000000000000000000000000..a8e366e28e6fcc52ad14bc2c9b6bfaba15a436d2 --- /dev/null +++ b/third_party/ALIKE/models/alike-n.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bd4789272eec779be280f8fc1007608ff604241440a0a3377c1559199412ee3 +size 1338420 diff --git a/third_party/ALIKE/models/alike-s.pth b/third_party/ALIKE/models/alike-s.pth new file mode 100644 index 0000000000000000000000000000000000000000..9bdcec17286fbebe42c4e31e0f024ad5187a5493 --- /dev/null +++ b/third_party/ALIKE/models/alike-s.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9c0789ff0a09f576cc24afe4924d3233471499d1ce3b0248d650c8794e99a94 +size 724468 diff --git a/third_party/ALIKE/models/alike-t.pth b/third_party/ALIKE/models/alike-t.pth new file mode 100644 index 0000000000000000000000000000000000000000..428d75400279f96a70e60d87739cb018d7d2130b --- /dev/null +++ b/third_party/ALIKE/models/alike-t.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0840329a6b88518d914b03af2be956f5607055a389ba17441db02bb94f7d12e +size 350644 diff --git a/third_party/ALIKE/requirements.txt b/third_party/ALIKE/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..14ca745ea1572bda6b2bd7c4eb88bb026b566781 --- /dev/null +++ b/third_party/ALIKE/requirements.txt @@ -0,0 +1,6 @@ +opencv-python~=4.5.1.48 +numpy~=1.19.5 +tqdm~=4.60.0 +torch~=1.8.0 +torchvision~=0.9.0 +thop~=0.0.31-2005241907 \ No newline at end of file diff --git a/third_party/ALIKE/soft_detect.py b/third_party/ALIKE/soft_detect.py new file mode 100644 index 0000000000000000000000000000000000000000..2d23cd13b8a7db9b0398fdc1b235564222d30c90 --- /dev/null +++ b/third_party/ALIKE/soft_detect.py @@ -0,0 +1,194 @@ +import torch +from torch import nn +import torch.nn.functional as F + + +# coordinates system +# ------------------------------> [ x: range=-1.0~1.0; w: range=0~W ] +# | ----------------------------- +# | | | +# | | | +# | | | +# | | image | +# | | | +# | | | +# | | | +# | |---------------------------| +# v +# [ y: range=-1.0~1.0; h: range=0~H ] + +def simple_nms(scores, nms_radius: int): + """ Fast Non-maximum suppression to remove nearby points """ + assert (nms_radius >= 0) + + def max_pool(x): + return torch.nn.functional.max_pool2d( + x, kernel_size=nms_radius * 2 + 1, stride=1, padding=nms_radius) + + zeros = torch.zeros_like(scores) + max_mask = scores == max_pool(scores) + + for _ in range(2): + supp_mask = max_pool(max_mask.float()) > 0 + supp_scores = torch.where(supp_mask, zeros, scores) + new_max_mask = supp_scores == max_pool(supp_scores) + max_mask = max_mask | (new_max_mask & (~supp_mask)) + return torch.where(max_mask, scores, zeros) + + +def sample_descriptor(descriptor_map, kpts, bilinear_interp=False): + """ + :param descriptor_map: BxCxHxW + :param kpts: list, len=B, each is Nx2 (keypoints) [h,w] + :param bilinear_interp: bool, whether to use bilinear interpolation + :return: descriptors: list, len=B, each is NxD + """ + batch_size, channel, height, width = descriptor_map.shape + + descriptors = [] + for index in range(batch_size): + kptsi = kpts[index] # Nx2,(x,y) + + if bilinear_interp: + descriptors_ = torch.nn.functional.grid_sample(descriptor_map[index].unsqueeze(0), kptsi.view(1, 1, -1, 2), + mode='bilinear', align_corners=True)[0, :, 0, :] # CxN + else: + kptsi = (kptsi + 1) / 2 * kptsi.new_tensor([[width - 1, height - 1]]) + kptsi = kptsi.long() + descriptors_ = descriptor_map[index, :, kptsi[:, 1], kptsi[:, 0]] # CxN + + descriptors_ = torch.nn.functional.normalize(descriptors_, p=2, dim=0) + descriptors.append(descriptors_.t()) + + return descriptors + + +class DKD(nn.Module): + def __init__(self, radius=2, top_k=0, scores_th=0.2, n_limit=20000): + """ + Args: + radius: soft detection radius, kernel size is (2 * radius + 1) + top_k: top_k > 0: return top k keypoints + scores_th: top_k <= 0 threshold mode: scores_th > 0: return keypoints with scores>scores_th + else: return keypoints with scores > scores.mean() + n_limit: max number of keypoint in threshold mode + """ + super().__init__() + self.radius = radius + self.top_k = top_k + self.scores_th = scores_th + self.n_limit = n_limit + self.kernel_size = 2 * self.radius + 1 + self.temperature = 0.1 # tuned temperature + self.unfold = nn.Unfold(kernel_size=self.kernel_size, padding=self.radius) + + # local xy grid + x = torch.linspace(-self.radius, self.radius, self.kernel_size) + # (kernel_size*kernel_size) x 2 : (w,h) + self.hw_grid = torch.stack(torch.meshgrid([x, x])).view(2, -1).t()[:, [1, 0]] + + def detect_keypoints(self, scores_map, sub_pixel=True): + b, c, h, w = scores_map.shape + scores_nograd = scores_map.detach() + # nms_scores = simple_nms(scores_nograd, self.radius) + nms_scores = simple_nms(scores_nograd, 2) + + # remove border + nms_scores[:, :, :self.radius + 1, :] = 0 + nms_scores[:, :, :, :self.radius + 1] = 0 + nms_scores[:, :, h - self.radius:, :] = 0 + nms_scores[:, :, :, w - self.radius:] = 0 + + # detect keypoints without grad + if self.top_k > 0: + topk = torch.topk(nms_scores.view(b, -1), self.top_k) + indices_keypoints = topk.indices # B x top_k + else: + if self.scores_th > 0: + masks = nms_scores > self.scores_th + if masks.sum() == 0: + th = scores_nograd.reshape(b, -1).mean(dim=1) # th = self.scores_th + masks = nms_scores > th.reshape(b, 1, 1, 1) + else: + th = scores_nograd.reshape(b, -1).mean(dim=1) # th = self.scores_th + masks = nms_scores > th.reshape(b, 1, 1, 1) + masks = masks.reshape(b, -1) + + indices_keypoints = [] # list, B x (any size) + scores_view = scores_nograd.reshape(b, -1) + for mask, scores in zip(masks, scores_view): + indices = mask.nonzero(as_tuple=False)[:, 0] + if len(indices) > self.n_limit: + kpts_sc = scores[indices] + sort_idx = kpts_sc.sort(descending=True)[1] + sel_idx = sort_idx[:self.n_limit] + indices = indices[sel_idx] + indices_keypoints.append(indices) + + keypoints = [] + scoredispersitys = [] + kptscores = [] + if sub_pixel: + # detect soft keypoints with grad backpropagation + patches = self.unfold(scores_map) # B x (kernel**2) x (H*W) + self.hw_grid = self.hw_grid.to(patches) # to device + for b_idx in range(b): + patch = patches[b_idx].t() # (H*W) x (kernel**2) + indices_kpt = indices_keypoints[b_idx] # one dimension vector, say its size is M + patch_scores = patch[indices_kpt] # M x (kernel**2) + + # max is detached to prevent undesired backprop loops in the graph + max_v = patch_scores.max(dim=1).values.detach()[:, None] + x_exp = ((patch_scores - max_v) / self.temperature).exp() # M * (kernel**2), in [0, 1] + + # \frac{ \sum{(i,j) \times \exp(x/T)} }{ \sum{\exp(x/T)} } + xy_residual = x_exp @ self.hw_grid / x_exp.sum(dim=1)[:, None] # Soft-argmax, Mx2 + + hw_grid_dist2 = torch.norm((self.hw_grid[None, :, :] - xy_residual[:, None, :]) / self.radius, + dim=-1) ** 2 + scoredispersity = (x_exp * hw_grid_dist2).sum(dim=1) / x_exp.sum(dim=1) + + # compute result keypoints + keypoints_xy_nms = torch.stack([indices_kpt % w, indices_kpt // w], dim=1) # Mx2 + keypoints_xy = keypoints_xy_nms + xy_residual + keypoints_xy = keypoints_xy / keypoints_xy.new_tensor( + [w - 1, h - 1]) * 2 - 1 # (w,h) -> (-1~1,-1~1) + + kptscore = torch.nn.functional.grid_sample(scores_map[b_idx].unsqueeze(0), + keypoints_xy.view(1, 1, -1, 2), + mode='bilinear', align_corners=True)[0, 0, 0, :] # CxN + + keypoints.append(keypoints_xy) + scoredispersitys.append(scoredispersity) + kptscores.append(kptscore) + else: + for b_idx in range(b): + indices_kpt = indices_keypoints[b_idx] # one dimension vector, say its size is M + keypoints_xy_nms = torch.stack([indices_kpt % w, indices_kpt // w], dim=1) # Mx2 + keypoints_xy = keypoints_xy_nms / keypoints_xy_nms.new_tensor( + [w - 1, h - 1]) * 2 - 1 # (w,h) -> (-1~1,-1~1) + kptscore = torch.nn.functional.grid_sample(scores_map[b_idx].unsqueeze(0), + keypoints_xy.view(1, 1, -1, 2), + mode='bilinear', align_corners=True)[0, 0, 0, :] # CxN + keypoints.append(keypoints_xy) + scoredispersitys.append(None) + kptscores.append(kptscore) + + return keypoints, scoredispersitys, kptscores + + def forward(self, scores_map, descriptor_map, sub_pixel=False): + """ + :param scores_map: Bx1xHxW + :param descriptor_map: BxCxHxW + :param sub_pixel: whether to use sub-pixel keypoint detection + :return: kpts: list[Nx2,...]; kptscores: list[N,....] normalised position: -1.0 ~ 1.0 + """ + keypoints, scoredispersitys, kptscores = self.detect_keypoints(scores_map, + sub_pixel) + + descriptors = sample_descriptor(descriptor_map, keypoints, sub_pixel) + + # keypoints: B M 2 + # descriptors: B M D + # scoredispersitys: + return keypoints, descriptors, kptscores, scoredispersitys diff --git a/third_party/ASpanFormer/.github/workflows/sync.yml b/third_party/ASpanFormer/.github/workflows/sync.yml new file mode 100644 index 0000000000000000000000000000000000000000..42e762d5299095226503f3a8cebfeef440ef68d7 --- /dev/null +++ b/third_party/ASpanFormer/.github/workflows/sync.yml @@ -0,0 +1,39 @@ +name: Upstream Sync + +permissions: + contents: write + +on: + schedule: + - cron: "0 0 * * *" # every day + workflow_dispatch: + +jobs: + sync_latest_from_upstream: + name: Sync latest commits from upstream repo + runs-on: ubuntu-latest + if: ${{ github.event.repository.fork }} + + steps: + # Step 1: run a standard checkout action + - name: Checkout target repo + uses: actions/checkout@v3 + + # Step 2: run the sync action + - name: Sync upstream changes + id: sync + uses: aormsby/Fork-Sync-With-Upstream-action@v3.4 + with: + upstream_sync_repo: apple/ml-aspanformer + upstream_sync_branch: main + target_sync_branch: main + target_repo_token: ${{ secrets.GITHUB_TOKEN }} # automatically generated, no need to set + + # Set test_mode true to run tests instead of the true action!! + test_mode: false + + - name: Sync check + if: failure() + run: | + echo "::error::Due to insufficient permissions, synchronization failed (as expected). Please go to the repository homepage and manually perform [Sync fork]." + exit 1 diff --git a/third_party/ASpanFormer/.gitignore b/third_party/ASpanFormer/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..a4b668777112a4fbc96b1763c8da4ad91c9bcac9 --- /dev/null +++ b/third_party/ASpanFormer/.gitignore @@ -0,0 +1,32 @@ +.vscode/ +__pycache__/ +*.pyc +*.DS_Store +*.swp +*.pth +tmp.* +*/.ipynb_checkpoints/* + +logs/ +# weights/ +dump/ +demo/*.mp4 +demo/demo_images/ +src/loftr/utils/superglue.py +demo/utils.py + +demo/*.jpg +demo/*.png + +notebooks/QccDayNight.ipynb +notebooks/westlake.ipynb +assets/westlake +assets/qcc_pairs.txt +configs/.petrel* +tools/draw_QccDayNights.py + +scripts/slurm/ +scripts/sbatch_submit.sh +src/utils/client.py + +scannet_indices/ diff --git a/third_party/ASpanFormer/CODE_OF_CONDUCT.md b/third_party/ASpanFormer/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000000000000000000000000000000000..c991377a60951acbcd7f586ebcf0184840e30e55 --- /dev/null +++ b/third_party/ASpanFormer/CODE_OF_CONDUCT.md @@ -0,0 +1,71 @@ +# Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies within all project spaces, and it also applies when +an individual is representing the project or its community in public spaces. +Examples of representing a project or community include using an official +project e-mail address, posting via an official social media account, or acting +as an appointed representative at an online or offline event. Representation of +a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the open source team at [opensource-conduct@group.apple.com](mailto:opensource-conduct@group.apple.com). All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org), version 1.4, +available at [https://www.contributor-covenant.org/version/1/4/code-of-conduct.html](https://www.contributor-covenant.org/version/1/4/code-of-conduct.html) \ No newline at end of file diff --git a/third_party/ASpanFormer/CONTRIBUTING.md b/third_party/ASpanFormer/CONTRIBUTING.md new file mode 100644 index 0000000000000000000000000000000000000000..03d1703dce5cbd70896fcb8abc0fbdc664751320 --- /dev/null +++ b/third_party/ASpanFormer/CONTRIBUTING.md @@ -0,0 +1,7 @@ +# Contribution Guide + +Thanks for your interest in contributing. This project was released to accompany a research paper for purposes of reproducability, and beyond its publication there are limited plans for future development of the repository. + +## Before you get started + +We ask that all community members read and observe our [Code of Conduct](CODE_OF_CONDUCT.md). \ No newline at end of file diff --git a/third_party/ASpanFormer/LICENSE b/third_party/ASpanFormer/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..e20657c86559c67eb94e9b9269ba802de8cc9189 --- /dev/null +++ b/third_party/ASpanFormer/LICENSE @@ -0,0 +1,9 @@ +Copyright (C) 2021, 2022 Apple Inc. All Rights Reserved. + +IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple") in consideration of your agreement to the following terms, and your use, installation, modification or redistribution of this Apple software constitutes acceptance of these terms. If you do not agree with these terms, please do not use, install, modify or redistribute this Apple software. + +In consideration of your agreement to abide by the following terms, and subject to these terms, Apple grants you a personal, non-commercial, non-exclusive license, under Apple's copyrights in this original Apple software (the "Apple Software"), to use, reproduce, modify and redistribute the Apple Software, with or without modifications, in source and/or binary forms for non-commercial purposes only; provided that if you redistribute the Apple Software in its entirety and without modifications, you must retain this notice and the following text and disclaimers in all such redistributions of the Apple Software. Neither the name, trademarks, service marks or logos of Apple Inc. may be used to endorse or promote products derived from the Apple Software without specific prior written permission from Apple. Except as expressly stated in this notice, no other rights or licenses, express or implied, are granted by Apple herein, including but not limited to any patent rights that may be infringed by your derivative works or by other works in which the Apple Software may be incorporated. + +The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION ALONE OR IN COMBINATION WITH YOUR PRODUCTS. + +IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT, TORT (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/third_party/ASpanFormer/README.md b/third_party/ASpanFormer/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e1b788606b6acf4a1b5e0e40d07789ac8ea8ea5b --- /dev/null +++ b/third_party/ASpanFormer/README.md @@ -0,0 +1,98 @@ +# Submodule used in [hloc](https://github.com/Vincentqyw/Hierarchical-Localization) toolbox + +# ASpanFormer Implementation + +![Framework](assets/teaser.png) + +This is a PyTorch implementation of ASpanFormer for ECCV'22 [paper](https://arxiv.org/abs/2208.14201), “ASpanFormer: Detector-Free Image Matching with Adaptive Span Transformer”, and can be used to reproduce the results in the paper. + +This work focuses on detector-free image matching. We propose a hierarchical attention framework for cross-view feature update, which adaptively adjusts attention span based on region-wise matchability. + +This repo contains training, evaluation and basic demo scripts used in our paper. + +A large part of the code base is borrowed from the [LoFTR Repository](https://github.com/zju3dv/LoFTR) under its own separate license, terms and conditions. The authors of this software are not responsible for the contents of third-party websites. + +## Installation +```bash +conda env create -f environment.yaml +conda activate ASpanFormer +``` + +## Get started +Download model weights from [here](https://drive.google.com/file/d/1eavM9dTkw9nbc-JqlVVfGPU5UvTTfc6k/view?usp=share_link) + +Extract weights by +```bash +tar -xvf weights_aspanformer.tar +``` + +A demo to match one image pair is provided. To get a quick start, + +```bash +cd demo +python demo.py +``` + + +## Data Preparation +Please follow the [training doc](docs/TRAINING.md) for data organization + + + +## Evaluation + + +### 1. ScanNet Evaluation +```bash +cd scripts/reproduce_test +bash indoor.sh +``` +Similar results as below should be obtained, +```bash +'auc@10': 0.46640095171012563, +'auc@20': 0.6407042320049785, +'auc@5': 0.26241231577189295, +'prec@5e-04': 0.8827665604024288, +'prec_flow@2e-03': 0.810938751342228 +``` + +### 2. MegaDepth Evaluation + ```bash +cd scripts/reproduce_test +bash outdoor.sh +``` +Similar results as below should be obtained, +```bash +'auc@10': 0.7184113573584142, +'auc@20': 0.8333835724453831, +'auc@5': 0.5567622479156181, +'prec@5e-04': 0.9901741341790503, +'prec_flow@2e-03': 0.7188964321862907 +``` + + +## Training + +### 1. ScanNet Training +```bash +cd scripts/reproduce_train +bash indoor.sh +``` + +### 2. MegaDepth Training +```bash +cd scripts/reproduce_train +bash outdoor.sh +``` + + +If you find this project useful, please cite: + +``` +@article{chen2022aspanformer, + title={ASpanFormer: Detector-Free Image Matching with Adaptive Span Transformer}, + author={Chen, Hongkai and Luo, Zixin and Zhou, Lei and Tian, Yurun and Zhen, Mingmin and Fang, Tian and McKinnon, David and Tsin, Yanghai and Quan, Long}, + journal={European Conference on Computer Vision (ECCV)}, + year={2022} +} +``` diff --git a/third_party/ASpanFormer/assets/megadepth_test_1500_scene_info/0015_0.1_0.3.npz b/third_party/ASpanFormer/assets/megadepth_test_1500_scene_info/0015_0.1_0.3.npz new file mode 100644 index 0000000000000000000000000000000000000000..f4b1b79acff510aab203a8b604955dd89edffc45 --- /dev/null +++ b/third_party/ASpanFormer/assets/megadepth_test_1500_scene_info/0015_0.1_0.3.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d441df1d380b2ed34449b944d9f13127e695542fa275098d38a6298835672f22 +size 231253 diff --git a/third_party/ASpanFormer/assets/megadepth_test_1500_scene_info/0015_0.3_0.5.npz b/third_party/ASpanFormer/assets/megadepth_test_1500_scene_info/0015_0.3_0.5.npz new file mode 100644 index 0000000000000000000000000000000000000000..2b2de7bda22dc6e78e01e3f56ba1dafd46c1c581 --- /dev/null +++ b/third_party/ASpanFormer/assets/megadepth_test_1500_scene_info/0015_0.3_0.5.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f34b5231d04a84d84378c671dd26854869663b5eafeae2ebaf624a279325139 +size 231253 diff --git a/third_party/ASpanFormer/assets/megadepth_test_1500_scene_info/0022_0.1_0.3.npz b/third_party/ASpanFormer/assets/megadepth_test_1500_scene_info/0022_0.1_0.3.npz new file mode 100644 index 0000000000000000000000000000000000000000..5680f3747296a4d565dc9a95c719dce0472c7e63 --- /dev/null +++ b/third_party/ASpanFormer/assets/megadepth_test_1500_scene_info/0022_0.1_0.3.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba46e6b9ec291fc7271eb9741d5c75ca04b83d3d7281e049815de9cb9024f4d9 +size 272610 diff --git a/third_party/ASpanFormer/assets/megadepth_test_1500_scene_info/0022_0.3_0.5.npz b/third_party/ASpanFormer/assets/megadepth_test_1500_scene_info/0022_0.3_0.5.npz new file mode 100644 index 0000000000000000000000000000000000000000..79f5a30dd0a8cd8b60263fa721a4e5ef8394801c --- /dev/null +++ b/third_party/ASpanFormer/assets/megadepth_test_1500_scene_info/0022_0.3_0.5.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f4465da174b96deba61e5328886e4f2e687d34b890efca69e0c838736f8ae12 +size 272610 diff --git a/third_party/ASpanFormer/assets/megadepth_test_1500_scene_info/0022_0.5_0.7.npz b/third_party/ASpanFormer/assets/megadepth_test_1500_scene_info/0022_0.5_0.7.npz new file mode 100644 index 0000000000000000000000000000000000000000..0c1315698e217f3be3dbcc85be72fcd16477b9dd --- /dev/null +++ b/third_party/ASpanFormer/assets/megadepth_test_1500_scene_info/0022_0.5_0.7.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:684ae10f03001917c3ca0d12d441f372ce3c7e6637bd1277a3cda60df4207fe9 +size 272610 diff --git a/third_party/ASpanFormer/assets/megadepth_test_1500_scene_info/megadepth_test_1500.txt b/third_party/ASpanFormer/assets/megadepth_test_1500_scene_info/megadepth_test_1500.txt new file mode 100644 index 0000000000000000000000000000000000000000..85a2e16722183d3fe209a9ceb60c43d8315c32cf --- /dev/null +++ b/third_party/ASpanFormer/assets/megadepth_test_1500_scene_info/megadepth_test_1500.txt @@ -0,0 +1,5 @@ +0022_0.1_0.3 +0015_0.1_0.3 +0015_0.3_0.5 +0022_0.3_0.5 +0022_0.5_0.7 \ No newline at end of file diff --git a/third_party/ASpanFormer/assets/phototourism_sample_images/london_bridge_19481797_2295892421.jpg b/third_party/ASpanFormer/assets/phototourism_sample_images/london_bridge_19481797_2295892421.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ca687eeca4471e7bb9806059586fb23863a808a2 --- /dev/null +++ b/third_party/ASpanFormer/assets/phototourism_sample_images/london_bridge_19481797_2295892421.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45167ac6ca1ca2e4f5b4f3b88cea886cbcedf75cdddc6cd3214b93fe5cce93ab +size 295643 diff --git a/third_party/ASpanFormer/assets/phototourism_sample_images/london_bridge_49190386_5209386933.jpg b/third_party/ASpanFormer/assets/phototourism_sample_images/london_bridge_49190386_5209386933.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ca220b680bb89610b0ed28b4cd45ec65ecacc5f0 --- /dev/null +++ b/third_party/ASpanFormer/assets/phototourism_sample_images/london_bridge_49190386_5209386933.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:999d61b530e23ab7da3605de46676d0e89a7947b239ee77e74f6acd2a427ab5c +size 381816 diff --git a/third_party/ASpanFormer/assets/phototourism_sample_images/london_bridge_78916675_4568141288.jpg b/third_party/ASpanFormer/assets/phototourism_sample_images/london_bridge_78916675_4568141288.jpg new file mode 100644 index 0000000000000000000000000000000000000000..30b481f19532e3939ebaa85fd9e14d6571f72c41 --- /dev/null +++ b/third_party/ASpanFormer/assets/phototourism_sample_images/london_bridge_78916675_4568141288.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b95c1f0c56ead99a87530f7862ca80996b6039267f44c37f7c260cab8757c26 +size 293798 diff --git a/third_party/ASpanFormer/assets/phototourism_sample_images/london_bridge_94185272_3874562886.jpg b/third_party/ASpanFormer/assets/phototourism_sample_images/london_bridge_94185272_3874562886.jpg new file mode 100644 index 0000000000000000000000000000000000000000..eb928ab921ad5f9d558a1c8976e55ea826e8bbe7 --- /dev/null +++ b/third_party/ASpanFormer/assets/phototourism_sample_images/london_bridge_94185272_3874562886.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39b78b9b7e909ccf2f297265c9922ad34fa35ed580e0fc9edf376bb4e89d3f03 +size 368048 diff --git a/third_party/ASpanFormer/assets/phototourism_sample_images/piazza_san_marco_06795901_3725050516.jpg b/third_party/ASpanFormer/assets/phototourism_sample_images/piazza_san_marco_06795901_3725050516.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c417181146161214a70ae2a0be0d5f40fa8c1d5d --- /dev/null +++ b/third_party/ASpanFormer/assets/phototourism_sample_images/piazza_san_marco_06795901_3725050516.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32a07bc272b315ff3eaa12ade6aa9a6a9b99cae34a896517695a159bfada3398 +size 469610 diff --git a/third_party/ASpanFormer/assets/phototourism_sample_images/piazza_san_marco_15148634_5228701572.jpg b/third_party/ASpanFormer/assets/phototourism_sample_images/piazza_san_marco_15148634_5228701572.jpg new file mode 100644 index 0000000000000000000000000000000000000000..80cc9d56ec68d59ec7870ef5f538cfc98cf9c817 --- /dev/null +++ b/third_party/ASpanFormer/assets/phototourism_sample_images/piazza_san_marco_15148634_5228701572.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e95beadf2601a89edc69d66bb565300ed32d44498146ce02fc32f14a47f7c70 +size 457136 diff --git a/third_party/ASpanFormer/assets/phototourism_sample_images/piazza_san_marco_18627786_5929294590.jpg b/third_party/ASpanFormer/assets/phototourism_sample_images/piazza_san_marco_18627786_5929294590.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8250dacf14805c073177e4a10c8ae96e92c2e126 --- /dev/null +++ b/third_party/ASpanFormer/assets/phototourism_sample_images/piazza_san_marco_18627786_5929294590.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:421ea0ef24a6f6480afdf13e1d5483c6f40d4dc6928fd59af6943d26bafad790 +size 145430 diff --git a/third_party/ASpanFormer/assets/phototourism_sample_images/piazza_san_marco_43351518_2659980686.jpg b/third_party/ASpanFormer/assets/phototourism_sample_images/piazza_san_marco_43351518_2659980686.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ad666990d8cc65f6e0d76825e000b88409e43ed5 --- /dev/null +++ b/third_party/ASpanFormer/assets/phototourism_sample_images/piazza_san_marco_43351518_2659980686.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86a1247908eacbb0dc9d383edc03ee83b50ea5f4779c7c006df32959770ba28a +size 506435 diff --git a/third_party/ASpanFormer/assets/phototourism_sample_images/piazza_san_marco_58751010_4849458397.jpg b/third_party/ASpanFormer/assets/phototourism_sample_images/piazza_san_marco_58751010_4849458397.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f0fd5f68f21e54b4b4033e1d9c3b29193bab7f91 --- /dev/null +++ b/third_party/ASpanFormer/assets/phototourism_sample_images/piazza_san_marco_58751010_4849458397.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acd9e43d253516b23756339f0e82979a69f2f01fef9484c8ca1da5a8c9b3ba98 +size 601365 diff --git a/third_party/ASpanFormer/assets/phototourism_sample_images/st_pauls_cathedral_30776973_2635313996.jpg b/third_party/ASpanFormer/assets/phototourism_sample_images/st_pauls_cathedral_30776973_2635313996.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c9ee7aca8caeb5bc6a22ecf0c4f789d467741079 --- /dev/null +++ b/third_party/ASpanFormer/assets/phototourism_sample_images/st_pauls_cathedral_30776973_2635313996.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68de07942d852f81915367de73adfb5ff612646f33d5a4d523d83df5d6bbdab7 +size 531254 diff --git a/third_party/ASpanFormer/assets/phototourism_sample_images/st_pauls_cathedral_37347628_10902811376.jpg b/third_party/ASpanFormer/assets/phototourism_sample_images/st_pauls_cathedral_37347628_10902811376.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1828d6e5831c63925e60cfc4e2334beb73a601b2 --- /dev/null +++ b/third_party/ASpanFormer/assets/phototourism_sample_images/st_pauls_cathedral_37347628_10902811376.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e1e6f984286998887ccbd1c6c99632d6e97936eea185b9ee93476badacbde11 +size 646814 diff --git a/third_party/ASpanFormer/assets/phototourism_sample_images/united_states_capitol_26757027_6717084061.jpg b/third_party/ASpanFormer/assets/phototourism_sample_images/united_states_capitol_26757027_6717084061.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b61efcbf0dc78652eae119d6e8ada4c087f9d70d --- /dev/null +++ b/third_party/ASpanFormer/assets/phototourism_sample_images/united_states_capitol_26757027_6717084061.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05ad1e66d7fee2f9e11766160522ad823f1fcc0ab8a5740a6c89b1765228ea32 +size 334048 diff --git a/third_party/ASpanFormer/assets/phototourism_sample_images/united_states_capitol_98169888_3347710852.jpg b/third_party/ASpanFormer/assets/phototourism_sample_images/united_states_capitol_98169888_3347710852.jpg new file mode 100644 index 0000000000000000000000000000000000000000..11f51edc25202ed31722422798c87f88dcb296c9 --- /dev/null +++ b/third_party/ASpanFormer/assets/phototourism_sample_images/united_states_capitol_98169888_3347710852.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ed3a68939b922bc2362b1d8051c24d2ca03be6a431fcc7c423e157012debd5a +size 424584 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0711_00_frame-001680.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0711_00_frame-001680.jpg new file mode 100644 index 0000000000000000000000000000000000000000..352d91fbf3d08d2aef8bf75377a302419e1d5c59 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0711_00_frame-001680.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:373126837fbd4c6f202dbade2e87fd310df5a98ad493069beed4809bc78c6d07 +size 190290 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0711_00_frame-001995.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0711_00_frame-001995.jpg new file mode 100644 index 0000000000000000000000000000000000000000..bef3f16c0403c0884cfea5423ba8ed7972f964c0 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0711_00_frame-001995.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6955a68c1f053682660c0c1f9c6ed84b76dc617199d966860c2e11edf0a0f782 +size 188834 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0713_00_frame-001320.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0713_00_frame-001320.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a52758a630c65d28f6f2bc5f95df0b2a456a8e67 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0713_00_frame-001320.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ef5f58bd71b9243c5d29e5dad56541a16a206b282ab0105a75b14a49b38105e +size 194198 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0713_00_frame-002025.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0713_00_frame-002025.jpg new file mode 100644 index 0000000000000000000000000000000000000000..dbfc7200dbc2aa575f6869bbc5bf1f380872eff3 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0713_00_frame-002025.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58867c9f45092ec39343819b37e2ea7fdeae8d0a4afaa9c1e8bbef4db122a426 +size 188245 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0721_00_frame-000375.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0721_00_frame-000375.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e5fb4c244187ab2881b419a748c3af8c7b02dbc9 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0721_00_frame-000375.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fe34bbe584aeece49b40371c883e82377e49cb54deb78411fef2d0a8c943919 +size 255959 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0721_00_frame-002745.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0721_00_frame-002745.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2b9028997f58178252f95a6120247adab0d96cd7 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0721_00_frame-002745.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68427065749354bbcec51210d24975ee5c4edd79000f45071e7453ce91c49011 +size 255148 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0722_00_frame-000045.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0722_00_frame-000045.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e4f07218fb796a01a68721ff313660d707e40149 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0722_00_frame-000045.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d5daf283a35fb1be211e91e9926d2d1fb727139fd339804852ff0216bedd217 +size 229016 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0722_00_frame-000735.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0722_00_frame-000735.jpg new file mode 100644 index 0000000000000000000000000000000000000000..72832063aeed533308643299e2264990d31f3e53 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0722_00_frame-000735.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06c0f39b70a6aeb95b1646f607def5481d27ce486195a6cfce9c5e180ccdac2b +size 192257 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0726_00_frame-000135.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0726_00_frame-000135.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f089613968b0ad42fa88119c331869002538a74d --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0726_00_frame-000135.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68ec3d969f7d80a239a865ac834cad1a9d28728ef5632ebbf766b0827b7fe66c +size 245104 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0726_00_frame-000210.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0726_00_frame-000210.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f07340d43409ef2e0c5b15946c0cca9f2363c44d --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0726_00_frame-000210.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8946de363045246897817ed54e30e2bf2994315549a734af966f894290f99da4 +size 209391 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0737_00_frame-000930.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0737_00_frame-000930.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7d4790ffaeeead0505a4ba64873a91c5b5769d57 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0737_00_frame-000930.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8311d78e2d2eddfb3bf6b5b6a3c9dab7b497bf4eeef2ad9def7c3b15d31040da +size 238814 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0737_00_frame-001095.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0737_00_frame-001095.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9fa7fc0a3e973b2e3f90ead2d7f4e00c2b96c5da --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0737_00_frame-001095.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6eb7668082d2f5b331e2e4a7240182f800d3d4e8cd7d641f6d78813dba463954 +size 320123 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0738_00_frame-000885.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0738_00_frame-000885.jpg new file mode 100644 index 0000000000000000000000000000000000000000..db55a757d035353bc49ac154157bdafe64fb9080 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0738_00_frame-000885.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38192f0256e15d7698b56914292028ce7645e160087f1ab1f803a953f7d64a70 +size 277514 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0738_00_frame-001065.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0738_00_frame-001065.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a61cca5f9226eb48fb82112b2aa974ebc37e7db6 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0738_00_frame-001065.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51fee9e83147b95fe6ba536b76d52081f2e3fb39cfd1d5a3754683d5bdaaf9a0 +size 266111 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0743_00_frame-000000.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0743_00_frame-000000.jpg new file mode 100644 index 0000000000000000000000000000000000000000..39d9da4d99aa2c3a4ea47c2ddd68af11d4690067 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0743_00_frame-000000.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c9ed6ea66bba27339b663c851ab3a62e69c3b19cd36540f0db55ae6553e296c +size 531877 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0743_00_frame-001275.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0743_00_frame-001275.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e8b5e757b0be61ff2dd2b78186279b077398f760 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0743_00_frame-001275.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da47f11f97b2c0f85d41e7948305840f0914482ba84cbcf15fdbf7b771eac3a5 +size 301332 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0744_00_frame-000585.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0744_00_frame-000585.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5985d0f8c759afd000a39d0ea2a6ff6488b6986f --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0744_00_frame-000585.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:881e500d00f573bffbceb7faf571f041458b40bf8cffeb0f2d169f3af37b37c8 +size 339129 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0744_00_frame-002310.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0744_00_frame-002310.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4f10fbab7241fb5187ced07e5742038918a7b7d4 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0744_00_frame-002310.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ad6c569339b1eaf043e1c025856664d18175d6f6656f2312a3aaa090db27971 +size 319981 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0747_00_frame-000000.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0747_00_frame-000000.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5a82086cef0c0c912b6be5fa01c778e4a7917c36 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0747_00_frame-000000.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0e277630621e1acc86c4e47d5bdf1d572af7bd77feb5750f6a99045fe5b9cc1 +size 287817 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0747_00_frame-001530.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0747_00_frame-001530.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c61fbdc3f24850e2a32da0a66ee67e8cbb50ed98 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0747_00_frame-001530.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8071f4744379f3d75dc59fa0c1716c4501a147d252303815305560ec255a895b +size 279427 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0752_00_frame-000075.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0752_00_frame-000075.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cc436f44daecf1075fd483052827bb1402912d37 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0752_00_frame-000075.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6aa1f094cd37533405bda109573f1bf06ee8f1c1f25dbc94818eac09752d321 +size 279868 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0752_00_frame-001440.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0752_00_frame-001440.jpg new file mode 100644 index 0000000000000000000000000000000000000000..90e42bb1cddde26a96316e19e18ba809bd288162 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0752_00_frame-001440.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cff68e82a7d7c93cf8ebd8a8d658d3f6e90c3e14f87e7c4e0f1321581f305e4 +size 255363 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0755_00_frame-000120.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0755_00_frame-000120.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e2a1816ce729263c49ab3cd185928f5c977f5a7b --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0755_00_frame-000120.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:247d99cdb6adff64c8048a0a5e19ffc6f441e4e994e03bd8b8f248de43e9dc13 +size 207851 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0755_00_frame-002055.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0755_00_frame-002055.jpg new file mode 100644 index 0000000000000000000000000000000000000000..843b610b9832d07b1c5e46379b64561ec8ac8d84 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0755_00_frame-002055.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63d5c5a5e0b6014c00092ba056b62f88940e793c7bd657ca4cf405c143c9aeff +size 160356 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0758_00_frame-000165.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0758_00_frame-000165.jpg new file mode 100644 index 0000000000000000000000000000000000000000..54b90160fdf012866cbce737ad1014e47ca32100 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0758_00_frame-000165.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fd77334cd42cbdd6daaaee0b155df32040221a8f56e51f527846fcfebf54d53 +size 218723 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0758_00_frame-000510.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0758_00_frame-000510.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8e992e4038e0901dc59b4507f45de683eafdacfb --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0758_00_frame-000510.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31f870f406c8eaf019a6b6df888789f31a6f17f3594413c4dd413b7873e2346e +size 202939 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0768_00_frame-001095.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0768_00_frame-001095.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b7f423ebbcb227104e061758ac3cc5069a89981c --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0768_00_frame-001095.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6f34afdb891dca6cde7d15e34aa840d0e1a562605ba304ed7aae3f809fb0525 +size 222502 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0768_00_frame-003435.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0768_00_frame-003435.jpg new file mode 100644 index 0000000000000000000000000000000000000000..94bcaf82e10997a0ef6d8567a80ab66d67bc7cd7 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0768_00_frame-003435.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91bf06e557c452b70e6e097b44d4d6a9d21af694d704e5623929576de4b0c093 +size 262356 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0806_00_frame-000225.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0806_00_frame-000225.jpg new file mode 100644 index 0000000000000000000000000000000000000000..dfaaafa5ca05cb8627716bc5993fadd0131f07d6 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0806_00_frame-000225.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:901e55cc1f250519a4a54cc32e9472dabafaf192933f11f402b893a5fdc0a282 +size 255317 diff --git a/third_party/ASpanFormer/assets/scannet_sample_images/scene0806_00_frame-001095.jpg b/third_party/ASpanFormer/assets/scannet_sample_images/scene0806_00_frame-001095.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8c1c103e835ce22d55869eb8ca2e39ae5c0b9c87 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_sample_images/scene0806_00_frame-001095.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35a95e0d17f07cd705bdfa89da9ae577a7c4c1df82a7ecf97383eec41c4ad180 +size 259540 diff --git a/third_party/ASpanFormer/assets/scannet_test_1500/intrinsics.npz b/third_party/ASpanFormer/assets/scannet_test_1500/intrinsics.npz new file mode 100644 index 0000000000000000000000000000000000000000..bcba553dab19a57fcea336e69abd77ca9e87bce1 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_test_1500/intrinsics.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25ac102c69e2e4e2f0ab9c0d64f4da2b815e0901630768bdfde30080ced3605c +size 23922 diff --git a/third_party/ASpanFormer/assets/scannet_test_1500/scannet_test.txt b/third_party/ASpanFormer/assets/scannet_test_1500/scannet_test.txt new file mode 100644 index 0000000000000000000000000000000000000000..45cc7ffd9ca2fb5750ce3e545f58410674d7ab9d --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_test_1500/scannet_test.txt @@ -0,0 +1 @@ +test.npz \ No newline at end of file diff --git a/third_party/ASpanFormer/assets/scannet_test_1500/statistics.json b/third_party/ASpanFormer/assets/scannet_test_1500/statistics.json new file mode 100644 index 0000000000000000000000000000000000000000..0e3ff582943ac12711da7a392a55f0a42d3b4449 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_test_1500/statistics.json @@ -0,0 +1,102 @@ +{ + "scene0707_00": 15, + "scene0708_00": 15, + "scene0709_00": 15, + "scene0710_00": 15, + "scene0711_00": 15, + "scene0712_00": 15, + "scene0713_00": 15, + "scene0714_00": 15, + "scene0715_00": 15, + "scene0716_00": 15, + "scene0717_00": 15, + "scene0718_00": 15, + "scene0719_00": 15, + "scene0720_00": 15, + "scene0721_00": 15, + "scene0722_00": 15, + "scene0723_00": 15, + "scene0724_00": 15, + "scene0725_00": 15, + "scene0726_00": 15, + "scene0727_00": 15, + "scene0728_00": 15, + "scene0729_00": 15, + "scene0730_00": 15, + "scene0731_00": 15, + "scene0732_00": 15, + "scene0733_00": 15, + "scene0734_00": 15, + "scene0735_00": 15, + "scene0736_00": 15, + "scene0737_00": 15, + "scene0738_00": 15, + "scene0739_00": 15, + "scene0740_00": 15, + "scene0741_00": 15, + "scene0742_00": 15, + "scene0743_00": 15, + "scene0744_00": 15, + "scene0745_00": 15, + "scene0746_00": 15, + "scene0747_00": 15, + "scene0748_00": 15, + "scene0749_00": 15, + "scene0750_00": 15, + "scene0751_00": 15, + "scene0752_00": 15, + "scene0753_00": 15, + "scene0754_00": 15, + "scene0755_00": 15, + "scene0756_00": 15, + "scene0757_00": 15, + "scene0758_00": 15, + "scene0759_00": 15, + "scene0760_00": 15, + "scene0761_00": 15, + "scene0762_00": 15, + "scene0763_00": 15, + "scene0764_00": 15, + "scene0765_00": 15, + "scene0766_00": 15, + "scene0767_00": 15, + "scene0768_00": 15, + "scene0769_00": 15, + "scene0770_00": 15, + "scene0771_00": 15, + "scene0772_00": 15, + "scene0773_00": 15, + "scene0774_00": 15, + "scene0775_00": 15, + "scene0776_00": 15, + "scene0777_00": 15, + "scene0778_00": 15, + "scene0779_00": 15, + "scene0780_00": 15, + "scene0781_00": 15, + "scene0782_00": 15, + "scene0783_00": 15, + "scene0784_00": 15, + "scene0785_00": 15, + "scene0786_00": 15, + "scene0787_00": 15, + "scene0788_00": 15, + "scene0789_00": 15, + "scene0790_00": 15, + "scene0791_00": 15, + "scene0792_00": 15, + "scene0793_00": 15, + "scene0794_00": 15, + "scene0795_00": 15, + "scene0796_00": 15, + "scene0797_00": 15, + "scene0798_00": 15, + "scene0799_00": 15, + "scene0800_00": 15, + "scene0801_00": 15, + "scene0802_00": 15, + "scene0803_00": 15, + "scene0804_00": 15, + "scene0805_00": 15, + "scene0806_00": 15 +} \ No newline at end of file diff --git a/third_party/ASpanFormer/assets/scannet_test_1500/test.npz b/third_party/ASpanFormer/assets/scannet_test_1500/test.npz new file mode 100644 index 0000000000000000000000000000000000000000..d2011c2913a9ae1311d18b08c089bd999ba3ad30 --- /dev/null +++ b/third_party/ASpanFormer/assets/scannet_test_1500/test.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b982b9c1f762e7d31af552ecc1ccf1a6add013197f74ec69c84a6deaa6f580ad +size 71687 diff --git a/third_party/ASpanFormer/assets/teaser.pdf b/third_party/ASpanFormer/assets/teaser.pdf new file mode 100644 index 0000000000000000000000000000000000000000..9e826ee0d43982068c60528017f93481e0c7cd1e --- /dev/null +++ b/third_party/ASpanFormer/assets/teaser.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfb83d72b2ff7929cb99a820620562205237147aaf5952acd9152185926c6b81 +size 2671548 diff --git a/third_party/ASpanFormer/assets/teaser.png b/third_party/ASpanFormer/assets/teaser.png new file mode 100644 index 0000000000000000000000000000000000000000..c7adcde5f6f35b2e274303dba763bab5d78f43b7 --- /dev/null +++ b/third_party/ASpanFormer/assets/teaser.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eea1427c6c092f5db0720b39f55cb15584e8b7aea11b28244f2e7f8da1d0967 +size 6957484 diff --git a/third_party/ASpanFormer/configs/aspan/indoor/aspan_test.py b/third_party/ASpanFormer/configs/aspan/indoor/aspan_test.py new file mode 100644 index 0000000000000000000000000000000000000000..fc2b44807696ec280672c8f40650fd04fa4d8a36 --- /dev/null +++ b/third_party/ASpanFormer/configs/aspan/indoor/aspan_test.py @@ -0,0 +1,10 @@ +import sys +from pathlib import Path +sys.path.append(str(Path(__file__).parent / '../../../')) +from src.config.default import _CN as cfg + +cfg.ASPAN.MATCH_COARSE.MATCH_TYPE = 'dual_softmax' + +cfg.ASPAN.MATCH_COARSE.BORDER_RM = 0 +cfg.ASPAN.COARSE.COARSEST_LEVEL= [15,20] +cfg.ASPAN.COARSE.TRAIN_RES = [480,640] diff --git a/third_party/ASpanFormer/configs/aspan/indoor/aspan_train.py b/third_party/ASpanFormer/configs/aspan/indoor/aspan_train.py new file mode 100644 index 0000000000000000000000000000000000000000..886d10d8f55533c8021bcca8395b5a2897fb8734 --- /dev/null +++ b/third_party/ASpanFormer/configs/aspan/indoor/aspan_train.py @@ -0,0 +1,11 @@ +import sys +from pathlib import Path +sys.path.append(str(Path(__file__).parent / '../../../')) +from src.config.default import _CN as cfg + +cfg.ASPAN.COARSE.COARSEST_LEVEL= [15,20] +cfg.ASPAN.MATCH_COARSE.MATCH_TYPE = 'dual_softmax' + +cfg.ASPAN.MATCH_COARSE.SPARSE_SPVS = False +cfg.ASPAN.MATCH_COARSE.BORDER_RM = 0 +cfg.TRAINER.MSLR_MILESTONES = [3, 6, 9, 12, 17, 20, 23, 26, 29] diff --git a/third_party/ASpanFormer/configs/aspan/outdoor/aspan_test.py b/third_party/ASpanFormer/configs/aspan/outdoor/aspan_test.py new file mode 100644 index 0000000000000000000000000000000000000000..f0b9c04cbf3f466e413b345272afe7d7fe4274ea --- /dev/null +++ b/third_party/ASpanFormer/configs/aspan/outdoor/aspan_test.py @@ -0,0 +1,21 @@ +import sys +from pathlib import Path +sys.path.append(str(Path(__file__).parent / '../../../')) +from src.config.default import _CN as cfg + +cfg.ASPAN.COARSE.COARSEST_LEVEL= [36,36] +cfg.ASPAN.COARSE.TRAIN_RES = [832,832] +cfg.ASPAN.COARSE.TEST_RES = [1152,1152] +cfg.ASPAN.MATCH_COARSE.MATCH_TYPE = 'dual_softmax' + +cfg.TRAINER.CANONICAL_LR = 8e-3 +cfg.TRAINER.WARMUP_STEP = 1875 # 3 epochs +cfg.TRAINER.WARMUP_RATIO = 0.1 +cfg.TRAINER.MSLR_MILESTONES = [8, 12, 16, 20, 24] + +# pose estimation +cfg.TRAINER.RANSAC_PIXEL_THR = 0.5 + +cfg.TRAINER.OPTIMIZER = "adamw" +cfg.TRAINER.ADAMW_DECAY = 0.1 +cfg.ASPAN.MATCH_COARSE.TRAIN_COARSE_PERCENT = 0.3 diff --git a/third_party/ASpanFormer/configs/aspan/outdoor/aspan_train.py b/third_party/ASpanFormer/configs/aspan/outdoor/aspan_train.py new file mode 100644 index 0000000000000000000000000000000000000000..1202080b234562d8cc65d924d7cccf0336b9f7c0 --- /dev/null +++ b/third_party/ASpanFormer/configs/aspan/outdoor/aspan_train.py @@ -0,0 +1,20 @@ +import sys +from pathlib import Path +sys.path.append(str(Path(__file__).parent / '../../../')) +from src.config.default import _CN as cfg + +cfg.ASPAN.COARSE.COARSEST_LEVEL= [26,26] +cfg.ASPAN.MATCH_COARSE.MATCH_TYPE = 'dual_softmax' +cfg.ASPAN.MATCH_COARSE.SPARSE_SPVS = False + +cfg.TRAINER.CANONICAL_LR = 8e-3 +cfg.TRAINER.WARMUP_STEP = 1875 # 3 epochs +cfg.TRAINER.WARMUP_RATIO = 0.1 +cfg.TRAINER.MSLR_MILESTONES = [8, 12, 16, 20, 24] + +# pose estimation +cfg.TRAINER.RANSAC_PIXEL_THR = 0.5 + +cfg.TRAINER.OPTIMIZER = "adamw" +cfg.TRAINER.ADAMW_DECAY = 0.1 +cfg.ASPAN.MATCH_COARSE.TRAIN_COARSE_PERCENT = 0.3 diff --git a/third_party/ASpanFormer/configs/data/__init__.py b/third_party/ASpanFormer/configs/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/ASpanFormer/configs/data/base.py b/third_party/ASpanFormer/configs/data/base.py new file mode 100644 index 0000000000000000000000000000000000000000..03aab160fa4137ccc04380f94854a56fbb549074 --- /dev/null +++ b/third_party/ASpanFormer/configs/data/base.py @@ -0,0 +1,35 @@ +""" +The data config will be the last one merged into the main config. +Setups in data configs will override all existed setups! +""" + +from yacs.config import CfgNode as CN +_CN = CN() +_CN.DATASET = CN() +_CN.TRAINER = CN() + +# training data config +_CN.DATASET.TRAIN_DATA_ROOT = None +_CN.DATASET.TRAIN_POSE_ROOT = None +_CN.DATASET.TRAIN_NPZ_ROOT = None +_CN.DATASET.TRAIN_LIST_PATH = None +_CN.DATASET.TRAIN_INTRINSIC_PATH = None +# validation set config +_CN.DATASET.VAL_DATA_ROOT = None +_CN.DATASET.VAL_POSE_ROOT = None +_CN.DATASET.VAL_NPZ_ROOT = None +_CN.DATASET.VAL_LIST_PATH = None +_CN.DATASET.VAL_INTRINSIC_PATH = None + +# testing data config +_CN.DATASET.TEST_DATA_ROOT = None +_CN.DATASET.TEST_POSE_ROOT = None +_CN.DATASET.TEST_NPZ_ROOT = None +_CN.DATASET.TEST_LIST_PATH = None +_CN.DATASET.TEST_INTRINSIC_PATH = None + +# dataset config +_CN.DATASET.MIN_OVERLAP_SCORE_TRAIN = 0.4 +_CN.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0 # for both test and val + +cfg = _CN diff --git a/third_party/ASpanFormer/configs/data/debug/.gitignore b/third_party/ASpanFormer/configs/data/debug/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..94548af5beba7825284af746324c8dc5b2f1ea31 --- /dev/null +++ b/third_party/ASpanFormer/configs/data/debug/.gitignore @@ -0,0 +1,3 @@ +* +*/ +!.gitignore diff --git a/third_party/ASpanFormer/configs/data/megadepth_test_1500.py b/third_party/ASpanFormer/configs/data/megadepth_test_1500.py new file mode 100644 index 0000000000000000000000000000000000000000..9616432f52a693ed84f3f12b9b85470b23410eee --- /dev/null +++ b/third_party/ASpanFormer/configs/data/megadepth_test_1500.py @@ -0,0 +1,13 @@ +from configs.data.base import cfg + +TEST_BASE_PATH = "assets/megadepth_test_1500_scene_info" + +cfg.DATASET.TEST_DATA_SOURCE = "MegaDepth" +cfg.DATASET.TEST_DATA_ROOT = "data/megadepth/test" +cfg.DATASET.TEST_NPZ_ROOT = f"{TEST_BASE_PATH}" +cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/megadepth_test_1500.txt" + +cfg.DATASET.MGDPT_IMG_RESIZE = 1152 +cfg.DATASET.MGDPT_IMG_PAD=True +cfg.DATASET.MGDPT_DF =8 +cfg.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0 \ No newline at end of file diff --git a/third_party/ASpanFormer/configs/data/megadepth_trainval_832.py b/third_party/ASpanFormer/configs/data/megadepth_trainval_832.py new file mode 100644 index 0000000000000000000000000000000000000000..8f9b01fdaed254e10b3d55980499b88a00060f04 --- /dev/null +++ b/third_party/ASpanFormer/configs/data/megadepth_trainval_832.py @@ -0,0 +1,22 @@ +from configs.data.base import cfg + + +TRAIN_BASE_PATH = "data/megadepth/index" +cfg.DATASET.TRAINVAL_DATA_SOURCE = "MegaDepth" +cfg.DATASET.TRAIN_DATA_ROOT = "data/megadepth/train" +cfg.DATASET.TRAIN_NPZ_ROOT = f"{TRAIN_BASE_PATH}/scene_info_0.1_0.7" +cfg.DATASET.TRAIN_LIST_PATH = f"{TRAIN_BASE_PATH}/trainvaltest_list/train_list.txt" +cfg.DATASET.MIN_OVERLAP_SCORE_TRAIN = 0.0 + +TEST_BASE_PATH = "data/megadepth/index" +cfg.DATASET.TEST_DATA_SOURCE = "MegaDepth" +cfg.DATASET.VAL_DATA_ROOT = cfg.DATASET.TEST_DATA_ROOT = "data/megadepth/test" +cfg.DATASET.VAL_NPZ_ROOT = cfg.DATASET.TEST_NPZ_ROOT = f"{TEST_BASE_PATH}/scene_info_val_1500" +cfg.DATASET.VAL_LIST_PATH = cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/trainvaltest_list/val_list.txt" +cfg.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0 # for both test and val + +# 368 scenes in total for MegaDepth +# (with difficulty balanced (further split each scene to 3 sub-scenes)) +cfg.TRAINER.N_SAMPLES_PER_SUBSET = 100 + +cfg.DATASET.MGDPT_IMG_RESIZE = 832 # for training on 32GB meme GPUs diff --git a/third_party/ASpanFormer/configs/data/scannet_test_1500.py b/third_party/ASpanFormer/configs/data/scannet_test_1500.py new file mode 100644 index 0000000000000000000000000000000000000000..60e560fa01d73345200aaca10961449fdf3e9fbe --- /dev/null +++ b/third_party/ASpanFormer/configs/data/scannet_test_1500.py @@ -0,0 +1,11 @@ +from configs.data.base import cfg + +TEST_BASE_PATH = "assets/scannet_test_1500" + +cfg.DATASET.TEST_DATA_SOURCE = "ScanNet" +cfg.DATASET.TEST_DATA_ROOT = "data/scannet/test" +cfg.DATASET.TEST_NPZ_ROOT = f"{TEST_BASE_PATH}" +cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/scannet_test.txt" +cfg.DATASET.TEST_INTRINSIC_PATH = f"{TEST_BASE_PATH}/intrinsics.npz" + +cfg.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0 diff --git a/third_party/ASpanFormer/configs/data/scannet_trainval.py b/third_party/ASpanFormer/configs/data/scannet_trainval.py new file mode 100644 index 0000000000000000000000000000000000000000..c38d6440e2b4ec349e5f168909c7f8c367408813 --- /dev/null +++ b/third_party/ASpanFormer/configs/data/scannet_trainval.py @@ -0,0 +1,17 @@ +from configs.data.base import cfg + + +TRAIN_BASE_PATH = "data/scannet/index" +cfg.DATASET.TRAINVAL_DATA_SOURCE = "ScanNet" +cfg.DATASET.TRAIN_DATA_ROOT = "data/scannet/train" +cfg.DATASET.TRAIN_NPZ_ROOT = f"{TRAIN_BASE_PATH}/scene_data/train" +cfg.DATASET.TRAIN_LIST_PATH = f"{TRAIN_BASE_PATH}/scene_data/train_list/scannet_all.txt" +cfg.DATASET.TRAIN_INTRINSIC_PATH = f"{TRAIN_BASE_PATH}/intrinsics.npz" + +TEST_BASE_PATH = "assets/scannet_test_1500" +cfg.DATASET.TEST_DATA_SOURCE = "ScanNet" +cfg.DATASET.VAL_DATA_ROOT = cfg.DATASET.TEST_DATA_ROOT = "data/scannet/test" +cfg.DATASET.VAL_NPZ_ROOT = cfg.DATASET.TEST_NPZ_ROOT = TEST_BASE_PATH +cfg.DATASET.VAL_LIST_PATH = cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/scannet_test.txt" +cfg.DATASET.VAL_INTRINSIC_PATH = cfg.DATASET.TEST_INTRINSIC_PATH = f"{TEST_BASE_PATH}/intrinsics.npz" +cfg.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0 # for both test and val diff --git a/third_party/ASpanFormer/data/megadepth/index/.gitignore b/third_party/ASpanFormer/data/megadepth/index/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..5e7d2734cfc60289debf74293817c0a8f572ff32 --- /dev/null +++ b/third_party/ASpanFormer/data/megadepth/index/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore diff --git a/third_party/ASpanFormer/data/megadepth/test/.gitignore b/third_party/ASpanFormer/data/megadepth/test/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..5e7d2734cfc60289debf74293817c0a8f572ff32 --- /dev/null +++ b/third_party/ASpanFormer/data/megadepth/test/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore diff --git a/third_party/ASpanFormer/data/megadepth/train/.gitignore b/third_party/ASpanFormer/data/megadepth/train/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..5e7d2734cfc60289debf74293817c0a8f572ff32 --- /dev/null +++ b/third_party/ASpanFormer/data/megadepth/train/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore diff --git a/third_party/ASpanFormer/data/scannet/index/.gitignore b/third_party/ASpanFormer/data/scannet/index/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..5e7d2734cfc60289debf74293817c0a8f572ff32 --- /dev/null +++ b/third_party/ASpanFormer/data/scannet/index/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore diff --git a/third_party/ASpanFormer/data/scannet/test/.gitignore b/third_party/ASpanFormer/data/scannet/test/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..94548af5beba7825284af746324c8dc5b2f1ea31 --- /dev/null +++ b/third_party/ASpanFormer/data/scannet/test/.gitignore @@ -0,0 +1,3 @@ +* +*/ +!.gitignore diff --git a/third_party/ASpanFormer/data/scannet/train/.gitignore b/third_party/ASpanFormer/data/scannet/train/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..5e7d2734cfc60289debf74293817c0a8f572ff32 --- /dev/null +++ b/third_party/ASpanFormer/data/scannet/train/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore diff --git a/third_party/ASpanFormer/demo/demo.py b/third_party/ASpanFormer/demo/demo.py new file mode 100644 index 0000000000000000000000000000000000000000..f3d95b10dc3166c18ad8493be7a3d36a25d8fc3b --- /dev/null +++ b/third_party/ASpanFormer/demo/demo.py @@ -0,0 +1,63 @@ +import os +import sys +ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +sys.path.insert(0, ROOT_DIR) + +from src.ASpanFormer.aspanformer import ASpanFormer +from src.config.default import get_cfg_defaults +from src.utils.misc import lower_config +import demo_utils + +import cv2 +import torch +import numpy as np + +import argparse +parser = argparse.ArgumentParser() +parser.add_argument('--config_path', type=str, default='../configs/aspan/outdoor/aspan_test.py', + help='path for config file.') +parser.add_argument('--img0_path', type=str, default='../assets/phototourism_sample_images/piazza_san_marco_06795901_3725050516.jpg', + help='path for image0.') +parser.add_argument('--img1_path', type=str, default='../assets/phototourism_sample_images/piazza_san_marco_15148634_5228701572.jpg', + help='path for image1.') +parser.add_argument('--weights_path', type=str, default='../weights/outdoor.ckpt', + help='path for model weights.') +parser.add_argument('--long_dim0', type=int, default=1024, + help='resize for longest dim of image0.') +parser.add_argument('--long_dim1', type=int, default=1024, + help='resize for longest dim of image1.') + +args = parser.parse_args() + + +if __name__=='__main__': + config = get_cfg_defaults() + config.merge_from_file(args.config_path) + _config = lower_config(config) + matcher = ASpanFormer(config=_config['aspan']) + state_dict = torch.load(args.weights_path, map_location='cpu')['state_dict'] + matcher.load_state_dict(state_dict,strict=False) + matcher.cuda(),matcher.eval() + + img0,img1=cv2.imread(args.img0_path),cv2.imread(args.img1_path) + img0_g,img1_g=cv2.imread(args.img0_path,0),cv2.imread(args.img1_path,0) + img0,img1=demo_utils.resize(img0,args.long_dim0),demo_utils.resize(img1,args.long_dim1) + img0_g,img1_g=demo_utils.resize(img0_g,args.long_dim0),demo_utils.resize(img1_g,args.long_dim1) + data={'image0':torch.from_numpy(img0_g/255.)[None,None].cuda().float(), + 'image1':torch.from_numpy(img1_g/255.)[None,None].cuda().float()} + with torch.no_grad(): + matcher(data,online_resize=True) + corr0,corr1=data['mkpts0_f'].cpu().numpy(),data['mkpts1_f'].cpu().numpy() + + F_hat,mask_F=cv2.findFundamentalMat(corr0,corr1,method=cv2.FM_RANSAC,ransacReprojThreshold=1) + if mask_F is not None: + mask_F=mask_F[:,0].astype(bool) + else: + mask_F=np.zeros_like(corr0[:,0]).astype(bool) + + #visualize match + display=demo_utils.draw_match(img0,img1,corr0,corr1) + display_ransac=demo_utils.draw_match(img0,img1,corr0[mask_F],corr1[mask_F]) + cv2.imwrite('match.png',display) + cv2.imwrite('match_ransac.png',display_ransac) + print(len(corr1),len(corr1[mask_F])) \ No newline at end of file diff --git a/third_party/ASpanFormer/demo/demo_utils.py b/third_party/ASpanFormer/demo/demo_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a104e25d3f5ee8b7efb6cc5fa0dc27378e22c83f --- /dev/null +++ b/third_party/ASpanFormer/demo/demo_utils.py @@ -0,0 +1,44 @@ +import cv2 +import numpy as np + +def resize(image,long_dim): + h,w=image.shape[0],image.shape[1] + image=cv2.resize(image,(int(w*long_dim/max(h,w)),int(h*long_dim/max(h,w)))) + return image + +def draw_points(img,points,color=(0,255,0),radius=3): + dp = [(int(points[i, 0]), int(points[i, 1])) for i in range(points.shape[0])] + for i in range(points.shape[0]): + cv2.circle(img, dp[i],radius=radius,color=color) + return img + + +def draw_match(img1, img2, corr1, corr2,inlier=[True],color=None,radius1=1,radius2=1,resize=None): + if resize is not None: + scale1,scale2=[img1.shape[1]/resize[0],img1.shape[0]/resize[1]],[img2.shape[1]/resize[0],img2.shape[0]/resize[1]] + img1,img2=cv2.resize(img1, resize, interpolation=cv2.INTER_AREA),cv2.resize(img2, resize, interpolation=cv2.INTER_AREA) + corr1,corr2=corr1/np.asarray(scale1)[np.newaxis],corr2/np.asarray(scale2)[np.newaxis] + corr1_key = [cv2.KeyPoint(corr1[i, 0], corr1[i, 1], radius1) for i in range(corr1.shape[0])] + corr2_key = [cv2.KeyPoint(corr2[i, 0], corr2[i, 1], radius2) for i in range(corr2.shape[0])] + + assert len(corr1) == len(corr2) + + draw_matches = [cv2.DMatch(i, i, 0) for i in range(len(corr1))] + if color is None: + color = [(0, 255, 0) if cur_inlier else (0,0,255) for cur_inlier in inlier] + if len(color)==1: + display = cv2.drawMatches(img1, corr1_key, img2, corr2_key, draw_matches, None, + matchColor=color[0], + singlePointColor=color[0], + flags=4 + ) + else: + height,width=max(img1.shape[0],img2.shape[0]),img1.shape[1]+img2.shape[1] + display=np.zeros([height,width,3],np.uint8) + display[:img1.shape[0],:img1.shape[1]]=img1 + display[:img2.shape[0],img1.shape[1]:]=img2 + for i in range(len(corr1)): + left_x,left_y,right_x,right_y=int(corr1[i][0]),int(corr1[i][1]),int(corr2[i][0]+img1.shape[1]),int(corr2[i][1]) + cur_color=(int(color[i][0]),int(color[i][1]),int(color[i][2])) + cv2.line(display, (left_x,left_y), (right_x,right_y),cur_color,1,lineType=cv2.LINE_AA) + return display \ No newline at end of file diff --git a/third_party/ASpanFormer/docs/TRAINING.md b/third_party/ASpanFormer/docs/TRAINING.md new file mode 100644 index 0000000000000000000000000000000000000000..99238b612d961a5a6aa29885bad23808c7aa6e07 --- /dev/null +++ b/third_party/ASpanFormer/docs/TRAINING.md @@ -0,0 +1,72 @@ + +# Traininig ASpanFormer + +## Dataset setup +Generally, two parts of data are needed for training ASpanFormer, the original dataset, i.e., ScanNet and MegaDepth, and the offline generated dataset indices. The dataset indices store scenes, image pairs, and other metadata within each dataset used for training/validation/testing. For the MegaDepth dataset, the relative poses between images used for training are directly cached in the indexing files. However, the relative poses of ScanNet image pairs are not stored due to the enormous resulting file size. + +### Download datasets +#### MegaDepth +We use depth maps provided in the [original MegaDepth dataset](https://www.cs.cornell.edu/projects/megadepth/) as well as undistorted images, corresponding camera intrinsics and extrinsics preprocessed by [D2-Net](https://github.com/mihaidusmanu/d2-net#downloading-and-preprocessing-the-megadepth-dataset). You can download them separately from the following links. +- [MegaDepth undistorted images and processed depths](https://www.cs.cornell.edu/projects/megadepth/dataset/Megadepth_v1/MegaDepth_v1.tar.gz) + - Note that we only use depth maps. + - Path of the download data will be referreed to as `/path/to/megadepth` +- [D2-Net preprocessed images](https://drive.google.com/drive/folders/1hxpOsqOZefdrba_BqnW490XpNX_LgXPB) + - Images are undistorted manually in D2-Net since the undistorted images from MegaDepth do not come with corresponding intrinsics. + - Path of the download data will be referreed to as `/path/to/megadepth_d2net` + +#### ScanNet +Please set up the ScanNet dataset following [the official guide](https://github.com/ScanNet/ScanNet#scannet-data) +> NOTE: We use the [python exported data](https://github.com/ScanNet/ScanNet/tree/master/SensReader/python), +instead of the [c++ exported one](https://github.com/ScanNet/ScanNet/tree/master/SensReader/c%2B%2B). + +### Download the dataset indices + +You can download the required dataset indices from the [following link](https://drive.google.com/drive/folders/1DOcOPZb3-5cWxLqn256AhwUVjBPifhuf). +After downloading, unzip the required files. +```shell +unzip downloaded-file.zip + +# extract dataset indices +tar xf train-data/megadepth_indices.tar +tar xf train-data/scannet_indices.tar + +# extract testing data (optional) +tar xf testdata/megadepth_test_1500.tar +tar xf testdata/scannet_test_1500.tar +``` + +### Build the dataset symlinks + +We symlink the datasets to the `data` directory under the main ASpanFormer project directory. + +```shell +# scannet +# -- # train and test dataset +ln -s /path/to/scannet_train/* /path/to/ASpanFormer/data/scannet/train +ln -s /path/to/scannet_test/* /path/to/ASpanFormer/data/scannet/test +# -- # dataset indices +ln -s /path/to/scannet_indices/* /path/to/ASpanFormer/data/scannet/index + +# megadepth +# -- # train and test dataset (train and test share the same dataset) +ln -sv /path/to/megadepth/phoenix /path/to/megadepth_d2net/Undistorted_SfM /path/to/ASpanFormer/data/megadepth/train +ln -sv /path/to/megadepth/phoenix /path/to/megadepth_d2net/Undistorted_SfM /path/to/ASpanFormer/data/megadepth/test +# -- # dataset indices +ln -s /path/to/megadepth_indices/* /path/to/ASpanFormer/data/megadepth/index +``` + + +## Training +We provide training scripts of ScanNet and MegaDepth. The results in the ASpanFormer paper can be reproduced with 8 v100 GPUs. For a different setup, we scale the learning rate and its warm-up linearly, but the final evaluation results might vary due to the different batch size & learning rate used. Thus the reproduction of results in our paper is not guaranteed. + + +### Training on ScanNet +``` shell +scripts/reproduce_train/indoor.sh +``` + + +### Training on MegaDepth +``` shell +scripts/reproduce_train/outdoor.sh +``` \ No newline at end of file diff --git a/third_party/ASpanFormer/environment.yaml b/third_party/ASpanFormer/environment.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5c52328762e971c94b447198869ec0036771bf76 --- /dev/null +++ b/third_party/ASpanFormer/environment.yaml @@ -0,0 +1,12 @@ +name: ASpanFormer +channels: + - pytorch + - conda-forge + - defaults +dependencies: + - python=3.8 + - cudatoolkit=10.2 + - pytorch=1.8.1 + - pip + - pip: + - -r requirements.txt diff --git a/third_party/ASpanFormer/requirements.txt b/third_party/ASpanFormer/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..815830f7bd8115b858bf5e49e85aed4f62d3f3b0 --- /dev/null +++ b/third_party/ASpanFormer/requirements.txt @@ -0,0 +1,18 @@ +#opencv_python==4.4.0.46 +albumentations==0.5.1 --no-binary=imgaug,albumentations +ray>=1.0.1 +einops==0.3.0 +kornia==0.4.1 +loguru==0.5.3 +yacs>=0.1.8 +tqdm +autopep8 +pylint +ipython +jupyterlab +matplotlib +h5py +pytorch-lightning==1.3.5 +loguru +joblib>=1.0.1 +torchmetrics==0.4 \ No newline at end of file diff --git a/third_party/ASpanFormer/scripts/reproduce_test/indoor.sh b/third_party/ASpanFormer/scripts/reproduce_test/indoor.sh new file mode 100644 index 0000000000000000000000000000000000000000..41e5c76a146fb84a2296f7fc63e6da881c0c8e03 --- /dev/null +++ b/third_party/ASpanFormer/scripts/reproduce_test/indoor.sh @@ -0,0 +1,31 @@ +#!/bin/bash -l +# a indoor_ds model with the pos_enc impl bug fixed. + +SCRIPTPATH=$(dirname $(readlink -f "$0")) +PROJECT_DIR="${SCRIPTPATH}/../../" + +# conda activate loftr +export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH +cd $PROJECT_DIR + +data_cfg_path="configs/data/scannet_test_1500.py" +main_cfg_path="configs/aspan/indoor/aspan_test.py" +ckpt_path='weights/indoor.ckpt' +dump_dir="dump/indoor_dump" +profiler_name="inference" +n_nodes=1 # mannually keep this the same with --nodes +n_gpus_per_node=-1 +torch_num_workers=4 +batch_size=1 # per gpu + +python -u ./test.py \ + ${data_cfg_path} \ + ${main_cfg_path} \ + --ckpt_path=${ckpt_path} \ + --dump_dir=${dump_dir} \ + --gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \ + --batch_size=${batch_size} --num_workers=${torch_num_workers}\ + --profiler_name=${profiler_name} \ + --benchmark \ + --mode integrated + \ No newline at end of file diff --git a/third_party/ASpanFormer/scripts/reproduce_test/outdoor.sh b/third_party/ASpanFormer/scripts/reproduce_test/outdoor.sh new file mode 100644 index 0000000000000000000000000000000000000000..817fe50b47f52dfa3f9b2d664f415527a7a9ea6d --- /dev/null +++ b/third_party/ASpanFormer/scripts/reproduce_test/outdoor.sh @@ -0,0 +1,30 @@ +#!/bin/bash -l + +SCRIPTPATH=$(dirname $(readlink -f "$0")) +PROJECT_DIR="${SCRIPTPATH}/../../" + +# conda activate loftr +export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH +cd $PROJECT_DIR + +data_cfg_path="configs/data/megadepth_test_1500.py" +main_cfg_path="configs/aspan/outdoor/aspan_test.py" +ckpt_path="weights/outdoor.ckpt" +dump_dir="dump/outdoor_dump" +profiler_name="inference" +n_nodes=1 # mannually keep this the same with --nodes +n_gpus_per_node=-1 +torch_num_workers=4 +batch_size=1 # per gpu + +python -u ./test.py \ + ${data_cfg_path} \ + ${main_cfg_path} \ + --ckpt_path=${ckpt_path} \ + --dump_dir=${dump_dir} \ + --gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \ + --batch_size=${batch_size} --num_workers=${torch_num_workers}\ + --profiler_name=${profiler_name} \ + --benchmark \ + --mode integrated + \ No newline at end of file diff --git a/third_party/ASpanFormer/scripts/reproduce_train/indoor.sh b/third_party/ASpanFormer/scripts/reproduce_train/indoor.sh new file mode 100644 index 0000000000000000000000000000000000000000..705723bf14a6e6fbe949df64bbc3a68a9159e659 --- /dev/null +++ b/third_party/ASpanFormer/scripts/reproduce_train/indoor.sh @@ -0,0 +1,34 @@ +#!/bin/bash -l + +SCRIPTPATH=$(dirname $(readlink -f "$0")) +PROJECT_DIR="${SCRIPTPATH}/../../" + +# conda activate loftr +export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH +cd $PROJECT_DIR + +data_cfg_path="configs/data/scannet_trainval.py" +main_cfg_path="configs/aspan/indoor/aspan_train.py" + +n_nodes=1 +n_gpus_per_node=8 +torch_num_workers=36 +batch_size=3 +pin_memory=true +exp_name="indoor-ds-bs-aspan-bs=$(($n_gpus_per_node * $batch_size))" + +CUDA_VISIBLE_DEVICES='0,1,2,3,4,5,6,7' python -u ./train.py \ + ${data_cfg_path} \ + ${main_cfg_path} \ + --exp_name=${exp_name} \ + --gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \ + --batch_size=${batch_size} --num_workers=${torch_num_workers} --pin_memory=${pin_memory} \ + --check_val_every_n_epoch=1 \ + --log_every_n_steps=100 \ + --flush_logs_every_n_steps=100 \ + --limit_val_batches=1. \ + --num_sanity_val_steps=10 \ + --benchmark=True \ + --max_epochs=30 \ + --parallel_load_data \ + --mode integrated \ No newline at end of file diff --git a/third_party/ASpanFormer/scripts/reproduce_train/outdoor.sh b/third_party/ASpanFormer/scripts/reproduce_train/outdoor.sh new file mode 100644 index 0000000000000000000000000000000000000000..c447e8feaa5c7ef7ff74da3b622151c7018447a6 --- /dev/null +++ b/third_party/ASpanFormer/scripts/reproduce_train/outdoor.sh @@ -0,0 +1,34 @@ +#!/bin/bash -l + +SCRIPTPATH=$(dirname $(readlink -f "$0")) +PROJECT_DIR="${SCRIPTPATH}/../../" + +# conda activate loftr +export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH +cd $PROJECT_DIR + +TRAIN_IMG_SIZE=832 +data_cfg_path="configs/data/megadepth_trainval_${TRAIN_IMG_SIZE}.py" +main_cfg_path="configs/aspan/outdoor/aspan_train.py" + +n_nodes=1 +n_gpus_per_node=8 +torch_num_workers=8 +batch_size=1 +pin_memory=true +exp_name="outdoor-ds-aspan-${TRAIN_IMG_SIZE}-bs=$(($n_gpus_per_node * $n_nodes * $batch_size))" + +CUDA_VISIBLE_DEVICES='0,1,2,3,4,5,6,7' python -u ./train.py \ + ${data_cfg_path} \ + ${main_cfg_path} \ + --exp_name=${exp_name} \ + --gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \ + --batch_size=${batch_size} --num_workers=${torch_num_workers} --pin_memory=${pin_memory} \ + --check_val_every_n_epoch=1 \ + --log_every_n_steps=100 \ + --flush_logs_every_n_steps=100 \ + --limit_val_batches=1. \ + --num_sanity_val_steps=10 \ + --benchmark=True \ + --max_epochs=30 \ + --mode integrated diff --git a/third_party/ASpanFormer/src/ASpanFormer/__init__.py b/third_party/ASpanFormer/src/ASpanFormer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3bfd5a901e83c7e8d3b439f21afa20ac8237635e --- /dev/null +++ b/third_party/ASpanFormer/src/ASpanFormer/__init__.py @@ -0,0 +1,2 @@ +from .aspanformer import LocalFeatureTransformer_Flow +from .utils.cvpr_ds_config import default_cfg diff --git a/third_party/ASpanFormer/src/ASpanFormer/aspan_module/__init__.py b/third_party/ASpanFormer/src/ASpanFormer/aspan_module/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dff6704976cbe9e916c6de6af9e3b755dfbd20bf --- /dev/null +++ b/third_party/ASpanFormer/src/ASpanFormer/aspan_module/__init__.py @@ -0,0 +1,3 @@ +from .transformer import LocalFeatureTransformer_Flow +from .loftr import LocalFeatureTransformer +from .fine_preprocess import FinePreprocess diff --git a/third_party/ASpanFormer/src/ASpanFormer/aspan_module/attention.py b/third_party/ASpanFormer/src/ASpanFormer/aspan_module/attention.py new file mode 100644 index 0000000000000000000000000000000000000000..632dd22077806d2b53f66a09d0567925a30d1523 --- /dev/null +++ b/third_party/ASpanFormer/src/ASpanFormer/aspan_module/attention.py @@ -0,0 +1,198 @@ +import torch +from torch.nn import Module +import torch.nn as nn +from itertools import product +from torch.nn import functional as F + +class layernorm2d(nn.Module): + + def __init__(self,dim) : + super().__init__() + self.dim=dim + self.affine=nn.parameter.Parameter(torch.ones(dim), requires_grad=True) + self.bias=nn.parameter.Parameter(torch.zeros(dim), requires_grad=True) + + def forward(self,x): + #x: B*C*H*W + mean,std=x.mean(dim=1,keepdim=True),x.std(dim=1,keepdim=True) + return self.affine[None,:,None,None]*(x-mean)/(std+1e-6)+self.bias[None,:,None,None] + + +class HierachicalAttention(Module): + def __init__(self,d_model,nhead,nsample,radius_scale,nlevel=3): + super().__init__() + self.d_model=d_model + self.nhead=nhead + self.nsample=nsample + self.nlevel=nlevel + self.radius_scale=radius_scale + self.merge_head = nn.Sequential( + nn.Conv1d(d_model*3, d_model, kernel_size=1,bias=False), + nn.ReLU(True), + nn.Conv1d(d_model, d_model, kernel_size=1,bias=False), + ) + self.fullattention=FullAttention(d_model,nhead) + self.temp=nn.parameter.Parameter(torch.tensor(1.),requires_grad=True) + sample_offset=torch.tensor([[pos[0]-nsample[1]/2+0.5, pos[1]-nsample[1]/2+0.5] for pos in product(range(nsample[1]), range(nsample[1]))]) #r^2*2 + self.sample_offset=nn.parameter.Parameter(sample_offset,requires_grad=False) + + def forward(self,query,key,value,flow,size_q,size_kv,mask0=None, mask1=None,ds0=[4,4],ds1=[4,4]): + """ + Args: + q,k,v (torch.Tensor): [B, C, L] + mask (torch.Tensor): [B, L] + flow (torch.Tensor): [B, H, W, 4] + Return: + all_message (torch.Tensor): [B, C, H, W] + """ + + variance=flow[:,:,:,2:] + offset=flow[:,:,:,:2] #B*H*W*2 + bs=query.shape[0] + h0,w0=size_q[0],size_q[1] + h1,w1=size_kv[0],size_kv[1] + variance=torch.exp(0.5*variance)*self.radius_scale #b*h*w*2(pixel scale) + span_scale=torch.clamp((variance*2/self.nsample[1]),min=1) #b*h*w*2 + + sub_sample0,sub_sample1=[ds0,2,1],[ds1,2,1] + q_list=[F.avg_pool2d(query.view(bs,-1,h0,w0),kernel_size=sub_size,stride=sub_size) for sub_size in sub_sample0] + k_list=[F.avg_pool2d(key.view(bs,-1,h1,w1),kernel_size=sub_size,stride=sub_size) for sub_size in sub_sample1] + v_list=[F.avg_pool2d(value.view(bs,-1,h1,w1),kernel_size=sub_size,stride=sub_size) for sub_size in sub_sample1] #n_level + + offset_list=[F.avg_pool2d(offset.permute(0,3,1,2),kernel_size=sub_size*self.nsample[0],stride=sub_size*self.nsample[0]).permute(0,2,3,1)/sub_size for sub_size in sub_sample0[1:]] #n_level-1 + span_list=[F.avg_pool2d(span_scale.permute(0,3,1,2),kernel_size=sub_size*self.nsample[0],stride=sub_size*self.nsample[0]).permute(0,2,3,1) for sub_size in sub_sample0[1:]] #n_level-1 + + if mask0 is not None: + mask0,mask1=mask0.view(bs,1,h0,w0),mask1.view(bs,1,h1,w1) + mask0_list=[-F.max_pool2d(-mask0,kernel_size=sub_size,stride=sub_size) for sub_size in sub_sample0] + mask1_list=[-F.max_pool2d(-mask1,kernel_size=sub_size,stride=sub_size) for sub_size in sub_sample1] + else: + mask0_list=mask1_list=[None,None,None] + + message_list=[] + #full attention at coarse scale + mask0_flatten=mask0_list[0].view(bs,-1) if mask0 is not None else None + mask1_flatten=mask1_list[0].view(bs,-1) if mask1 is not None else None + message_list.append(self.fullattention(q_list[0],k_list[0],v_list[0],mask0_flatten,mask1_flatten,self.temp).view(bs,self.d_model,h0//ds0[0],w0//ds0[1])) + + for index in range(1,self.nlevel): + q,k,v=q_list[index],k_list[index],v_list[index] + mask0,mask1=mask0_list[index],mask1_list[index] + s,o=span_list[index-1],offset_list[index-1] #B*h*w(*2) + q,k,v,sample_pixel,mask_sample=self.partition_token(q,k,v,o,s,mask0) #B*Head*D*G*N(G*N=H*W for q) + message_list.append(self.group_attention(q,k,v,1,mask_sample).view(bs,self.d_model,h0//sub_sample0[index],w0//sub_sample0[index])) + #fuse + all_message=torch.cat([F.upsample(message_list[idx],scale_factor=sub_sample0[idx],mode='nearest') \ + for idx in range(self.nlevel)],dim=1).view(bs,-1,h0*w0) #b*3d*H*W + + all_message=self.merge_head(all_message).view(bs,-1,h0,w0) #b*d*H*W + return all_message + + def partition_token(self,q,k,v,offset,span_scale,maskv): + #q,k,v: B*C*H*W + #o: B*H/2*W/2*2 + #span_scale:B*H*W + bs=q.shape[0] + h,w=q.shape[2],q.shape[3] + hk,wk=k.shape[2],k.shape[3] + offset=offset.view(bs,-1,2) + span_scale=span_scale.view(bs,-1,1,2) + #B*G*2 + offset_sample=self.sample_offset[None,None]*span_scale + sample_pixel=offset[:,:,None]+offset_sample#B*G*r^2*2 + sample_norm=sample_pixel/torch.tensor([wk/2,hk/2]).cuda()[None,None,None]-1 + + q = q.view(bs, -1 , h // self.nsample[0], self.nsample[0], w // self.nsample[0], self.nsample[0]).\ + permute(0, 1, 2, 4, 3, 5).contiguous().view(bs, self.nhead,self.d_model//self.nhead, -1,self.nsample[0]**2)#B*head*D*G*N(G*N=H*W for q) + #sample token + k=F.grid_sample(k, grid=sample_norm).view(bs, self.nhead,self.d_model//self.nhead,-1, self.nsample[1]**2) #B*head*D*G*r^2 + v=F.grid_sample(v, grid=sample_norm).view(bs, self.nhead,self.d_model//self.nhead,-1, self.nsample[1]**2) #B*head*D*G*r^2 + #import pdb;pdb.set_trace() + if maskv is not None: + mask_sample=F.grid_sample(maskv.view(bs,-1,h,w).float(),grid=sample_norm,mode='nearest')==1 #B*1*G*r^2 + else: + mask_sample=None + return q,k,v,sample_pixel,mask_sample + + + def group_attention(self,query,key,value,temp,mask_sample=None): + #q,k,v: B*Head*D*G*N(G*N=H*W for q) + bs=query.shape[0] + #import pdb;pdb.set_trace() + QK = torch.einsum("bhdgn,bhdgm->bhgnm", query, key) + if mask_sample is not None: + num_head,number_n=QK.shape[1],QK.shape[3] + QK.masked_fill_(~(mask_sample[:,:,:,None]).expand(-1,num_head,-1,number_n,-1).bool(), float(-1e8)) + # Compute the attention and the weighted average + softmax_temp = temp / query.size(2)**.5 # sqrt(D) + A = torch.softmax(softmax_temp * QK, dim=-1) + queried_values = torch.einsum("bhgnm,bhdgm->bhdgn", A, value).contiguous().view(bs,self.d_model,-1) + return queried_values + + + +class FullAttention(Module): + def __init__(self,d_model,nhead): + super().__init__() + self.d_model=d_model + self.nhead=nhead + + def forward(self, q, k,v , mask0=None, mask1=None, temp=1): + """ Multi-head scaled dot-product attention, a.k.a full attention. + Args: + q,k,v: [N, D, L] + mask: [N, L] + Returns: + msg: [N,L] + """ + bs=q.shape[0] + q,k,v=q.view(bs,self.nhead,self.d_model//self.nhead,-1),k.view(bs,self.nhead,self.d_model//self.nhead,-1),v.view(bs,self.nhead,self.d_model//self.nhead,-1) + # Compute the unnormalized attention and apply the masks + QK = torch.einsum("nhdl,nhds->nhls", q, k) + if mask0 is not None: + QK.masked_fill_(~(mask0[:,None, :, None] * mask1[:, None, None]).bool(), float(-1e8)) + # Compute the attention and the weighted average + softmax_temp = temp / q.size(2)**.5 # sqrt(D) + A = torch.softmax(softmax_temp * QK, dim=-1) + queried_values = torch.einsum("nhls,nhds->nhdl", A, v).contiguous().view(bs,self.d_model,-1) + return queried_values + + + +def elu_feature_map(x): + return F.elu(x) + 1 + +class LinearAttention(Module): + def __init__(self, eps=1e-6): + super().__init__() + self.feature_map = elu_feature_map + self.eps = eps + + def forward(self, queries, keys, values, q_mask=None, kv_mask=None): + """ Multi-Head linear attention proposed in "Transformers are RNNs" + Args: + queries: [N, L, H, D] + keys: [N, S, H, D] + values: [N, S, H, D] + q_mask: [N, L] + kv_mask: [N, S] + Returns: + queried_values: (N, L, H, D) + """ + Q = self.feature_map(queries) + K = self.feature_map(keys) + + # set padded position to zero + if q_mask is not None: + Q = Q * q_mask[:, :, None, None] + if kv_mask is not None: + K = K * kv_mask[:, :, None, None] + values = values * kv_mask[:, :, None, None] + + v_length = values.size(1) + values = values / v_length # prevent fp16 overflow + KV = torch.einsum("nshd,nshv->nhdv", K, values) # (S,D)' @ S,V + Z = 1 / (torch.einsum("nlhd,nhd->nlh", Q, K.sum(dim=1)) + self.eps) + queried_values = torch.einsum("nlhd,nhdv,nlh->nlhv", Q, KV, Z) * v_length + + return queried_values.contiguous() \ No newline at end of file diff --git a/third_party/ASpanFormer/src/ASpanFormer/aspan_module/fine_preprocess.py b/third_party/ASpanFormer/src/ASpanFormer/aspan_module/fine_preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..5bb8eefd362240a9901a335f0e6e07770ff04567 --- /dev/null +++ b/third_party/ASpanFormer/src/ASpanFormer/aspan_module/fine_preprocess.py @@ -0,0 +1,59 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from einops.einops import rearrange, repeat + + +class FinePreprocess(nn.Module): + def __init__(self, config): + super().__init__() + + self.config = config + self.cat_c_feat = config['fine_concat_coarse_feat'] + self.W = self.config['fine_window_size'] + + d_model_c = self.config['coarse']['d_model'] + d_model_f = self.config['fine']['d_model'] + self.d_model_f = d_model_f + if self.cat_c_feat: + self.down_proj = nn.Linear(d_model_c, d_model_f, bias=True) + self.merge_feat = nn.Linear(2*d_model_f, d_model_f, bias=True) + + self._reset_parameters() + + def _reset_parameters(self): + for p in self.parameters(): + if p.dim() > 1: + nn.init.kaiming_normal_(p, mode="fan_out", nonlinearity="relu") + + def forward(self, feat_f0, feat_f1, feat_c0, feat_c1, data): + W = self.W + stride = data['hw0_f'][0] // data['hw0_c'][0] + + data.update({'W': W}) + if data['b_ids'].shape[0] == 0: + feat0 = torch.empty(0, self.W**2, self.d_model_f, device=feat_f0.device) + feat1 = torch.empty(0, self.W**2, self.d_model_f, device=feat_f0.device) + return feat0, feat1 + + # 1. unfold(crop) all local windows + feat_f0_unfold = F.unfold(feat_f0, kernel_size=(W, W), stride=stride, padding=W//2) + feat_f0_unfold = rearrange(feat_f0_unfold, 'n (c ww) l -> n l ww c', ww=W**2) + feat_f1_unfold = F.unfold(feat_f1, kernel_size=(W, W), stride=stride, padding=W//2) + feat_f1_unfold = rearrange(feat_f1_unfold, 'n (c ww) l -> n l ww c', ww=W**2) + + # 2. select only the predicted matches + feat_f0_unfold = feat_f0_unfold[data['b_ids'], data['i_ids']] # [n, ww, cf] + feat_f1_unfold = feat_f1_unfold[data['b_ids'], data['j_ids']] + + # option: use coarse-level loftr feature as context: concat and linear + if self.cat_c_feat: + feat_c_win = self.down_proj(torch.cat([feat_c0[data['b_ids'], data['i_ids']], + feat_c1[data['b_ids'], data['j_ids']]], 0)) # [2n, c] + feat_cf_win = self.merge_feat(torch.cat([ + torch.cat([feat_f0_unfold, feat_f1_unfold], 0), # [2n, ww, cf] + repeat(feat_c_win, 'n c -> n ww c', ww=W**2), # [2n, ww, cf] + ], -1)) + feat_f0_unfold, feat_f1_unfold = torch.chunk(feat_cf_win, 2, dim=0) + + return feat_f0_unfold, feat_f1_unfold diff --git a/third_party/ASpanFormer/src/ASpanFormer/aspan_module/loftr.py b/third_party/ASpanFormer/src/ASpanFormer/aspan_module/loftr.py new file mode 100644 index 0000000000000000000000000000000000000000..7dcebaa7beee978b9b8abcec8bb1bd2cc6b60870 --- /dev/null +++ b/third_party/ASpanFormer/src/ASpanFormer/aspan_module/loftr.py @@ -0,0 +1,112 @@ +import copy +import torch +import torch.nn as nn +from .attention import LinearAttention + +class LoFTREncoderLayer(nn.Module): + def __init__(self, + d_model, + nhead, + attention='linear'): + super(LoFTREncoderLayer, self).__init__() + + self.dim = d_model // nhead + self.nhead = nhead + + # multi-head attention + self.q_proj = nn.Linear(d_model, d_model, bias=False) + self.k_proj = nn.Linear(d_model, d_model, bias=False) + self.v_proj = nn.Linear(d_model, d_model, bias=False) + self.attention = LinearAttention() + self.merge = nn.Linear(d_model, d_model, bias=False) + + # feed-forward network + self.mlp = nn.Sequential( + nn.Linear(d_model*2, d_model*2, bias=False), + nn.ReLU(True), + nn.Linear(d_model*2, d_model, bias=False), + ) + + # norm and dropout + self.norm1 = nn.LayerNorm(d_model) + self.norm2 = nn.LayerNorm(d_model) + + def forward(self, x, source, x_mask=None, source_mask=None, type=None, index=0): + """ + Args: + x (torch.Tensor): [N, L, C] + source (torch.Tensor): [N, S, C] + x_mask (torch.Tensor): [N, L] (optional) + source_mask (torch.Tensor): [N, S] (optional) + """ + bs = x.size(0) + query, key, value = x, source, source + + # multi-head attention + query = self.q_proj(query).view( + bs, -1, self.nhead, self.dim) # [N, L, (H, D)] + key = self.k_proj(key).view(bs, -1, self.nhead, + self.dim) # [N, S, (H, D)] + value = self.v_proj(value).view(bs, -1, self.nhead, self.dim) + + message = self.attention( + query, key, value, q_mask=x_mask, kv_mask=source_mask) # [N, L, (H, D)] + message = self.merge(message.view( + bs, -1, self.nhead*self.dim)) # [N, L, C] + message = self.norm1(message) + + # feed-forward network + message = self.mlp(torch.cat([x, message], dim=2)) + message = self.norm2(message) + + return x + message + + +class LocalFeatureTransformer(nn.Module): + """A Local Feature Transformer (LoFTR) module.""" + + def __init__(self, config): + super(LocalFeatureTransformer, self).__init__() + + self.config = config + self.d_model = config['d_model'] + self.nhead = config['nhead'] + self.layer_names = config['layer_names'] + encoder_layer = LoFTREncoderLayer( + config['d_model'], config['nhead'], config['attention']) + self.layers = nn.ModuleList( + [copy.deepcopy(encoder_layer) for _ in range(len(self.layer_names))]) + self._reset_parameters() + + def _reset_parameters(self): + for p in self.parameters(): + if p.dim() > 1: + nn.init.xavier_uniform_(p) + + def forward(self, feat0, feat1, mask0=None, mask1=None): + """ + Args: + feat0 (torch.Tensor): [N, L, C] + feat1 (torch.Tensor): [N, S, C] + mask0 (torch.Tensor): [N, L] (optional) + mask1 (torch.Tensor): [N, S] (optional) + """ + + assert self.d_model == feat0.size( + 2), "the feature number of src and transformer must be equal" + + index = 0 + for layer, name in zip(self.layers, self.layer_names): + if name == 'self': + feat0 = layer(feat0, feat0, mask0, mask0, + type='self', index=index) + feat1 = layer(feat1, feat1, mask1, mask1) + elif name == 'cross': + feat0 = layer(feat0, feat1, mask0, mask1) + feat1 = layer(feat1, feat0, mask1, mask0, + type='cross', index=index) + index += 1 + else: + raise KeyError + return feat0, feat1 + diff --git a/third_party/ASpanFormer/src/ASpanFormer/aspan_module/transformer.py b/third_party/ASpanFormer/src/ASpanFormer/aspan_module/transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..c398f770833bf2066cda60a7ff546ec29640d433 --- /dev/null +++ b/third_party/ASpanFormer/src/ASpanFormer/aspan_module/transformer.py @@ -0,0 +1,244 @@ +import copy +import torch +import torch.nn as nn +import torch.nn.functional as F +from .attention import FullAttention, HierachicalAttention ,layernorm2d + + +class messageLayer_ini(nn.Module): + + def __init__(self, d_model, d_flow,d_value, nhead): + super().__init__() + super(messageLayer_ini, self).__init__() + + self.d_model = d_model + self.d_flow = d_flow + self.d_value=d_value + self.nhead = nhead + self.attention = FullAttention(d_model,nhead) + + self.q_proj = nn.Conv1d(d_model, d_model, kernel_size=1,bias=False) + self.k_proj = nn.Conv1d(d_model, d_model, kernel_size=1,bias=False) + self.v_proj = nn.Conv1d(d_value, d_model, kernel_size=1,bias=False) + self.merge_head=nn.Conv1d(d_model,d_model,kernel_size=1,bias=False) + + self.merge_f= self.merge_f = nn.Sequential( + nn.Conv2d(d_model*2, d_model*2, kernel_size=1, bias=False), + nn.ReLU(True), + nn.Conv2d(d_model*2, d_model, kernel_size=1, bias=False), + ) + + self.norm1 = layernorm2d(d_model) + self.norm2 = layernorm2d(d_model) + + + def forward(self, x0, x1,pos0,pos1,mask0=None,mask1=None): + #x1,x2: b*d*L + x0,x1=self.update(x0,x1,pos1,mask0,mask1),\ + self.update(x1,x0,pos0,mask1,mask0) + return x0,x1 + + + def update(self,f0,f1,pos1,mask0,mask1): + """ + Args: + f0: [N, D, H, W] + f1: [N, D, H, W] + Returns: + f0_new: (N, d, h, w) + """ + bs,h,w=f0.shape[0],f0.shape[2],f0.shape[3] + + f0_flatten,f1_flatten=f0.view(bs,self.d_model,-1),f1.view(bs,self.d_model,-1) + pos1_flatten=pos1.view(bs,self.d_value-self.d_model,-1) + f1_flatten_v=torch.cat([f1_flatten,pos1_flatten],dim=1) + + queries,keys=self.q_proj(f0_flatten),self.k_proj(f1_flatten) + values=self.v_proj(f1_flatten_v).view(bs,self.nhead,self.d_model//self.nhead,-1) + + queried_values=self.attention(queries,keys,values,mask0,mask1) + msg=self.merge_head(queried_values).view(bs,-1,h,w) + msg=self.norm2(self.merge_f(torch.cat([f0,self.norm1(msg)],dim=1))) + return f0+msg + + + +class messageLayer_gla(nn.Module): + + def __init__(self,d_model,d_flow,d_value, + nhead,radius_scale,nsample,update_flow=True): + super().__init__() + self.d_model = d_model + self.d_flow=d_flow + self.d_value=d_value + self.nhead = nhead + self.radius_scale=radius_scale + self.update_flow=update_flow + self.flow_decoder=nn.Sequential( + nn.Conv1d(d_flow, d_flow//2, kernel_size=1, bias=False), + nn.ReLU(True), + nn.Conv1d(d_flow//2, 4, kernel_size=1, bias=False)) + self.attention=HierachicalAttention(d_model,nhead,nsample,radius_scale) + + self.q_proj = nn.Conv1d(d_model, d_model, kernel_size=1,bias=False) + self.k_proj = nn.Conv1d(d_model, d_model, kernel_size=1,bias=False) + self.v_proj = nn.Conv1d(d_value, d_model, kernel_size=1,bias=False) + + d_extra=d_flow if update_flow else 0 + self.merge_f=nn.Sequential( + nn.Conv2d(d_model*2+d_extra, d_model+d_flow, kernel_size=1, bias=False), + nn.ReLU(True), + nn.Conv2d(d_model+d_flow, d_model+d_extra, kernel_size=3,padding=1, bias=False), + ) + self.norm1 = layernorm2d(d_model) + self.norm2 = layernorm2d(d_model+d_extra) + + def forward(self, x0, x1, flow_feature0,flow_feature1,pos0,pos1,mask0=None,mask1=None,ds0=[4,4],ds1=[4,4]): + """ + Args: + x0 (torch.Tensor): [B, C, H, W] + x1 (torch.Tensor): [B, C, H, W] + flow_feature0 (torch.Tensor): [B, C', H, W] + flow_feature1 (torch.Tensor): [B, C', H, W] + """ + flow0,flow1=self.decode_flow(flow_feature0,flow_feature1.shape[2:]),self.decode_flow(flow_feature1,flow_feature0.shape[2:]) + x0_new,flow_feature0_new=self.update(x0,x1,flow0.detach(),flow_feature0,pos1,mask0,mask1,ds0,ds1) + x1_new,flow_feature1_new=self.update(x1,x0,flow1.detach(),flow_feature1,pos0,mask1,mask0,ds1,ds0) + return x0_new,x1_new,flow_feature0_new,flow_feature1_new,flow0,flow1 + + def update(self,x0,x1,flow0,flow_feature0,pos1,mask0,mask1,ds0,ds1): + bs=x0.shape[0] + queries,keys=self.q_proj(x0.view(bs,self.d_model,-1)),self.k_proj(x1.view(bs,self.d_model,-1)) + x1_pos=torch.cat([x1,pos1],dim=1) + values=self.v_proj(x1_pos.view(bs,self.d_value,-1)) + msg=self.attention(queries,keys,values,flow0,x0.shape[2:],x1.shape[2:],mask0,mask1,ds0,ds1) + + if self.update_flow: + update_feature=torch.cat([x0,flow_feature0],dim=1) + else: + update_feature=x0 + msg=self.norm2(self.merge_f(torch.cat([update_feature,self.norm1(msg)],dim=1))) + update_feature=update_feature+msg + + x0_new,flow_feature0_new=update_feature[:,:self.d_model],update_feature[:,self.d_model:] + return x0_new,flow_feature0_new + + def decode_flow(self,flow_feature,kshape): + bs,h,w=flow_feature.shape[0],flow_feature.shape[2],flow_feature.shape[3] + scale_factor=torch.tensor([kshape[1],kshape[0]]).cuda()[None,None,None] + flow=self.flow_decoder(flow_feature.view(bs,-1,h*w)).permute(0,2,1).view(bs,h,w,4) + flow_coordinates=torch.sigmoid(flow[:,:,:,:2])*scale_factor + flow_var=flow[:,:,:,2:] + flow=torch.cat([flow_coordinates,flow_var],dim=-1) #B*H*W*4 + return flow + + +class flow_initializer(nn.Module): + + def __init__(self, dim, dim_flow, nhead, layer_num): + super().__init__() + self.layer_num= layer_num + self.dim = dim + self.dim_flow = dim_flow + + encoder_layer = messageLayer_ini( + dim ,dim_flow,dim+dim_flow , nhead) + self.layers_coarse = nn.ModuleList( + [copy.deepcopy(encoder_layer) for _ in range(layer_num)]) + self.decoupler = nn.Conv2d( + self.dim, self.dim+self.dim_flow, kernel_size=1) + self.up_merge = nn.Conv2d(2*dim, dim, kernel_size=1) + + def forward(self, feat0, feat1,pos0,pos1,mask0=None,mask1=None,ds0=[4,4],ds1=[4,4]): + # feat0: [B, C, H0, W0] + # feat1: [B, C, H1, W1] + # use low-res MHA to initialize flow feature + bs = feat0.size(0) + h0,w0,h1,w1=feat0.shape[2],feat0.shape[3],feat1.shape[2],feat1.shape[3] + + # coarse level + sub_feat0, sub_feat1 = F.avg_pool2d(feat0, ds0, stride=ds0), \ + F.avg_pool2d(feat1, ds1, stride=ds1) + + sub_pos0,sub_pos1=F.avg_pool2d(pos0, ds0, stride=ds0), \ + F.avg_pool2d(pos1, ds1, stride=ds1) + + if mask0 is not None: + mask0,mask1=-F.max_pool2d(-mask0.view(bs,1,h0,w0),ds0,stride=ds0).view(bs,-1),\ + -F.max_pool2d(-mask1.view(bs,1,h1,w1),ds1,stride=ds1).view(bs,-1) + + for layer in self.layers_coarse: + sub_feat0, sub_feat1 = layer(sub_feat0, sub_feat1,sub_pos0,sub_pos1,mask0,mask1) + # decouple flow and visual features + decoupled_feature0, decoupled_feature1 = self.decoupler(sub_feat0),self.decoupler(sub_feat1) + + sub_feat0, sub_flow_feature0 = decoupled_feature0[:,:self.dim], decoupled_feature0[:, self.dim:] + sub_feat1, sub_flow_feature1 = decoupled_feature1[:,:self.dim], decoupled_feature1[:, self.dim:] + update_feat0, flow_feature0 = F.upsample(sub_feat0, scale_factor=ds0, mode='bilinear'),\ + F.upsample(sub_flow_feature0, scale_factor=ds0, mode='bilinear') + update_feat1, flow_feature1 = F.upsample(sub_feat1, scale_factor=ds1, mode='bilinear'),\ + F.upsample(sub_flow_feature1, scale_factor=ds1, mode='bilinear') + + feat0 = feat0+self.up_merge(torch.cat([feat0, update_feat0], dim=1)) + feat1 = feat1+self.up_merge(torch.cat([feat1, update_feat1], dim=1)) + + return feat0,feat1,flow_feature0,flow_feature1 #b*c*h*w + + +class LocalFeatureTransformer_Flow(nn.Module): + """A Local Feature Transformer (LoFTR) module.""" + + def __init__(self, config): + super(LocalFeatureTransformer_Flow, self).__init__() + + self.config = config + self.d_model = config['d_model'] + self.nhead = config['nhead'] + + self.pos_transform=nn.Conv2d(config['d_model'],config['d_flow'],kernel_size=1,bias=False) + self.ini_layer = flow_initializer(self.d_model, config['d_flow'], config['nhead'],config['ini_layer_num']) + + encoder_layer = messageLayer_gla( + config['d_model'], config['d_flow'], config['d_flow']+config['d_model'], config['nhead'],config['radius_scale'],config['nsample']) + encoder_layer_last=messageLayer_gla( + config['d_model'], config['d_flow'], config['d_flow']+config['d_model'], config['nhead'],config['radius_scale'],config['nsample'],update_flow=False) + self.layers = nn.ModuleList([copy.deepcopy(encoder_layer) for _ in range(config['layer_num']-1)]+[encoder_layer_last]) + self._reset_parameters() + + def _reset_parameters(self): + for name,p in self.named_parameters(): + if 'temp' in name or 'sample_offset' in name: + continue + if p.dim() > 1: + nn.init.xavier_uniform_(p) + + def forward(self, feat0, feat1,pos0,pos1,mask0=None,mask1=None,ds0=[4,4],ds1=[4,4]): + """ + Args: + feat0 (torch.Tensor): [N, C, H, W] + feat1 (torch.Tensor): [N, C, H, W] + pos1,pos2: [N, C, H, W] + Outputs: + feat0: [N,-1,C] + feat1: [N,-1,C] + flow_list: [L,N,H,W,4]*1(2) + """ + bs = feat0.size(0) + + pos0,pos1=self.pos_transform(pos0),self.pos_transform(pos1) + pos0,pos1=pos0.expand(bs,-1,-1,-1),pos1.expand(bs,-1,-1,-1) + assert self.d_model == feat0.size( + 1), "the feature number of src and transformer must be equal" + + flow_list=[[],[]]# [px,py,sx,sy] + if mask0 is not None: + mask0,mask1=mask0[:,None].float(),mask1[:,None].float() + feat0,feat1, flow_feature0, flow_feature1 = self.ini_layer(feat0, feat1,pos0,pos1,mask0,mask1,ds0,ds1) + for layer in self.layers: + feat0,feat1,flow_feature0,flow_feature1,flow0,flow1=layer(feat0,feat1,flow_feature0,flow_feature1,pos0,pos1,mask0,mask1,ds0,ds1) + flow_list[0].append(flow0) + flow_list[1].append(flow1) + flow_list[0]=torch.stack(flow_list[0],dim=0) + flow_list[1]=torch.stack(flow_list[1],dim=0) + feat0, feat1 = feat0.permute(0, 2, 3, 1).view(bs, -1, self.d_model), feat1.permute(0, 2, 3, 1).view(bs, -1, self.d_model) + return feat0, feat1, flow_list \ No newline at end of file diff --git a/third_party/ASpanFormer/src/ASpanFormer/aspanformer.py b/third_party/ASpanFormer/src/ASpanFormer/aspanformer.py new file mode 100644 index 0000000000000000000000000000000000000000..01b797a420cf5ccea5b53fee3ceda8b5e157573f --- /dev/null +++ b/third_party/ASpanFormer/src/ASpanFormer/aspanformer.py @@ -0,0 +1,133 @@ +import torch +import torch.nn as nn +from torchvision import transforms +from einops.einops import rearrange + +from .backbone import build_backbone +from .utils.position_encoding import PositionEncodingSine +from .aspan_module import LocalFeatureTransformer_Flow, LocalFeatureTransformer, FinePreprocess +from .utils.coarse_matching import CoarseMatching +from .utils.fine_matching import FineMatching + + +class ASpanFormer(nn.Module): + def __init__(self, config): + super().__init__() + # Misc + self.config = config + + # Modules + self.backbone = build_backbone(config) + self.pos_encoding = PositionEncodingSine( + config['coarse']['d_model'],pre_scaling=[config['coarse']['train_res'],config['coarse']['test_res']]) + self.loftr_coarse = LocalFeatureTransformer_Flow(config['coarse']) + self.coarse_matching = CoarseMatching(config['match_coarse']) + self.fine_preprocess = FinePreprocess(config) + self.loftr_fine = LocalFeatureTransformer(config["fine"]) + self.fine_matching = FineMatching() + self.coarsest_level=config['coarse']['coarsest_level'] + + def forward(self, data, online_resize=False): + """ + Update: + data (dict): { + 'image0': (torch.Tensor): (N, 1, H, W) + 'image1': (torch.Tensor): (N, 1, H, W) + 'mask0'(optional) : (torch.Tensor): (N, H, W) '0' indicates a padded position + 'mask1'(optional) : (torch.Tensor): (N, H, W) + } + """ + if online_resize: + assert data['image0'].shape[0]==1 and data['image1'].shape[1]==1 + self.resize_input(data,self.config['coarse']['train_res']) + else: + data['pos_scale0'],data['pos_scale1']=None,None + + # 1. Local Feature CNN + data.update({ + 'bs': data['image0'].size(0), + 'hw0_i': data['image0'].shape[2:], 'hw1_i': data['image1'].shape[2:] + }) + + if data['hw0_i'] == data['hw1_i']: # faster & better BN convergence + feats_c, feats_f = self.backbone( + torch.cat([data['image0'], data['image1']], dim=0)) + (feat_c0, feat_c1), (feat_f0, feat_f1) = feats_c.split( + data['bs']), feats_f.split(data['bs']) + else: # handle different input shapes + (feat_c0, feat_f0), (feat_c1, feat_f1) = self.backbone( + data['image0']), self.backbone(data['image1']) + + data.update({ + 'hw0_c': feat_c0.shape[2:], 'hw1_c': feat_c1.shape[2:], + 'hw0_f': feat_f0.shape[2:], 'hw1_f': feat_f1.shape[2:] + }) + + # 2. coarse-level loftr module + # add featmap with positional encoding, then flatten it to sequence [N, HW, C] + [feat_c0, pos_encoding0], [feat_c1, pos_encoding1] = self.pos_encoding(feat_c0,data['pos_scale0']), self.pos_encoding(feat_c1,data['pos_scale1']) + feat_c0 = rearrange(feat_c0, 'n c h w -> n c h w ') + feat_c1 = rearrange(feat_c1, 'n c h w -> n c h w ') + + #TODO:adjust ds + ds0=[int(data['hw0_c'][0]/self.coarsest_level[0]),int(data['hw0_c'][1]/self.coarsest_level[1])] + ds1=[int(data['hw1_c'][0]/self.coarsest_level[0]),int(data['hw1_c'][1]/self.coarsest_level[1])] + if online_resize: + ds0,ds1=[4,4],[4,4] + + mask_c0 = mask_c1 = None # mask is useful in training + if 'mask0' in data: + mask_c0, mask_c1 = data['mask0'].flatten( + -2), data['mask1'].flatten(-2) + feat_c0, feat_c1, flow_list = self.loftr_coarse( + feat_c0, feat_c1,pos_encoding0,pos_encoding1,mask_c0,mask_c1,ds0,ds1) + + # 3. match coarse-level and register predicted offset + self.coarse_matching(feat_c0, feat_c1, flow_list,data, + mask_c0=mask_c0, mask_c1=mask_c1) + + # 4. fine-level refinement + feat_f0_unfold, feat_f1_unfold = self.fine_preprocess( + feat_f0, feat_f1, feat_c0, feat_c1, data) + if feat_f0_unfold.size(0) != 0: # at least one coarse level predicted + feat_f0_unfold, feat_f1_unfold = self.loftr_fine( + feat_f0_unfold, feat_f1_unfold) + + # 5. match fine-level + self.fine_matching(feat_f0_unfold, feat_f1_unfold, data) + + # 6. resize match coordinates back to input resolution + if online_resize: + data['mkpts0_f']*=data['online_resize_scale0'] + data['mkpts1_f']*=data['online_resize_scale1'] + + def load_state_dict(self, state_dict, *args, **kwargs): + for k in list(state_dict.keys()): + if k.startswith('matcher.'): + if 'sample_offset' in k: + state_dict.pop(k) + else: + state_dict[k.replace('matcher.', '', 1)] = state_dict.pop(k) + return super().load_state_dict(state_dict, *args, **kwargs) + + def resize_input(self,data,train_res,df=32): + h0,w0,h1,w1=data['image0'].shape[2],data['image0'].shape[3],data['image1'].shape[2],data['image1'].shape[3] + data['image0'],data['image1']=self.resize_df(data['image0'],df),self.resize_df(data['image1'],df) + + if len(train_res)==1: + train_res_h=train_res_w=train_res + else: + train_res_h,train_res_w=train_res[0],train_res[1] + data['pos_scale0'],data['pos_scale1']=[train_res_h/data['image0'].shape[2],train_res_w/data['image0'].shape[3]],\ + [train_res_h/data['image1'].shape[2],train_res_w/data['image1'].shape[3]] + data['online_resize_scale0'],data['online_resize_scale1']=torch.tensor([w0/data['image0'].shape[3],h0/data['image0'].shape[2]])[None].cuda(),\ + torch.tensor([w1/data['image1'].shape[3],h1/data['image1'].shape[2]])[None].cuda() + + def resize_df(self,image,df=32): + h,w=image.shape[2],image.shape[3] + h_new,w_new=h//df*df,w//df*df + if h!=h_new or w!=w_new: + img_new=transforms.Resize([h_new,w_new]).forward(image) + else: + img_new=image + return img_new diff --git a/third_party/ASpanFormer/src/ASpanFormer/backbone/__init__.py b/third_party/ASpanFormer/src/ASpanFormer/backbone/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b6e731b3f53ab367c89ef0ea8e1cbffb0d990775 --- /dev/null +++ b/third_party/ASpanFormer/src/ASpanFormer/backbone/__init__.py @@ -0,0 +1,11 @@ +from .resnet_fpn import ResNetFPN_8_2, ResNetFPN_16_4 + + +def build_backbone(config): + if config['backbone_type'] == 'ResNetFPN': + if config['resolution'] == (8, 2): + return ResNetFPN_8_2(config['resnetfpn']) + elif config['resolution'] == (16, 4): + return ResNetFPN_16_4(config['resnetfpn']) + else: + raise ValueError(f"LOFTR.BACKBONE_TYPE {config['backbone_type']} not supported.") diff --git a/third_party/ASpanFormer/src/ASpanFormer/backbone/resnet_fpn.py b/third_party/ASpanFormer/src/ASpanFormer/backbone/resnet_fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..985e5b3f273a51e51447a8025ca3aadbe46752eb --- /dev/null +++ b/third_party/ASpanFormer/src/ASpanFormer/backbone/resnet_fpn.py @@ -0,0 +1,199 @@ +import torch.nn as nn +import torch.nn.functional as F + + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution without padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, padding=0, bias=False) + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) + + +class BasicBlock(nn.Module): + def __init__(self, in_planes, planes, stride=1): + super().__init__() + self.conv1 = conv3x3(in_planes, planes, stride) + self.conv2 = conv3x3(planes, planes) + self.bn1 = nn.BatchNorm2d(planes) + self.bn2 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + + if stride == 1: + self.downsample = None + else: + self.downsample = nn.Sequential( + conv1x1(in_planes, planes, stride=stride), + nn.BatchNorm2d(planes) + ) + + def forward(self, x): + y = x + y = self.relu(self.bn1(self.conv1(y))) + y = self.bn2(self.conv2(y)) + + if self.downsample is not None: + x = self.downsample(x) + + return self.relu(x+y) + + +class ResNetFPN_8_2(nn.Module): + """ + ResNet+FPN, output resolution are 1/8 and 1/2. + Each block has 2 layers. + """ + + def __init__(self, config): + super().__init__() + # Config + block = BasicBlock + initial_dim = config['initial_dim'] + block_dims = config['block_dims'] + + # Class Variable + self.in_planes = initial_dim + + # Networks + self.conv1 = nn.Conv2d(1, initial_dim, kernel_size=7, stride=2, padding=3, bias=False) + self.bn1 = nn.BatchNorm2d(initial_dim) + self.relu = nn.ReLU(inplace=True) + + self.layer1 = self._make_layer(block, block_dims[0], stride=1) # 1/2 + self.layer2 = self._make_layer(block, block_dims[1], stride=2) # 1/4 + self.layer3 = self._make_layer(block, block_dims[2], stride=2) # 1/8 + + # 3. FPN upsample + self.layer3_outconv = conv1x1(block_dims[2], block_dims[2]) + self.layer2_outconv = conv1x1(block_dims[1], block_dims[2]) + self.layer2_outconv2 = nn.Sequential( + conv3x3(block_dims[2], block_dims[2]), + nn.BatchNorm2d(block_dims[2]), + nn.LeakyReLU(), + conv3x3(block_dims[2], block_dims[1]), + ) + self.layer1_outconv = conv1x1(block_dims[0], block_dims[1]) + self.layer1_outconv2 = nn.Sequential( + conv3x3(block_dims[1], block_dims[1]), + nn.BatchNorm2d(block_dims[1]), + nn.LeakyReLU(), + conv3x3(block_dims[1], block_dims[0]), + ) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def _make_layer(self, block, dim, stride=1): + layer1 = block(self.in_planes, dim, stride=stride) + layer2 = block(dim, dim, stride=1) + layers = (layer1, layer2) + + self.in_planes = dim + return nn.Sequential(*layers) + + def forward(self, x): + # ResNet Backbone + x0 = self.relu(self.bn1(self.conv1(x))) + x1 = self.layer1(x0) # 1/2 + x2 = self.layer2(x1) # 1/4 + x3 = self.layer3(x2) # 1/8 + + # FPN + x3_out = self.layer3_outconv(x3) + + x3_out_2x = F.interpolate(x3_out, scale_factor=2., mode='bilinear', align_corners=True) + x2_out = self.layer2_outconv(x2) + x2_out = self.layer2_outconv2(x2_out+x3_out_2x) + + x2_out_2x = F.interpolate(x2_out, scale_factor=2., mode='bilinear', align_corners=True) + x1_out = self.layer1_outconv(x1) + x1_out = self.layer1_outconv2(x1_out+x2_out_2x) + + return [x3_out, x1_out] + + +class ResNetFPN_16_4(nn.Module): + """ + ResNet+FPN, output resolution are 1/16 and 1/4. + Each block has 2 layers. + """ + + def __init__(self, config): + super().__init__() + # Config + block = BasicBlock + initial_dim = config['initial_dim'] + block_dims = config['block_dims'] + + # Class Variable + self.in_planes = initial_dim + + # Networks + self.conv1 = nn.Conv2d(1, initial_dim, kernel_size=7, stride=2, padding=3, bias=False) + self.bn1 = nn.BatchNorm2d(initial_dim) + self.relu = nn.ReLU(inplace=True) + + self.layer1 = self._make_layer(block, block_dims[0], stride=1) # 1/2 + self.layer2 = self._make_layer(block, block_dims[1], stride=2) # 1/4 + self.layer3 = self._make_layer(block, block_dims[2], stride=2) # 1/8 + self.layer4 = self._make_layer(block, block_dims[3], stride=2) # 1/16 + + # 3. FPN upsample + self.layer4_outconv = conv1x1(block_dims[3], block_dims[3]) + self.layer3_outconv = conv1x1(block_dims[2], block_dims[3]) + self.layer3_outconv2 = nn.Sequential( + conv3x3(block_dims[3], block_dims[3]), + nn.BatchNorm2d(block_dims[3]), + nn.LeakyReLU(), + conv3x3(block_dims[3], block_dims[2]), + ) + + self.layer2_outconv = conv1x1(block_dims[1], block_dims[2]) + self.layer2_outconv2 = nn.Sequential( + conv3x3(block_dims[2], block_dims[2]), + nn.BatchNorm2d(block_dims[2]), + nn.LeakyReLU(), + conv3x3(block_dims[2], block_dims[1]), + ) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def _make_layer(self, block, dim, stride=1): + layer1 = block(self.in_planes, dim, stride=stride) + layer2 = block(dim, dim, stride=1) + layers = (layer1, layer2) + + self.in_planes = dim + return nn.Sequential(*layers) + + def forward(self, x): + # ResNet Backbone + x0 = self.relu(self.bn1(self.conv1(x))) + x1 = self.layer1(x0) # 1/2 + x2 = self.layer2(x1) # 1/4 + x3 = self.layer3(x2) # 1/8 + x4 = self.layer4(x3) # 1/16 + + # FPN + x4_out = self.layer4_outconv(x4) + + x4_out_2x = F.interpolate(x4_out, scale_factor=2., mode='bilinear', align_corners=True) + x3_out = self.layer3_outconv(x3) + x3_out = self.layer3_outconv2(x3_out+x4_out_2x) + + x3_out_2x = F.interpolate(x3_out, scale_factor=2., mode='bilinear', align_corners=True) + x2_out = self.layer2_outconv(x2) + x2_out = self.layer2_outconv2(x2_out+x3_out_2x) + + return [x4_out, x2_out] diff --git a/third_party/ASpanFormer/src/ASpanFormer/utils/coarse_matching.py b/third_party/ASpanFormer/src/ASpanFormer/utils/coarse_matching.py new file mode 100644 index 0000000000000000000000000000000000000000..953ee55a09144a4ce0099e709f3a992d021aa0ab --- /dev/null +++ b/third_party/ASpanFormer/src/ASpanFormer/utils/coarse_matching.py @@ -0,0 +1,331 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from einops.einops import rearrange + +from time import time + +INF = 1e9 + +def mask_border(m, b: int, v): + """ Mask borders with value + Args: + m (torch.Tensor): [N, H0, W0, H1, W1] + b (int) + v (m.dtype) + """ + if b <= 0: + return + + m[:, :b] = v + m[:, :, :b] = v + m[:, :, :, :b] = v + m[:, :, :, :, :b] = v + m[:, -b:] = v + m[:, :, -b:] = v + m[:, :, :, -b:] = v + m[:, :, :, :, -b:] = v + + +def mask_border_with_padding(m, bd, v, p_m0, p_m1): + if bd <= 0: + return + + m[:, :bd] = v + m[:, :, :bd] = v + m[:, :, :, :bd] = v + m[:, :, :, :, :bd] = v + + h0s, w0s = p_m0.sum(1).max(-1)[0].int(), p_m0.sum(-1).max(-1)[0].int() + h1s, w1s = p_m1.sum(1).max(-1)[0].int(), p_m1.sum(-1).max(-1)[0].int() + for b_idx, (h0, w0, h1, w1) in enumerate(zip(h0s, w0s, h1s, w1s)): + m[b_idx, h0 - bd:] = v + m[b_idx, :, w0 - bd:] = v + m[b_idx, :, :, h1 - bd:] = v + m[b_idx, :, :, :, w1 - bd:] = v + + +def compute_max_candidates(p_m0, p_m1): + """Compute the max candidates of all pairs within a batch + + Args: + p_m0, p_m1 (torch.Tensor): padded masks + """ + h0s, w0s = p_m0.sum(1).max(-1)[0], p_m0.sum(-1).max(-1)[0] + h1s, w1s = p_m1.sum(1).max(-1)[0], p_m1.sum(-1).max(-1)[0] + max_cand = torch.sum( + torch.min(torch.stack([h0s * w0s, h1s * w1s], -1), -1)[0]) + return max_cand + + +class CoarseMatching(nn.Module): + def __init__(self, config): + super().__init__() + self.config = config + # general config + self.thr = config['thr'] + self.border_rm = config['border_rm'] + # -- # for trainig fine-level LoFTR + self.train_coarse_percent = config['train_coarse_percent'] + self.train_pad_num_gt_min = config['train_pad_num_gt_min'] + + # we provide 2 options for differentiable matching + self.match_type = config['match_type'] + if self.match_type == 'dual_softmax': + self.temperature=nn.parameter.Parameter(torch.tensor(10.), requires_grad=True) + elif self.match_type == 'sinkhorn': + try: + from .superglue import log_optimal_transport + except ImportError: + raise ImportError("download superglue.py first!") + self.log_optimal_transport = log_optimal_transport + self.bin_score = nn.Parameter( + torch.tensor(config['skh_init_bin_score'], requires_grad=True)) + self.skh_iters = config['skh_iters'] + self.skh_prefilter = config['skh_prefilter'] + else: + raise NotImplementedError() + + def forward(self, feat_c0, feat_c1, flow_list, data, mask_c0=None, mask_c1=None): + """ + Args: + feat0 (torch.Tensor): [N, L, C] + feat1 (torch.Tensor): [N, S, C] + offset: [layer, B, H, W, 4] (*2) + data (dict) + mask_c0 (torch.Tensor): [N, L] (optional) + mask_c1 (torch.Tensor): [N, S] (optional) + Update: + data (dict): { + 'b_ids' (torch.Tensor): [M'], + 'i_ids' (torch.Tensor): [M'], + 'j_ids' (torch.Tensor): [M'], + 'gt_mask' (torch.Tensor): [M'], + 'mkpts0_c' (torch.Tensor): [M, 2], + 'mkpts1_c' (torch.Tensor): [M, 2], + 'mconf' (torch.Tensor): [M]} + NOTE: M' != M during training. + """ + N, L, S, C = feat_c0.size(0), feat_c0.size(1), feat_c1.size(1), feat_c0.size(2) + # normalize + feat_c0, feat_c1 = map(lambda feat: feat / feat.shape[-1]**.5, + [feat_c0, feat_c1]) + + if self.match_type == 'dual_softmax': + sim_matrix = torch.einsum("nlc,nsc->nls", feat_c0, + feat_c1) * self.temperature + if mask_c0 is not None: + sim_matrix.masked_fill_( + ~(mask_c0[..., None] * mask_c1[:, None]).bool(), + -INF) + conf_matrix = F.softmax(sim_matrix, 1) * F.softmax(sim_matrix, 2) + + elif self.match_type == 'sinkhorn': + # sinkhorn, dustbin included + sim_matrix = torch.einsum("nlc,nsc->nls", feat_c0, feat_c1) + if mask_c0 is not None: + sim_matrix[:, :L, :S].masked_fill_( + ~(mask_c0[..., None] * mask_c1[:, None]).bool(), + -INF) + + # build uniform prior & use sinkhorn + log_assign_matrix = self.log_optimal_transport( + sim_matrix, self.bin_score, self.skh_iters) + assign_matrix = log_assign_matrix.exp() + conf_matrix = assign_matrix[:, :-1, :-1] + + # filter prediction with dustbin score (only in evaluation mode) + if not self.training and self.skh_prefilter: + filter0 = (assign_matrix.max(dim=2)[1] == S)[:, :-1] # [N, L] + filter1 = (assign_matrix.max(dim=1)[1] == L)[:, :-1] # [N, S] + conf_matrix[filter0[..., None].repeat(1, 1, S)] = 0 + conf_matrix[filter1[:, None].repeat(1, L, 1)] = 0 + + if self.config['sparse_spvs']: + data.update({'conf_matrix_with_bin': assign_matrix.clone()}) + + data.update({'conf_matrix': conf_matrix}) + # predict coarse matches from conf_matrix + data.update(**self.get_coarse_match(conf_matrix, data)) + + #update predicted offset + if flow_list[0].shape[2]==flow_list[1].shape[2] and flow_list[0].shape[3]==flow_list[1].shape[3]: + flow_list=torch.stack(flow_list,dim=0) + data.update({'predict_flow':flow_list}) #[2*L*B*H*W*4] + self.get_offset_match(flow_list,data,mask_c0,mask_c1) + + @torch.no_grad() + def get_coarse_match(self, conf_matrix, data): + """ + Args: + conf_matrix (torch.Tensor): [N, L, S] + data (dict): with keys ['hw0_i', 'hw1_i', 'hw0_c', 'hw1_c'] + Returns: + coarse_matches (dict): { + 'b_ids' (torch.Tensor): [M'], + 'i_ids' (torch.Tensor): [M'], + 'j_ids' (torch.Tensor): [M'], + 'gt_mask' (torch.Tensor): [M'], + 'm_bids' (torch.Tensor): [M], + 'mkpts0_c' (torch.Tensor): [M, 2], + 'mkpts1_c' (torch.Tensor): [M, 2], + 'mconf' (torch.Tensor): [M]} + """ + axes_lengths = { + 'h0c': data['hw0_c'][0], + 'w0c': data['hw0_c'][1], + 'h1c': data['hw1_c'][0], + 'w1c': data['hw1_c'][1] + } + _device = conf_matrix.device + # 1. confidence thresholding + mask = conf_matrix > self.thr + mask = rearrange(mask, 'b (h0c w0c) (h1c w1c) -> b h0c w0c h1c w1c', + **axes_lengths) + if 'mask0' not in data: + mask_border(mask, self.border_rm, False) + else: + mask_border_with_padding(mask, self.border_rm, False, + data['mask0'], data['mask1']) + mask = rearrange(mask, 'b h0c w0c h1c w1c -> b (h0c w0c) (h1c w1c)', + **axes_lengths) + + # 2. mutual nearest + mask = mask \ + * (conf_matrix == conf_matrix.max(dim=2, keepdim=True)[0]) \ + * (conf_matrix == conf_matrix.max(dim=1, keepdim=True)[0]) + + # 3. find all valid coarse matches + # this only works when at most one `True` in each row + mask_v, all_j_ids = mask.max(dim=2) + b_ids, i_ids = torch.where(mask_v) + j_ids = all_j_ids[b_ids, i_ids] + mconf = conf_matrix[b_ids, i_ids, j_ids] + + # 4. Random sampling of training samples for fine-level LoFTR + # (optional) pad samples with gt coarse-level matches + if self.training: + # NOTE: + # The sampling is performed across all pairs in a batch without manually balancing + # #samples for fine-level increases w.r.t. batch_size + if 'mask0' not in data: + num_candidates_max = mask.size(0) * max( + mask.size(1), mask.size(2)) + else: + num_candidates_max = compute_max_candidates( + data['mask0'], data['mask1']) + num_matches_train = int(num_candidates_max * + self.train_coarse_percent) + num_matches_pred = len(b_ids) + assert self.train_pad_num_gt_min < num_matches_train, "min-num-gt-pad should be less than num-train-matches" + + # pred_indices is to select from prediction + if num_matches_pred <= num_matches_train - self.train_pad_num_gt_min: + pred_indices = torch.arange(num_matches_pred, device=_device) + else: + pred_indices = torch.randint( + num_matches_pred, + (num_matches_train - self.train_pad_num_gt_min, ), + device=_device) + + # gt_pad_indices is to select from gt padding. e.g. max(3787-4800, 200) + gt_pad_indices = torch.randint( + len(data['spv_b_ids']), + (max(num_matches_train - num_matches_pred, + self.train_pad_num_gt_min), ), + device=_device) + mconf_gt = torch.zeros(len(data['spv_b_ids']), device=_device) # set conf of gt paddings to all zero + + b_ids, i_ids, j_ids, mconf = map( + lambda x, y: torch.cat([x[pred_indices], y[gt_pad_indices]], + dim=0), + *zip([b_ids, data['spv_b_ids']], [i_ids, data['spv_i_ids']], + [j_ids, data['spv_j_ids']], [mconf, mconf_gt])) + + # These matches select patches that feed into fine-level network + coarse_matches = {'b_ids': b_ids, 'i_ids': i_ids, 'j_ids': j_ids} + + # 4. Update with matches in original image resolution + scale = data['hw0_i'][0] / data['hw0_c'][0] + scale0 = scale * data['scale0'][b_ids] if 'scale0' in data else scale + scale1 = scale * data['scale1'][b_ids] if 'scale1' in data else scale + mkpts0_c = torch.stack( + [i_ids % data['hw0_c'][1], i_ids // data['hw0_c'][1]], + dim=1) * scale0 + mkpts1_c = torch.stack( + [j_ids % data['hw1_c'][1], j_ids // data['hw1_c'][1]], + dim=1) * scale1 + + # These matches is the current prediction (for visualization) + coarse_matches.update({ + 'gt_mask': mconf == 0, + 'm_bids': b_ids[mconf != 0], # mconf == 0 => gt matches + 'mkpts0_c': mkpts0_c[mconf != 0], + 'mkpts1_c': mkpts1_c[mconf != 0], + 'mconf': mconf[mconf != 0] + }) + + return coarse_matches + + @torch.no_grad() + def get_offset_match(self, flow_list, data,mask1,mask2): + """ + Args: + offset (torch.Tensor): [L, B, H, W, 2] + data (dict): with keys ['hw0_i', 'hw1_i', 'hw0_c', 'hw1_c'] + Returns: + coarse_matches (dict): { + 'm_bids' (torch.Tensor): [M], + 'mkpts0_c' (torch.Tensor): [M, 2], + 'mkpts1_c' (torch.Tensor): [M, 2], + 'mconf' (torch.Tensor): [M]} + """ + offset1=flow_list[0] + bs,layer_num=offset1.shape[1],offset1.shape[0] + + #left side + offset1=offset1.view(layer_num,bs,-1,4) + conf1=offset1[:,:,:,2:].mean(dim=-1) + if mask1 is not None: + conf1.masked_fill_(~mask1.bool()[None].expand(layer_num,-1,-1),100) + offset1=offset1[:,:,:,:2] + self.get_offset_match_work(offset1,conf1,data,'left') + + #rihgt side + if len(flow_list)==2: + offset2=flow_list[1].view(layer_num,bs,-1,4) + conf2=offset2[:,:,:,2:].mean(dim=-1) + if mask2 is not None: + conf2.masked_fill_(~mask2.bool()[None].expand(layer_num,-1,-1),100) + offset2=offset2[:,:,:,:2] + self.get_offset_match_work(offset2,conf2,data,'right') + + + @torch.no_grad() + def get_offset_match_work(self, offset,conf, data,side): + bs,layer_num=offset.shape[1],offset.shape[0] + # 1. confidence thresholding + mask_conf= conf<2 + for index in range(bs): + mask_conf[:,index,0]=True #safe guard in case that no match survives + # 3. find offset matches + scale = data['hw0_i'][0] / data['hw0_c'][0] + l_ids,b_ids,i_ids = torch.where(mask_conf) + j_coor=offset[l_ids,b_ids,i_ids,:2] *scale#[N,2] + i_coor=torch.stack([i_ids%data['hw0_c'][1],i_ids//data['hw0_c'][1]],dim=1)*scale + #i_coor=torch.as_tensor([[index%data['hw0_c'][1],index//data['hw0_c'][1]] for index in i_ids]).cuda().float()*scale #[N,2] + # These matches is the current prediction (for visualization) + data.update({ + 'offset_bids_'+side: b_ids, # mconf == 0 => gt matches + 'offset_lids_'+side: l_ids, + 'conf'+side: conf[mask_conf] + }) + + if side=='right': + data.update({'offset_kpts0_f_'+side: j_coor.detach(), + 'offset_kpts1_f_'+side: i_coor}) + else: + data.update({'offset_kpts0_f_'+side: i_coor, + 'offset_kpts1_f_'+side: j_coor.detach()}) + + diff --git a/third_party/ASpanFormer/src/ASpanFormer/utils/cvpr_ds_config.py b/third_party/ASpanFormer/src/ASpanFormer/utils/cvpr_ds_config.py new file mode 100644 index 0000000000000000000000000000000000000000..fdc57e84936c805cb387b6239ca4a5ff6154e22e --- /dev/null +++ b/third_party/ASpanFormer/src/ASpanFormer/utils/cvpr_ds_config.py @@ -0,0 +1,50 @@ +from yacs.config import CfgNode as CN + + +def lower_config(yacs_cfg): + if not isinstance(yacs_cfg, CN): + return yacs_cfg + return {k.lower(): lower_config(v) for k, v in yacs_cfg.items()} + + +_CN = CN() +_CN.BACKBONE_TYPE = 'ResNetFPN' +_CN.RESOLUTION = (8, 2) # options: [(8, 2), (16, 4)] +_CN.FINE_WINDOW_SIZE = 5 # window_size in fine_level, must be odd +_CN.FINE_CONCAT_COARSE_FEAT = True + +# 1. LoFTR-backbone (local feature CNN) config +_CN.RESNETFPN = CN() +_CN.RESNETFPN.INITIAL_DIM = 128 +_CN.RESNETFPN.BLOCK_DIMS = [128, 196, 256] # s1, s2, s3 + +# 2. LoFTR-coarse module config +_CN.COARSE = CN() +_CN.COARSE.D_MODEL = 256 +_CN.COARSE.D_FFN = 256 +_CN.COARSE.NHEAD = 8 +_CN.COARSE.LAYER_NAMES = ['self', 'cross'] * 4 +_CN.COARSE.ATTENTION = 'linear' # options: ['linear', 'full'] +_CN.COARSE.TEMP_BUG_FIX = False + +# 3. Coarse-Matching config +_CN.MATCH_COARSE = CN() +_CN.MATCH_COARSE.THR = 0.1 +_CN.MATCH_COARSE.BORDER_RM = 2 +_CN.MATCH_COARSE.MATCH_TYPE = 'dual_softmax' # options: ['dual_softmax, 'sinkhorn'] +_CN.MATCH_COARSE.DSMAX_TEMPERATURE = 0.1 +_CN.MATCH_COARSE.SKH_ITERS = 3 +_CN.MATCH_COARSE.SKH_INIT_BIN_SCORE = 1.0 +_CN.MATCH_COARSE.SKH_PREFILTER = True +_CN.MATCH_COARSE.TRAIN_COARSE_PERCENT = 0.4 # training tricks: save GPU memory +_CN.MATCH_COARSE.TRAIN_PAD_NUM_GT_MIN = 200 # training tricks: avoid DDP deadlock + +# 4. LoFTR-fine module config +_CN.FINE = CN() +_CN.FINE.D_MODEL = 128 +_CN.FINE.D_FFN = 128 +_CN.FINE.NHEAD = 8 +_CN.FINE.LAYER_NAMES = ['self', 'cross'] * 1 +_CN.FINE.ATTENTION = 'linear' + +default_cfg = lower_config(_CN) diff --git a/third_party/ASpanFormer/src/ASpanFormer/utils/fine_matching.py b/third_party/ASpanFormer/src/ASpanFormer/utils/fine_matching.py new file mode 100644 index 0000000000000000000000000000000000000000..6e77aded52e1eb5c01e22c2738104f3b09d6922a --- /dev/null +++ b/third_party/ASpanFormer/src/ASpanFormer/utils/fine_matching.py @@ -0,0 +1,74 @@ +import math +import torch +import torch.nn as nn + +from kornia.geometry.subpix import dsnt +from kornia.utils.grid import create_meshgrid + + +class FineMatching(nn.Module): + """FineMatching with s2d paradigm""" + + def __init__(self): + super().__init__() + + def forward(self, feat_f0, feat_f1, data): + """ + Args: + feat0 (torch.Tensor): [M, WW, C] + feat1 (torch.Tensor): [M, WW, C] + data (dict) + Update: + data (dict):{ + 'expec_f' (torch.Tensor): [M, 3], + 'mkpts0_f' (torch.Tensor): [M, 2], + 'mkpts1_f' (torch.Tensor): [M, 2]} + """ + M, WW, C = feat_f0.shape + W = int(math.sqrt(WW)) + scale = data['hw0_i'][0] / data['hw0_f'][0] + self.M, self.W, self.WW, self.C, self.scale = M, W, WW, C, scale + + # corner case: if no coarse matches found + if M == 0: + assert self.training == False, "M is always >0, when training, see coarse_matching.py" + # logger.warning('No matches found in coarse-level.') + data.update({ + 'expec_f': torch.empty(0, 3, device=feat_f0.device), + 'mkpts0_f': data['mkpts0_c'], + 'mkpts1_f': data['mkpts1_c'], + }) + return + + feat_f0_picked = feat_f0_picked = feat_f0[:, WW//2, :] + sim_matrix = torch.einsum('mc,mrc->mr', feat_f0_picked, feat_f1) + softmax_temp = 1. / C**.5 + heatmap = torch.softmax(softmax_temp * sim_matrix, dim=1).view(-1, W, W) + + # compute coordinates from heatmap + coords_normalized = dsnt.spatial_expectation2d(heatmap[None], True)[0] # [M, 2] + grid_normalized = create_meshgrid(W, W, True, heatmap.device).reshape(1, -1, 2) # [1, WW, 2] + + # compute std over + var = torch.sum(grid_normalized**2 * heatmap.view(-1, WW, 1), dim=1) - coords_normalized**2 # [M, 2] + std = torch.sum(torch.sqrt(torch.clamp(var, min=1e-10)), -1) # [M] clamp needed for numerical stability + + # for fine-level supervision + data.update({'expec_f': torch.cat([coords_normalized, std.unsqueeze(1)], -1)}) + + # compute absolute kpt coords + self.get_fine_match(coords_normalized, data) + + @torch.no_grad() + def get_fine_match(self, coords_normed, data): + W, WW, C, scale = self.W, self.WW, self.C, self.scale + + # mkpts0_f and mkpts1_f + mkpts0_f = data['mkpts0_c'] + scale1 = scale * data['scale1'][data['b_ids']] if 'scale0' in data else scale + mkpts1_f = data['mkpts1_c'] + (coords_normed * (W // 2) * scale1)[:len(data['mconf'])] + + data.update({ + "mkpts0_f": mkpts0_f, + "mkpts1_f": mkpts1_f + }) diff --git a/third_party/ASpanFormer/src/ASpanFormer/utils/geometry.py b/third_party/ASpanFormer/src/ASpanFormer/utils/geometry.py new file mode 100644 index 0000000000000000000000000000000000000000..f95cdb65b48324c4f4ceb20231b1bed992b41116 --- /dev/null +++ b/third_party/ASpanFormer/src/ASpanFormer/utils/geometry.py @@ -0,0 +1,54 @@ +import torch + + +@torch.no_grad() +def warp_kpts(kpts0, depth0, depth1, T_0to1, K0, K1): + """ Warp kpts0 from I0 to I1 with depth, K and Rt + Also check covisibility and depth consistency. + Depth is consistent if relative error < 0.2 (hard-coded). + + Args: + kpts0 (torch.Tensor): [N, L, 2] - , + depth0 (torch.Tensor): [N, H, W], + depth1 (torch.Tensor): [N, H, W], + T_0to1 (torch.Tensor): [N, 3, 4], + K0 (torch.Tensor): [N, 3, 3], + K1 (torch.Tensor): [N, 3, 3], + Returns: + calculable_mask (torch.Tensor): [N, L] + warped_keypoints0 (torch.Tensor): [N, L, 2] + """ + kpts0_long = kpts0.round().long() + + # Sample depth, get calculable_mask on depth != 0 + kpts0_depth = torch.stack( + [depth0[i, kpts0_long[i, :, 1], kpts0_long[i, :, 0]] for i in range(kpts0.shape[0])], dim=0 + ) # (N, L) + nonzero_mask = kpts0_depth != 0 + + # Unproject + kpts0_h = torch.cat([kpts0, torch.ones_like(kpts0[:, :, [0]])], dim=-1) * kpts0_depth[..., None] # (N, L, 3) + kpts0_cam = K0.inverse() @ kpts0_h.transpose(2, 1) # (N, 3, L) + + # Rigid Transform + w_kpts0_cam = T_0to1[:, :3, :3] @ kpts0_cam + T_0to1[:, :3, [3]] # (N, 3, L) + w_kpts0_depth_computed = w_kpts0_cam[:, 2, :] + + # Project + w_kpts0_h = (K1 @ w_kpts0_cam).transpose(2, 1) # (N, L, 3) + w_kpts0 = w_kpts0_h[:, :, :2] / (w_kpts0_h[:, :, [2]] + 1e-4) # (N, L, 2), +1e-4 to avoid zero depth + + # Covisible Check + h, w = depth1.shape[1:3] + covisible_mask = (w_kpts0[:, :, 0] > 0) * (w_kpts0[:, :, 0] < w-1) * \ + (w_kpts0[:, :, 1] > 0) * (w_kpts0[:, :, 1] < h-1) + w_kpts0_long = w_kpts0.long() + w_kpts0_long[~covisible_mask, :] = 0 + + w_kpts0_depth = torch.stack( + [depth1[i, w_kpts0_long[i, :, 1], w_kpts0_long[i, :, 0]] for i in range(w_kpts0_long.shape[0])], dim=0 + ) # (N, L) + consistent_mask = ((w_kpts0_depth - w_kpts0_depth_computed) / w_kpts0_depth).abs() < 0.2 + valid_mask = nonzero_mask * covisible_mask * consistent_mask + + return valid_mask, w_kpts0 diff --git a/third_party/ASpanFormer/src/ASpanFormer/utils/position_encoding.py b/third_party/ASpanFormer/src/ASpanFormer/utils/position_encoding.py new file mode 100644 index 0000000000000000000000000000000000000000..07d384ae18370acb99ef00a788f628c967249ace --- /dev/null +++ b/third_party/ASpanFormer/src/ASpanFormer/utils/position_encoding.py @@ -0,0 +1,61 @@ +import math +import torch +from torch import nn + + +class PositionEncodingSine(nn.Module): + """ + This is a sinusoidal position encoding that generalized to 2-dimensional images + """ + + def __init__(self, d_model, max_shape=(256, 256),pre_scaling=None): + """ + Args: + max_shape (tuple): for 1/8 featmap, the max length of 256 corresponds to 2048 pixels + temp_bug_fix (bool): As noted in this [issue](https://github.com/zju3dv/LoFTR/issues/41), + the original implementation of LoFTR includes a bug in the pos-enc impl, which has little impact + on the final performance. For now, we keep both impls for backward compatability. + We will remove the buggy impl after re-training all variants of our released models. + """ + super().__init__() + self.d_model=d_model + self.max_shape=max_shape + self.pre_scaling=pre_scaling + + pe = torch.zeros((d_model, *max_shape)) + y_position = torch.ones(max_shape).cumsum(0).float().unsqueeze(0) + x_position = torch.ones(max_shape).cumsum(1).float().unsqueeze(0) + + if pre_scaling[0] is not None and pre_scaling[1] is not None: + train_res,test_res=pre_scaling[0],pre_scaling[1] + x_position,y_position=x_position*train_res[1]/test_res[1],y_position*train_res[0]/test_res[0] + + div_term = torch.exp(torch.arange(0, d_model//2, 2).float() * (-math.log(10000.0) / (d_model//2))) + div_term = div_term[:, None, None] # [C//4, 1, 1] + pe[0::4, :, :] = torch.sin(x_position * div_term) + pe[1::4, :, :] = torch.cos(x_position * div_term) + pe[2::4, :, :] = torch.sin(y_position * div_term) + pe[3::4, :, :] = torch.cos(y_position * div_term) + + self.register_buffer('pe', pe.unsqueeze(0), persistent=False) # [1, C, H, W] + + def forward(self, x,scaling=None): + """ + Args: + x: [N, C, H, W] + """ + if scaling is None: #onliner scaling overwrites pre_scaling + return x + self.pe[:, :, :x.size(2), :x.size(3)],self.pe[:, :, :x.size(2), :x.size(3)] + else: + pe = torch.zeros((self.d_model, *self.max_shape)) + y_position = torch.ones(self.max_shape).cumsum(0).float().unsqueeze(0)*scaling[0] + x_position = torch.ones(self.max_shape).cumsum(1).float().unsqueeze(0)*scaling[1] + + div_term = torch.exp(torch.arange(0, self.d_model//2, 2).float() * (-math.log(10000.0) / (self.d_model//2))) + div_term = div_term[:, None, None] # [C//4, 1, 1] + pe[0::4, :, :] = torch.sin(x_position * div_term) + pe[1::4, :, :] = torch.cos(x_position * div_term) + pe[2::4, :, :] = torch.sin(y_position * div_term) + pe[3::4, :, :] = torch.cos(y_position * div_term) + pe=pe.unsqueeze(0).to(x.device) + return x + pe[:, :, :x.size(2), :x.size(3)],pe[:, :, :x.size(2), :x.size(3)] \ No newline at end of file diff --git a/third_party/ASpanFormer/src/ASpanFormer/utils/supervision.py b/third_party/ASpanFormer/src/ASpanFormer/utils/supervision.py new file mode 100644 index 0000000000000000000000000000000000000000..5cef3a7968413136f6dc9f52b6a1ec87192b006b --- /dev/null +++ b/third_party/ASpanFormer/src/ASpanFormer/utils/supervision.py @@ -0,0 +1,151 @@ +from math import log +from loguru import logger + +import torch +from einops import repeat +from kornia.utils import create_meshgrid + +from .geometry import warp_kpts + +############## ↓ Coarse-Level supervision ↓ ############## + + +@torch.no_grad() +def mask_pts_at_padded_regions(grid_pt, mask): + """For megadepth dataset, zero-padding exists in images""" + mask = repeat(mask, 'n h w -> n (h w) c', c=2) + grid_pt[~mask.bool()] = 0 + return grid_pt + + +@torch.no_grad() +def spvs_coarse(data, config): + """ + Update: + data (dict): { + "conf_matrix_gt": [N, hw0, hw1], + 'spv_b_ids': [M] + 'spv_i_ids': [M] + 'spv_j_ids': [M] + 'spv_w_pt0_i': [N, hw0, 2], in original image resolution + 'spv_pt1_i': [N, hw1, 2], in original image resolution + } + + NOTE: + - for scannet dataset, there're 3 kinds of resolution {i, c, f} + - for megadepth dataset, there're 4 kinds of resolution {i, i_resize, c, f} + """ + # 1. misc + device = data['image0'].device + N, _, H0, W0 = data['image0'].shape + _, _, H1, W1 = data['image1'].shape + scale = config['ASPAN']['RESOLUTION'][0] + scale0 = scale * data['scale0'][:, None] if 'scale0' in data else scale + scale1 = scale * data['scale1'][:, None] if 'scale0' in data else scale + h0, w0, h1, w1 = map(lambda x: x // scale, [H0, W0, H1, W1]) + + # 2. warp grids + # create kpts in meshgrid and resize them to image resolution + grid_pt0_c = create_meshgrid(h0, w0, False, device).reshape(1, h0*w0, 2).repeat(N, 1, 1) # [N, hw, 2] + grid_pt0_i = scale0 * grid_pt0_c + grid_pt1_c = create_meshgrid(h1, w1, False, device).reshape(1, h1*w1, 2).repeat(N, 1, 1) + grid_pt1_i = scale1 * grid_pt1_c + + # mask padded region to (0, 0), so no need to manually mask conf_matrix_gt + if 'mask0' in data: + grid_pt0_i = mask_pts_at_padded_regions(grid_pt0_i, data['mask0']) + grid_pt1_i = mask_pts_at_padded_regions(grid_pt1_i, data['mask1']) + + # warp kpts bi-directionally and resize them to coarse-level resolution + # (no depth consistency check, since it leads to worse results experimentally) + # (unhandled edge case: points with 0-depth will be warped to the left-up corner) + _, w_pt0_i = warp_kpts(grid_pt0_i, data['depth0'], data['depth1'], data['T_0to1'], data['K0'], data['K1']) + _, w_pt1_i = warp_kpts(grid_pt1_i, data['depth1'], data['depth0'], data['T_1to0'], data['K1'], data['K0']) + w_pt0_c = w_pt0_i / scale1 + w_pt1_c = w_pt1_i / scale0 + + # 3. check if mutual nearest neighbor + w_pt0_c_round = w_pt0_c[:, :, :].round().long() + nearest_index1 = w_pt0_c_round[..., 0] + w_pt0_c_round[..., 1] * w1 + w_pt1_c_round = w_pt1_c[:, :, :].round().long() + nearest_index0 = w_pt1_c_round[..., 0] + w_pt1_c_round[..., 1] * w0 + + # corner case: out of boundary + def out_bound_mask(pt, w, h): + return (pt[..., 0] < 0) + (pt[..., 0] >= w) + (pt[..., 1] < 0) + (pt[..., 1] >= h) + nearest_index1[out_bound_mask(w_pt0_c_round, w1, h1)] = 0 + nearest_index0[out_bound_mask(w_pt1_c_round, w0, h0)] = 0 + + loop_back = torch.stack([nearest_index0[_b][_i] for _b, _i in enumerate(nearest_index1)], dim=0) + correct_0to1 = loop_back == torch.arange(h0*w0, device=device)[None].repeat(N, 1) + correct_0to1[:, 0] = False # ignore the top-left corner + + # 4. construct a gt conf_matrix + conf_matrix_gt = torch.zeros(N, h0*w0, h1*w1, device=device) + b_ids, i_ids = torch.where(correct_0to1 != 0) + j_ids = nearest_index1[b_ids, i_ids] + + conf_matrix_gt[b_ids, i_ids, j_ids] = 1 + data.update({'conf_matrix_gt': conf_matrix_gt}) + + # 5. save coarse matches(gt) for training fine level + if len(b_ids) == 0: + logger.warning(f"No groundtruth coarse match found for: {data['pair_names']}") + # this won't affect fine-level loss calculation + b_ids = torch.tensor([0], device=device) + i_ids = torch.tensor([0], device=device) + j_ids = torch.tensor([0], device=device) + + data.update({ + 'spv_b_ids': b_ids, + 'spv_i_ids': i_ids, + 'spv_j_ids': j_ids + }) + + # 6. save intermediate results (for fast fine-level computation) + data.update({ + 'spv_w_pt0_i': w_pt0_i, + 'spv_pt1_i': grid_pt1_i + }) + + +def compute_supervision_coarse(data, config): + assert len(set(data['dataset_name'])) == 1, "Do not support mixed datasets training!" + data_source = data['dataset_name'][0] + if data_source.lower() in ['scannet', 'megadepth']: + spvs_coarse(data, config) + else: + raise ValueError(f'Unknown data source: {data_source}') + + +############## ↓ Fine-Level supervision ↓ ############## + +@torch.no_grad() +def spvs_fine(data, config): + """ + Update: + data (dict):{ + "expec_f_gt": [M, 2]} + """ + # 1. misc + # w_pt0_i, pt1_i = data.pop('spv_w_pt0_i'), data.pop('spv_pt1_i') + w_pt0_i, pt1_i = data['spv_w_pt0_i'], data['spv_pt1_i'] + scale = config['ASPAN']['RESOLUTION'][1] + radius = config['ASPAN']['FINE_WINDOW_SIZE'] // 2 + + # 2. get coarse prediction + b_ids, i_ids, j_ids = data['b_ids'], data['i_ids'], data['j_ids'] + + # 3. compute gt + scale = scale * data['scale1'][b_ids] if 'scale0' in data else scale + # `expec_f_gt` might exceed the window, i.e. abs(*) > 1, which would be filtered later + expec_f_gt = (w_pt0_i[b_ids, i_ids] - pt1_i[b_ids, j_ids]) / scale / radius # [M, 2] + data.update({"expec_f_gt": expec_f_gt}) + + +def compute_supervision_fine(data, config): + data_source = data['dataset_name'][0] + if data_source.lower() in ['scannet', 'megadepth']: + spvs_fine(data, config) + else: + raise NotImplementedError diff --git a/third_party/ASpanFormer/src/__init__.py b/third_party/ASpanFormer/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/ASpanFormer/src/config/default.py b/third_party/ASpanFormer/src/config/default.py new file mode 100644 index 0000000000000000000000000000000000000000..40abd51c3f28ea6dee3c4e9fcee6efac5c080a2f --- /dev/null +++ b/third_party/ASpanFormer/src/config/default.py @@ -0,0 +1,180 @@ +from yacs.config import CfgNode as CN +_CN = CN() + +############## ↓ ASPAN Pipeline ↓ ############## +_CN.ASPAN = CN() +_CN.ASPAN.BACKBONE_TYPE = 'ResNetFPN' +_CN.ASPAN.RESOLUTION = (8, 2) # options: [(8, 2), (16, 4)] +_CN.ASPAN.FINE_WINDOW_SIZE = 5 # window_size in fine_level, must be odd +_CN.ASPAN.FINE_CONCAT_COARSE_FEAT = True + +# 1. ASPAN-backbone (local feature CNN) config +_CN.ASPAN.RESNETFPN = CN() +_CN.ASPAN.RESNETFPN.INITIAL_DIM = 128 +_CN.ASPAN.RESNETFPN.BLOCK_DIMS = [128, 196, 256] # s1, s2, s3 + +# 2. ASPAN-coarse module config +_CN.ASPAN.COARSE = CN() +_CN.ASPAN.COARSE.D_MODEL = 256 +_CN.ASPAN.COARSE.D_FFN = 256 +_CN.ASPAN.COARSE.D_FLOW= 128 +_CN.ASPAN.COARSE.NHEAD = 8 +_CN.ASPAN.COARSE.NLEVEL= 3 +_CN.ASPAN.COARSE.INI_LAYER_NUM = 2 +_CN.ASPAN.COARSE.LAYER_NUM = 4 +_CN.ASPAN.COARSE.NSAMPLE = [2,8] +_CN.ASPAN.COARSE.RADIUS_SCALE= 5 +_CN.ASPAN.COARSE.COARSEST_LEVEL= [26,26] +_CN.ASPAN.COARSE.TRAIN_RES = None +_CN.ASPAN.COARSE.TEST_RES = None + +# 3. Coarse-Matching config +_CN.ASPAN.MATCH_COARSE = CN() +_CN.ASPAN.MATCH_COARSE.THR = 0.2 +_CN.ASPAN.MATCH_COARSE.BORDER_RM = 2 +_CN.ASPAN.MATCH_COARSE.MATCH_TYPE = 'dual_softmax' # options: ['dual_softmax, 'sinkhorn'] +_CN.ASPAN.MATCH_COARSE.SKH_ITERS = 3 +_CN.ASPAN.MATCH_COARSE.SKH_INIT_BIN_SCORE = 1.0 +_CN.ASPAN.MATCH_COARSE.SKH_PREFILTER = False +_CN.ASPAN.MATCH_COARSE.TRAIN_COARSE_PERCENT = 0.2 # training tricks: save GPU memory +_CN.ASPAN.MATCH_COARSE.TRAIN_PAD_NUM_GT_MIN = 200 # training tricks: avoid DDP deadlock +_CN.ASPAN.MATCH_COARSE.SPARSE_SPVS = True +_CN.ASPAN.MATCH_COARSE.LEARNABLE_DS_TEMP = True + +# 4. ASPAN-fine module config +_CN.ASPAN.FINE = CN() +_CN.ASPAN.FINE.D_MODEL = 128 +_CN.ASPAN.FINE.D_FFN = 128 +_CN.ASPAN.FINE.NHEAD = 8 +_CN.ASPAN.FINE.LAYER_NAMES = ['self', 'cross'] * 1 +_CN.ASPAN.FINE.ATTENTION = 'linear' + +# 5. ASPAN Losses +# -- # coarse-level +_CN.ASPAN.LOSS = CN() +_CN.ASPAN.LOSS.COARSE_TYPE = 'focal' # ['focal', 'cross_entropy'] +_CN.ASPAN.LOSS.COARSE_WEIGHT = 1.0 +# _CN.ASPAN.LOSS.SPARSE_SPVS = False +# -- - -- # focal loss (coarse) +_CN.ASPAN.LOSS.FOCAL_ALPHA = 0.25 +_CN.ASPAN.LOSS.FOCAL_GAMMA = 2.0 +_CN.ASPAN.LOSS.POS_WEIGHT = 1.0 +_CN.ASPAN.LOSS.NEG_WEIGHT = 1.0 +# _CN.ASPAN.LOSS.DUAL_SOFTMAX = False # whether coarse-level use dual-softmax or not. +# use `_CN.ASPAN.MATCH_COARSE.MATCH_TYPE` + +# -- # fine-level +_CN.ASPAN.LOSS.FINE_TYPE = 'l2_with_std' # ['l2_with_std', 'l2'] +_CN.ASPAN.LOSS.FINE_WEIGHT = 1.0 +_CN.ASPAN.LOSS.FINE_CORRECT_THR = 1.0 # for filtering valid fine-level gts (some gt matches might fall out of the fine-level window) + +# -- # flow-sloss +_CN.ASPAN.LOSS.FLOW_WEIGHT = 0.1 + + +############## Dataset ############## +_CN.DATASET = CN() +# 1. data config +# training and validating +_CN.DATASET.TRAINVAL_DATA_SOURCE = None # options: ['ScanNet', 'MegaDepth'] +_CN.DATASET.TRAIN_DATA_ROOT = None +_CN.DATASET.TRAIN_POSE_ROOT = None # (optional directory for poses) +_CN.DATASET.TRAIN_NPZ_ROOT = None +_CN.DATASET.TRAIN_LIST_PATH = None +_CN.DATASET.TRAIN_INTRINSIC_PATH = None +_CN.DATASET.VAL_DATA_ROOT = None +_CN.DATASET.VAL_POSE_ROOT = None # (optional directory for poses) +_CN.DATASET.VAL_NPZ_ROOT = None +_CN.DATASET.VAL_LIST_PATH = None # None if val data from all scenes are bundled into a single npz file +_CN.DATASET.VAL_INTRINSIC_PATH = None +# testing +_CN.DATASET.TEST_DATA_SOURCE = None +_CN.DATASET.TEST_DATA_ROOT = None +_CN.DATASET.TEST_POSE_ROOT = None # (optional directory for poses) +_CN.DATASET.TEST_NPZ_ROOT = None +_CN.DATASET.TEST_LIST_PATH = None # None if test data from all scenes are bundled into a single npz file +_CN.DATASET.TEST_INTRINSIC_PATH = None + +# 2. dataset config +# general options +_CN.DATASET.MIN_OVERLAP_SCORE_TRAIN = 0.4 # discard data with overlap_score < min_overlap_score +_CN.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0 +_CN.DATASET.AUGMENTATION_TYPE = None # options: [None, 'dark', 'mobile'] + +# MegaDepth options +_CN.DATASET.MGDPT_IMG_RESIZE = 640 # resize the longer side, zero-pad bottom-right to square. +_CN.DATASET.MGDPT_IMG_PAD = True # pad img to square with size = MGDPT_IMG_RESIZE +_CN.DATASET.MGDPT_DEPTH_PAD = True # pad depthmap to square with size = 2000 +_CN.DATASET.MGDPT_DF = 8 + +############## Trainer ############## +_CN.TRAINER = CN() +_CN.TRAINER.WORLD_SIZE = 1 +_CN.TRAINER.CANONICAL_BS = 64 +_CN.TRAINER.CANONICAL_LR = 6e-3 +_CN.TRAINER.SCALING = None # this will be calculated automatically +_CN.TRAINER.FIND_LR = False # use learning rate finder from pytorch-lightning + +# optimizer +_CN.TRAINER.OPTIMIZER = "adamw" # [adam, adamw] +_CN.TRAINER.TRUE_LR = None # this will be calculated automatically at runtime +_CN.TRAINER.ADAM_DECAY = 0. # ADAM: for adam +_CN.TRAINER.ADAMW_DECAY = 0.1 + +# step-based warm-up +_CN.TRAINER.WARMUP_TYPE = 'linear' # [linear, constant] +_CN.TRAINER.WARMUP_RATIO = 0. +_CN.TRAINER.WARMUP_STEP = 4800 + +# learning rate scheduler +_CN.TRAINER.SCHEDULER = 'MultiStepLR' # [MultiStepLR, CosineAnnealing, ExponentialLR] +_CN.TRAINER.SCHEDULER_INTERVAL = 'epoch' # [epoch, step] +_CN.TRAINER.MSLR_MILESTONES = [3, 6, 9, 12] # MSLR: MultiStepLR +_CN.TRAINER.MSLR_GAMMA = 0.5 +_CN.TRAINER.COSA_TMAX = 30 # COSA: CosineAnnealing +_CN.TRAINER.ELR_GAMMA = 0.999992 # ELR: ExponentialLR, this value for 'step' interval + +# plotting related +_CN.TRAINER.ENABLE_PLOTTING = True +_CN.TRAINER.N_VAL_PAIRS_TO_PLOT = 32 # number of val/test paris for plotting +_CN.TRAINER.PLOT_MODE = 'evaluation' # ['evaluation', 'confidence'] +_CN.TRAINER.PLOT_MATCHES_ALPHA = 'dynamic' + +# geometric metrics and pose solver +_CN.TRAINER.EPI_ERR_THR = 5e-4 # recommendation: 5e-4 for ScanNet, 1e-4 for MegaDepth (from SuperGlue) +_CN.TRAINER.POSE_GEO_MODEL = 'E' # ['E', 'F', 'H'] +_CN.TRAINER.POSE_ESTIMATION_METHOD = 'RANSAC' # [RANSAC, DEGENSAC, MAGSAC] +_CN.TRAINER.RANSAC_PIXEL_THR = 0.5 +_CN.TRAINER.RANSAC_CONF = 0.99999 +_CN.TRAINER.RANSAC_MAX_ITERS = 10000 +_CN.TRAINER.USE_MAGSACPP = False + +# data sampler for train_dataloader +_CN.TRAINER.DATA_SAMPLER = 'scene_balance' # options: ['scene_balance', 'random', 'normal'] +# 'scene_balance' config +_CN.TRAINER.N_SAMPLES_PER_SUBSET = 200 +_CN.TRAINER.SB_SUBSET_SAMPLE_REPLACEMENT = True # whether sample each scene with replacement or not +_CN.TRAINER.SB_SUBSET_SHUFFLE = True # after sampling from scenes, whether shuffle within the epoch or not +_CN.TRAINER.SB_REPEAT = 1 # repeat N times for training the sampled data +# 'random' config +_CN.TRAINER.RDM_REPLACEMENT = True +_CN.TRAINER.RDM_NUM_SAMPLES = None + +# gradient clipping +_CN.TRAINER.GRADIENT_CLIPPING = 0.5 + +# reproducibility +# This seed affects the data sampling. With the same seed, the data sampling is promised +# to be the same. When resume training from a checkpoint, it's better to use a different +# seed, otherwise the sampled data will be exactly the same as before resuming, which will +# cause less unique data items sampled during the entire training. +# Use of different seed values might affect the final training result, since not all data items +# are used during training on ScanNet. (60M pairs of images sampled during traing from 230M pairs in total.) +_CN.TRAINER.SEED = 66 + + +def get_cfg_defaults(): + """Get a yacs CfgNode object with default values for my_project.""" + # Return a clone so that the defaults will not be altered + # This is for the "local variable" use pattern + return _CN.clone() diff --git a/third_party/ASpanFormer/src/datasets/__init__.py b/third_party/ASpanFormer/src/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1860e3ae060a26e4625925861cecdc355f2b08b7 --- /dev/null +++ b/third_party/ASpanFormer/src/datasets/__init__.py @@ -0,0 +1,3 @@ +from .scannet import ScanNetDataset +from .megadepth import MegaDepthDataset + diff --git a/third_party/ASpanFormer/src/datasets/megadepth.py b/third_party/ASpanFormer/src/datasets/megadepth.py new file mode 100644 index 0000000000000000000000000000000000000000..a70ac715a3f807e37bc5b87ae9446ddd2aa4fc86 --- /dev/null +++ b/third_party/ASpanFormer/src/datasets/megadepth.py @@ -0,0 +1,127 @@ +import os.path as osp +import numpy as np +import torch +import torch.nn.functional as F +from torch.utils.data import Dataset +from loguru import logger + +from src.utils.dataset import read_megadepth_gray, read_megadepth_depth + + +class MegaDepthDataset(Dataset): + def __init__(self, + root_dir, + npz_path, + mode='train', + min_overlap_score=0.4, + img_resize=None, + df=None, + img_padding=False, + depth_padding=False, + augment_fn=None, + **kwargs): + """ + Manage one scene(npz_path) of MegaDepth dataset. + + Args: + root_dir (str): megadepth root directory that has `phoenix`. + npz_path (str): {scene_id}.npz path. This contains image pair information of a scene. + mode (str): options are ['train', 'val', 'test'] + min_overlap_score (float): how much a pair should have in common. In range of [0, 1]. Set to 0 when testing. + img_resize (int, optional): the longer edge of resized images. None for no resize. 640 is recommended. + This is useful during training with batches and testing with memory intensive algorithms. + df (int, optional): image size division factor. NOTE: this will change the final image size after img_resize. + img_padding (bool): If set to 'True', zero-pad the image to squared size. This is useful during training. + depth_padding (bool): If set to 'True', zero-pad depthmap to (2000, 2000). This is useful during training. + augment_fn (callable, optional): augments images with pre-defined visual effects. + """ + super().__init__() + self.root_dir = root_dir + self.mode = mode + self.scene_id = npz_path.split('.')[0] + + # prepare scene_info and pair_info + if mode == 'test' and min_overlap_score != 0: + logger.warning("You are using `min_overlap_score`!=0 in test mode. Set to 0.") + min_overlap_score = 0 + self.scene_info = np.load(npz_path, allow_pickle=True) + self.pair_infos = self.scene_info['pair_infos'].copy() + del self.scene_info['pair_infos'] + self.pair_infos = [pair_info for pair_info in self.pair_infos if pair_info[1] > min_overlap_score] + + # parameters for image resizing, padding and depthmap padding + if mode == 'train': + assert img_resize is not None and img_padding and depth_padding + self.img_resize = img_resize + self.df = df + self.img_padding = img_padding + self.depth_max_size = 2000 if depth_padding else None # the upperbound of depthmaps size in megadepth. + + # for training LoFTR + self.augment_fn = augment_fn if mode == 'train' else None + self.coarse_scale = getattr(kwargs, 'coarse_scale', 0.125) + + def __len__(self): + return len(self.pair_infos) + + def __getitem__(self, idx): + (idx0, idx1), overlap_score, central_matches = self.pair_infos[idx] + + # read grayscale image and mask. (1, h, w) and (h, w) + img_name0 = osp.join(self.root_dir, self.scene_info['image_paths'][idx0]) + img_name1 = osp.join(self.root_dir, self.scene_info['image_paths'][idx1]) + + # TODO: Support augmentation & handle seeds for each worker correctly. + image0, mask0, scale0 = read_megadepth_gray( + img_name0, self.img_resize, self.df, self.img_padding, None) + # np.random.choice([self.augment_fn, None], p=[0.5, 0.5])) + image1, mask1, scale1 = read_megadepth_gray( + img_name1, self.img_resize, self.df, self.img_padding, None) + # np.random.choice([self.augment_fn, None], p=[0.5, 0.5])) + + # read depth. shape: (h, w) + if self.mode in ['train', 'val']: + depth0 = read_megadepth_depth( + osp.join(self.root_dir, self.scene_info['depth_paths'][idx0]), pad_to=self.depth_max_size) + depth1 = read_megadepth_depth( + osp.join(self.root_dir, self.scene_info['depth_paths'][idx1]), pad_to=self.depth_max_size) + else: + depth0 = depth1 = torch.tensor([]) + + # read intrinsics of original size + K_0 = torch.tensor(self.scene_info['intrinsics'][idx0].copy(), dtype=torch.float).reshape(3, 3) + K_1 = torch.tensor(self.scene_info['intrinsics'][idx1].copy(), dtype=torch.float).reshape(3, 3) + + # read and compute relative poses + T0 = self.scene_info['poses'][idx0] + T1 = self.scene_info['poses'][idx1] + T_0to1 = torch.tensor(np.matmul(T1, np.linalg.inv(T0)), dtype=torch.float)[:4, :4] # (4, 4) + T_1to0 = T_0to1.inverse() + + data = { + 'image0': image0, # (1, h, w) + 'depth0': depth0, # (h, w) + 'image1': image1, + 'depth1': depth1, + 'T_0to1': T_0to1, # (4, 4) + 'T_1to0': T_1to0, + 'K0': K_0, # (3, 3) + 'K1': K_1, + 'scale0': scale0, # [scale_w, scale_h] + 'scale1': scale1, + 'dataset_name': 'MegaDepth', + 'scene_id': self.scene_id, + 'pair_id': idx, + 'pair_names': (self.scene_info['image_paths'][idx0], self.scene_info['image_paths'][idx1]), + } + + # for LoFTR training + if mask0 is not None: # img_padding is True + if self.coarse_scale: + [ts_mask_0, ts_mask_1] = F.interpolate(torch.stack([mask0, mask1], dim=0)[None].float(), + scale_factor=self.coarse_scale, + mode='nearest', + recompute_scale_factor=False)[0].bool() + data.update({'mask0': ts_mask_0, 'mask1': ts_mask_1}) + + return data diff --git a/third_party/ASpanFormer/src/datasets/sampler.py b/third_party/ASpanFormer/src/datasets/sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..81b6f435645632a013476f9a665a0861ab7fcb61 --- /dev/null +++ b/third_party/ASpanFormer/src/datasets/sampler.py @@ -0,0 +1,77 @@ +import torch +from torch.utils.data import Sampler, ConcatDataset + + +class RandomConcatSampler(Sampler): + """ Random sampler for ConcatDataset. At each epoch, `n_samples_per_subset` samples will be draw from each subset + in the ConcatDataset. If `subset_replacement` is ``True``, sampling within each subset will be done with replacement. + However, it is impossible to sample data without replacement between epochs, unless bulding a stateful sampler lived along the entire training phase. + + For current implementation, the randomness of sampling is ensured no matter the sampler is recreated across epochs or not and call `torch.manual_seed()` or not. + Args: + shuffle (bool): shuffle the random sampled indices across all sub-datsets. + repeat (int): repeatedly use the sampled indices multiple times for training. + [arXiv:1902.05509, arXiv:1901.09335] + NOTE: Don't re-initialize the sampler between epochs (will lead to repeated samples) + NOTE: This sampler behaves differently with DistributedSampler. + It assume the dataset is splitted across ranks instead of replicated. + TODO: Add a `set_epoch()` method to fullfill sampling without replacement across epochs. + ref: https://github.com/PyTorchLightning/pytorch-lightning/blob/e9846dd758cfb1500eb9dba2d86f6912eb487587/pytorch_lightning/trainer/training_loop.py#L373 + """ + def __init__(self, + data_source: ConcatDataset, + n_samples_per_subset: int, + subset_replacement: bool=True, + shuffle: bool=True, + repeat: int=1, + seed: int=None): + if not isinstance(data_source, ConcatDataset): + raise TypeError("data_source should be torch.utils.data.ConcatDataset") + + self.data_source = data_source + self.n_subset = len(self.data_source.datasets) + self.n_samples_per_subset = n_samples_per_subset + self.n_samples = self.n_subset * self.n_samples_per_subset * repeat + self.subset_replacement = subset_replacement + self.repeat = repeat + self.shuffle = shuffle + self.generator = torch.manual_seed(seed) + assert self.repeat >= 1 + + def __len__(self): + return self.n_samples + + def __iter__(self): + indices = [] + # sample from each sub-dataset + for d_idx in range(self.n_subset): + low = 0 if d_idx==0 else self.data_source.cumulative_sizes[d_idx-1] + high = self.data_source.cumulative_sizes[d_idx] + if self.subset_replacement: + rand_tensor = torch.randint(low, high, (self.n_samples_per_subset, ), + generator=self.generator, dtype=torch.int64) + else: # sample without replacement + len_subset = len(self.data_source.datasets[d_idx]) + rand_tensor = torch.randperm(len_subset, generator=self.generator) + low + if len_subset >= self.n_samples_per_subset: + rand_tensor = rand_tensor[:self.n_samples_per_subset] + else: # padding with replacement + rand_tensor_replacement = torch.randint(low, high, (self.n_samples_per_subset - len_subset, ), + generator=self.generator, dtype=torch.int64) + rand_tensor = torch.cat([rand_tensor, rand_tensor_replacement]) + indices.append(rand_tensor) + indices = torch.cat(indices) + if self.shuffle: # shuffle the sampled dataset (from multiple subsets) + rand_tensor = torch.randperm(len(indices), generator=self.generator) + indices = indices[rand_tensor] + + # repeat the sampled indices (can be used for RepeatAugmentation or pure RepeatSampling) + if self.repeat > 1: + repeat_indices = [indices.clone() for _ in range(self.repeat - 1)] + if self.shuffle: + _choice = lambda x: x[torch.randperm(len(x), generator=self.generator)] + repeat_indices = map(_choice, repeat_indices) + indices = torch.cat([indices, *repeat_indices], 0) + + assert indices.shape[0] == self.n_samples + return iter(indices.tolist()) diff --git a/third_party/ASpanFormer/src/datasets/scannet.py b/third_party/ASpanFormer/src/datasets/scannet.py new file mode 100644 index 0000000000000000000000000000000000000000..3520d34c0f08a784ddbf923846a7cb2a847b1787 --- /dev/null +++ b/third_party/ASpanFormer/src/datasets/scannet.py @@ -0,0 +1,113 @@ +from os import path as osp +from typing import Dict +from unicodedata import name + +import numpy as np +import torch +import torch.utils as utils +from numpy.linalg import inv +from src.utils.dataset import ( + read_scannet_gray, + read_scannet_depth, + read_scannet_pose, + read_scannet_intrinsic +) + + +class ScanNetDataset(utils.data.Dataset): + def __init__(self, + root_dir, + npz_path, + intrinsic_path, + mode='train', + min_overlap_score=0.4, + augment_fn=None, + pose_dir=None, + **kwargs): + """Manage one scene of ScanNet Dataset. + Args: + root_dir (str): ScanNet root directory that contains scene folders. + npz_path (str): {scene_id}.npz path. This contains image pair information of a scene. + intrinsic_path (str): path to depth-camera intrinsic file. + mode (str): options are ['train', 'val', 'test']. + augment_fn (callable, optional): augments images with pre-defined visual effects. + pose_dir (str): ScanNet root directory that contains all poses. + (we use a separate (optional) pose_dir since we store images and poses separately.) + """ + super().__init__() + self.root_dir = root_dir + self.pose_dir = pose_dir if pose_dir is not None else root_dir + self.mode = mode + + # prepare data_names, intrinsics and extrinsics(T) + with np.load(npz_path) as data: + self.data_names = data['name'] + if 'score' in data.keys() and mode not in ['val' or 'test']: + kept_mask = data['score'] > min_overlap_score + self.data_names = self.data_names[kept_mask] + self.intrinsics = dict(np.load(intrinsic_path)) + + # for training LoFTR + self.augment_fn = augment_fn if mode == 'train' else None + + def __len__(self): + return len(self.data_names) + + def _read_abs_pose(self, scene_name, name): + pth = osp.join(self.pose_dir, + scene_name, + 'pose', f'{name}.txt') + return read_scannet_pose(pth) + + def _compute_rel_pose(self, scene_name, name0, name1): + pose0 = self._read_abs_pose(scene_name, name0) + pose1 = self._read_abs_pose(scene_name, name1) + + return np.matmul(pose1, inv(pose0)) # (4, 4) + + def __getitem__(self, idx): + data_name = self.data_names[idx] + scene_name, scene_sub_name, stem_name_0, stem_name_1 = data_name + scene_name = f'scene{scene_name:04d}_{scene_sub_name:02d}' + + # read the grayscale image which will be resized to (1, 480, 640) + img_name0 = osp.join(self.root_dir, scene_name, 'color', f'{stem_name_0}.jpg') + img_name1 = osp.join(self.root_dir, scene_name, 'color', f'{stem_name_1}.jpg') + # TODO: Support augmentation & handle seeds for each worker correctly. + image0 = read_scannet_gray(img_name0, resize=(640, 480), augment_fn=None) + # augment_fn=np.random.choice([self.augment_fn, None], p=[0.5, 0.5])) + image1 = read_scannet_gray(img_name1, resize=(640, 480), augment_fn=None) + # augment_fn=np.random.choice([self.augment_fn, None], p=[0.5, 0.5])) + + # read the depthmap which is stored as (480, 640) + if self.mode in ['train', 'val']: + depth0 = read_scannet_depth(osp.join(self.root_dir, scene_name, 'depth', f'{stem_name_0}.png')) + depth1 = read_scannet_depth(osp.join(self.root_dir, scene_name, 'depth', f'{stem_name_1}.png')) + else: + depth0 = depth1 = torch.tensor([]) + + # read the intrinsic of depthmap + K_0 = K_1 = torch.tensor(self.intrinsics[scene_name].copy(), dtype=torch.float).reshape(3, 3) + + # read and compute relative poses + T_0to1 = torch.tensor(self._compute_rel_pose(scene_name, stem_name_0, stem_name_1), + dtype=torch.float32) + T_1to0 = T_0to1.inverse() + + data = { + 'image0': image0, # (1, h, w) + 'depth0': depth0, # (h, w) + 'image1': image1, + 'depth1': depth1, + 'T_0to1': T_0to1, # (4, 4) + 'T_1to0': T_1to0, + 'K0': K_0, # (3, 3) + 'K1': K_1, + 'dataset_name': 'ScanNet', + 'scene_id': scene_name, + 'pair_id': idx, + 'pair_names': (osp.join(scene_name, 'color', f'{stem_name_0}.jpg'), + osp.join(scene_name, 'color', f'{stem_name_1}.jpg')) + } + + return data diff --git a/third_party/ASpanFormer/src/lightning/data.py b/third_party/ASpanFormer/src/lightning/data.py new file mode 100644 index 0000000000000000000000000000000000000000..73db514b8924d647814e6c5def919c23393d3ccf --- /dev/null +++ b/third_party/ASpanFormer/src/lightning/data.py @@ -0,0 +1,326 @@ +import os +import math +from collections import abc +from loguru import logger +from torch.utils.data.dataset import Dataset +from tqdm import tqdm +from os import path as osp +from pathlib import Path +from joblib import Parallel, delayed + +import pytorch_lightning as pl +from torch import distributed as dist +from torch.utils.data import ( + Dataset, + DataLoader, + ConcatDataset, + DistributedSampler, + RandomSampler, + dataloader +) + +from src.utils.augment import build_augmentor +from src.utils.dataloader import get_local_split +from src.utils.misc import tqdm_joblib +from src.utils import comm +from src.datasets.megadepth import MegaDepthDataset +from src.datasets.scannet import ScanNetDataset +from src.datasets.sampler import RandomConcatSampler + + +class MultiSceneDataModule(pl.LightningDataModule): + """ + For distributed training, each training process is assgined + only a part of the training scenes to reduce memory overhead. + """ + def __init__(self, args, config): + super().__init__() + + # 1. data config + # Train and Val should from the same data source + self.trainval_data_source = config.DATASET.TRAINVAL_DATA_SOURCE + self.test_data_source = config.DATASET.TEST_DATA_SOURCE + # training and validating + self.train_data_root = config.DATASET.TRAIN_DATA_ROOT + self.train_pose_root = config.DATASET.TRAIN_POSE_ROOT # (optional) + self.train_npz_root = config.DATASET.TRAIN_NPZ_ROOT + self.train_list_path = config.DATASET.TRAIN_LIST_PATH + self.train_intrinsic_path = config.DATASET.TRAIN_INTRINSIC_PATH + self.val_data_root = config.DATASET.VAL_DATA_ROOT + self.val_pose_root = config.DATASET.VAL_POSE_ROOT # (optional) + self.val_npz_root = config.DATASET.VAL_NPZ_ROOT + self.val_list_path = config.DATASET.VAL_LIST_PATH + self.val_intrinsic_path = config.DATASET.VAL_INTRINSIC_PATH + # testing + self.test_data_root = config.DATASET.TEST_DATA_ROOT + self.test_pose_root = config.DATASET.TEST_POSE_ROOT # (optional) + self.test_npz_root = config.DATASET.TEST_NPZ_ROOT + self.test_list_path = config.DATASET.TEST_LIST_PATH + self.test_intrinsic_path = config.DATASET.TEST_INTRINSIC_PATH + + # 2. dataset config + # general options + self.min_overlap_score_test = config.DATASET.MIN_OVERLAP_SCORE_TEST # 0.4, omit data with overlap_score < min_overlap_score + self.min_overlap_score_train = config.DATASET.MIN_OVERLAP_SCORE_TRAIN + self.augment_fn = build_augmentor(config.DATASET.AUGMENTATION_TYPE) # None, options: [None, 'dark', 'mobile'] + + # MegaDepth options + self.mgdpt_img_resize = config.DATASET.MGDPT_IMG_RESIZE # 840 + self.mgdpt_img_pad = config.DATASET.MGDPT_IMG_PAD # True + self.mgdpt_depth_pad = config.DATASET.MGDPT_DEPTH_PAD # True + self.mgdpt_df = config.DATASET.MGDPT_DF # 8 + self.coarse_scale = 1 / config.ASPAN.RESOLUTION[0] # 0.125. for training loftr. + + # 3.loader parameters + self.train_loader_params = { + 'batch_size': args.batch_size, + 'num_workers': args.num_workers, + 'pin_memory': getattr(args, 'pin_memory', True) + } + self.val_loader_params = { + 'batch_size': 1, + 'shuffle': False, + 'num_workers': args.num_workers, + 'pin_memory': getattr(args, 'pin_memory', True) + } + self.test_loader_params = { + 'batch_size': 1, + 'shuffle': False, + 'num_workers': args.num_workers, + 'pin_memory': True + } + + # 4. sampler + self.data_sampler = config.TRAINER.DATA_SAMPLER + self.n_samples_per_subset = config.TRAINER.N_SAMPLES_PER_SUBSET + self.subset_replacement = config.TRAINER.SB_SUBSET_SAMPLE_REPLACEMENT + self.shuffle = config.TRAINER.SB_SUBSET_SHUFFLE + self.repeat = config.TRAINER.SB_REPEAT + + # (optional) RandomSampler for debugging + + # misc configurations + self.parallel_load_data = getattr(args, 'parallel_load_data', False) + self.seed = config.TRAINER.SEED # 66 + + def setup(self, stage=None): + """ + Setup train / val / test dataset. This method will be called by PL automatically. + Args: + stage (str): 'fit' in training phase, and 'test' in testing phase. + """ + + assert stage in ['fit', 'test'], "stage must be either fit or test" + + try: + self.world_size = dist.get_world_size() + self.rank = dist.get_rank() + logger.info(f"[rank:{self.rank}] world_size: {self.world_size}") + except AssertionError as ae: + self.world_size = 1 + self.rank = 0 + logger.warning(str(ae) + " (set wolrd_size=1 and rank=0)") + + if stage == 'fit': + self.train_dataset = self._setup_dataset( + self.train_data_root, + self.train_npz_root, + self.train_list_path, + self.train_intrinsic_path, + mode='train', + min_overlap_score=self.min_overlap_score_train, + pose_dir=self.train_pose_root) + # setup multiple (optional) validation subsets + if isinstance(self.val_list_path, (list, tuple)): + self.val_dataset = [] + if not isinstance(self.val_npz_root, (list, tuple)): + self.val_npz_root = [self.val_npz_root for _ in range(len(self.val_list_path))] + for npz_list, npz_root in zip(self.val_list_path, self.val_npz_root): + self.val_dataset.append(self._setup_dataset( + self.val_data_root, + npz_root, + npz_list, + self.val_intrinsic_path, + mode='val', + min_overlap_score=self.min_overlap_score_test, + pose_dir=self.val_pose_root)) + else: + self.val_dataset = self._setup_dataset( + self.val_data_root, + self.val_npz_root, + self.val_list_path, + self.val_intrinsic_path, + mode='val', + min_overlap_score=self.min_overlap_score_test, + pose_dir=self.val_pose_root) + logger.info(f'[rank:{self.rank}] Train & Val Dataset loaded!') + else: # stage == 'test + self.test_dataset = self._setup_dataset( + self.test_data_root, + self.test_npz_root, + self.test_list_path, + self.test_intrinsic_path, + mode='test', + min_overlap_score=self.min_overlap_score_test, + pose_dir=self.test_pose_root) + logger.info(f'[rank:{self.rank}]: Test Dataset loaded!') + + def _setup_dataset(self, + data_root, + split_npz_root, + scene_list_path, + intri_path, + mode='train', + min_overlap_score=0., + pose_dir=None): + """ Setup train / val / test set""" + with open(scene_list_path, 'r') as f: + npz_names = [name.split()[0] for name in f.readlines()] + + if mode == 'train': + local_npz_names = get_local_split(npz_names, self.world_size, self.rank, self.seed) + else: + local_npz_names = npz_names + logger.info(f'[rank {self.rank}]: {len(local_npz_names)} scene(s) assigned.') + + dataset_builder = self._build_concat_dataset_parallel \ + if self.parallel_load_data \ + else self._build_concat_dataset + return dataset_builder(data_root, local_npz_names, split_npz_root, intri_path, + mode=mode, min_overlap_score=min_overlap_score, pose_dir=pose_dir) + + def _build_concat_dataset( + self, + data_root, + npz_names, + npz_dir, + intrinsic_path, + mode, + min_overlap_score=0., + pose_dir=None + ): + datasets = [] + augment_fn = self.augment_fn if mode == 'train' else None + data_source = self.trainval_data_source if mode in ['train', 'val'] else self.test_data_source + if data_source=='GL3D' and mode=='val': + data_source='MegaDepth' + if str(data_source).lower() == 'megadepth': + npz_names = [f'{n}.npz' for n in npz_names] + if str(data_source).lower() == 'gl3d': + npz_names = [f'{n}.txt' for n in npz_names] + #npz_names=npz_names[:8] + for npz_name in tqdm(npz_names, + desc=f'[rank:{self.rank}] loading {mode} datasets', + disable=int(self.rank) != 0): + # `ScanNetDataset`/`MegaDepthDataset` load all data from npz_path when initialized, which might take time. + npz_path = osp.join(npz_dir, npz_name) + if data_source == 'ScanNet': + datasets.append( + ScanNetDataset(data_root, + npz_path, + intrinsic_path, + mode=mode, + min_overlap_score=min_overlap_score, + augment_fn=augment_fn, + pose_dir=pose_dir)) + elif data_source == 'MegaDepth': + datasets.append( + MegaDepthDataset(data_root, + npz_path, + mode=mode, + min_overlap_score=min_overlap_score, + img_resize=self.mgdpt_img_resize, + df=self.mgdpt_df, + img_padding=self.mgdpt_img_pad, + depth_padding=self.mgdpt_depth_pad, + augment_fn=augment_fn, + coarse_scale=self.coarse_scale)) + else: + raise NotImplementedError() + return ConcatDataset(datasets) + + def _build_concat_dataset_parallel( + self, + data_root, + npz_names, + npz_dir, + intrinsic_path, + mode, + min_overlap_score=0., + pose_dir=None, + ): + augment_fn = self.augment_fn if mode == 'train' else None + data_source = self.trainval_data_source if mode in ['train', 'val'] else self.test_data_source + if str(data_source).lower() == 'megadepth': + npz_names = [f'{n}.npz' for n in npz_names] + #npz_names=npz_names[:8] + with tqdm_joblib(tqdm(desc=f'[rank:{self.rank}] loading {mode} datasets', + total=len(npz_names), disable=int(self.rank) != 0)): + if data_source == 'ScanNet': + datasets = Parallel(n_jobs=math.floor(len(os.sched_getaffinity(0)) * 0.9 / comm.get_local_size()))( + delayed(lambda x: _build_dataset( + ScanNetDataset, + data_root, + osp.join(npz_dir, x), + intrinsic_path, + mode=mode, + min_overlap_score=min_overlap_score, + augment_fn=augment_fn, + pose_dir=pose_dir))(name) + for name in npz_names) + elif data_source == 'MegaDepth': + # TODO: _pickle.PicklingError: Could not pickle the task to send it to the workers. + raise NotImplementedError() + datasets = Parallel(n_jobs=math.floor(len(os.sched_getaffinity(0)) * 0.9 / comm.get_local_size()))( + delayed(lambda x: _build_dataset( + MegaDepthDataset, + data_root, + osp.join(npz_dir, x), + mode=mode, + min_overlap_score=min_overlap_score, + img_resize=self.mgdpt_img_resize, + df=self.mgdpt_df, + img_padding=self.mgdpt_img_pad, + depth_padding=self.mgdpt_depth_pad, + augment_fn=augment_fn, + coarse_scale=self.coarse_scale))(name) + for name in npz_names) + else: + raise ValueError(f'Unknown dataset: {data_source}') + return ConcatDataset(datasets) + + def train_dataloader(self): + """ Build training dataloader for ScanNet / MegaDepth. """ + assert self.data_sampler in ['scene_balance'] + logger.info(f'[rank:{self.rank}/{self.world_size}]: Train Sampler and DataLoader re-init (should not re-init between epochs!).') + if self.data_sampler == 'scene_balance': + sampler = RandomConcatSampler(self.train_dataset, + self.n_samples_per_subset, + self.subset_replacement, + self.shuffle, self.repeat, self.seed) + else: + sampler = None + dataloader = DataLoader(self.train_dataset, sampler=sampler, **self.train_loader_params) + return dataloader + + def val_dataloader(self): + """ Build validation dataloader for ScanNet / MegaDepth. """ + logger.info(f'[rank:{self.rank}/{self.world_size}]: Val Sampler and DataLoader re-init.') + if not isinstance(self.val_dataset, abc.Sequence): + sampler = DistributedSampler(self.val_dataset, shuffle=False) + return DataLoader(self.val_dataset, sampler=sampler, **self.val_loader_params) + else: + dataloaders = [] + for dataset in self.val_dataset: + sampler = DistributedSampler(dataset, shuffle=False) + dataloaders.append(DataLoader(dataset, sampler=sampler, **self.val_loader_params)) + return dataloaders + + def test_dataloader(self, *args, **kwargs): + logger.info(f'[rank:{self.rank}/{self.world_size}]: Test Sampler and DataLoader re-init.') + sampler = DistributedSampler(self.test_dataset, shuffle=False) + return DataLoader(self.test_dataset, sampler=sampler, **self.test_loader_params) + + +def _build_dataset(dataset: Dataset, *args, **kwargs): + return dataset(*args, **kwargs) diff --git a/third_party/ASpanFormer/src/lightning/lightning_aspanformer.py b/third_party/ASpanFormer/src/lightning/lightning_aspanformer.py new file mode 100644 index 0000000000000000000000000000000000000000..ee20cbec4628b73c08358ebf1e1906fb2c0ac13c --- /dev/null +++ b/third_party/ASpanFormer/src/lightning/lightning_aspanformer.py @@ -0,0 +1,276 @@ + +from collections import defaultdict +import pprint +from loguru import logger +from pathlib import Path + +import torch +import numpy as np +import pytorch_lightning as pl +from matplotlib import pyplot as plt + +from src.ASpanFormer.aspanformer import ASpanFormer +from src.ASpanFormer.utils.supervision import compute_supervision_coarse, compute_supervision_fine +from src.losses.aspan_loss import ASpanLoss +from src.optimizers import build_optimizer, build_scheduler +from src.utils.metrics import ( + compute_symmetrical_epipolar_errors,compute_symmetrical_epipolar_errors_offset_bidirectional, + compute_pose_errors, + aggregate_metrics +) +from src.utils.plotting import make_matching_figures,make_matching_figures_offset +from src.utils.comm import gather, all_gather +from src.utils.misc import lower_config, flattenList +from src.utils.profiler import PassThroughProfiler + + +class PL_ASpanFormer(pl.LightningModule): + def __init__(self, config, pretrained_ckpt=None, profiler=None, dump_dir=None): + """ + TODO: + - use the new version of PL logging API. + """ + super().__init__() + # Misc + self.config = config # full config + _config = lower_config(self.config) + self.loftr_cfg = lower_config(_config['aspan']) + self.profiler = profiler or PassThroughProfiler() + self.n_vals_plot = max(config.TRAINER.N_VAL_PAIRS_TO_PLOT // config.TRAINER.WORLD_SIZE, 1) + + # Matcher: LoFTR + self.matcher = ASpanFormer(config=_config['aspan']) + self.loss = ASpanLoss(_config) + + # Pretrained weights + print(pretrained_ckpt) + if pretrained_ckpt: + print('load') + state_dict = torch.load(pretrained_ckpt, map_location='cpu')['state_dict'] + msg=self.matcher.load_state_dict(state_dict, strict=False) + print(msg) + logger.info(f"Load \'{pretrained_ckpt}\' as pretrained checkpoint") + + # Testing + self.dump_dir = dump_dir + + def configure_optimizers(self): + # FIXME: The scheduler did not work properly when `--resume_from_checkpoint` + optimizer = build_optimizer(self, self.config) + scheduler = build_scheduler(self.config, optimizer) + return [optimizer], [scheduler] + + def optimizer_step( + self, epoch, batch_idx, optimizer, optimizer_idx, + optimizer_closure, on_tpu, using_native_amp, using_lbfgs): + # learning rate warm up + warmup_step = self.config.TRAINER.WARMUP_STEP + if self.trainer.global_step < warmup_step: + if self.config.TRAINER.WARMUP_TYPE == 'linear': + base_lr = self.config.TRAINER.WARMUP_RATIO * self.config.TRAINER.TRUE_LR + lr = base_lr + \ + (self.trainer.global_step / self.config.TRAINER.WARMUP_STEP) * \ + abs(self.config.TRAINER.TRUE_LR - base_lr) + for pg in optimizer.param_groups: + pg['lr'] = lr + elif self.config.TRAINER.WARMUP_TYPE == 'constant': + pass + else: + raise ValueError(f'Unknown lr warm-up strategy: {self.config.TRAINER.WARMUP_TYPE}') + + # update params + optimizer.step(closure=optimizer_closure) + optimizer.zero_grad() + + def _trainval_inference(self, batch): + with self.profiler.profile("Compute coarse supervision"): + compute_supervision_coarse(batch, self.config) + + with self.profiler.profile("LoFTR"): + self.matcher(batch) + + with self.profiler.profile("Compute fine supervision"): + compute_supervision_fine(batch, self.config) + + with self.profiler.profile("Compute losses"): + self.loss(batch) + + def _compute_metrics(self, batch): + with self.profiler.profile("Copmute metrics"): + compute_symmetrical_epipolar_errors(batch) # compute epi_errs for each match + compute_symmetrical_epipolar_errors_offset_bidirectional(batch) # compute epi_errs for offset match + compute_pose_errors(batch, self.config) # compute R_errs, t_errs, pose_errs for each pair + + rel_pair_names = list(zip(*batch['pair_names'])) + bs = batch['image0'].size(0) + metrics = { + # to filter duplicate pairs caused by DistributedSampler + 'identifiers': ['#'.join(rel_pair_names[b]) for b in range(bs)], + 'epi_errs': [batch['epi_errs'][batch['m_bids'] == b].cpu().numpy() for b in range(bs)], + 'epi_errs_offset': [batch['epi_errs_offset_left'][batch['offset_bids_left'] == b].cpu().numpy() for b in range(bs)], #only consider left side + 'R_errs': batch['R_errs'], + 't_errs': batch['t_errs'], + 'inliers': batch['inliers']} + ret_dict = {'metrics': metrics} + return ret_dict, rel_pair_names + + + def training_step(self, batch, batch_idx): + self._trainval_inference(batch) + + # logging + if self.trainer.global_rank == 0 and self.global_step % self.trainer.log_every_n_steps == 0: + # scalars + for k, v in batch['loss_scalars'].items(): + if not k.startswith('loss_flow') and not k.startswith('conf_'): + self.logger.experiment.add_scalar(f'train/{k}', v, self.global_step) + + #log offset_loss and conf for each layer and level + layer_num=self.loftr_cfg['coarse']['layer_num'] + for layer_index in range(layer_num): + log_title='layer_'+str(layer_index) + self.logger.experiment.add_scalar(log_title+'/offset_loss', batch['loss_scalars']['loss_flow_'+str(layer_index)], self.global_step) + self.logger.experiment.add_scalar(log_title+'/conf_', batch['loss_scalars']['conf_'+str(layer_index)],self.global_step) + + # net-params + if self.config.ASPAN.MATCH_COARSE.MATCH_TYPE == 'sinkhorn': + self.logger.experiment.add_scalar( + f'skh_bin_score', self.matcher.coarse_matching.bin_score.clone().detach().cpu().data, self.global_step) + + # figures + if self.config.TRAINER.ENABLE_PLOTTING: + compute_symmetrical_epipolar_errors(batch) # compute epi_errs for each match + figures = make_matching_figures(batch, self.config, self.config.TRAINER.PLOT_MODE) + for k, v in figures.items(): + self.logger.experiment.add_figure(f'train_match/{k}', v, self.global_step) + + #plot offset + if self.global_step%200==0: + compute_symmetrical_epipolar_errors_offset_bidirectional(batch) + figures_left = make_matching_figures_offset(batch, self.config, self.config.TRAINER.PLOT_MODE,side='_left') + figures_right = make_matching_figures_offset(batch, self.config, self.config.TRAINER.PLOT_MODE,side='_right') + for k, v in figures_left.items(): + self.logger.experiment.add_figure(f'train_offset/{k}'+'_left', v, self.global_step) + figures = make_matching_figures_offset(batch, self.config, self.config.TRAINER.PLOT_MODE,side='_right') + for k, v in figures_right.items(): + self.logger.experiment.add_figure(f'train_offset/{k}'+'_right', v, self.global_step) + + return {'loss': batch['loss']} + + def training_epoch_end(self, outputs): + avg_loss = torch.stack([x['loss'] for x in outputs]).mean() + if self.trainer.global_rank == 0: + self.logger.experiment.add_scalar( + 'train/avg_loss_on_epoch', avg_loss, + global_step=self.current_epoch) + + def validation_step(self, batch, batch_idx): + self._trainval_inference(batch) + + ret_dict, _ = self._compute_metrics(batch) #this func also compute the epi_errors + + val_plot_interval = max(self.trainer.num_val_batches[0] // self.n_vals_plot, 1) + figures = {self.config.TRAINER.PLOT_MODE: []} + figures_offset = {self.config.TRAINER.PLOT_MODE: []} + if batch_idx % val_plot_interval == 0: + figures = make_matching_figures(batch, self.config, mode=self.config.TRAINER.PLOT_MODE) + figures_offset=make_matching_figures_offset(batch, self.config, self.config.TRAINER.PLOT_MODE,'_left') + return { + **ret_dict, + 'loss_scalars': batch['loss_scalars'], + 'figures': figures, + 'figures_offset_left':figures_offset + } + + def validation_epoch_end(self, outputs): + # handle multiple validation sets + multi_outputs = [outputs] if not isinstance(outputs[0], (list, tuple)) else outputs + multi_val_metrics = defaultdict(list) + + for valset_idx, outputs in enumerate(multi_outputs): + # since pl performs sanity_check at the very begining of the training + cur_epoch = self.trainer.current_epoch + if not self.trainer.resume_from_checkpoint and self.trainer.running_sanity_check: + cur_epoch = -1 + + # 1. loss_scalars: dict of list, on cpu + _loss_scalars = [o['loss_scalars'] for o in outputs] + loss_scalars = {k: flattenList(all_gather([_ls[k] for _ls in _loss_scalars])) for k in _loss_scalars[0]} + + # 2. val metrics: dict of list, numpy + _metrics = [o['metrics'] for o in outputs] + metrics = {k: flattenList(all_gather(flattenList([_me[k] for _me in _metrics]))) for k in _metrics[0]} + # NOTE: all ranks need to `aggregate_merics`, but only log at rank-0 + val_metrics_4tb = aggregate_metrics(metrics, self.config.TRAINER.EPI_ERR_THR) + for thr in [5, 10, 20]: + multi_val_metrics[f'auc@{thr}'].append(val_metrics_4tb[f'auc@{thr}']) + + # 3. figures + _figures = [o['figures'] for o in outputs] + figures = {k: flattenList(gather(flattenList([_me[k] for _me in _figures]))) for k in _figures[0]} + + # tensorboard records only on rank 0 + if self.trainer.global_rank == 0: + for k, v in loss_scalars.items(): + mean_v = torch.stack(v).mean() + self.logger.experiment.add_scalar(f'val_{valset_idx}/avg_{k}', mean_v, global_step=cur_epoch) + + for k, v in val_metrics_4tb.items(): + self.logger.experiment.add_scalar(f"metrics_{valset_idx}/{k}", v, global_step=cur_epoch) + + for k, v in figures.items(): + if self.trainer.global_rank == 0: + for plot_idx, fig in enumerate(v): + self.logger.experiment.add_figure( + f'val_match_{valset_idx}/{k}/pair-{plot_idx}', fig, cur_epoch, close=True) + plt.close('all') + + for thr in [5, 10, 20]: + # log on all ranks for ModelCheckpoint callback to work properly + self.log(f'auc@{thr}', torch.tensor(np.mean(multi_val_metrics[f'auc@{thr}']))) # ckpt monitors on this + + def test_step(self, batch, batch_idx): + with self.profiler.profile("LoFTR"): + self.matcher(batch) + + ret_dict, rel_pair_names = self._compute_metrics(batch) + + with self.profiler.profile("dump_results"): + if self.dump_dir is not None: + # dump results for further analysis + keys_to_save = {'mkpts0_f', 'mkpts1_f', 'mconf', 'epi_errs'} + pair_names = list(zip(*batch['pair_names'])) + bs = batch['image0'].shape[0] + dumps = [] + for b_id in range(bs): + item = {} + mask = batch['m_bids'] == b_id + item['pair_names'] = pair_names[b_id] + item['identifier'] = '#'.join(rel_pair_names[b_id]) + for key in keys_to_save: + item[key] = batch[key][mask].cpu().numpy() + for key in ['R_errs', 't_errs', 'inliers']: + item[key] = batch[key][b_id] + dumps.append(item) + ret_dict['dumps'] = dumps + + return ret_dict + + def test_epoch_end(self, outputs): + # metrics: dict of list, numpy + _metrics = [o['metrics'] for o in outputs] + metrics = {k: flattenList(gather(flattenList([_me[k] for _me in _metrics]))) for k in _metrics[0]} + + # [{key: [{...}, *#bs]}, *#batch] + if self.dump_dir is not None: + Path(self.dump_dir).mkdir(parents=True, exist_ok=True) + _dumps = flattenList([o['dumps'] for o in outputs]) # [{...}, #bs*#batch] + dumps = flattenList(gather(_dumps)) # [{...}, #proc*#bs*#batch] + logger.info(f'Prediction and evaluation results will be saved to: {self.dump_dir}') + + if self.trainer.global_rank == 0: + print(self.profiler.summary()) + val_metrics_4tb = aggregate_metrics(metrics, self.config.TRAINER.EPI_ERR_THR) + logger.info('\n' + pprint.pformat(val_metrics_4tb)) + if self.dump_dir is not None: + np.save(Path(self.dump_dir) / 'LoFTR_pred_eval', dumps) diff --git a/third_party/ASpanFormer/src/losses/aspan_loss.py b/third_party/ASpanFormer/src/losses/aspan_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..0cca52b36fc997415937969f26caba8c41ac2b8e --- /dev/null +++ b/third_party/ASpanFormer/src/losses/aspan_loss.py @@ -0,0 +1,231 @@ +from loguru import logger + +import torch +import torch.nn as nn + +class ASpanLoss(nn.Module): + def __init__(self, config): + super().__init__() + self.config = config # config under the global namespace + self.loss_config = config['aspan']['loss'] + self.match_type = self.config['aspan']['match_coarse']['match_type'] + self.sparse_spvs = self.config['aspan']['match_coarse']['sparse_spvs'] + self.flow_weight=self.config['aspan']['loss']['flow_weight'] + + # coarse-level + self.correct_thr = self.loss_config['fine_correct_thr'] + self.c_pos_w = self.loss_config['pos_weight'] + self.c_neg_w = self.loss_config['neg_weight'] + # fine-level + self.fine_type = self.loss_config['fine_type'] + + def compute_flow_loss(self,coarse_corr_gt,flow_list,h0,w0,h1,w1): + #coarse_corr_gt:[[batch_indices],[left_indices],[right_indices]] + #flow_list: [L,B,H,W,4] + loss1=self.flow_loss_worker(flow_list[0],coarse_corr_gt[0],coarse_corr_gt[1],coarse_corr_gt[2],w1) + loss2=self.flow_loss_worker(flow_list[1],coarse_corr_gt[0],coarse_corr_gt[2],coarse_corr_gt[1],w0) + total_loss=(loss1+loss2)/2 + return total_loss + + def flow_loss_worker(self,flow,batch_indicies,self_indicies,cross_indicies,w): + bs,layer_num=flow.shape[1],flow.shape[0] + flow=flow.view(layer_num,bs,-1,4) + gt_flow=torch.stack([cross_indicies%w,cross_indicies//w],dim=1) + + total_loss_list=[] + for layer_index in range(layer_num): + cur_flow_list=flow[layer_index] + spv_flow=cur_flow_list[batch_indicies,self_indicies][:,:2] + spv_conf=cur_flow_list[batch_indicies,self_indicies][:,2:]#[#coarse,2] + l2_flow_dis=((gt_flow-spv_flow)**2) #[#coarse,2] + total_loss=(spv_conf+torch.exp(-spv_conf)*l2_flow_dis) #[#coarse,2] + total_loss_list.append(total_loss.mean()) + total_loss=torch.stack(total_loss_list,dim=-1)*self.flow_weight + return total_loss + + def compute_coarse_loss(self, conf, conf_gt, weight=None): + """ Point-wise CE / Focal Loss with 0 / 1 confidence as gt. + Args: + conf (torch.Tensor): (N, HW0, HW1) / (N, HW0+1, HW1+1) + conf_gt (torch.Tensor): (N, HW0, HW1) + weight (torch.Tensor): (N, HW0, HW1) + """ + pos_mask, neg_mask = conf_gt == 1, conf_gt == 0 + c_pos_w, c_neg_w = self.c_pos_w, self.c_neg_w + # corner case: no gt coarse-level match at all + if not pos_mask.any(): # assign a wrong gt + pos_mask[0, 0, 0] = True + if weight is not None: + weight[0, 0, 0] = 0. + c_pos_w = 0. + if not neg_mask.any(): + neg_mask[0, 0, 0] = True + if weight is not None: + weight[0, 0, 0] = 0. + c_neg_w = 0. + + if self.loss_config['coarse_type'] == 'cross_entropy': + assert not self.sparse_spvs, 'Sparse Supervision for cross-entropy not implemented!' + conf = torch.clamp(conf, 1e-6, 1-1e-6) + loss_pos = - torch.log(conf[pos_mask]) + loss_neg = - torch.log(1 - conf[neg_mask]) + if weight is not None: + loss_pos = loss_pos * weight[pos_mask] + loss_neg = loss_neg * weight[neg_mask] + return c_pos_w * loss_pos.mean() + c_neg_w * loss_neg.mean() + elif self.loss_config['coarse_type'] == 'focal': + conf = torch.clamp(conf, 1e-6, 1-1e-6) + alpha = self.loss_config['focal_alpha'] + gamma = self.loss_config['focal_gamma'] + + if self.sparse_spvs: + pos_conf = conf[:, :-1, :-1][pos_mask] \ + if self.match_type == 'sinkhorn' \ + else conf[pos_mask] + loss_pos = - alpha * torch.pow(1 - pos_conf, gamma) * pos_conf.log() + # calculate losses for negative samples + if self.match_type == 'sinkhorn': + neg0, neg1 = conf_gt.sum(-1) == 0, conf_gt.sum(1) == 0 + neg_conf = torch.cat([conf[:, :-1, -1][neg0], conf[:, -1, :-1][neg1]], 0) + loss_neg = - alpha * torch.pow(1 - neg_conf, gamma) * neg_conf.log() + else: + # These is no dustbin for dual_softmax, so we left unmatchable patches without supervision. + # we could also add 'pseudo negtive-samples' + pass + # handle loss weights + if weight is not None: + # Different from dense-spvs, the loss w.r.t. padded regions aren't directly zeroed out, + # but only through manually setting corresponding regions in sim_matrix to '-inf'. + loss_pos = loss_pos * weight[pos_mask] + if self.match_type == 'sinkhorn': + neg_w0 = (weight.sum(-1) != 0)[neg0] + neg_w1 = (weight.sum(1) != 0)[neg1] + neg_mask = torch.cat([neg_w0, neg_w1], 0) + loss_neg = loss_neg[neg_mask] + + loss = c_pos_w * loss_pos.mean() + c_neg_w * loss_neg.mean() \ + if self.match_type == 'sinkhorn' \ + else c_pos_w * loss_pos.mean() + return loss + # positive and negative elements occupy similar propotions. => more balanced loss weights needed + else: # dense supervision (in the case of match_type=='sinkhorn', the dustbin is not supervised.) + loss_pos = - alpha * torch.pow(1 - conf[pos_mask], gamma) * (conf[pos_mask]).log() + loss_neg = - alpha * torch.pow(conf[neg_mask], gamma) * (1 - conf[neg_mask]).log() + if weight is not None: + loss_pos = loss_pos * weight[pos_mask] + loss_neg = loss_neg * weight[neg_mask] + return c_pos_w * loss_pos.mean() + c_neg_w * loss_neg.mean() + # each negative element occupy a smaller propotion than positive elements. => higher negative loss weight needed + else: + raise ValueError('Unknown coarse loss: {type}'.format(type=self.loss_config['coarse_type'])) + + def compute_fine_loss(self, expec_f, expec_f_gt): + if self.fine_type == 'l2_with_std': + return self._compute_fine_loss_l2_std(expec_f, expec_f_gt) + elif self.fine_type == 'l2': + return self._compute_fine_loss_l2(expec_f, expec_f_gt) + else: + raise NotImplementedError() + + def _compute_fine_loss_l2(self, expec_f, expec_f_gt): + """ + Args: + expec_f (torch.Tensor): [M, 2] + expec_f_gt (torch.Tensor): [M, 2] + """ + correct_mask = torch.linalg.norm(expec_f_gt, ord=float('inf'), dim=1) < self.correct_thr + if correct_mask.sum() == 0: + if self.training: # this seldomly happen when training, since we pad prediction with gt + logger.warning("assign a false supervision to avoid ddp deadlock") + correct_mask[0] = True + else: + return None + flow_l2 = ((expec_f_gt[correct_mask] - expec_f[correct_mask]) ** 2).sum(-1) + return flow_l2.mean() + + def _compute_fine_loss_l2_std(self, expec_f, expec_f_gt): + """ + Args: + expec_f (torch.Tensor): [M, 3] + expec_f_gt (torch.Tensor): [M, 2] + """ + # correct_mask tells you which pair to compute fine-loss + correct_mask = torch.linalg.norm(expec_f_gt, ord=float('inf'), dim=1) < self.correct_thr + + # use std as weight that measures uncertainty + std = expec_f[:, 2] + inverse_std = 1. / torch.clamp(std, min=1e-10) + weight = (inverse_std / torch.mean(inverse_std)).detach() # avoid minizing loss through increase std + + # corner case: no correct coarse match found + if not correct_mask.any(): + if self.training: # this seldomly happen during training, since we pad prediction with gt + # sometimes there is not coarse-level gt at all. + logger.warning("assign a false supervision to avoid ddp deadlock") + correct_mask[0] = True + weight[0] = 0. + else: + return None + + # l2 loss with std + flow_l2 = ((expec_f_gt[correct_mask] - expec_f[correct_mask, :2]) ** 2).sum(-1) + loss = (flow_l2 * weight[correct_mask]).mean() + + return loss + + @torch.no_grad() + def compute_c_weight(self, data): + """ compute element-wise weights for computing coarse-level loss. """ + if 'mask0' in data: + c_weight = (data['mask0'].flatten(-2)[..., None] * data['mask1'].flatten(-2)[:, None]).float() + else: + c_weight = None + return c_weight + + def forward(self, data): + """ + Update: + data (dict): update{ + 'loss': [1] the reduced loss across a batch, + 'loss_scalars' (dict): loss scalars for tensorboard_record + } + """ + loss_scalars = {} + # 0. compute element-wise loss weight + c_weight = self.compute_c_weight(data) + + # 1. coarse-level loss + loss_c = self.compute_coarse_loss( + data['conf_matrix_with_bin'] if self.sparse_spvs and self.match_type == 'sinkhorn' \ + else data['conf_matrix'], + data['conf_matrix_gt'], + weight=c_weight) + loss = loss_c * self.loss_config['coarse_weight'] + loss_scalars.update({"loss_c": loss_c.clone().detach().cpu()}) + + # 2. fine-level loss + loss_f = self.compute_fine_loss(data['expec_f'], data['expec_f_gt']) + if loss_f is not None: + loss += loss_f * self.loss_config['fine_weight'] + loss_scalars.update({"loss_f": loss_f.clone().detach().cpu()}) + else: + assert self.training is False + loss_scalars.update({'loss_f': torch.tensor(1.)}) # 1 is the upper bound + + # 3. flow loss + coarse_corr=[data['spv_b_ids'],data['spv_i_ids'],data['spv_j_ids']] + loss_flow = self.compute_flow_loss(coarse_corr,data['predict_flow'],\ + data['hw0_c'][0],data['hw0_c'][1],data['hw1_c'][0],data['hw1_c'][1]) + loss_flow=loss_flow*self.flow_weight + for index,loss_off in enumerate(loss_flow): + loss_scalars.update({'loss_flow_'+str(index): loss_off.clone().detach().cpu()}) # 1 is the upper bound + conf=data['predict_flow'][0][:,:,:,:,2:] + layer_num=conf.shape[0] + for layer_index in range(layer_num): + loss_scalars.update({'conf_'+str(layer_index): conf[layer_index].mean().clone().detach().cpu()}) # 1 is the upper bound + + + loss+=loss_flow.sum() + #print((loss_c * self.loss_config['coarse_weight']).data,loss_flow.data) + loss_scalars.update({'loss': loss.clone().detach().cpu()}) + data.update({"loss": loss, "loss_scalars": loss_scalars}) diff --git a/third_party/ASpanFormer/src/optimizers/__init__.py b/third_party/ASpanFormer/src/optimizers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e1db2285352586c250912bdd2c4ae5029620ab5f --- /dev/null +++ b/third_party/ASpanFormer/src/optimizers/__init__.py @@ -0,0 +1,42 @@ +import torch +from torch.optim.lr_scheduler import MultiStepLR, CosineAnnealingLR, ExponentialLR + + +def build_optimizer(model, config): + name = config.TRAINER.OPTIMIZER + lr = config.TRAINER.TRUE_LR + + if name == "adam": + return torch.optim.Adam(model.parameters(), lr=lr, weight_decay=config.TRAINER.ADAM_DECAY) + elif name == "adamw": + return torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=config.TRAINER.ADAMW_DECAY) + else: + raise ValueError(f"TRAINER.OPTIMIZER = {name} is not a valid optimizer!") + + +def build_scheduler(config, optimizer): + """ + Returns: + scheduler (dict):{ + 'scheduler': lr_scheduler, + 'interval': 'step', # or 'epoch' + 'monitor': 'val_f1', (optional) + 'frequency': x, (optional) + } + """ + scheduler = {'interval': config.TRAINER.SCHEDULER_INTERVAL} + name = config.TRAINER.SCHEDULER + + if name == 'MultiStepLR': + scheduler.update( + {'scheduler': MultiStepLR(optimizer, config.TRAINER.MSLR_MILESTONES, gamma=config.TRAINER.MSLR_GAMMA)}) + elif name == 'CosineAnnealing': + scheduler.update( + {'scheduler': CosineAnnealingLR(optimizer, config.TRAINER.COSA_TMAX)}) + elif name == 'ExponentialLR': + scheduler.update( + {'scheduler': ExponentialLR(optimizer, config.TRAINER.ELR_GAMMA)}) + else: + raise NotImplementedError() + + return scheduler diff --git a/third_party/ASpanFormer/src/utils/augment.py b/third_party/ASpanFormer/src/utils/augment.py new file mode 100644 index 0000000000000000000000000000000000000000..d7c5d3e11b6fe083aaeff7555bb7ce3a4bfb755d --- /dev/null +++ b/third_party/ASpanFormer/src/utils/augment.py @@ -0,0 +1,55 @@ +import albumentations as A + + +class DarkAug(object): + """ + Extreme dark augmentation aiming at Aachen Day-Night + """ + + def __init__(self) -> None: + self.augmentor = A.Compose([ + A.RandomBrightnessContrast(p=0.75, brightness_limit=(-0.6, 0.0), contrast_limit=(-0.5, 0.3)), + A.Blur(p=0.1, blur_limit=(3, 9)), + A.MotionBlur(p=0.2, blur_limit=(3, 25)), + A.RandomGamma(p=0.1, gamma_limit=(15, 65)), + A.HueSaturationValue(p=0.1, val_shift_limit=(-100, -40)) + ], p=0.75) + + def __call__(self, x): + return self.augmentor(image=x)['image'] + + +class MobileAug(object): + """ + Random augmentations aiming at images of mobile/handhold devices. + """ + + def __init__(self): + self.augmentor = A.Compose([ + A.MotionBlur(p=0.25), + A.ColorJitter(p=0.5), + A.RandomRain(p=0.1), # random occlusion + A.RandomSunFlare(p=0.1), + A.JpegCompression(p=0.25), + A.ISONoise(p=0.25) + ], p=1.0) + + def __call__(self, x): + return self.augmentor(image=x)['image'] + + +def build_augmentor(method=None, **kwargs): + if method is not None: + raise NotImplementedError('Using of augmentation functions are not supported yet!') + if method == 'dark': + return DarkAug() + elif method == 'mobile': + return MobileAug() + elif method is None: + return None + else: + raise ValueError(f'Invalid augmentation method: {method}') + + +if __name__ == '__main__': + augmentor = build_augmentor('FDA') diff --git a/third_party/ASpanFormer/src/utils/comm.py b/third_party/ASpanFormer/src/utils/comm.py new file mode 100644 index 0000000000000000000000000000000000000000..26ec9517cc47e224430106d8ae9aa99a3fe49167 --- /dev/null +++ b/third_party/ASpanFormer/src/utils/comm.py @@ -0,0 +1,265 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +""" +[Copied from detectron2] +This file contains primitives for multi-gpu communication. +This is useful when doing distributed training. +""" + +import functools +import logging +import numpy as np +import pickle +import torch +import torch.distributed as dist + +_LOCAL_PROCESS_GROUP = None +""" +A torch process group which only includes processes that on the same machine as the current process. +This variable is set when processes are spawned by `launch()` in "engine/launch.py". +""" + + +def get_world_size() -> int: + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank() -> int: + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + return dist.get_rank() + + +def get_local_rank() -> int: + """ + Returns: + The rank of the current process within the local (per-machine) process group. + """ + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + assert _LOCAL_PROCESS_GROUP is not None + return dist.get_rank(group=_LOCAL_PROCESS_GROUP) + + +def get_local_size() -> int: + """ + Returns: + The size of the per-machine process group, + i.e. the number of processes per machine. + """ + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size(group=_LOCAL_PROCESS_GROUP) + + +def is_main_process() -> bool: + return get_rank() == 0 + + +def synchronize(): + """ + Helper function to synchronize (barrier) among all processes when + using distributed training + """ + if not dist.is_available(): + return + if not dist.is_initialized(): + return + world_size = dist.get_world_size() + if world_size == 1: + return + dist.barrier() + + +@functools.lru_cache() +def _get_global_gloo_group(): + """ + Return a process group based on gloo backend, containing all the ranks + The result is cached. + """ + if dist.get_backend() == "nccl": + return dist.new_group(backend="gloo") + else: + return dist.group.WORLD + + +def _serialize_to_tensor(data, group): + backend = dist.get_backend(group) + assert backend in ["gloo", "nccl"] + device = torch.device("cpu" if backend == "gloo" else "cuda") + + buffer = pickle.dumps(data) + if len(buffer) > 1024 ** 3: + logger = logging.getLogger(__name__) + logger.warning( + "Rank {} trying to all-gather {:.2f} GB of data on device {}".format( + get_rank(), len(buffer) / (1024 ** 3), device + ) + ) + storage = torch.ByteStorage.from_buffer(buffer) + tensor = torch.ByteTensor(storage).to(device=device) + return tensor + + +def _pad_to_largest_tensor(tensor, group): + """ + Returns: + list[int]: size of the tensor, on each rank + Tensor: padded tensor that has the max size + """ + world_size = dist.get_world_size(group=group) + assert ( + world_size >= 1 + ), "comm.gather/all_gather must be called from ranks within the given group!" + local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device) + size_list = [ + torch.zeros([1], dtype=torch.int64, device=tensor.device) for _ in range(world_size) + ] + dist.all_gather(size_list, local_size, group=group) + + size_list = [int(size.item()) for size in size_list] + + max_size = max(size_list) + + # we pad the tensor because torch all_gather does not support + # gathering tensors of different shapes + if local_size != max_size: + padding = torch.zeros((max_size - local_size,), dtype=torch.uint8, device=tensor.device) + tensor = torch.cat((tensor, padding), dim=0) + return size_list, tensor + + +def all_gather(data, group=None): + """ + Run all_gather on arbitrary picklable data (not necessarily tensors). + + Args: + data: any picklable object + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + + Returns: + list[data]: list of data gathered from each rank + """ + if get_world_size() == 1: + return [data] + if group is None: + group = _get_global_gloo_group() + if dist.get_world_size(group) == 1: + return [data] + + tensor = _serialize_to_tensor(data, group) + + size_list, tensor = _pad_to_largest_tensor(tensor, group) + max_size = max(size_list) + + # receiving Tensor from all ranks + tensor_list = [ + torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list + ] + dist.all_gather(tensor_list, tensor, group=group) + + data_list = [] + for size, tensor in zip(size_list, tensor_list): + buffer = tensor.cpu().numpy().tobytes()[:size] + data_list.append(pickle.loads(buffer)) + + return data_list + + +def gather(data, dst=0, group=None): + """ + Run gather on arbitrary picklable data (not necessarily tensors). + + Args: + data: any picklable object + dst (int): destination rank + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + + Returns: + list[data]: on dst, a list of data gathered from each rank. Otherwise, + an empty list. + """ + if get_world_size() == 1: + return [data] + if group is None: + group = _get_global_gloo_group() + if dist.get_world_size(group=group) == 1: + return [data] + rank = dist.get_rank(group=group) + + tensor = _serialize_to_tensor(data, group) + size_list, tensor = _pad_to_largest_tensor(tensor, group) + + # receiving Tensor from all ranks + if rank == dst: + max_size = max(size_list) + tensor_list = [ + torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list + ] + dist.gather(tensor, tensor_list, dst=dst, group=group) + + data_list = [] + for size, tensor in zip(size_list, tensor_list): + buffer = tensor.cpu().numpy().tobytes()[:size] + data_list.append(pickle.loads(buffer)) + return data_list + else: + dist.gather(tensor, [], dst=dst, group=group) + return [] + + +def shared_random_seed(): + """ + Returns: + int: a random number that is the same across all workers. + If workers need a shared RNG, they can use this shared seed to + create one. + + All workers must call this function, otherwise it will deadlock. + """ + ints = np.random.randint(2 ** 31) + all_ints = all_gather(ints) + return all_ints[0] + + +def reduce_dict(input_dict, average=True): + """ + Reduce the values in the dictionary from all processes so that process with rank + 0 has the reduced results. + + Args: + input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor. + average (bool): whether to do average or sum + + Returns: + a dict with the same keys as input_dict, after reduction. + """ + world_size = get_world_size() + if world_size < 2: + return input_dict + with torch.no_grad(): + names = [] + values = [] + # sort the keys so that they are consistent across processes + for k in sorted(input_dict.keys()): + names.append(k) + values.append(input_dict[k]) + values = torch.stack(values, dim=0) + dist.reduce(values, dst=0) + if dist.get_rank() == 0 and average: + # only main process gets accumulated, so only divide by + # world_size in this case + values /= world_size + reduced_dict = {k: v for k, v in zip(names, values)} + return reduced_dict diff --git a/third_party/ASpanFormer/src/utils/dataloader.py b/third_party/ASpanFormer/src/utils/dataloader.py new file mode 100644 index 0000000000000000000000000000000000000000..6da37b880a290c2bb3ebb028d0c8dab592acc5c1 --- /dev/null +++ b/third_party/ASpanFormer/src/utils/dataloader.py @@ -0,0 +1,23 @@ +import numpy as np + + +# --- PL-DATAMODULE --- + +def get_local_split(items: list, world_size: int, rank: int, seed: int): + """ The local rank only loads a split of the dataset. """ + n_items = len(items) + items_permute = np.random.RandomState(seed).permutation(items) + if n_items % world_size == 0: + padded_items = items_permute + else: + padding = np.random.RandomState(seed).choice( + items, + world_size - (n_items % world_size), + replace=True) + padded_items = np.concatenate([items_permute, padding]) + assert len(padded_items) % world_size == 0, \ + f'len(padded_items): {len(padded_items)}; world_size: {world_size}; len(padding): {len(padding)}' + n_per_rank = len(padded_items) // world_size + local_items = padded_items[n_per_rank * rank: n_per_rank * (rank+1)] + + return local_items diff --git a/third_party/ASpanFormer/src/utils/dataset.py b/third_party/ASpanFormer/src/utils/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..209bf554acc20e33ea89eb9e7024ba68d0b3a30b --- /dev/null +++ b/third_party/ASpanFormer/src/utils/dataset.py @@ -0,0 +1,222 @@ +import io +import cv2 +import numpy as np +import h5py +import torch +from numpy.linalg import inv +import re + + +try: + # for internel use only + from .client import MEGADEPTH_CLIENT, SCANNET_CLIENT +except Exception: + MEGADEPTH_CLIENT = SCANNET_CLIENT = None + +# --- DATA IO --- + +def load_array_from_s3( + path, client, cv_type, + use_h5py=False, +): + byte_str = client.Get(path) + try: + if not use_h5py: + raw_array = np.fromstring(byte_str, np.uint8) + data = cv2.imdecode(raw_array, cv_type) + else: + f = io.BytesIO(byte_str) + data = np.array(h5py.File(f, 'r')['/depth']) + except Exception as ex: + print(f"==> Data loading failure: {path}") + raise ex + + assert data is not None + return data + + +def imread_gray(path, augment_fn=None, client=SCANNET_CLIENT): + cv_type = cv2.IMREAD_GRAYSCALE if augment_fn is None \ + else cv2.IMREAD_COLOR + if str(path).startswith('s3://'): + image = load_array_from_s3(str(path), client, cv_type) + else: + image = cv2.imread(str(path), cv_type) + + if augment_fn is not None: + image = cv2.imread(str(path), cv2.IMREAD_COLOR) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image = augment_fn(image) + image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) + return image # (h, w) + + +def get_resized_wh(w, h, resize=None): + if resize is not None: # resize the longer edge + scale = resize / max(h, w) + w_new, h_new = int(round(w*scale)), int(round(h*scale)) + else: + w_new, h_new = w, h + return w_new, h_new + + +def get_divisible_wh(w, h, df=None): + if df is not None: + w_new, h_new = map(lambda x: int(x // df * df), [w, h]) + else: + w_new, h_new = w, h + return w_new, h_new + + +def pad_bottom_right(inp, pad_size, ret_mask=False): + assert isinstance(pad_size, int) and pad_size >= max(inp.shape[-2:]), f"{pad_size} < {max(inp.shape[-2:])}" + mask = None + if inp.ndim == 2: + padded = np.zeros((pad_size, pad_size), dtype=inp.dtype) + padded[:inp.shape[0], :inp.shape[1]] = inp + if ret_mask: + mask = np.zeros((pad_size, pad_size), dtype=bool) + mask[:inp.shape[0], :inp.shape[1]] = True + elif inp.ndim == 3: + padded = np.zeros((inp.shape[0], pad_size, pad_size), dtype=inp.dtype) + padded[:, :inp.shape[1], :inp.shape[2]] = inp + if ret_mask: + mask = np.zeros((inp.shape[0], pad_size, pad_size), dtype=bool) + mask[:, :inp.shape[1], :inp.shape[2]] = True + else: + raise NotImplementedError() + return padded, mask + + +# --- MEGADEPTH --- + +def read_megadepth_gray(path, resize=None, df=None, padding=False, augment_fn=None): + """ + Args: + resize (int, optional): the longer edge of resized images. None for no resize. + padding (bool): If set to 'True', zero-pad resized images to squared size. + augment_fn (callable, optional): augments images with pre-defined visual effects + Returns: + image (torch.tensor): (1, h, w) + mask (torch.tensor): (h, w) + scale (torch.tensor): [w/w_new, h/h_new] + """ + # read image + image = imread_gray(path, augment_fn, client=MEGADEPTH_CLIENT) + + # resize image + w, h = image.shape[1], image.shape[0] + w_new, h_new = get_resized_wh(w, h, resize) + w_new, h_new = get_divisible_wh(w_new, h_new, df) + + image = cv2.resize(image, (w_new, h_new)) + scale = torch.tensor([w/w_new, h/h_new], dtype=torch.float) + + if padding: # padding + pad_to = max(h_new, w_new) + image, mask = pad_bottom_right(image, pad_to, ret_mask=True) + else: + mask = None + + image = torch.from_numpy(image).float()[None] / 255 # (h, w) -> (1, h, w) and normalized + if mask is not None: + mask = torch.from_numpy(mask) + + return image, mask, scale + + +def read_megadepth_depth(path, pad_to=None): + if str(path).startswith('s3://'): + depth = load_array_from_s3(path, MEGADEPTH_CLIENT, None, use_h5py=True) + else: + depth = np.array(h5py.File(path, 'r')['depth']) + if pad_to is not None: + depth, _ = pad_bottom_right(depth, pad_to, ret_mask=False) + depth = torch.from_numpy(depth).float() # (h, w) + return depth + + +# --- ScanNet --- + +def read_scannet_gray(path, resize=(640, 480), augment_fn=None): + """ + Args: + resize (tuple): align image to depthmap, in (w, h). + augment_fn (callable, optional): augments images with pre-defined visual effects + Returns: + image (torch.tensor): (1, h, w) + mask (torch.tensor): (h, w) + scale (torch.tensor): [w/w_new, h/h_new] + """ + # read and resize image + image = imread_gray(path, augment_fn) + image = cv2.resize(image, resize) + + # (h, w) -> (1, h, w) and normalized + image = torch.from_numpy(image).float()[None] / 255 + return image + + +def read_scannet_depth(path): + if str(path).startswith('s3://'): + depth = load_array_from_s3(str(path), SCANNET_CLIENT, cv2.IMREAD_UNCHANGED) + else: + depth = cv2.imread(str(path), cv2.IMREAD_UNCHANGED) + depth = depth / 1000 + depth = torch.from_numpy(depth).float() # (h, w) + return depth + + +def read_scannet_pose(path): + """ Read ScanNet's Camera2World pose and transform it to World2Camera. + + Returns: + pose_w2c (np.ndarray): (4, 4) + """ + cam2world = np.loadtxt(path, delimiter=' ') + world2cam = inv(cam2world) + return world2cam + + +def read_scannet_intrinsic(path): + """ Read ScanNet's intrinsic matrix and return the 3x3 matrix. + """ + intrinsic = np.loadtxt(path, delimiter=' ') + return intrinsic[:-1, :-1] + + +def read_gl3d_gray(path,resize): + img=cv2.resize(cv2.imread(path,cv2.IMREAD_GRAYSCALE),(int(resize),int(resize))) + img = torch.from_numpy(img).float()[None] / 255 # (h, w) -> (1, h, w) and normalized + return img + +def read_gl3d_depth(file_path): + with open(file_path, 'rb') as fin: + color = None + width = None + height = None + scale = None + data_type = None + header = str(fin.readline().decode('UTF-8')).rstrip() + if header == 'PF': + color = True + elif header == 'Pf': + color = False + else: + raise Exception('Not a PFM file.') + dim_match = re.match(r'^(\d+)\s(\d+)\s$', fin.readline().decode('UTF-8')) + if dim_match: + width, height = map(int, dim_match.groups()) + else: + raise Exception('Malformed PFM header.') + scale = float((fin.readline().decode('UTF-8')).rstrip()) + if scale < 0: # little-endian + data_type = ' best_num_inliers: + ret = (R, t[:, 0], mask.ravel() > 0) + best_num_inliers = n + + return ret + + +def compute_pose_errors(data, config): + """ + Update: + data (dict):{ + "R_errs" List[float]: [N] + "t_errs" List[float]: [N] + "inliers" List[np.ndarray]: [N] + } + """ + pixel_thr = config.TRAINER.RANSAC_PIXEL_THR # 0.5 + conf = config.TRAINER.RANSAC_CONF # 0.99999 + data.update({'R_errs': [], 't_errs': [], 'inliers': []}) + + m_bids = data['m_bids'].cpu().numpy() + pts0 = data['mkpts0_f'].cpu().numpy() + pts1 = data['mkpts1_f'].cpu().numpy() + K0 = data['K0'].cpu().numpy() + K1 = data['K1'].cpu().numpy() + T_0to1 = data['T_0to1'].cpu().numpy() + + for bs in range(K0.shape[0]): + mask = m_bids == bs + ret = estimate_pose(pts0[mask], pts1[mask], K0[bs], K1[bs], pixel_thr, conf=conf) + + if ret is None: + data['R_errs'].append(np.inf) + data['t_errs'].append(np.inf) + data['inliers'].append(np.array([]).astype(np.bool)) + else: + R, t, inliers = ret + t_err, R_err = relative_pose_error(T_0to1[bs], R, t, ignore_gt_t_thr=0.0) + data['R_errs'].append(R_err) + data['t_errs'].append(t_err) + data['inliers'].append(inliers) + + +# --- METRIC AGGREGATION --- + +def error_auc(errors, thresholds): + """ + Args: + errors (list): [N,] + thresholds (list) + """ + errors = [0] + sorted(list(errors)) + recall = list(np.linspace(0, 1, len(errors))) + + aucs = [] + thresholds = [5, 10, 20] + for thr in thresholds: + last_index = np.searchsorted(errors, thr) + y = recall[:last_index] + [recall[last_index-1]] + x = errors[:last_index] + [thr] + aucs.append(np.trapz(y, x) / thr) + + return {f'auc@{t}': auc for t, auc in zip(thresholds, aucs)} + + +def epidist_prec(errors, thresholds, ret_dict=False,offset=False): + precs = [] + for thr in thresholds: + prec_ = [] + for errs in errors: + correct_mask = errs < thr + prec_.append(np.mean(correct_mask) if len(correct_mask) > 0 else 0) + precs.append(np.mean(prec_) if len(prec_) > 0 else 0) + if ret_dict: + return {f'prec@{t:.0e}': prec for t, prec in zip(thresholds, precs)} if not offset else {f'prec_flow@{t:.0e}': prec for t, prec in zip(thresholds, precs)} + else: + return precs + + +def aggregate_metrics(metrics, epi_err_thr=5e-4): + """ Aggregate metrics for the whole dataset: + (This method should be called once per dataset) + 1. AUC of the pose error (angular) at the threshold [5, 10, 20] + 2. Mean matching precision at the threshold 5e-4(ScanNet), 1e-4(MegaDepth) + """ + # filter duplicates + unq_ids = OrderedDict((iden, id) for id, iden in enumerate(metrics['identifiers'])) + unq_ids = list(unq_ids.values()) + logger.info(f'Aggregating metrics over {len(unq_ids)} unique items...') + + # pose auc + angular_thresholds = [5, 10, 20] + pose_errors = np.max(np.stack([metrics['R_errs'], metrics['t_errs']]), axis=0)[unq_ids] + aucs = error_auc(pose_errors, angular_thresholds) # (auc@5, auc@10, auc@20) + + # matching precision + dist_thresholds = [epi_err_thr] + precs = epidist_prec(np.array(metrics['epi_errs'], dtype=object)[unq_ids], dist_thresholds, True) # (prec@err_thr) + + #offset precision + try: + precs_offset = epidist_prec(np.array(metrics['epi_errs_offset'], dtype=object)[unq_ids], [2e-3], True,offset=True) + return {**aucs, **precs,**precs_offset} + except: + return {**aucs, **precs} diff --git a/third_party/ASpanFormer/src/utils/misc.py b/third_party/ASpanFormer/src/utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..25e4433f5ffa41adc4c0435cfe2b5696e43b58b3 --- /dev/null +++ b/third_party/ASpanFormer/src/utils/misc.py @@ -0,0 +1,139 @@ +import os +import contextlib +import joblib +from typing import Union +from loguru import _Logger, logger +from itertools import chain + +import torch +from yacs.config import CfgNode as CN +from pytorch_lightning.utilities import rank_zero_only +import cv2 +import numpy as np + +def lower_config(yacs_cfg): + if not isinstance(yacs_cfg, CN): + return yacs_cfg + return {k.lower(): lower_config(v) for k, v in yacs_cfg.items()} + + +def upper_config(dict_cfg): + if not isinstance(dict_cfg, dict): + return dict_cfg + return {k.upper(): upper_config(v) for k, v in dict_cfg.items()} + + +def log_on(condition, message, level): + if condition: + assert level in ['INFO', 'DEBUG', 'WARNING', 'ERROR', 'CRITICAL'] + logger.log(level, message) + + +def get_rank_zero_only_logger(logger: _Logger): + if rank_zero_only.rank == 0: + return logger + else: + for _level in logger._core.levels.keys(): + level = _level.lower() + setattr(logger, level, + lambda x: None) + logger._log = lambda x: None + return logger + + +def setup_gpus(gpus: Union[str, int]) -> int: + """ A temporary fix for pytorch-lighting 1.3.x """ + gpus = str(gpus) + gpu_ids = [] + + if ',' not in gpus: + n_gpus = int(gpus) + return n_gpus if n_gpus != -1 else torch.cuda.device_count() + else: + gpu_ids = [i.strip() for i in gpus.split(',') if i != ''] + + # setup environment variables + visible_devices = os.getenv('CUDA_VISIBLE_DEVICES') + if visible_devices is None: + os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" + os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(i) for i in gpu_ids) + visible_devices = os.getenv('CUDA_VISIBLE_DEVICES') + logger.warning(f'[Temporary Fix] manually set CUDA_VISIBLE_DEVICES when specifying gpus to use: {visible_devices}') + else: + logger.warning('[Temporary Fix] CUDA_VISIBLE_DEVICES already set by user or the main process.') + return len(gpu_ids) + + +def flattenList(x): + return list(chain(*x)) + + +@contextlib.contextmanager +def tqdm_joblib(tqdm_object): + """Context manager to patch joblib to report into tqdm progress bar given as argument + + Usage: + with tqdm_joblib(tqdm(desc="My calculation", total=10)) as progress_bar: + Parallel(n_jobs=16)(delayed(sqrt)(i**2) for i in range(10)) + + When iterating over a generator, directly use of tqdm is also a solutin (but monitor the task queuing, instead of finishing) + ret_vals = Parallel(n_jobs=args.world_size)( + delayed(lambda x: _compute_cov_score(pid, *x))(param) + for param in tqdm(combinations(image_ids, 2), + desc=f'Computing cov_score of [{pid}]', + total=len(image_ids)*(len(image_ids)-1)/2)) + Src: https://stackoverflow.com/a/58936697 + """ + class TqdmBatchCompletionCallback(joblib.parallel.BatchCompletionCallBack): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, *args, **kwargs): + tqdm_object.update(n=self.batch_size) + return super().__call__(*args, **kwargs) + + old_batch_callback = joblib.parallel.BatchCompletionCallBack + joblib.parallel.BatchCompletionCallBack = TqdmBatchCompletionCallback + try: + yield tqdm_object + finally: + joblib.parallel.BatchCompletionCallBack = old_batch_callback + tqdm_object.close() + + +def draw_points(img,points,color=(0,255,0),radius=3): + dp = [(int(points[i, 0]), int(points[i, 1])) for i in range(points.shape[0])] + for i in range(points.shape[0]): + cv2.circle(img, dp[i],radius=radius,color=color) + return img + + +def draw_match(img1, img2, corr1, corr2,inlier=[True],color=None,radius1=1,radius2=1,resize=None): + if resize is not None: + scale1,scale2=[img1.shape[1]/resize[0],img1.shape[0]/resize[1]],[img2.shape[1]/resize[0],img2.shape[0]/resize[1]] + img1,img2=cv2.resize(img1, resize, interpolation=cv2.INTER_AREA),cv2.resize(img2, resize, interpolation=cv2.INTER_AREA) + corr1,corr2=corr1/np.asarray(scale1)[np.newaxis],corr2/np.asarray(scale2)[np.newaxis] + corr1_key = [cv2.KeyPoint(corr1[i, 0], corr1[i, 1], radius1) for i in range(corr1.shape[0])] + corr2_key = [cv2.KeyPoint(corr2[i, 0], corr2[i, 1], radius2) for i in range(corr2.shape[0])] + + assert len(corr1) == len(corr2) + + draw_matches = [cv2.DMatch(i, i, 0) for i in range(len(corr1))] + if color is None: + color = [(0, 255, 0) if cur_inlier else (0,0,255) for cur_inlier in inlier] + if len(color)==1: + display = cv2.drawMatches(img1, corr1_key, img2, corr2_key, draw_matches, None, + matchColor=color[0], + singlePointColor=color[0], + flags=4 + ) + else: + height,width=max(img1.shape[0],img2.shape[0]),img1.shape[1]+img2.shape[1] + display=np.zeros([height,width,3],np.uint8) + display[:img1.shape[0],:img1.shape[1]]=img1 + display[:img2.shape[0],img1.shape[1]:]=img2 + for i in range(len(corr1)): + left_x,left_y,right_x,right_y=int(corr1[i][0]),int(corr1[i][1]),int(corr2[i][0]+img1.shape[1]),int(corr2[i][1]) + cur_color=(int(color[i][0]),int(color[i][1]),int(color[i][2])) + cv2.line(display, (left_x,left_y), (right_x,right_y),cur_color,1,lineType=cv2.LINE_AA) + return display diff --git a/third_party/ASpanFormer/src/utils/plotting.py b/third_party/ASpanFormer/src/utils/plotting.py new file mode 100644 index 0000000000000000000000000000000000000000..8696880237b6ad9fe48d3c1fc44ed13b691a6c4d --- /dev/null +++ b/third_party/ASpanFormer/src/utils/plotting.py @@ -0,0 +1,219 @@ +import bisect +import numpy as np +import matplotlib.pyplot as plt +import matplotlib +from copy import deepcopy + +def _compute_conf_thresh(data): + dataset_name = data['dataset_name'][0].lower() + if dataset_name == 'scannet': + thr = 5e-4 + elif dataset_name == 'megadepth' or dataset_name=='gl3d': + thr = 1e-4 + else: + raise ValueError(f'Unknown dataset: {dataset_name}') + return thr + + +# --- VISUALIZATION --- # + +def make_matching_figure( + img0, img1, mkpts0, mkpts1, color, + kpts0=None, kpts1=None, text=[], dpi=75, path=None): + # draw image pair + assert mkpts0.shape[0] == mkpts1.shape[0], f'mkpts0: {mkpts0.shape[0]} v.s. mkpts1: {mkpts1.shape[0]}' + fig, axes = plt.subplots(1, 2, figsize=(10, 6), dpi=dpi) + axes[0].imshow(img0, cmap='gray') + axes[1].imshow(img1, cmap='gray') + for i in range(2): # clear all frames + axes[i].get_yaxis().set_ticks([]) + axes[i].get_xaxis().set_ticks([]) + for spine in axes[i].spines.values(): + spine.set_visible(False) + plt.tight_layout(pad=1) + + if kpts0 is not None: + assert kpts1 is not None + axes[0].scatter(kpts0[:, 0], kpts0[:, 1], c='w', s=2) + axes[1].scatter(kpts1[:, 0], kpts1[:, 1], c='w', s=2) + + # draw matches + if mkpts0.shape[0] != 0 and mkpts1.shape[0] != 0: + fig.canvas.draw() + transFigure = fig.transFigure.inverted() + fkpts0 = transFigure.transform(axes[0].transData.transform(mkpts0)) + fkpts1 = transFigure.transform(axes[1].transData.transform(mkpts1)) + fig.lines = [matplotlib.lines.Line2D((fkpts0[i, 0], fkpts1[i, 0]), + (fkpts0[i, 1], fkpts1[i, 1]), + transform=fig.transFigure, c=color[i], linewidth=1) + for i in range(len(mkpts0))] + + axes[0].scatter(mkpts0[:, 0], mkpts0[:, 1], c=color, s=4) + axes[1].scatter(mkpts1[:, 0], mkpts1[:, 1], c=color, s=4) + + # put txts + txt_color = 'k' if img0[:100, :200].mean() > 200 else 'w' + fig.text( + 0.01, 0.99, '\n'.join(text), transform=fig.axes[0].transAxes, + fontsize=15, va='top', ha='left', color=txt_color) + + # save or return figure + if path: + plt.savefig(str(path), bbox_inches='tight', pad_inches=0) + plt.close() + else: + return fig + + +def _make_evaluation_figure(data, b_id, alpha='dynamic'): + b_mask = data['m_bids'] == b_id + conf_thr = _compute_conf_thresh(data) + + img0 = (data['image0'][b_id][0].cpu().numpy() * 255).round().astype(np.int32) + img1 = (data['image1'][b_id][0].cpu().numpy() * 255).round().astype(np.int32) + kpts0 = data['mkpts0_f'][b_mask].cpu().numpy() + kpts1 = data['mkpts1_f'][b_mask].cpu().numpy() + + # for megadepth, we visualize matches on the resized image + if 'scale0' in data: + kpts0 = kpts0 / data['scale0'][b_id].cpu().numpy()[[1, 0]] + kpts1 = kpts1 / data['scale1'][b_id].cpu().numpy()[[1, 0]] + epi_errs = data['epi_errs'][b_mask].cpu().numpy() + correct_mask = epi_errs < conf_thr + precision = np.mean(correct_mask) if len(correct_mask) > 0 else 0 + n_correct = np.sum(correct_mask) + n_gt_matches = int(data['conf_matrix_gt'][b_id].sum().cpu()) + recall = 0 if n_gt_matches == 0 else n_correct / (n_gt_matches) + # recall might be larger than 1, since the calculation of conf_matrix_gt + # uses groundtruth depths and camera poses, but epipolar distance is used here. + + # matching info + if alpha == 'dynamic': + alpha = dynamic_alpha(len(correct_mask)) + color = error_colormap(epi_errs, conf_thr, alpha=alpha) + + text = [ + f'#Matches {len(kpts0)}', + f'Precision({conf_thr:.2e}) ({100 * precision:.1f}%): {n_correct}/{len(kpts0)}', + f'Recall({conf_thr:.2e}) ({100 * recall:.1f}%): {n_correct}/{n_gt_matches}' + ] + + # make the figure + figure = make_matching_figure(img0, img1, kpts0, kpts1, + color, text=text) + return figure + +def _make_evaluation_figure_offset(data, b_id, alpha='dynamic',side=''): + layer_num=data['predict_flow'][0].shape[0] + + b_mask = data['offset_bids'+side] == b_id + conf_thr = 2e-3 #hardcode for scannet(coarse level) + img0 = (data['image0'][b_id][0].cpu().numpy() * 255).round().astype(np.int32) + img1 = (data['image1'][b_id][0].cpu().numpy() * 255).round().astype(np.int32) + + figure_list=[] + #draw offset matches in different layers + for layer_index in range(layer_num): + l_mask=data['offset_lids'+side]==layer_index + mask=l_mask&b_mask + kpts0 = data['offset_kpts0_f'+side][mask].cpu().numpy() + kpts1 = data['offset_kpts1_f'+side][mask].cpu().numpy() + + epi_errs = data['epi_errs_offset'+side][mask].cpu().numpy() + correct_mask = epi_errs < conf_thr + + precision = np.mean(correct_mask) if len(correct_mask) > 0 else 0 + n_correct = np.sum(correct_mask) + n_gt_matches = int(data['conf_matrix_gt'][b_id].sum().cpu()) + recall = 0 if n_gt_matches == 0 else n_correct / (n_gt_matches) + # recall might be larger than 1, since the calculation of conf_matrix_gt + # uses groundtruth depths and camera poses, but epipolar distance is used here. + + # matching info + if alpha == 'dynamic': + alpha = dynamic_alpha(len(correct_mask)) + color = error_colormap(epi_errs, conf_thr, alpha=alpha) + + text = [ + f'#Matches {len(kpts0)}', + f'Precision({conf_thr:.2e}) ({100 * precision:.1f}%): {n_correct}/{len(kpts0)}', + f'Recall({conf_thr:.2e}) ({100 * recall:.1f}%): {n_correct}/{n_gt_matches}' + ] + + # make the figure + #import pdb;pdb.set_trace() + figure = make_matching_figure(deepcopy(img0), deepcopy(img1) , kpts0, kpts1, + color, text=text) + figure_list.append(figure) + return figure + +def _make_confidence_figure(data, b_id): + # TODO: Implement confidence figure + raise NotImplementedError() + + +def make_matching_figures(data, config, mode='evaluation'): + """ Make matching figures for a batch. + + Args: + data (Dict): a batch updated by PL_LoFTR. + config (Dict): matcher config + Returns: + figures (Dict[str, List[plt.figure]] + """ + assert mode in ['evaluation', 'confidence'] # 'confidence' + figures = {mode: []} + for b_id in range(data['image0'].size(0)): + if mode == 'evaluation': + fig = _make_evaluation_figure( + data, b_id, + alpha=config.TRAINER.PLOT_MATCHES_ALPHA) + elif mode == 'confidence': + fig = _make_confidence_figure(data, b_id) + else: + raise ValueError(f'Unknown plot mode: {mode}') + figures[mode].append(fig) + return figures + +def make_matching_figures_offset(data, config, mode='evaluation',side=''): + """ Make matching figures for a batch. + + Args: + data (Dict): a batch updated by PL_LoFTR. + config (Dict): matcher config + Returns: + figures (Dict[str, List[plt.figure]] + """ + assert mode in ['evaluation', 'confidence'] # 'confidence' + figures = {mode: []} + for b_id in range(data['image0'].size(0)): + if mode == 'evaluation': + fig = _make_evaluation_figure_offset( + data, b_id, + alpha=config.TRAINER.PLOT_MATCHES_ALPHA,side=side) + elif mode == 'confidence': + fig = _make_evaluation_figure_offset(data, b_id) + else: + raise ValueError(f'Unknown plot mode: {mode}') + figures[mode].append(fig) + return figures + +def dynamic_alpha(n_matches, + milestones=[0, 300, 1000, 2000], + alphas=[1.0, 0.8, 0.4, 0.2]): + if n_matches == 0: + return 1.0 + ranges = list(zip(alphas, alphas[1:] + [None])) + loc = bisect.bisect_right(milestones, n_matches) - 1 + _range = ranges[loc] + if _range[1] is None: + return _range[0] + return _range[1] + (milestones[loc + 1] - n_matches) / ( + milestones[loc + 1] - milestones[loc]) * (_range[0] - _range[1]) + + +def error_colormap(err, thr, alpha=1.0): + assert alpha <= 1.0 and alpha > 0, f"Invaid alpha value: {alpha}" + x = 1 - np.clip(err / (thr * 2), 0, 1) + return np.clip( + np.stack([2-x*2, x*2, np.zeros_like(x), np.ones_like(x)*alpha], -1), 0, 1) diff --git a/third_party/ASpanFormer/src/utils/profiler.py b/third_party/ASpanFormer/src/utils/profiler.py new file mode 100644 index 0000000000000000000000000000000000000000..6d21ed79fb506ef09c75483355402c48a195aaa9 --- /dev/null +++ b/third_party/ASpanFormer/src/utils/profiler.py @@ -0,0 +1,39 @@ +import torch +from pytorch_lightning.profiler import SimpleProfiler, PassThroughProfiler +from contextlib import contextmanager +from pytorch_lightning.utilities import rank_zero_only + + +class InferenceProfiler(SimpleProfiler): + """ + This profiler records duration of actions with cuda.synchronize() + Use this in test time. + """ + + def __init__(self): + super().__init__() + self.start = rank_zero_only(self.start) + self.stop = rank_zero_only(self.stop) + self.summary = rank_zero_only(self.summary) + + @contextmanager + def profile(self, action_name: str) -> None: + try: + torch.cuda.synchronize() + self.start(action_name) + yield action_name + finally: + torch.cuda.synchronize() + self.stop(action_name) + + +def build_profiler(name): + if name == 'inference': + return InferenceProfiler() + elif name == 'pytorch': + from pytorch_lightning.profiler import PyTorchProfiler + return PyTorchProfiler(use_cuda=True, profile_memory=True, row_limit=100) + elif name is None: + return PassThroughProfiler() + else: + raise ValueError(f'Invalid profiler: {name}') diff --git a/third_party/ASpanFormer/test.py b/third_party/ASpanFormer/test.py new file mode 100644 index 0000000000000000000000000000000000000000..541ce84662ab4888c6fece30403c5c9983118637 --- /dev/null +++ b/third_party/ASpanFormer/test.py @@ -0,0 +1,69 @@ +import pytorch_lightning as pl +import argparse +import pprint +from loguru import logger as loguru_logger + +from src.config.default import get_cfg_defaults +from src.utils.profiler import build_profiler + +from src.lightning.data import MultiSceneDataModule +from src.lightning.lightning_aspanformer import PL_ASpanFormer +import torch + +def parse_args(): + # init a costum parser which will be added into pl.Trainer parser + # check documentation: https://pytorch-lightning.readthedocs.io/en/latest/common/trainer.html#trainer-flags + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + 'data_cfg_path', type=str, help='data config path') + parser.add_argument( + 'main_cfg_path', type=str, help='main config path') + parser.add_argument( + '--ckpt_path', type=str, default="weights/indoor_ds.ckpt", help='path to the checkpoint') + parser.add_argument( + '--dump_dir', type=str, default=None, help="if set, the matching results will be dump to dump_dir") + parser.add_argument( + '--profiler_name', type=str, default=None, help='options: [inference, pytorch], or leave it unset') + parser.add_argument( + '--batch_size', type=int, default=1, help='batch_size per gpu') + parser.add_argument( + '--num_workers', type=int, default=2) + parser.add_argument( + '--thr', type=float, default=None, help='modify the coarse-level matching threshold.') + parser.add_argument( + '--mode', type=str, default='vanilla', help='modify the coarse-level matching threshold.') + parser = pl.Trainer.add_argparse_args(parser) + return parser.parse_args() + + +if __name__ == '__main__': + # parse arguments + args = parse_args() + pprint.pprint(vars(args)) + + # init default-cfg and merge it with the main- and data-cfg + config = get_cfg_defaults() + config.merge_from_file(args.main_cfg_path) + config.merge_from_file(args.data_cfg_path) + pl.seed_everything(config.TRAINER.SEED) # reproducibility + + # tune when testing + if args.thr is not None: + config.ASPAN.MATCH_COARSE.THR = args.thr + + loguru_logger.info(f"Args and config initialized!") + + # lightning module + profiler = build_profiler(args.profiler_name) + model = PL_ASpanFormer(config, pretrained_ckpt=args.ckpt_path, profiler=profiler, dump_dir=args.dump_dir) + loguru_logger.info(f"ASpanFormer-lightning initialized!") + + # lightning data + data_module = MultiSceneDataModule(args, config) + loguru_logger.info(f"DataModule initialized!") + + # lightning trainer + trainer = pl.Trainer.from_argparse_args(args, replace_sampler_ddp=False, logger=False) + + loguru_logger.info(f"Start testing!") + trainer.test(model, datamodule=data_module, verbose=False) diff --git a/third_party/ASpanFormer/tools/SensorData.py b/third_party/ASpanFormer/tools/SensorData.py new file mode 100644 index 0000000000000000000000000000000000000000..a3ec2644bf8b3b988ef0f36851cd3317c00511b2 --- /dev/null +++ b/third_party/ASpanFormer/tools/SensorData.py @@ -0,0 +1,125 @@ + +import os, struct +import numpy as np +import zlib +import imageio +import cv2 +import png + +COMPRESSION_TYPE_COLOR = {-1:'unknown', 0:'raw', 1:'png', 2:'jpeg'} +COMPRESSION_TYPE_DEPTH = {-1:'unknown', 0:'raw_ushort', 1:'zlib_ushort', 2:'occi_ushort'} + +class RGBDFrame(): + + def load(self, file_handle): + self.camera_to_world = np.asarray(struct.unpack('f'*16, file_handle.read(16*4)), dtype=np.float32).reshape(4, 4) + self.timestamp_color = struct.unpack('Q', file_handle.read(8))[0] + self.timestamp_depth = struct.unpack('Q', file_handle.read(8))[0] + self.color_size_bytes = struct.unpack('Q', file_handle.read(8))[0] + self.depth_size_bytes = struct.unpack('Q', file_handle.read(8))[0] + self.color_data = ''.join(struct.unpack('c'*self.color_size_bytes, file_handle.read(self.color_size_bytes))) + self.depth_data = ''.join(struct.unpack('c'*self.depth_size_bytes, file_handle.read(self.depth_size_bytes))) + + + def decompress_depth(self, compression_type): + if compression_type == 'zlib_ushort': + return self.decompress_depth_zlib() + else: + raise + + + def decompress_depth_zlib(self): + return zlib.decompress(self.depth_data) + + + def decompress_color(self, compression_type): + if compression_type == 'jpeg': + return self.decompress_color_jpeg() + else: + raise + + + def decompress_color_jpeg(self): + return imageio.imread(self.color_data) + + +class SensorData: + + def __init__(self, filename): + self.version = 4 + self.load(filename) + + + def load(self, filename): + with open(filename, 'rb') as f: + version = struct.unpack('I', f.read(4))[0] + assert self.version == version + strlen = struct.unpack('Q', f.read(8))[0] + self.sensor_name = ''.join(struct.unpack('c'*strlen, f.read(strlen))) + self.intrinsic_color = np.asarray(struct.unpack('f'*16, f.read(16*4)), dtype=np.float32).reshape(4, 4) + self.extrinsic_color = np.asarray(struct.unpack('f'*16, f.read(16*4)), dtype=np.float32).reshape(4, 4) + self.intrinsic_depth = np.asarray(struct.unpack('f'*16, f.read(16*4)), dtype=np.float32).reshape(4, 4) + self.extrinsic_depth = np.asarray(struct.unpack('f'*16, f.read(16*4)), dtype=np.float32).reshape(4, 4) + self.color_compression_type = COMPRESSION_TYPE_COLOR[struct.unpack('i', f.read(4))[0]] + self.depth_compression_type = COMPRESSION_TYPE_DEPTH[struct.unpack('i', f.read(4))[0]] + self.color_width = struct.unpack('I', f.read(4))[0] + self.color_height = struct.unpack('I', f.read(4))[0] + self.depth_width = struct.unpack('I', f.read(4))[0] + self.depth_height = struct.unpack('I', f.read(4))[0] + self.depth_shift = struct.unpack('f', f.read(4))[0] + num_frames = struct.unpack('Q', f.read(8))[0] + self.frames = [] + for i in range(num_frames): + frame = RGBDFrame() + frame.load(f) + self.frames.append(frame) + + + def export_depth_images(self, output_path, image_size=None, frame_skip=1): + if not os.path.exists(output_path): + os.makedirs(output_path) + print 'exporting', len(self.frames)//frame_skip, ' depth frames to', output_path + for f in range(0, len(self.frames), frame_skip): + depth_data = self.frames[f].decompress_depth(self.depth_compression_type) + depth = np.fromstring(depth_data, dtype=np.uint16).reshape(self.depth_height, self.depth_width) + if image_size is not None: + depth = cv2.resize(depth, (image_size[1], image_size[0]), interpolation=cv2.INTER_NEAREST) + #imageio.imwrite(os.path.join(output_path, str(f) + '.png'), depth) + with open(os.path.join(output_path, str(f) + '.png'), 'wb') as f: # write 16-bit + writer = png.Writer(width=depth.shape[1], height=depth.shape[0], bitdepth=16) + depth = depth.reshape(-1, depth.shape[1]).tolist() + writer.write(f, depth) + + def export_color_images(self, output_path, image_size=None, frame_skip=1): + if not os.path.exists(output_path): + os.makedirs(output_path) + print 'exporting', len(self.frames)//frame_skip, 'color frames to', output_path + for f in range(0, len(self.frames), frame_skip): + color = self.frames[f].decompress_color(self.color_compression_type) + if image_size is not None: + color = cv2.resize(color, (image_size[1], image_size[0]), interpolation=cv2.INTER_NEAREST) + imageio.imwrite(os.path.join(output_path, str(f) + '.jpg'), color) + + + def save_mat_to_file(self, matrix, filename): + with open(filename, 'w') as f: + for line in matrix: + np.savetxt(f, line[np.newaxis], fmt='%f') + + + def export_poses(self, output_path, frame_skip=1): + if not os.path.exists(output_path): + os.makedirs(output_path) + print 'exporting', len(self.frames)//frame_skip, 'camera poses to', output_path + for f in range(0, len(self.frames), frame_skip): + self.save_mat_to_file(self.frames[f].camera_to_world, os.path.join(output_path, str(f) + '.txt')) + + + def export_intrinsics(self, output_path): + if not os.path.exists(output_path): + os.makedirs(output_path) + print 'exporting camera intrinsics to', output_path + self.save_mat_to_file(self.intrinsic_color, os.path.join(output_path, 'intrinsic_color.txt')) + self.save_mat_to_file(self.extrinsic_color, os.path.join(output_path, 'extrinsic_color.txt')) + self.save_mat_to_file(self.intrinsic_depth, os.path.join(output_path, 'intrinsic_depth.txt')) + self.save_mat_to_file(self.extrinsic_depth, os.path.join(output_path, 'extrinsic_depth.txt')) \ No newline at end of file diff --git a/third_party/ASpanFormer/tools/extract.py b/third_party/ASpanFormer/tools/extract.py new file mode 100644 index 0000000000000000000000000000000000000000..12f55e2f94120d5765f124f8eec867f1d82e0aa7 --- /dev/null +++ b/third_party/ASpanFormer/tools/extract.py @@ -0,0 +1,47 @@ +import os +import glob +from re import split +from tqdm import tqdm +from multiprocessing import Pool +from functools import partial + +scannet_dir='/root/data/ScanNet-v2-1.0.0/data/raw' +dump_dir='/root/data/scannet_dump' +num_process=32 + +def extract(seq,scannet_dir,split,dump_dir): + assert split=='train' or split=='test' + if not os.path.exists(os.path.join(dump_dir,split,seq)): + os.mkdir(os.path.join(dump_dir,split,seq)) + cmd='python reader.py --filename '+os.path.join(scannet_dir,'scans' if split=='train' else 'scans_test',seq,seq+'.sens')+' --output_path '+os.path.join(dump_dir,split,seq)+\ + ' --export_depth_images --export_color_images --export_poses --export_intrinsics' + os.system(cmd) + +if __name__=='__main__': + if not os.path.exists(dump_dir): + os.mkdir(dump_dir) + os.mkdir(os.path.join(dump_dir,'train')) + os.mkdir(os.path.join(dump_dir,'test')) + + train_seq_list=[seq.split('/')[-1] for seq in glob.glob(os.path.join(scannet_dir,'scans','scene*'))] + test_seq_list=[seq.split('/')[-1] for seq in glob.glob(os.path.join(scannet_dir,'scans_test','scene*'))] + + extract_train=partial(extract,scannet_dir=scannet_dir,split='train',dump_dir=dump_dir) + extract_test=partial(extract,scannet_dir=scannet_dir,split='test',dump_dir=dump_dir) + + num_train_iter=len(train_seq_list)//num_process if len(train_seq_list)%num_process==0 else len(train_seq_list)//num_process+1 + num_test_iter=len(test_seq_list)//num_process if len(test_seq_list)%num_process==0 else len(test_seq_list)//num_process+1 + + pool = Pool(num_process) + for index in tqdm(range(num_train_iter)): + seq_list=train_seq_list[index*num_process:min((index+1)*num_process,len(train_seq_list))] + pool.map(extract_train,seq_list) + pool.close() + pool.join() + + pool = Pool(num_process) + for index in tqdm(range(num_test_iter)): + seq_list=test_seq_list[index*num_process:min((index+1)*num_process,len(test_seq_list))] + pool.map(extract_test,seq_list) + pool.close() + pool.join() \ No newline at end of file diff --git a/third_party/ASpanFormer/tools/preprocess_scene.py b/third_party/ASpanFormer/tools/preprocess_scene.py new file mode 100644 index 0000000000000000000000000000000000000000..d20c0d070243519d67bbd25668ff5eb1657474be --- /dev/null +++ b/third_party/ASpanFormer/tools/preprocess_scene.py @@ -0,0 +1,242 @@ +import argparse + +import imagesize + +import numpy as np + +import os + +parser = argparse.ArgumentParser(description='MegaDepth preprocessing script') + +parser.add_argument( + '--base_path', type=str, required=True, + help='path to MegaDepth' +) +parser.add_argument( + '--scene_id', type=str, required=True, + help='scene ID' +) + +parser.add_argument( + '--output_path', type=str, required=True, + help='path to the output directory' +) + +args = parser.parse_args() + +base_path = args.base_path +# Remove the trailing / if need be. +if base_path[-1] in ['/', '\\']: + base_path = base_path[: - 1] +scene_id = args.scene_id + +base_depth_path = os.path.join( + base_path, 'phoenix/S6/zl548/MegaDepth_v1' +) +base_undistorted_sfm_path = os.path.join( + base_path, 'Undistorted_SfM' +) + +undistorted_sparse_path = os.path.join( + base_undistorted_sfm_path, scene_id, 'sparse-txt' +) +if not os.path.exists(undistorted_sparse_path): + exit() + +depths_path = os.path.join( + base_depth_path, scene_id, 'dense0', 'depths' +) +if not os.path.exists(depths_path): + exit() + +images_path = os.path.join( + base_undistorted_sfm_path, scene_id, 'images' +) +if not os.path.exists(images_path): + exit() + +# Process cameras.txt +with open(os.path.join(undistorted_sparse_path, 'cameras.txt'), 'r') as f: + raw = f.readlines()[3 :] # skip the header + +camera_intrinsics = {} +for camera in raw: + camera = camera.split(' ') + camera_intrinsics[int(camera[0])] = [float(elem) for elem in camera[2 :]] + +# Process points3D.txt +with open(os.path.join(undistorted_sparse_path, 'points3D.txt'), 'r') as f: + raw = f.readlines()[3 :] # skip the header + +points3D = {} +for point3D in raw: + point3D = point3D.split(' ') + points3D[int(point3D[0])] = np.array([ + float(point3D[1]), float(point3D[2]), float(point3D[3]) + ]) + +# Process images.txt +with open(os.path.join(undistorted_sparse_path, 'images.txt'), 'r') as f: + raw = f.readlines()[4 :] # skip the header + +image_id_to_idx = {} +image_names = [] +raw_pose = [] +camera = [] +points3D_id_to_2D = [] +n_points3D = [] +for idx, (image, points) in enumerate(zip(raw[:: 2], raw[1 :: 2])): + image = image.split(' ') + points = points.split(' ') + + image_id_to_idx[int(image[0])] = idx + + image_name = image[-1].strip('\n') + image_names.append(image_name) + + raw_pose.append([float(elem) for elem in image[1 : -2]]) + camera.append(int(image[-2])) + current_points3D_id_to_2D = {} + for x, y, point3D_id in zip(points[:: 3], points[1 :: 3], points[2 :: 3]): + if int(point3D_id) == -1: + continue + current_points3D_id_to_2D[int(point3D_id)] = [float(x), float(y)] + points3D_id_to_2D.append(current_points3D_id_to_2D) + n_points3D.append(len(current_points3D_id_to_2D)) +n_images = len(image_names) + +# Image and depthmaps paths +image_paths = [] +depth_paths = [] +for image_name in image_names: + image_path = os.path.join(images_path, image_name) + + # Path to the depth file + depth_path = os.path.join( + depths_path, '%s.h5' % os.path.splitext(image_name)[0] + ) + + if os.path.exists(depth_path): + # Check if depth map or background / foreground mask + file_size = os.stat(depth_path).st_size + # Rough estimate - 75KB might work as well + if file_size < 100 * 1024: + depth_paths.append(None) + image_paths.append(None) + else: + depth_paths.append(depth_path[len(base_path) + 1 :]) + image_paths.append(image_path[len(base_path) + 1 :]) + else: + depth_paths.append(None) + image_paths.append(None) + +# Camera configuration +intrinsics = [] +poses = [] +principal_axis = [] +points3D_id_to_ndepth = [] +for idx, image_name in enumerate(image_names): + if image_paths[idx] is None: + intrinsics.append(None) + poses.append(None) + principal_axis.append([0, 0, 0]) + points3D_id_to_ndepth.append({}) + continue + image_intrinsics = camera_intrinsics[camera[idx]] + K = np.zeros([3, 3]) + K[0, 0] = image_intrinsics[2] + K[0, 2] = image_intrinsics[4] + K[1, 1] = image_intrinsics[3] + K[1, 2] = image_intrinsics[5] + K[2, 2] = 1 + intrinsics.append(K) + + image_pose = raw_pose[idx] + qvec = image_pose[: 4] + qvec = qvec / np.linalg.norm(qvec) + w, x, y, z = qvec + R = np.array([ + [ + 1 - 2 * y * y - 2 * z * z, + 2 * x * y - 2 * z * w, + 2 * x * z + 2 * y * w + ], + [ + 2 * x * y + 2 * z * w, + 1 - 2 * x * x - 2 * z * z, + 2 * y * z - 2 * x * w + ], + [ + 2 * x * z - 2 * y * w, + 2 * y * z + 2 * x * w, + 1 - 2 * x * x - 2 * y * y + ] + ]) + principal_axis.append(R[2, :]) + t = image_pose[4 : 7] + # World-to-Camera pose + current_pose = np.zeros([4, 4]) + current_pose[: 3, : 3] = R + current_pose[: 3, 3] = t + current_pose[3, 3] = 1 + # Camera-to-World pose + # pose = np.zeros([4, 4]) + # pose[: 3, : 3] = np.transpose(R) + # pose[: 3, 3] = -np.matmul(np.transpose(R), t) + # pose[3, 3] = 1 + poses.append(current_pose) + + current_points3D_id_to_ndepth = {} + for point3D_id in points3D_id_to_2D[idx].keys(): + p3d = points3D[point3D_id] + current_points3D_id_to_ndepth[point3D_id] = (np.dot(R[2, :], p3d) + t[2]) / (.5 * (K[0, 0] + K[1, 1])) + points3D_id_to_ndepth.append(current_points3D_id_to_ndepth) +principal_axis = np.array(principal_axis) +angles = np.rad2deg(np.arccos( + np.clip( + np.dot(principal_axis, np.transpose(principal_axis)), + -1, 1 + ) +)) + +# Compute overlap score +overlap_matrix = np.full([n_images, n_images], -1.) +scale_ratio_matrix = np.full([n_images, n_images], -1.) +for idx1 in range(n_images): + if image_paths[idx1] is None or depth_paths[idx1] is None: + continue + for idx2 in range(idx1 + 1, n_images): + if image_paths[idx2] is None or depth_paths[idx2] is None: + continue + matches = ( + points3D_id_to_2D[idx1].keys() & + points3D_id_to_2D[idx2].keys() + ) + min_num_points3D = min( + len(points3D_id_to_2D[idx1]), len(points3D_id_to_2D[idx2]) + ) + overlap_matrix[idx1, idx2] = len(matches) / len(points3D_id_to_2D[idx1]) # min_num_points3D + overlap_matrix[idx2, idx1] = len(matches) / len(points3D_id_to_2D[idx2]) # min_num_points3D + if len(matches) == 0: + continue + points3D_id_to_ndepth1 = points3D_id_to_ndepth[idx1] + points3D_id_to_ndepth2 = points3D_id_to_ndepth[idx2] + nd1 = np.array([points3D_id_to_ndepth1[match] for match in matches]) + nd2 = np.array([points3D_id_to_ndepth2[match] for match in matches]) + min_scale_ratio = np.min(np.maximum(nd1 / nd2, nd2 / nd1)) + scale_ratio_matrix[idx1, idx2] = min_scale_ratio + scale_ratio_matrix[idx2, idx1] = min_scale_ratio + +np.savez( + os.path.join(args.output_path, '%s.npz' % scene_id), + image_paths=image_paths, + depth_paths=depth_paths, + intrinsics=intrinsics, + poses=poses, + overlap_matrix=overlap_matrix, + scale_ratio_matrix=scale_ratio_matrix, + angles=angles, + n_points3D=n_points3D, + points3D_id_to_2D=points3D_id_to_2D, + points3D_id_to_ndepth=points3D_id_to_ndepth +) \ No newline at end of file diff --git a/third_party/ASpanFormer/tools/preprocess_undistorted_megadepth.sh b/third_party/ASpanFormer/tools/preprocess_undistorted_megadepth.sh new file mode 100644 index 0000000000000000000000000000000000000000..c983ee464bb36439d68f52d60f981414e2c6e84b --- /dev/null +++ b/third_party/ASpanFormer/tools/preprocess_undistorted_megadepth.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +if [[ $# != 2 ]]; then + echo 'Usage: bash preprocess_megadepth.sh /path/to/megadepth /output/path' + exit +fi + +export dataset_path=$1 +export output_path=$2 + +mkdir $output_path +echo 0 +ls $dataset_path/Undistorted_SfM | xargs -P 8 -I % sh -c 'echo %; python preprocess_scene.py --base_path $dataset_path --scene_id % --output_path $output_path' \ No newline at end of file diff --git a/third_party/ASpanFormer/tools/reader.py b/third_party/ASpanFormer/tools/reader.py new file mode 100644 index 0000000000000000000000000000000000000000..f419fbaa8a099fcfede1cea51fcf95a2c1589160 --- /dev/null +++ b/third_party/ASpanFormer/tools/reader.py @@ -0,0 +1,39 @@ +import argparse +import os, sys + +from SensorData import SensorData + +# params +parser = argparse.ArgumentParser() +# data paths +parser.add_argument('--filename', required=True, help='path to sens file to read') +parser.add_argument('--output_path', required=True, help='path to output folder') +parser.add_argument('--export_depth_images', dest='export_depth_images', action='store_true') +parser.add_argument('--export_color_images', dest='export_color_images', action='store_true') +parser.add_argument('--export_poses', dest='export_poses', action='store_true') +parser.add_argument('--export_intrinsics', dest='export_intrinsics', action='store_true') +parser.set_defaults(export_depth_images=False, export_color_images=False, export_poses=False, export_intrinsics=False) + +opt = parser.parse_args() +print(opt) + + +def main(): + if not os.path.exists(opt.output_path): + os.makedirs(opt.output_path) + # load the data + sys.stdout.write('loading %s...' % opt.filename) + sd = SensorData(opt.filename) + sys.stdout.write('loaded!\n') + if opt.export_depth_images: + sd.export_depth_images(os.path.join(opt.output_path, 'depth')) + if opt.export_color_images: + sd.export_color_images(os.path.join(opt.output_path, 'color')) + if opt.export_poses: + sd.export_poses(os.path.join(opt.output_path, 'pose')) + if opt.export_intrinsics: + sd.export_intrinsics(os.path.join(opt.output_path, 'intrinsic')) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/third_party/ASpanFormer/tools/undistort_mega.py b/third_party/ASpanFormer/tools/undistort_mega.py new file mode 100644 index 0000000000000000000000000000000000000000..68798ff30e6afa37a0f98571ecfd3f05751868c8 --- /dev/null +++ b/third_party/ASpanFormer/tools/undistort_mega.py @@ -0,0 +1,69 @@ +import argparse + +import imagesize + +import os + +import subprocess + +parser = argparse.ArgumentParser(description='MegaDepth Undistortion') + +parser.add_argument( + '--colmap_path', type=str,default='/usr/bin/', + help='path to colmap executable' +) +parser.add_argument( + '--base_path', type=str,default='/root/MegaDepth', + help='path to MegaDepth' +) + +args = parser.parse_args() + +sfm_path = os.path.join( + args.base_path, 'MegaDepth_v1_SfM' +) +base_depth_path = os.path.join( + args.base_path, 'phoenix/S6/zl548/MegaDepth_v1' +) +output_path = os.path.join( + args.base_path, 'Undistorted_SfM' +) + +os.mkdir(output_path) + +for scene_name in os.listdir(base_depth_path): + current_output_path = os.path.join(output_path, scene_name) + os.mkdir(current_output_path) + + image_path = os.path.join( + base_depth_path, scene_name, 'dense0', 'imgs' + ) + if not os.path.exists(image_path): + continue + + # Find the maximum image size in scene. + max_image_size = 0 + for image_name in os.listdir(image_path): + max_image_size = max( + max_image_size, + max(imagesize.get(os.path.join(image_path, image_name))) + ) + + # Undistort the images and update the reconstruction. + subprocess.call([ + os.path.join(args.colmap_path, 'colmap'), 'image_undistorter', + '--image_path', os.path.join(sfm_path, scene_name, 'images'), + '--input_path', os.path.join(sfm_path, scene_name, 'sparse', 'manhattan', '0'), + '--output_path', current_output_path, + '--max_image_size', str(max_image_size) + ]) + + # Transform the reconstruction to raw text format. + sparse_txt_path = os.path.join(current_output_path, 'sparse-txt') + os.mkdir(sparse_txt_path) + subprocess.call([ + os.path.join(args.colmap_path, 'colmap'), 'model_converter', + '--input_path', os.path.join(current_output_path, 'sparse'), + '--output_path', sparse_txt_path, + '--output_type', 'TXT' + ]) \ No newline at end of file diff --git a/third_party/ASpanFormer/train.py b/third_party/ASpanFormer/train.py new file mode 100644 index 0000000000000000000000000000000000000000..21f644763711481e84863ed5d861ec57d95f2d5c --- /dev/null +++ b/third_party/ASpanFormer/train.py @@ -0,0 +1,134 @@ +import math +import argparse +import pprint +from distutils.util import strtobool +from pathlib import Path +from loguru import logger as loguru_logger + +import pytorch_lightning as pl +from pytorch_lightning.utilities import rank_zero_only +from pytorch_lightning.loggers import TensorBoardLogger +from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor +from pytorch_lightning.plugins import DDPPlugin + +from src.config.default import get_cfg_defaults +from src.utils.misc import get_rank_zero_only_logger, setup_gpus +from src.utils.profiler import build_profiler +from src.lightning.data import MultiSceneDataModule +from src.lightning.lightning_aspanformer import PL_ASpanFormer + +loguru_logger = get_rank_zero_only_logger(loguru_logger) + + +def parse_args(): + def str2bool(v): + return v.lower() in ("true", "1") + # init a costum parser which will be added into pl.Trainer parser + # check documentation: https://pytorch-lightning.readthedocs.io/en/latest/common/trainer.html#trainer-flags + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + 'data_cfg_path', type=str, help='data config path') + parser.add_argument( + 'main_cfg_path', type=str, help='main config path') + parser.add_argument( + '--exp_name', type=str, default='default_exp_name') + parser.add_argument( + '--batch_size', type=int, default=4, help='batch_size per gpu') + parser.add_argument( + '--num_workers', type=int, default=4) + parser.add_argument( + '--pin_memory', type=lambda x: bool(strtobool(x)), + nargs='?', default=True, help='whether loading data to pinned memory or not') + parser.add_argument( + '--ckpt_path', type=str, default=None, + help='pretrained checkpoint path, helpful for using a pre-trained coarse-only ASpanFormer') + parser.add_argument( + '--disable_ckpt', action='store_true', + help='disable checkpoint saving (useful for debugging).') + parser.add_argument( + '--profiler_name', type=str, default=None, + help='options: [inference, pytorch], or leave it unset') + parser.add_argument( + '--parallel_load_data', action='store_true', + help='load datasets in with multiple processes.') + parser.add_argument( + '--mode', type=str, default='vanilla', + help='pretrained checkpoint path, helpful for using a pre-trained coarse-only ASpanFormer') + parser.add_argument( + '--ini', type=str2bool, default=False, + help='pretrained checkpoint path, helpful for using a pre-trained coarse-only ASpanFormer') + + parser = pl.Trainer.add_argparse_args(parser) + return parser.parse_args() + + +def main(): + # parse arguments + args = parse_args() + rank_zero_only(pprint.pprint)(vars(args)) + + # init default-cfg and merge it with the main- and data-cfg + config = get_cfg_defaults() + config.merge_from_file(args.main_cfg_path) + config.merge_from_file(args.data_cfg_path) + pl.seed_everything(config.TRAINER.SEED) # reproducibility + # TODO: Use different seeds for each dataloader workers + # This is needed for data augmentation + + # scale lr and warmup-step automatically + args.gpus = _n_gpus = setup_gpus(args.gpus) + config.TRAINER.WORLD_SIZE = _n_gpus * args.num_nodes + config.TRAINER.TRUE_BATCH_SIZE = config.TRAINER.WORLD_SIZE * args.batch_size + _scaling = config.TRAINER.TRUE_BATCH_SIZE / config.TRAINER.CANONICAL_BS + config.TRAINER.SCALING = _scaling + config.TRAINER.TRUE_LR = config.TRAINER.CANONICAL_LR * _scaling + config.TRAINER.WARMUP_STEP = math.floor( + config.TRAINER.WARMUP_STEP / _scaling) + + # lightning module + profiler = build_profiler(args.profiler_name) + model = PL_ASpanFormer(config, pretrained_ckpt=args.ckpt_path, profiler=profiler) + loguru_logger.info(f"ASpanFormer LightningModule initialized!") + + # lightning data + data_module = MultiSceneDataModule(args, config) + loguru_logger.info(f"ASpanFormer DataModule initialized!") + + # TensorBoard Logger + logger = TensorBoardLogger( + save_dir='logs/tb_logs', name=args.exp_name, default_hp_metric=False) + ckpt_dir = Path(logger.log_dir) / 'checkpoints' + + # Callbacks + # TODO: update ModelCheckpoint to monitor multiple metrics + ckpt_callback = ModelCheckpoint(monitor='auc@10', verbose=True, save_top_k=5, mode='max', + save_last=True, + dirpath=str(ckpt_dir), + filename='{epoch}-{auc@5:.3f}-{auc@10:.3f}-{auc@20:.3f}') + lr_monitor = LearningRateMonitor(logging_interval='step') + callbacks = [lr_monitor] + if not args.disable_ckpt: + callbacks.append(ckpt_callback) + + # Lightning Trainer + trainer = pl.Trainer.from_argparse_args( + args, + plugins=DDPPlugin(find_unused_parameters=False, + num_nodes=args.num_nodes, + sync_batchnorm=config.TRAINER.WORLD_SIZE > 0), + gradient_clip_val=config.TRAINER.GRADIENT_CLIPPING, + callbacks=callbacks, + logger=logger, + sync_batchnorm=config.TRAINER.WORLD_SIZE > 0, + replace_sampler_ddp=False, # use custom sampler + reload_dataloaders_every_epoch=False, # avoid repeated samples! + weights_summary='full', + profiler=profiler) + loguru_logger.info(f"Trainer initialized!") + loguru_logger.info(f"Start training!") + trainer.fit(model, datamodule=data_module) + + +if __name__ == '__main__': + main() diff --git a/third_party/DarkFeat/.gitignore b/third_party/DarkFeat/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..a79937ab52bdb8bca803c5ad0ded48961dcafa4a --- /dev/null +++ b/third_party/DarkFeat/.gitignore @@ -0,0 +1,5 @@ +**/__pycache__/ +test +runs +figures +*.log \ No newline at end of file diff --git a/third_party/DarkFeat/README.md b/third_party/DarkFeat/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2b94dce50a61b358d7f05c1942fde15cb2874b73 --- /dev/null +++ b/third_party/DarkFeat/README.md @@ -0,0 +1,95 @@ +# DarkFeat + +DarkFeat: Noise-Robust Feature Detector and Descriptor for Extremely Low-Light RAW Images (AAAI2023 Oral) + +darkfeat demo + +### Installation + +```shell +git clone git@github.com:THU-LYJ-Lab/DarkFeat.git +cd DarkFeat +pip install -r requirements.txt +``` + +[Pytorch](https://pytorch.org/) installation is machine dependent, please install the correct version for your machine. + +### Demo + +```shell +python ./demo_darkfeat.py \ + --input /path/to/your/sequence \ + --output_dir ./output \ + --resize 960 640 \ + --model_path /path/to/pretrained/weights +``` + +Sample raw image sequences and pretrained weights can be downloaded from [here](https://drive.google.com/drive/folders/1zkUCsBVEmQcPZPhsEUymA5GIvAzi12hD?usp=sharing). + +Note that different pytorch and cuda versions may cause different model output results, and the output matches may differ from those shown in the gif. The results are tested in python 3.6, PyTorch 1.10.2 and cuda 10.2. + +### Evaluation + +1. Download [MID](https://github.com/Wenzhengchina/Matching-in-the-Dark) Dataset. + +2. Preprocessing the data in MID dataset, you can choose whether to enable histogram equalization or not: + + ```shell + python raw_preprocess.py --dataset_dir /path/to/MID/dataset + ``` + +3. Extract the keypoints and descriptors, followed by a nearest neighborhood matching: + + ```shell + python export_features.py \ + --model_path /path/to/pretrained/weights \ + --dataset_dir /path/to/MID/dataset + ``` + +4. Estimate the pose through corresponding keypoint pairs: + + ```shell + python pose_estimation.py --dataset_dir /path/to/MID/dataset + ``` + +5. Finally collect the results of pose estimation errors: + + ``` + python read_error.py + ``` + +### Training from scratch + +We use [GL3D](https://github.com/lzx551402/GL3D) as our source training-use matching dataset. Please follow the [instructions](https://github.com/lzx551402/GL3D) to download and unzip all the data (including GL3D group and tourism group). + +Then using the preprocessing code provided by ASLFeat to generate matching informations: + +```shell +git clone https://github.com/lzx551402/tfmatch +# please edit the GL3D path in the shell script before executing. +cd tfmatch +sh train_aslfeat_base.sh +``` + +To launch the training, configure your training hyperparameters inside `./configs` and then run: + +```shell +# stage1 +python run.py --stage 1 --config ./configs/config_stage1.yaml \ + --dataset_dir /path/to/your/GL3D/dataset \ + --job_name YOUR_JOB_NAME +# stage2 +python run.py --stage 2 --config ./configs/config_stage1.yaml \ + --dataset_dir /path/to/your/GL3D/dataset \ + --job_name YOUR_JOB_NAME \ + --start_cnt 160000 +# stage3 +python run.py --stage 3 --config ./configs/config.yaml \ + --dataset_dir /path/to/your/GL3D/dataset \ + --job_name YOUR_JOB_NAME \ + --start_cnt 220000 +``` + +### Acknowledgements + +This project could not be possible without the open-source works from [ASLFeat](https://github.com/lzx551402/ASLFeat), [R2D2](https://github.com/naver/r2d2), [MID](https://github.com/Wenzhengchina/Matching-in-the-Dark), [GL3D](https://github.com/lzx551402/GL3D), [SuperGlue](https://github.com/magicleap/SuperGluePretrainedNetwork). We sincerely thank them all. \ No newline at end of file diff --git a/third_party/DarkFeat/checkpoints/DarkFeat.pth b/third_party/DarkFeat/checkpoints/DarkFeat.pth new file mode 100644 index 0000000000000000000000000000000000000000..2b28a0fc38779abea7a41cfaa830cae31c4f2791 --- /dev/null +++ b/third_party/DarkFeat/checkpoints/DarkFeat.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f9c832df932465a24c9849b65df04d9f33f04df3510fd8becf6bf73b28f77b2 +size 2934451 diff --git a/third_party/DarkFeat/configs/config.yaml b/third_party/DarkFeat/configs/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7ffead73fc3eac520aa7aa4bf3811c5069a4c149 --- /dev/null +++ b/third_party/DarkFeat/configs/config.yaml @@ -0,0 +1,24 @@ +training: + optimizer: 'SGD' + lr: 0.01 + momentum: 0.9 + weight_decay: 0.0001 + lr_gamma: 0.1 + lr_step: 200000 +network: + input_type: 'raw-demosaic' + noise: true + noise_maxstep: 1 + model: 'Quad_L2Net' + loss_type: 'HARD_CONTRASTIVE' + photaug: true + resize: 480 + use_corr_n: 512 + det: + corr_weight: true + safe_radius: 12 + kpt_n: 512 + score_thld: -1 + edge_thld: 10 + nms_size: 3 + eof_size: 5 \ No newline at end of file diff --git a/third_party/DarkFeat/configs/config_stage1.yaml b/third_party/DarkFeat/configs/config_stage1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f94e1da377bf8f507d6fa6db394b1016227d0e25 --- /dev/null +++ b/third_party/DarkFeat/configs/config_stage1.yaml @@ -0,0 +1,24 @@ +training: + optimizer: 'SGD' + lr: 0.1 + momentum: 0.9 + weight_decay: 0.0001 + lr_gamma: 0.1 + lr_step: 200000 +network: + input_type: 'raw-demosaic' + noise: true + noise_maxstep: 1 + model: 'Quad_L2Net' + loss_type: 'HARD_CONTRASTIVE' + photaug: true + resize: 480 + use_corr_n: 512 + det: + corr_weight: true + safe_radius: 12 + kpt_n: 512 + score_thld: -1 + edge_thld: 10 + nms_size: 3 + eof_size: 5 \ No newline at end of file diff --git a/third_party/DarkFeat/darkfeat.py b/third_party/DarkFeat/darkfeat.py new file mode 100644 index 0000000000000000000000000000000000000000..e78ad2604aafb759a6241365ac93fd1ef38f76f3 --- /dev/null +++ b/third_party/DarkFeat/darkfeat.py @@ -0,0 +1,359 @@ +import torch +from torch import nn +from torch.nn.parameter import Parameter +import torchvision.transforms as tvf +import torch.nn.functional as F +import numpy as np + + +def gather_nd(params, indices): + orig_shape = list(indices.shape) + num_samples = np.prod(orig_shape[:-1]) + m = orig_shape[-1] + n = len(params.shape) + + if m <= n: + out_shape = orig_shape[:-1] + list(params.shape)[m:] + else: + raise ValueError( + f'the last dimension of indices must less or equal to the rank of params. Got indices:{indices.shape}, params:{params.shape}. {m} > {n}' + ) + + indices = indices.reshape((num_samples, m)).transpose(0, 1).tolist() + output = params[indices] # (num_samples, ...) + return output.reshape(out_shape).contiguous() + + +# input: pos [kpt_n, 2]; inputs [H, W, 128] / [H, W] +# output: [kpt_n, 128] / [kpt_n] +def interpolate(pos, inputs, nd=True): + h = inputs.shape[0] + w = inputs.shape[1] + + i = pos[:, 0] + j = pos[:, 1] + + i_top_left = torch.clamp(torch.floor(i).int(), 0, h - 1) + j_top_left = torch.clamp(torch.floor(j).int(), 0, w - 1) + + i_top_right = torch.clamp(torch.floor(i).int(), 0, h - 1) + j_top_right = torch.clamp(torch.ceil(j).int(), 0, w - 1) + + i_bottom_left = torch.clamp(torch.ceil(i).int(), 0, h - 1) + j_bottom_left = torch.clamp(torch.floor(j).int(), 0, w - 1) + + i_bottom_right = torch.clamp(torch.ceil(i).int(), 0, h - 1) + j_bottom_right = torch.clamp(torch.ceil(j).int(), 0, w - 1) + + dist_i_top_left = i - i_top_left.float() + dist_j_top_left = j - j_top_left.float() + w_top_left = (1 - dist_i_top_left) * (1 - dist_j_top_left) + w_top_right = (1 - dist_i_top_left) * dist_j_top_left + w_bottom_left = dist_i_top_left * (1 - dist_j_top_left) + w_bottom_right = dist_i_top_left * dist_j_top_left + + if nd: + w_top_left = w_top_left[..., None] + w_top_right = w_top_right[..., None] + w_bottom_left = w_bottom_left[..., None] + w_bottom_right = w_bottom_right[..., None] + + interpolated_val = ( + w_top_left * gather_nd(inputs, torch.stack([i_top_left, j_top_left], axis=-1)) + + w_top_right * gather_nd(inputs, torch.stack([i_top_right, j_top_right], axis=-1)) + + w_bottom_left * gather_nd(inputs, torch.stack([i_bottom_left, j_bottom_left], axis=-1)) + + w_bottom_right * + gather_nd(inputs, torch.stack([i_bottom_right, j_bottom_right], axis=-1)) + ) + + return interpolated_val + + +def edge_mask(inputs, n_channel, dilation=1, edge_thld=5): + b, c, h, w = inputs.size() + device = inputs.device + + dii_filter = torch.tensor( + [[0, 1., 0], [0, -2., 0], [0, 1., 0]] + ).view(1, 1, 3, 3) + dij_filter = 0.25 * torch.tensor( + [[1., 0, -1.], [0, 0., 0], [-1., 0, 1.]] + ).view(1, 1, 3, 3) + djj_filter = torch.tensor( + [[0, 0, 0], [1., -2., 1.], [0, 0, 0]] + ).view(1, 1, 3, 3) + + dii = F.conv2d( + inputs.view(-1, 1, h, w), dii_filter.to(device), padding=dilation, dilation=dilation + ).view(b, c, h, w) + dij = F.conv2d( + inputs.view(-1, 1, h, w), dij_filter.to(device), padding=dilation, dilation=dilation + ).view(b, c, h, w) + djj = F.conv2d( + inputs.view(-1, 1, h, w), djj_filter.to(device), padding=dilation, dilation=dilation + ).view(b, c, h, w) + + det = dii * djj - dij * dij + tr = dii + djj + del dii, dij, djj + + threshold = (edge_thld + 1) ** 2 / edge_thld + is_not_edge = torch.min(tr * tr / det <= threshold, det > 0) + + return is_not_edge + + +# input: score_map [batch_size, 1, H, W] +# output: indices [2, k, 2], scores [2, k] +def extract_kpts(score_map, k=256, score_thld=0, edge_thld=0, nms_size=3, eof_size=5): + h = score_map.shape[2] + w = score_map.shape[3] + + mask = score_map > score_thld + if nms_size > 0: + nms_mask = F.max_pool2d(score_map, kernel_size=nms_size, stride=1, padding=nms_size//2) + nms_mask = torch.eq(score_map, nms_mask) + mask = torch.logical_and(nms_mask, mask) + if eof_size > 0: + eof_mask = torch.ones((1, 1, h - 2 * eof_size, w - 2 * eof_size), dtype=torch.float32, device=score_map.device) + eof_mask = F.pad(eof_mask, [eof_size] * 4, value=0) + eof_mask = eof_mask.bool() + mask = torch.logical_and(eof_mask, mask) + if edge_thld > 0: + non_edge_mask = edge_mask(score_map, 1, dilation=3, edge_thld=edge_thld) + mask = torch.logical_and(non_edge_mask, mask) + + bs = score_map.shape[0] + if bs is None: + indices = torch.nonzero(mask)[0] + scores = gather_nd(score_map, indices)[0] + sample = torch.sort(scores, descending=True)[1][0:k] + indices = indices[sample].unsqueeze(0) + scores = scores[sample].unsqueeze(0) + else: + indices = [] + scores = [] + for i in range(bs): + tmp_mask = mask[i][0] + tmp_score_map = score_map[i][0] + tmp_indices = torch.nonzero(tmp_mask) + tmp_scores = gather_nd(tmp_score_map, tmp_indices) + tmp_sample = torch.sort(tmp_scores, descending=True)[1][0:k] + tmp_indices = tmp_indices[tmp_sample] + tmp_scores = tmp_scores[tmp_sample] + indices.append(tmp_indices) + scores.append(tmp_scores) + try: + indices = torch.stack(indices, dim=0) + scores = torch.stack(scores, dim=0) + except: + min_num = np.min([len(i) for i in indices]) + indices = torch.stack([i[:min_num] for i in indices], dim=0) + scores = torch.stack([i[:min_num] for i in scores], dim=0) + return indices, scores + + +# input: [batch_size, C, H, W] +# output: [batch_size, C, H, W], [batch_size, C, H, W] +def peakiness_score(inputs, moving_instance_max, ksize=3, dilation=1): + inputs = inputs / moving_instance_max + + batch_size, C, H, W = inputs.shape + + pad_size = ksize // 2 + (dilation - 1) + kernel = torch.ones([C, 1, ksize, ksize], device=inputs.device) / (ksize * ksize) + + pad_inputs = F.pad(inputs, [pad_size] * 4, mode='reflect') + + avg_spatial_inputs = F.conv2d( + pad_inputs, + kernel, + stride=1, + dilation=dilation, + padding=0, + groups=C + ) + avg_channel_inputs = torch.mean(inputs, axis=1, keepdim=True) # channel dimension is 1 + # print(avg_spatial_inputs.shape) + + alpha = F.softplus(inputs - avg_spatial_inputs) + beta = F.softplus(inputs - avg_channel_inputs) + + return alpha, beta + + +class DarkFeat(nn.Module): + default_config = { + 'model_path': '', + 'input_type': 'raw-demosaic', + 'kpt_n': 5000, + 'kpt_refinement': True, + 'score_thld': 0.5, + 'edge_thld': 10, + 'multi_scale': False, + 'multi_level': True, + 'nms_size': 3, + 'eof_size': 5, + 'need_norm': True, + 'use_peakiness': True + } + + def __init__(self, model_path='', inchan=3, dilated=True, dilation=1, bn=True, bn_affine=False): + super(DarkFeat, self).__init__() + inchan = 3 if self.default_config['input_type'] == 'rgb' or self.default_config['input_type'] == 'raw-demosaic' else 1 + self.config = {**self.default_config} + + self.inchan = inchan + self.curchan = inchan + self.dilated = dilated + self.dilation = dilation + self.bn = bn + self.bn_affine = bn_affine + self.config['model_path'] = model_path + + dim = 128 + mchan = 4 + + self.conv0 = self._add_conv( 8*mchan) + self.conv1 = self._add_conv( 8*mchan, bn=False) + self.bn1 = self._make_bn(8*mchan) + self.conv2 = self._add_conv( 16*mchan, stride=2) + self.conv3 = self._add_conv( 16*mchan, bn=False) + self.bn3 = self._make_bn(16*mchan) + self.conv4 = self._add_conv( 32*mchan, stride=2) + self.conv5 = self._add_conv( 32*mchan) + # replace last 8x8 convolution with 3 3x3 convolutions + self.conv6_0 = self._add_conv( 32*mchan) + self.conv6_1 = self._add_conv( 32*mchan) + self.conv6_2 = self._add_conv(dim, bn=False, relu=False) + self.out_dim = dim + + self.moving_avg_params = nn.ParameterList([ + Parameter(torch.tensor(1.), requires_grad=False), + Parameter(torch.tensor(1.), requires_grad=False), + Parameter(torch.tensor(1.), requires_grad=False) + ]) + self.clf = nn.Conv2d(128, 2, kernel_size=1) + + state_dict = torch.load(self.config["model_path"]) + new_state_dict = {} + + for key in state_dict: + if 'running_mean' not in key and 'running_var' not in key and 'num_batches_tracked' not in key: + new_state_dict[key] = state_dict[key] + + self.load_state_dict(new_state_dict) + print('Loaded DarkFeat model') + + def _make_bn(self, outd): + return nn.BatchNorm2d(outd, affine=self.bn_affine, track_running_stats=False) + + def _add_conv(self, outd, k=3, stride=1, dilation=1, bn=True, relu=True, k_pool = 1, pool_type='max', bias=False): + d = self.dilation * dilation + conv_params = dict(padding=((k-1)*d)//2, dilation=d, stride=stride, bias=bias) + + ops = nn.ModuleList([]) + + ops.append( nn.Conv2d(self.curchan, outd, kernel_size=k, **conv_params) ) + if bn and self.bn: ops.append( self._make_bn(outd) ) + if relu: ops.append( nn.ReLU(inplace=True) ) + self.curchan = outd + + if k_pool > 1: + if pool_type == 'avg': + ops.append(torch.nn.AvgPool2d(kernel_size=k_pool)) + elif pool_type == 'max': + ops.append(torch.nn.MaxPool2d(kernel_size=k_pool)) + else: + print(f"Error, unknown pooling type {pool_type}...") + + return nn.Sequential(*ops) + + def forward(self, input): + """ Compute keypoints, scores, descriptors for image """ + data = input['image'] + H, W = data.shape[2:] + + if self.config['input_type'] == 'rgb': + # 3-channel rgb + RGB_mean = [0.485, 0.456, 0.406] + RGB_std = [0.229, 0.224, 0.225] + norm_RGB = tvf.Normalize(mean=RGB_mean, std=RGB_std) + data = norm_RGB(data) + + elif self.config['input_type'] == 'gray': + # 1-channel + data = torch.mean(data, dim=1, keepdim=True) + norm_gray0 = tvf.Normalize(mean=data.mean(), std=data.std()) + data = norm_gray0(data) + + elif self.config['input_type'] == 'raw': + # 4-channel + pass + elif self.config['input_type'] == 'raw-demosaic': + # 3-channel + pass + else: + raise NotImplementedError() + + # x: [N, C, H, W] + x0 = self.conv0(data) + x1 = self.conv1(x0) + x1_bn = self.bn1(x1) + x2 = self.conv2(x1_bn) + x3 = self.conv3(x2) + x3_bn = self.bn3(x3) + x4 = self.conv4(x3_bn) + x5 = self.conv5(x4) + x6_0 = self.conv6_0(x5) + x6_1 = self.conv6_1(x6_0) + x6_2 = self.conv6_2(x6_1) + + comb_weights = torch.tensor([1., 2., 3.], device=data.device) + comb_weights /= torch.sum(comb_weights) + ksize = [3, 2, 1] + det_score_maps = [] + + for idx, xx in enumerate([x1, x3, x6_2]): + alpha, beta = peakiness_score(xx, self.moving_avg_params[idx].detach(), ksize=3, dilation=ksize[idx]) + score_vol = alpha * beta + det_score_map = torch.max(score_vol, dim=1, keepdim=True)[0] + det_score_map = F.interpolate(det_score_map, size=data.shape[2:], mode='bilinear', align_corners=True) + det_score_map = comb_weights[idx] * det_score_map + det_score_maps.append(det_score_map) + + det_score_map = torch.sum(torch.stack(det_score_maps, dim=0), dim=0) + + desc = x6_2 + score_map = det_score_map + conf = F.softmax(self.clf((desc)**2), dim=1)[:,1:2] + score_map = score_map * F.interpolate(conf, size=score_map.shape[2:], mode='bilinear', align_corners=True) + + kpt_inds, kpt_score = extract_kpts( + score_map, + k=self.config['kpt_n'], + score_thld=self.config['score_thld'], + nms_size=self.config['nms_size'], + eof_size=self.config['eof_size'], + edge_thld=self.config['edge_thld'] + ) + + descs = F.normalize( + interpolate(kpt_inds.squeeze(0) / 4, desc.squeeze(0).permute(1, 2, 0)), + p=2, + dim=-1 + ).detach().cpu().numpy(), + kpts = np.squeeze(torch.stack([kpt_inds[:, :, 1], kpt_inds[:, :, 0]], dim=-1).cpu(), axis=0) \ + * np.array([W / data.shape[3], H / data.shape[2]], dtype=np.float32) + scores = np.squeeze(kpt_score.detach().cpu().numpy(), axis=0) + + idxs = np.negative(scores).argsort()[0:self.config['kpt_n']] + descs = descs[0][idxs] + kpts = kpts[idxs] + scores = scores[idxs] + + return { + 'keypoints': kpts, + 'scores': torch.from_numpy(scores), + 'descriptors': torch.from_numpy(descs.T), + } diff --git a/third_party/DarkFeat/datasets/InvISP/LICENSE b/third_party/DarkFeat/datasets/InvISP/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..0c7a7ab19788c339529ee9c85d301a582c3c8010 --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 Yazhou XING + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/third_party/DarkFeat/datasets/InvISP/README.md b/third_party/DarkFeat/datasets/InvISP/README.md new file mode 100644 index 0000000000000000000000000000000000000000..654d33dae8e00fcd61b6f38f8e2763ae87dfefa4 --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/README.md @@ -0,0 +1,117 @@ +# Invertible Image Signal Processing + + +![Python 3.6](https://img.shields.io/badge/Python-3.6-green.svg?style=plastic) +![pytorch 1.4.0](https://img.shields.io/badge/PyTorch-1.4.0-green.svg?style=plastic) + +**This repository includes official codes for "[Invertible Image Signal Processing (CVPR2021)](https://arxiv.org/abs/2103.15061)".** + +![](./figures/teaser.png) +**Figure:** *Our framework* + +Unprocessed RAW data is a highly valuable image format for image editing and computer vision. However, since the file size of RAW data is huge, most users can only get access to processed and compressed sRGB images. To bridge this gap, we design an Invertible Image Signal Processing (InvISP) pipeline, which not only enables rendering visually appealing sRGB images but also allows recovering nearly perfect RAW data. Due to our framework's inherent reversibility, we can reconstruct realistic RAW data instead of synthesizing RAW data from sRGB images, without any memory overhead. We also integrate a differentiable JPEG compression simulator that empowers our framework to reconstruct RAW data from JPEG images. Extensive quantitative and qualitative experiments on two DSLR demonstrate that our method obtains much higher quality in both rendered sRGB images and reconstructed RAW data than alternative methods. + +> **Invertible Image Signal Processing**
+> Yazhou Xing*, Zian Qian*, Qifeng Chen (* indicates joint first authors)
+> HKUST
+ +[[Paper](https://arxiv.org/abs/2103.15061)] +[[Project Page](https://yzxing87.github.io/InvISP/index.html)] +[[Technical Video (Coming soon)](https://yzxing87.github.io/TBA)] + +![](./figures/result_01.png) +**Figure:** *Our results* + + +## Known issue (10/2021) +There exists some errors in the bilinear demosaicing implementation of the python library ``colour_demosaicing``. You can fix it through add the 'constant' parameter in convolve method in [this file](https://colour-demosaicing.readthedocs.io/en/latest/_modules/colour_demosaicing/bayer/demosaicing/bilinear.html#demosaicing_CFA_Bayer_bilinear) of your package. Otherwise the demosaicing results will be out of its original range and the trained results will face some incorrect color issues. + +## Installation +Clone this repo. +```bash +git clone https://github.com/yzxing87/Invertible-ISP.git +cd Invertible-ISP/ +``` + +We have tested our code on Ubuntu 18.04 LTS with PyTorch 1.4.0, CUDA 10.1 and cudnn7.6.5. Please install dependencies by +```bash +conda env create -f environment.yml +``` + +## Preparing datasets +We use [MIT-Adobe FiveK Dataset](https://data.csail.mit.edu/graphics/fivek/) for training and evaluation. To reproduce our results, you need to first download the NIKON D700 and Canon EOS 5D subsets from their website. The images (DNG) can be downloaded by +```bash +cd data/ +bash data_preprocess.sh +``` +The downloading may take a while. After downloading, we need to prepare the bilinearly demosaiced RAW and white balance parameters as network input, and ground truth sRGB (in JPEG format) as supervision. +```bash +python data_preprocess.py --camera="NIKON_D700" +python data_preprocess.py --camera="Canon_EOS_5D" +``` +The dataset will be organized into +| Path | Size | Files | Format | Description +| :--- | :--: | ----: | :----: | :---------- +| data | 585 GB | 1 | | Main folder +| ├  Canon_EOS_5D | 448 GB | 1 | | Canon sub-folder +| ├  NIKON_D700 | 137 GB | 1 | | NIKON sub-folder +|     ├  DNG | 2.9 GB | 487 | DNG | In-the-wild RAW. +|     ├  RAW | 133 GB | 487 | NPZ | Preprocessed RAW. +|     ├  RGB | 752 MB | 487 | JPG | Ground-truth RGB. +| ├  NIKON_D700_train.txt | 1 KB | 1 | TXT | Training data split. +| ├  NIKON_D700_test.txt | 5 KB | 1 | TXT | Test data split. + +## Training networks +We specify the training arguments into `train.sh`. Simply run +```bash +cd ../ +bash train.sh +``` +The checkpoints will be saved into `./exps/{exp_name}/checkpoint/`. + +## Test and evaluation +### Use your trained model +To reconstruct the RAW from JPEG RGB, we need to first save the rendered RGB into disk then do test to recover RAW. +Original RAW images are too huge to be directly tested on one 2080 Ti GPU. We provide two ways to test the model. + +1. Subsampling the RAW for visualization purpose: + ```bash + python test_rgb.py --task=EXPERIMENT_NAME \ + --data_path="./data/" \ + --gamma \ + --camera=CAMERA_NAME \ + --out_path=OUTPUT_PATH \ + --ckpt=CKPT_PATH + ``` + After finish, run + ```bash + python test_raw.py --task=EXPERIMENT_NAME \ + --data_path="./data/" \ + --gamma \ + --camera=CAMERA_NAME \ + --out_path=OUTPUT_PATH \ + --ckpt=CKPT_PATH + ``` +2. Spliting the RAW data into patches, for quantitatively evaluation purpose. Turn on the `--split_to_patch` argument. See `test.sh.` The PSNR and SSIM metrics can be obtained by + ```bash + python cal_metrics.py --path=PATH_TO_SAVED_PATCHES + ``` +### Use our pretrained weights +We also provide our trained model for a reference. The checkpoints are placed in `pretrained/` folder. Specify the correct PATH in `test.sh`, then you can get similar results as our paper. Please note that in the context of ISP, one trained model can only be applied for a specific camera. This is due to the camera-dependent proprietary raw color space and photo-finishing steps. + + +## Citation + +``` +@inproceedings{xing21invertible, + title = {Invertible Image Signal Processing}, + author = {Xing, Yazhou and Qian, Zian and Chen, Qifeng}, + booktitle = {CVPR}, + year = {2021} +} +``` +## Acknowledgement +Part of the codes benefit from [DiffJPEG](https://github.com/mlomnitz/DiffJPEG) and [Invertible-Image-Rescaling](https://github.com/pkuxmq/Invertible-Image-Rescaling). + +## Contact +Feel free to contact me if there is any question. (Yazhou Xing, yzxing87@gmail.com) diff --git a/third_party/DarkFeat/datasets/InvISP/__init__.py b/third_party/DarkFeat/datasets/InvISP/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/DarkFeat/datasets/InvISP/cal_metrics.py b/third_party/DarkFeat/datasets/InvISP/cal_metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..cc3e501664487de4c08ab8c89328dd266fba2868 --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/cal_metrics.py @@ -0,0 +1,114 @@ +import cv2 +import numpy as np +import math +# from skimage.metrics import structural_similarity as ssim +from skimage.measure import compare_ssim +from scipy.misc import imread +from glob import glob + +import argparse + +parser = argparse.ArgumentParser(description="evaluation codes") + +parser.add_argument("--path", type=str, help="Path to evaluate images.") + +args = parser.parse_args() + +def psnr(img1, img2): + mse = np.mean( (img1/255. - img2/255.) ** 2 ) + if mse < 1.0e-10: + return 100 + PIXEL_MAX = 1 + return 20 * math.log10(PIXEL_MAX / math.sqrt(mse)) + +def psnr_raw(img1, img2): + mse = np.mean( (img1 - img2) ** 2 ) + if mse < 1.0e-10: + return 100 + PIXEL_MAX = 1 + return 20 * math.log10(PIXEL_MAX / math.sqrt(mse)) + + +def my_ssim(img1, img2): + return compare_ssim(img1, img2, data_range=img1.max() - img1.min(), multichannel=True) + + +def quan_eval(path, suffix="jpg"): + # path: /disk2/yazhou/projects/IISP/exps/test_final_unet_globalEDV2/ + # ours + gt_imgs = sorted(glob(path+"tar*.%s"%suffix)) + pred_imgs = sorted(glob(path+"pred*.%s"%suffix)) + + # with open(split_path + "test_gt.txt", 'r') as f_gt, open(split_path+"test_rgb.txt","r") as f_rgb: + # gt_imgs = [line.rstrip() for line in f_gt.readlines()] + # pred_imgs = [line.rstrip() for line in f_rgb.readlines()] + + assert len(gt_imgs) == len(pred_imgs) + + psnr_avg = 0. + ssim_avg = 0. + for i in range(len(gt_imgs)): + gt = imread(gt_imgs[i]) + pred = imread(pred_imgs[i]) + psnr_temp = psnr(gt, pred) + psnr_avg += psnr_temp + ssim_temp = my_ssim(gt, pred) + ssim_avg += ssim_temp + + print("psnr: ", psnr_temp) + print("ssim: ", ssim_temp) + + psnr_avg /= float(len(gt_imgs)) + ssim_avg /= float(len(gt_imgs)) + + print("psnr_avg: ", psnr_avg) + print("ssim_avg: ", ssim_avg) + + return psnr_avg, ssim_avg + +def mse(gt, pred): + return np.mean((gt-pred)**2) + +def mse_raw(path, suffix="npy"): + gt_imgs = sorted(glob(path+"raw_tar*.%s"%suffix)) + pred_imgs = sorted(glob(path+"raw_pred*.%s"%suffix)) + + # with open(split_path + "test_gt.txt", 'r') as f_gt, open(split_path+"test_rgb.txt","r") as f_rgb: + # gt_imgs = [line.rstrip() for line in f_gt.readlines()] + # pred_imgs = [line.rstrip() for line in f_rgb.readlines()] + + assert len(gt_imgs) == len(pred_imgs) + + mse_avg = 0. + psnr_avg = 0. + for i in range(len(gt_imgs)): + gt = np.load(gt_imgs[i]) + pred = np.load(pred_imgs[i]) + mse_temp = mse(gt, pred) + mse_avg += mse_temp + psnr_temp = psnr_raw(gt, pred) + psnr_avg += psnr_temp + + print("mse: ", mse_temp) + print("psnr: ", psnr_temp) + + mse_avg /= float(len(gt_imgs)) + psnr_avg /= float(len(gt_imgs)) + + print("mse_avg: ", mse_avg) + print("psnr_avg: ", psnr_avg) + + return mse_avg, psnr_avg + +test_full = False + +# if test_full: +# psnr_avg, ssim_avg = quan_eval(ROOT_PATH+"%s/vis_%s_full/"%(args.task, args.ckpt), "jpeg") +# mse_avg, psnr_avg_raw = mse_raw(ROOT_PATH+"%s/vis_%s_full/"%(args.task, args.ckpt)) +# else: +psnr_avg, ssim_avg = quan_eval(args.path, "jpg") +mse_avg, psnr_avg_raw = mse_raw(args.path) + +print("pnsr: {}, ssim: {}, mse: {}, psnr raw: {}".format(psnr_avg, ssim_avg, mse_avg, psnr_avg_raw)) + + diff --git a/third_party/DarkFeat/datasets/InvISP/config/config.py b/third_party/DarkFeat/datasets/InvISP/config/config.py new file mode 100644 index 0000000000000000000000000000000000000000..dc42182ecf7464cc85ed5c77b7aeb9ee4e3ecd74 --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/config/config.py @@ -0,0 +1,21 @@ +import argparse + +BATCH_SIZE = 1 + +DATA_PATH = "./data/" + + + +def get_arguments(): + parser = argparse.ArgumentParser(description="training codes") + + parser.add_argument("--task", type=str, help="Name of this training") + parser.add_argument("--data_path", type=str, default=DATA_PATH, help="Dataset root path.") + parser.add_argument("--batch_size", type=int, default=BATCH_SIZE, help="Batch size for training. ") + parser.add_argument("--debug_mode", dest='debug_mode', action='store_true', help="If debug mode, load less data.") + parser.add_argument("--gamma", dest='gamma', action='store_true', help="Use gamma compression for raw data.") + parser.add_argument("--camera", type=str, default="NIKON_D700", choices=["NIKON_D700", "Canon_EOS_5D"], help="Choose which camera to use. ") + parser.add_argument("--rgb_weight", type=float, default=1, help="Weight for rgb loss. ") + + + return parser diff --git a/third_party/DarkFeat/datasets/InvISP/data/Canon_EOS_5D.txt b/third_party/DarkFeat/datasets/InvISP/data/Canon_EOS_5D.txt new file mode 100644 index 0000000000000000000000000000000000000000..b2a01137c15059c99e7ad26301c7ffdafdcbe72d --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/data/Canon_EOS_5D.txt @@ -0,0 +1,777 @@ +https://data.csail.mit.edu/graphics/fivek/img/dng/a3674-jmac_MG_0392.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1902-_MG_7217.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0023-07-06-02-at-15h06m48-s_MG_1489.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0282-20060619_125715__MG_9197.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2314-20080426_111248__MG_9227.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2113-20070619_135552__MG_8411.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3057-dvf_002.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0121-jmac_MG_7813.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1416-07-10-06-at-16h48m40s-_MG_3892.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3243-07-11-11-at-11h52m02s-_MG_4558.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4814-Duggan_080114_4419.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4966-Duggan_090124_4744.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4558-Duggan_080410_5878.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2125-20080710_001754__MG_9208.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4163-MB_070908_098.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3644-jmac_MG_5959.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0704-jmac_MG_0617.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4500-Duggan_090428_8065.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4211-Duggan_090305_5296.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4592-Duggan_090331_6589.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1382-MB_070908_022.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4542-Duggan_080411_6019.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1451-07-06-28-at-12h47m34s-_MG_1828.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4715-Duggan_090503_8760.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4395-Duggan_090503_8734.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4968-Duggan_080819_1132.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4849-Duggan_090426_7764.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2182-_MG_1566.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3719-07-11-29-at-15h43m28s-_MG_8075.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0525-MB_070908_076.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0915-MB_060708_204.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4644-Duggan_090214_5136.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4086-jmac_MG_7933.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1268-jmac_MG_5989.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4227-Duggan_090504_8946.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1061-jmac_MG_0244.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0619-20081019_at_01h22m56__MG_3327.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3368-jmac_MG_0786.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3869-_MG_7067.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4517-Duggan_090406_7318.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1732-07-11-11-at-12h06m55s-_MG_4594.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1081-jmac_MG_6226.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2565-07-07-17-at-23h18m11s-_MG_2364.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1779-07-08-11-at-14h58m37s-N0000114.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4197-_MG_6428.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4579-Duggan_090212_5073.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0203-07-06-01-at-15h10m04-s_MG_1303.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1621-jmac_MG_0344.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0238-dvf_024.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3666-_MG_6404.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3658-jmac_MG_0418.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2881-20070514_162430__MG_7345.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4708-Duggan_090323_6142.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0326-jmac_MG_7785.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4862-jmac_MG_1010.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0356-07-11-26-at-16h05m54s-_MG_7171.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4063-07-11-25-at-18h26m49s-_MG_7002.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4560-Duggan_090405_7058.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0740-dvf_019.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1559-jmac_MG_0089.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0894-dvf_001.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0884-MB_080329_065.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3199-20081026_at_06h13m48__MG_3460.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1205-07-06-02-at-11h36m32-s_MG_1421.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2892-MB_060708_226.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1546-MB_080329_066.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1817-07-06-30-at-12h38m43s-_MG_2006.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4058-MB_080329_056.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1952-07-12-02-at-12h24m10s-_MG_8944.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2285-07-11-29-at-17h23m11s-_MG_8171.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4704-Duggan_090503_8779.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0811-20051224_165428__MG_0953.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3751-07-11-04-at-18h05m15s-_MG_4020.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0835-MB_080329_061.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2327-dvf_032.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0454-08-05-25-at-12h33m47s-_MG_9489.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3282-_MG_6990.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3089-07-11-22-at-11h21m46s-_MG_6278.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2928-jmac_MG_0176.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0043-07-11-27-at-12h09m46s-_MG_7307.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1777-jmac_MG_0499.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1935-MB_070908_090.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3771-07-06-01-at-13h03m06-s_MG_1256.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4345-Duggan_080411_5976.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3625-07-11-11-at-10h53m52s-_MG_4480.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3242-20080623_at_15h18m22__MG_9919.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4368-Duggan_090321_5857.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0919-07-10-06-at-17h40m18s-_MG_3916.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4107-dvf_018.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4088-dvf_041.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1901-_MG_0357.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2104-07-08-11-at-16h50m03s-N0000154.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1775-dvf_006.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1317-20061213_150840__MG_3797.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1006-_MG_7950.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0535-jmac_MG_6029.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0622-jmac_MG_5852.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0754-07-11-22-at-09h58m34s-_MG_6189.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3670-jmac_MG_5917.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4928-Duggan_090127_4793.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4451-Duggan_080821_1263.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3623-20051220_201437__MG_9239.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1352-07-11-04-at-17h58m48s-_MG_4012.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4860-Duggan_090504_8801.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0997-jmac_MG_7637.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4397-Duggan_080819_1155.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1864-_MG_6384.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4271-Duggan_090227_5232.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2898-dvf_011.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2159-jmac_MG_6361.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1612-MB_070908_015.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0104-dvf_003.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1178-jmac_MG_6061.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0348-07-07-07-at-09h42m42s-_MG_2151.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4502-Duggan_090116_4368.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0980-_MG_0509.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4812-Duggan_090428_8086.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2711-MB_070908_106.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0381-20070929_134540__MG_0110.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3036-20090127_at_17h54m33__MG_4036.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1400-MB_070908_014.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0093-MB_070908_038.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0764-MB_070908_088.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1511-jmac_MG_6757.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0958-jmac_MG_0737.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2452-dvf_014.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1802-061006_014724__MG_6933.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3345-20080514_105211__MG_9917.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4357-Duggan_090124_4645.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0218-kme_181.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4881-Duggan_090405_7225.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2793-MB_070519_036.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0814-MB_070908_062.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2885-20081207_at_23h26m15__MG_3818.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3829-07-06-02-at-05h48m48-s_MG_1315.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4974-Duggan_090226_5202.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1603-MB_070908_037.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1199-jmac_MG_5873.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4831-Duggan_090406_7270.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3460-20080514_105637__MG_9928.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1491-dvf_025.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2951-jmac_MG_5613.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4714-Duggan_080613_8704.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3273-jmac_MG_0703.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2588-jmac_MG_6874.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1853-07-11-28-at-17h03m55s-_MG_7857.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4608-Duggan_080413_6147.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0020-jmac_MG_6225.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2435-_MG_8018.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1452-20080809_at_14h52m39__MG_0081.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3339-_MG_7202.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1413-07-11-21-at-16h37m24s-_MG_5983.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1399-jmac_MG_7777.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3566-07-12-01-at-12h52m44s-_MG_8540.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0601-07-11-26-at-12h45m09s-_MG_7055.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0529-jmac_MG_0267.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2599-jmac_MG_0414.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0335-jmac_MG_6437.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2710-jmac_MG_7731.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3511-jmac_MG_0542.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2546-_MG_7763.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4220-Duggan_090305_5359.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3020-07-09-16-at-11h03m47s-_MG_3425.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3591-07-11-30-at-16h19m33s-_MG_8384.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4335-Duggan_090123_4520.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2669-jmac_MG_0238.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0047-07-11-18-at-00h05m40s-_MG_4882.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4963-Duggan_090428_8067.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1523-jmac_MG_0452.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1940-jmac_MG_6206.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2363-07-11-19-at-14h03m38s-_MG_5078.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0646-20070826_182055__MG_9177.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4899-Duggan_090330_6257.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2006-07-06-02-at-06h00m56-s_MG_1324.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4399-Duggan_080410_5879.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1890-07-10-06-at-15h32m38s-_MG_3803.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1973-060914_170620__MG_6779.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2355-MB_080329_058.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1734-07-11-11-at-11h44m17s-_MG_4537.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3729-07-11-24-at-21h39m19s-_MG_6853.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0077-20080627_at_14h31m24__MG_0714.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1369-jmac_MG_5781.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2939-20080702_at_00h12m52__MG_3193.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4954-Duggan_080312_5489.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0092-jmac_MG_7673.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1760-07-06-01-at-13h01m06-s_MG_1253.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3603-MB_080329_055.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1338-_MG_1523.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0501-_MG_7370.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4052-20060620_165511__MG_9535.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0715-060812_182920__MG_6255.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2923-20060619_195834__MG_9248.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1261-07-12-01-at-16h14m01s-_MG_8746.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4565-Duggan_090504_9023.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4953-Duggan_090330_6272.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3797-jmac_MG_0496.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1483-jmac_MG_7755.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3000-_MG_7776.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4931-Duggan_090428_8054.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1125-07-11-25-at-10h33m49s-_MG_6884.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0323-07-06-27-at-13h56m27s-_MG_1782.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1471-07-07-15-at-23h51m48s-_MG_2179.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4759-Duggan_090305_5342.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4313-Duggan_080413_6158.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2362-20051223_084128__MG_0542.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4092-07-12-03-at-09h35m54s-_MG_9192.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3841-07-12-01-at-13h04m21s-_MG_8637.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0442-jmac_MG_1461.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0183-07-06-02-at-07h15m59-s_MG_1347.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4755-Duggan_090323_6173.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4129-MB_070908_033.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3474-jmac_MG_1125.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3252-07-12-01-at-16h06m04s-_MG_8716.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0944-20061213_132310__MG_3646.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2349-07-11-20-at-08h06m58s-_MG_5505.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1433-jmac_MG_0303.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0707-07-12-01-at-15h31m07s-_MG_8670.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4409-Duggan_090503_8738.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1925-_MG_7836.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1363-MB_060909_005.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4904-Duggan_081024_2201.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0638-20061008_092601__MG_0024.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1515-jmac_MG_1266.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2451-07-07-17-at-00h36m15s-_MG_2335.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3223-MB_080627_677.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4238-Duggan_090320_5609.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2725-07-11-21-at-16h55m39s-_MG_5992.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2361-07-06-01-at-13h15m17-s_MG_1259.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4494-Duggan_081010_1923.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4985-jmac_MG_7412.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4553-Duggan_090331_6590.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3720-jmac_MG_0851.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3843-20061213_150009__MG_3787.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0681-060811_183554__MG_6223.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1091-07-07-04-at-04h03m08s-_MG_2094.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3784-07-10-06-at-16h08m07s-_MG_3859.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1842-07-11-21-at-08h59m04s-_MG_5807.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4736-Duggan_090503_8761.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0981-jmac_MG_1360.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1275-20080809_at_14h45m40__MG_0065.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1855-jmac_MG_0383.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4628-Duggan_090428_8108.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2999-jmac_MG_8001.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4740-Duggan_080120_4782.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4121-07-11-22-at-06h50m14s-_MG_6000.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3111-_MG_2968.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4007-_MG_7167.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0470-_MG_7801.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4819-Duggan_090330_6230.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1847-20051222_141305__MG_0341.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4779-Duggan_090323_6115.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3465-20060619_114622__MG_9153.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4742-Duggan_090331_6517.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1994-20080708_at_13h44m41__MG_4350.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3911-07-07-01-at-10h50m55s-_MG_2028.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0441-jmac_MG_5386.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3039-07-06-02-at-10h16m04-s_MG_1405.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4212-Duggan_090321_5925.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2837-07-12-02-at-11h35m49s-_MG_8848.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2089-jmac_MG_1391.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4386-Duggan_090124_4632.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4482-Duggan_090503_8712.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1787-_MG_3277.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4470-Duggan_090123_4566.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0019-jmac_MG_0653.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4935-Duggan_090312_5580.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4855-Duggan_090323_6207.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0351-MB_070908_006.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3442-MB_060909_003.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1899-jmac_MG_1320.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4408-Duggan_080411_5973.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1804-MB_060909_002.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4598-Duggan_090305_5297.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0853-20070923_073247__MG_9686.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3551-MB_080627_668.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4493-Duggan_090322_6041.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1149-_MG_6531.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0708-20070210_164509__MG_6786.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0594-_MG_0406.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2471-_MG_6887.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3648-07-06-01-at-12h59m03-s_MG_1251.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1076-07-11-20-at-07h21m04s-_MG_5402.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3256-jmac_MG_0351.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3697-07-11-24-at-16h05m35s-_MG_6729.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3079-_MG_7179.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4232-Duggan_090323_6181.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3838-jmac_MG_7919.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0808-kme_147.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0083-jmac_MG_0082.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2831-_MG_3139.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4221-Duggan_080126_4855.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1758-07-07-23-at-23h39m31s-_MG_2497.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1084-jmac_MG_5972.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1498-07-06-02-at-14h08m33-s_MG_1456.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0030-_MG_7844.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4509-Duggan_090504_8967.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2273-jmac_MG_0479.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4231-Duggan_080326_5786.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4601-Duggan_090331_6495.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4443-Duggan_090503_8691.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1122-20080622_at_13h47m40__MG_9874.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1720-07-06-01-at-14h14m20-s_MG_1282.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3975-jmac_MG_5721.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1465-07-07-17-at-00h30m32s-_MG_2247.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3660-jmac_MG_8044.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4662-Duggan_080115_4605.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1259-jmac_MG_0385.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2133-20060617_140539__MG_8570.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4751-Duggan_080819_1030.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2812-07-11-30-at-11h07m15s-_MG_8208.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2848-MB_060708_292.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4906-Duggan_090210_5028.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2208-_MG_6963.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4888-Duggan_081024_2295.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4468-Duggan_081122_3260.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2005-07-11-20-at-17h05m05s-_MG_5779.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3870-MB_070908_122.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3832-20060613_091536__MG_7749.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2224-MB_070908_032.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3319-MB_070908_080.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3409-20080509_070806__MG_9695.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4448-Duggan_080119_4778.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4199-jmac_MG_5003.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1424-kme_185.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4548-Duggan_080130_5029.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4584-Duggan_080309_5404.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4188-_MG_1604.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0635-20060613_112054__MG_7862.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0605-_MG_7197.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0440-MB_070520_107.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3920-jmac_MG_0682.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1131-dvf_020.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4351-Duggan_090428_8083.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3822-07-11-21-at-09h53m21s-_MG_5852.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1744-jmac_MG_0369.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4009-jmac_MG_7717.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3715-_MG_7773.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3563-07-11-30-at-15h55m08s-_MG_8326.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4760-Duggan_081024_2178.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2836-jmac_MG_0389.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3631-MB_070908_140.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1479-jmac_MG_8030.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4246-Duggan_090330_6226.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4471-Duggan_090321_5859.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0801-07-08-11-at-16h32m03s-_MG_3277.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0803-20081226_at_17h04m14__MG_3930.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0222-NKIM_MG_2635.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4636-Duggan_080216_5303.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3371-07-12-01-at-11h32m58s-_MG_8498.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3831-jmac_MG_5861.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4546-Duggan_081010_1913.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1119-MB_070908_170.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2597-060824_122554__MG_6756.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2105-jmac_MG_7930.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1697-07-12-01-at-11h12m05s-_MG_8492.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3296-20080509_071308__MG_9701.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3067-_MG_1539.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1449-MB_060909_016.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3149-20080708_at_13h43m33__MG_4340.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3650-07-06-01-at-13h48m38-s_MG_1270.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4308-Duggan_090209_4996.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4839-Duggan_090321_5908.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2102-jmac_MG_7845.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0917-07-06-01-at-14h40m08-s_MG_1293.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0411-07-11-21-at-13h12m13s-_MG_5935.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4696-Duggan_080323_5686.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1525-jmac_MG_0646.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0632-07-06-01-at-12h50m26-s_MG_1230.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4735-Duggan_090307_5553.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1980-07-11-08-at-01h16m15s-_MG_4131.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4151-dvf_026.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2067-dvf_013.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4108-MB_080329_057.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1132-20061213_164642__MG_6076.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0982-jmac_MG_1105.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0784-_MG_7693.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4886-Duggan_090503_8792.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1917-jmac_MG_5620.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0840-07-11-19-at-16h20m11s-_MG_5348.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4750-Duggan_090504_9001.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2230-20060616_082451__MG_8195.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0636-07-11-27-at-10h02m30s-_MG_7226.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0825-_MG_7225.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2560-MB_070908_079.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2129-jmac_MG_1342.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0504-jmacIMG_6809.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1070-_MG_6547.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2550-_MG_3058.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4990-jmac_MG_1139.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0313-_MG_7253.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4586-Duggan_090428_8010.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3152-07-07-04-at-06h23m15s-_MG_2099.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1620-20080204_113002__MG_0583.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0242-07-06-01-at-12h55m36-s_MG_1241.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1242-07-10-27-at-16h31m23s-_MG_3949.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0869-20080629_at_19h10m02__MG_1342.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2252-jmac_MG_6404.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3018-jmac_MG_0481.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2773-jmac_MG_4982.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0004-jmac_MG_1384.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4120-_MG_7211.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3051-07-06-01-at-13h01m22-s_MG_1255.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2900-MB_070908_087.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1757-dvf_023.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4878-Duggan_080207_5155.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4540-Duggan_080411_5948.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2277-07-11-24-at-15h53m42s-_MG_6720.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1821-07-11-19-at-14h41m50s-_MG_5129.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2828-jmac_MG_0100.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3559-jmac_MG_0205.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2158-jmac_MG_7657.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1797-jmac_MG_6883.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4703-Duggan_090426_7850.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2764-07-11-19-at-13h52m09s-_MG_5054.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1423-20080624_at_19h53m25__MG_0078.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4965-Duggan_090405_7028.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2085-20051009_104656__MG_0587.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4239-Duggan_080114_4429.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4511-Duggan_090504_9050.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2095-07-11-22-at-08h32m36s-_MG_6015.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4605-Duggan_090108_4208.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0042-060813_155838__MG_6361.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1656-dvf_005.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2225-jmac_MG_0540.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3647-MB_070908_094.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4524-Duggan_080326_5805.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4700-Duggan_090406_7321.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1188-MB_080329_068.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1882-07-11-23-at-17h04m28s-_MG_6574.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1265-20051225_163547__MG_1396.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2824-dvf_035.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4432-Duggan_081114_3124.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2664-20081226_at_17h48m43__MG_3997.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0032-jmac_MG_0266.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1730-20080809_at_18h39m49__MG_0130.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0358-MB_080329_074.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2731-07-12-01-at-17h40m41s-_MG_8785.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0118-20051223_103622__MG_0617.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4298-Duggan_090504_9090.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3473-jmac_MG_0161.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4898-Duggan_090212_5075.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3685-MB_060909_011.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2964-MB_070908_020.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1610-08-11-09-at-22h58m42s-_MG_3590.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3482-jmac_MG_1250.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0418-07-11-19-at-13h26m20s-_MG_5018.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3026-_MG_7180.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1861-jmac_MG_6054.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2358-jmac_MG_0546.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4411-Duggan_090131_4857.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4863-Duggan_080115_4511.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0540-jmac_MG_5988.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1263-20071122_142540__MG_0314.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1690-061202_195438__MG_9731.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2822-jmac_MG_1389.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1330-20080625_at_00h06m29__MG_0169.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2789-jmac_MG_0522.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0259-dvf_029.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3043-jmac_MG_6976.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1795-jmac_MG_0165.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2526-20061015_103622__MG_0042.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4467-Duggan_090426_7873.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2162-kme_014.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3080-jmac_MG_1235.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0038-MB_070908_135.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4564-Duggan_090406_7253.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3977-07-11-05-at-22h45m52s-_MG_4073.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4463-Duggan_081024_2100.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4421-Duggan_090214_5129.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4438-Duggan_090330_6313.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3292-jmac_MG_4914.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2926-MB_070908_110.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1790-07-06-28-at-12h47m57s-_MG_1831.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4722-Duggan_090406_7315.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3892-07-11-11-at-11h46m34s-_MG_4544.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1963-jmac_MG_1112.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0091-jmac_MG_4959.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2772-jmac_MG_7411.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2205-jmac_MG_5745.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3764-20060618_093109__MG_8792.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2180-dvf_007.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4550-Duggan_090428_8066.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1743-07-06-01-at-14h31m58-s_MG_1288.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2529-07-06-02-at-06h09m13-s_MG_1328.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0918-_MG_1507.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2338-MB_080628_696.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2245-20060508_141031__MG_6785.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1564-MB_080329_054.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1487-20081226_at_16h52m49__MG_3920.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0539-jmac_MG_0220.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4670-Duggan_080115_4464.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3029-07-11-17-at-07h41m24s-_MG_4654.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4665-Duggan_090504_8932.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3849-MB_070908_003.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1755-NKIM_MG_2646.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4096-jmac_MG_0095.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1072-jmac_MG_6892.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3316-20051225_163230__MG_1390.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4624-Duggan_090322_5962.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1912-MB_070908_028.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0146-07-11-23-at-10h54m29s-_MG_6544.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2395-07-11-28-at-11h57m18s-_MG_7567.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1915-07-11-27-at-19h34m28s-_MG_7389.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4793-Duggan_090330_6227.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3123-20070930_191159__MG_0168.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2427-jmac_MG_5488.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2329-07-06-02-at-06h10m57-s_MG_1331.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0185-07-07-06-at-20h08m44s-_MG_2130.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3531-07-06-30-at-04h02m08s-_MG_1936.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1625-20081226_at_17h39m38__MG_3987.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3024-07-08-11-at-16h35m32s-N0000142.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0639-dvf_010.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4654-Duggan_090221_5150.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0322-kme_016.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0406-_MG_7943.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4998-Duggan_080210_5246.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1887-_MG_7973.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1232-07-11-04-at-18h21m34s-_MG_4038.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4053-07-09-16-at-11h25m31s-_MG_3439.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3055-20051223_105419__MG_0634.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1206-07-11-11-at-10h31m23s-_MG_4451.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4028-060810_105728__MG_6096.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4761-Duggan_090504_8960.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3320-jmac_MG_4870.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0786-MB_060708_253.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0239-_MG_1622.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4940-MB_070908_065.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3204-MB_080329_075.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3859-_MG_3076.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1771-20090127_at_18h47m42__MG_4085.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2275-07-06-02-at-14h19m38-s_MG_1471.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4865-Duggan_090331_6584.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0514-jmac_MG_7749.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4676-Duggan_090322_5973.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3888-07-11-26-at-15h06m23s-_MG_7098.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3007-07-11-28-at-10h38m19s-_MG_7488.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2575-jmac_MG_7650.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0488-jmac_MG_1405.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1998-20080426_112951__MG_9254.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0275-07-11-24-at-16h27m12s-_MG_6758.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4918-Duggan_080324_5694.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4461-_MG_7166.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2884-jmac_MG_0586.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2026-dvf_008.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2465-20051009_143101__MG_0625.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2882-060805_172412__MG_5993.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2084-jmac_MG_5592.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3279-20060620_171222__MG_9575.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2203-kme_146.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0354-07-07-17-at-23h28m36s-_MG_2372.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4265-Duggan_080411_5930.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1906-jmac_MG_4886.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2678-07-11-30-at-15h00m07s-_MG_8238.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0865-20080515_075226__MG_9983.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3354-MB_070908_069.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4763-Duggan_080203_5123.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4416-Duggan_090428_8159.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1290-_MG_7809.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0486-jmac_MG_0791.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0709-07-12-01-at-17h01m35s-_MG_8762.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2212-jmac_MG_6333.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0656-20070505_100410__MG_6820.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1320-MB_060708_069.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3264-jmac_MG_5785.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4658-Duggan_090201_4929.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0620-jmac_MG_6253.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2965-07-07-16-at-00h22m25s-_MG_2198.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3713-07-11-20-at-07h38m43s-_MG_5448.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1818-07-06-28-at-13h38m34s-_MG_1888.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3125-07-06-02-at-14h20m02-s_MG_1472.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1301-07-11-24-at-14h40m51s-_MG_6711.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4394-Duggan_090127_4837.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1388-jmac_MG_6009.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1009-jmac_MG_7831.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4249-Duggan_090322_6001.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0765-07-06-02-at-14h28m55-s_MG_1477.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3421-20080630_at_16h14m34__MG_1769.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0076-jmac_MG_5736.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1183-07-07-01-at-11h01m48s-_MG_2035.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2971-jmac_MG_1092.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4826-Duggan_080821_1199.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1118-jmac_MG_1307.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3002-MB_060708_203.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2808-20080516_072208__MG_0018.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1103-jmac_MG_0296.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2379-07-12-01-at-11h06m10s-_MG_8476.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3376-MB_060909_057.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2184-07-06-30-at-05h41m51s-_MG_1954.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1568-_MG_6479.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0148-07-07-16-at-23h50m49s-_MG_2214.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4791-Duggan_090131_4873.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2723-07-07-23-at-22h40m05s-_MG_2491.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4455-Duggan_080106_4325.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0797-07-10-06-at-08h42m41s-_MG_3745.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1364-20060209_113655__MG_2902.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0892-jmac_MG_0130.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0423-07-06-02-at-07h35m36-s_MG_1355.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4105-07-11-26-at-16h02m57s-_MG_7151.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3693-07-09-22-at-20h22m54s-_MG_3623.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1346-20061213_142422__MG_3757.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1870-jmac_MG_6385.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4645-Duggan_090426_7758.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4806-Duggan_090207_4948.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0386-jmac_MG_0520.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4124-20080709_at_10h04m23__MG_4561.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4768-Duggan_090330_6266.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1277-dvf_022.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4225-Duggan_081109_3031.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3540-07-12-02-at-14h05m14s-_MG_8949.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1984-MB_060909_014.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0719-jmac_MG_5118.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2850-jmac_MG_5803.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4969-Duggan_080819_1109.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2616-07-12-01-at-11h09m15s-_MG_8482.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1955-07-11-22-at-10h50m10s-_MG_6213.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3710-07-11-20-at-16h52m05s-_MG_5742.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0383-MB_060909_028.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0021-07-11-28-at-09h22m57s-_MG_7427.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1708-_MG_7164.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1768-07-08-11-at-17h54m02s-_MG_3365.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2927-jmac_MG_5844.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4126-_MG_1739.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0920-dvf_012.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1266-20060206_145139__MG_2286.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0336-07-08-11-at-16h57m13s-_MG_3305.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4510-Duggan_090305_5511.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4528-Duggan_090209_4971.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4685-Duggan_080411_5945.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0617-20060619_094244__MG_9140.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3688-jmac_MG_1424.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3882-20051225_165429__MG_1427.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0900-jmac_MG_7376.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0781-20080627_at_18h09m45__MG_0793.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1328-20080630_at_22h44m56__MG_1921.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4184-jmac_MG_5507.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4562-_MG_7033.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3085-jmac_MG_8019.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4642-Duggan_080324_5701.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4442-Duggan_080629_9284.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3094-jmac_MG_0621.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4835-Duggan_090426_7891.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3755-07-11-19-at-15h49m11s-_MG_5217.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1588-MB_080329_053.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3773-jmac_MG_0380.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4861-Duggan_090123_4543.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4339-Duggan_090111_4244.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0263-07-11-20-at-16h57m56s-_MG_5753.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1700-07-11-22-at-13h30m23s-_MG_6305.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2152-jmac_MG_7721.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3745-jmac_MG_5066.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3552-MB_080629_691.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1647-MB_060909_078.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3389-dvf_004.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1593-_MG_3087.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3377-_MG_7893.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1577-07-06-28-at-12h42m19s-_MG_1822.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0752-20061213_134314__MG_3708.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4244-Duggan_090504_8959.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1054-07-06-27-at-13h59m14s-_MG_1801.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3386-jmac_MG_7601.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2334-jmac_MG_0701.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1592-07-06-01-at-14h20m21-s_MG_1284.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1688-MB_070908_012.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4591-Duggan_080411_5940.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2637-060814_062852__MG_6415.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2969-MB_060909_061.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1485-dvf_042.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3177-07-11-17-at-08h19m16s-_MG_4757.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4433-Duggan_090504_8957.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3119-07-11-05-at-23h49m11s-_MG_4105.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4480-Duggan_090201_4896.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3687-07-06-30-at-13h15m14s-_MG_2022.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4447-Duggan_090321_5856.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0725-07-12-02-at-10h25m22s-_MG_8796.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4933-Duggan_090428_8040.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0809-jmac_MG_5754.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0941-MB_071013_001.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0480-jmac_MG_0549.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0347-07-08-11-at-18h17m09s-N0000221.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4589-Duggan_090426_7840.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0192-_MG_7063.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0144-07-11-20-at-16h38m08s-_MG_5725.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3307-jmac_MG_1001.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4631-Duggan_080811_0493.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3180-07-08-11-at-18h19m52s-N0000238.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1833-kme_138.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1996-07-10-06-at-15h02m12s-_MG_3767.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2570-jmac_MG_5734.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4597-Duggan_090226_5190.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3671-jmac_MG_6191.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3735-_MG_7825.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4745-Duggan_090330_6275.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3434-jmac_MG_5831.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0854-MB_080329_060.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4392-Duggan_090331_6554.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2692-060824_103042__MG_6710.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2380-20060208_203256__MG_2849.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2278-20080508_074100__MG_9540.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4487-Duggan_090322_5971.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1392-08-05-25-at-15h08m39s-_MG_9578.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3400-07-11-04-at-17h36m14s-_MG_4004.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3454-07-11-28-at-15h56m18s-_MG_7736.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2847-dvf_040.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1826-jmac_MG_1122.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0084-_MG_1610.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4306-Duggan_090127_4836.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3889-jmac_MG_1181.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1565-dvf_015.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4064-07-12-02-at-16h23m18s-_MG_9020.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0621-20080514_110501__MG_9940.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1175-kme_007.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4230-Duggan_090426_7798.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0016-jmac_MG_0795.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1335-07-11-26-at-14h48m48s-_MG_7086.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3156-20080514_101818__MG_9892.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0871-07-09-22-at-20h08m29s-_MG_3610.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4996-Duggan_090426_7783.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1989-MB_070908_016.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3791-_MG_1498.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4186-dvf_039.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2299-20060617_172354__MG_8709.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4431-Duggan_090330_6282.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0626-20070618_190911__MG_8400.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3935-07-11-19-at-10h53m45s-_MG_4961.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2511-_MG_3149.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3185-07-11-30-at-15h00m26s-_MG_8241.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0727-07-11-11-at-11h53m38s-_MG_4569.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1367-07-11-11-at-11h49m06s-_MG_4547.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1509-dvf_034.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1816-07-12-02-at-16h13m34s-_MG_8986.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4462-Duggan_090331_6525.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2207-jmac_MG_6896.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3202-07-06-02-at-13h18m43-s_MG_1425.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3212-_MG_1504.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0311-jmac_MG_0128.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1486-07-11-25-at-10h58m01s-_MG_6923.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0879-jmac_MG_0200.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3763-07-11-23-at-19h43m03s-_MG_6657.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4097-20080623_at_14h52m36__MG_9904.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3691-_MG_6475.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4769-Duggan_090320_5608.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1406-jmac_MG_5303.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3947-jmac_MG_1444.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1043-_MG_0366.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2417-20060207_192034__MG_2638.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2193-20090128_at_16h44m24__MG_4134.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2144-jmac_MG_0288.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4595-Duggan_090503_8713.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2459-_MG_7774.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2572-MB_080329_064.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2128-07-11-21-at-09h26m45s-_MG_5827.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2622-jmac_MG_5763.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2013-MB_060909_009.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0993-jmac_MG_0770.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4724-Duggan_090319_5593.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0690-_MG_6397.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4580-Duggan_081024_2311.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3756-jmac_MG_5949.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4102-07-06-30-at-11h38m56s-_MG_1997.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0459-jmac_MG_0866.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0207-jmac_MG_7695.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2912-20051006_200556__MG_0421.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0556-07-08-10-at-19h09m19s-N0000107.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4327-Duggan_080127_4972.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0623-dvf_031.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3233-MB_070908_021.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1430-07-11-23-at-21h05m16s-_MG_6685.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4472-Duggan_090504_9026.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1269-jmac_MG_5885.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2989-jmac_MG_5969.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3686-jmac_MG_0353.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0609-_MG_3231.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0103-jmac_MG_1394.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2732-20051225_162540__MG_1358.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4348-Duggan_080412_6029.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4264-Duggan_090428_8025.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4318-Duggan_090321_5920.dng diff --git a/third_party/DarkFeat/datasets/InvISP/data/Canon_EOS_5D_test.txt b/third_party/DarkFeat/datasets/InvISP/data/Canon_EOS_5D_test.txt new file mode 100644 index 0000000000000000000000000000000000000000..fec5026fe56e3fccd2439245f50f5a5f0c26b9ec --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/data/Canon_EOS_5D_test.txt @@ -0,0 +1,127 @@ +a3552-MB_080629_691 +a1647-MB_060909_078 +a3389-dvf_004 +a1593-_MG_3087 +a3377-_MG_7893 +a1577-07-06-28-at-12h42m19s-_MG_1822 +a0752-20061213_134314__MG_3708 +a4244-Duggan_090504_8959 +a1054-07-06-27-at-13h59m14s-_MG_1801 +a3386-jmac_MG_7601 +a2334-jmac_MG_0701 +a1592-07-06-01-at-14h20m21-s_MG_1284 +a1688-MB_070908_012 +a4591-Duggan_080411_5940 +a2637-060814_062852__MG_6415 +a2969-MB_060909_061 +a1485-dvf_042 +a3177-07-11-17-at-08h19m16s-_MG_4757 +a4433-Duggan_090504_8957 +a3119-07-11-05-at-23h49m11s-_MG_4105 +a4480-Duggan_090201_4896 +a3687-07-06-30-at-13h15m14s-_MG_2022 +a4447-Duggan_090321_5856 +a0725-07-12-02-at-10h25m22s-_MG_8796 +a4933-Duggan_090428_8040 +a0809-jmac_MG_5754 +a0941-MB_071013_001 +a0480-jmac_MG_0549 +a0347-07-08-11-at-18h17m09s-N0000221 +a4589-Duggan_090426_7840 +a0192-_MG_7063 +a0144-07-11-20-at-16h38m08s-_MG_5725 +a3307-jmac_MG_1001 +a4631-Duggan_080811_0493 +a3180-07-08-11-at-18h19m52s-N0000238 +a1833-kme_138 +a1996-07-10-06-at-15h02m12s-_MG_3767 +a2570-jmac_MG_5734 +a4597-Duggan_090226_5190 +a3671-jmac_MG_6191 +a3735-_MG_7825 +a4745-Duggan_090330_6275 +a3434-jmac_MG_5831 +a0854-MB_080329_060 +a4392-Duggan_090331_6554 +a2692-060824_103042__MG_6710 +a2380-20060208_203256__MG_2849 +a2278-20080508_074100__MG_9540 +a4487-Duggan_090322_5971 +a1392-08-05-25-at-15h08m39s-_MG_9578 +a3400-07-11-04-at-17h36m14s-_MG_4004 +a3454-07-11-28-at-15h56m18s-_MG_7736 +a2847-dvf_040 +a1826-jmac_MG_1122 +a0084-_MG_1610 +a4306-Duggan_090127_4836 +a3889-jmac_MG_1181 +a1565-dvf_015 +a4064-07-12-02-at-16h23m18s-_MG_9020 +a0621-20080514_110501__MG_9940 +a1175-kme_007 +a4230-Duggan_090426_7798 +a0016-jmac_MG_0795 +a1335-07-11-26-at-14h48m48s-_MG_7086 +a3156-20080514_101818__MG_9892 +a0871-07-09-22-at-20h08m29s-_MG_3610 +a4996-Duggan_090426_7783 +a1989-MB_070908_016 +a3791-_MG_1498 +a4186-dvf_039 +a2299-20060617_172354__MG_8709 +a4431-Duggan_090330_6282 +a0626-20070618_190911__MG_8400 +a3935-07-11-19-at-10h53m45s-_MG_4961 +a2511-_MG_3149 +a3185-07-11-30-at-15h00m26s-_MG_8241 +a0727-07-11-11-at-11h53m38s-_MG_4569 +a1367-07-11-11-at-11h49m06s-_MG_4547 +a1509-dvf_034 +a1816-07-12-02-at-16h13m34s-_MG_8986 +a4462-Duggan_090331_6525 +a2207-jmac_MG_6896 +a3202-07-06-02-at-13h18m43-s_MG_1425 +a3212-_MG_1504 +a0311-jmac_MG_0128 +a1486-07-11-25-at-10h58m01s-_MG_6923 +a0879-jmac_MG_0200 +a3763-07-11-23-at-19h43m03s-_MG_6657 +a4097-20080623_at_14h52m36__MG_9904 +a3691-_MG_6475 +a4769-Duggan_090320_5608 +a1406-jmac_MG_5303 +a3947-jmac_MG_1444 +a1043-_MG_0366 +a2417-20060207_192034__MG_2638 +a2193-20090128_at_16h44m24__MG_4134 +a2144-jmac_MG_0288 +a4595-Duggan_090503_8713 +a2459-_MG_7774 +a2572-MB_080329_064 +a2128-07-11-21-at-09h26m45s-_MG_5827 +a2622-jmac_MG_5763 +a2013-MB_060909_009 +a0993-jmac_MG_0770 +a4724-Duggan_090319_5593 +a0690-_MG_6397 +a4580-Duggan_081024_2311 +a3756-jmac_MG_5949 +a4102-07-06-30-at-11h38m56s-_MG_1997 +a0459-jmac_MG_0866 +a0207-jmac_MG_7695 +a2912-20051006_200556__MG_0421 +a0556-07-08-10-at-19h09m19s-N0000107 +a4327-Duggan_080127_4972 +a0623-dvf_031 +a3233-MB_070908_021 +a1430-07-11-23-at-21h05m16s-_MG_6685 +a4472-Duggan_090504_9026 +a1269-jmac_MG_5885 +a2989-jmac_MG_5969 +a3686-jmac_MG_0353 +a0609-_MG_3231 +a0103-jmac_MG_1394 +a2732-20051225_162540__MG_1358 +a4348-Duggan_080412_6029 +a4264-Duggan_090428_8025 +a4318-Duggan_090321_5920 diff --git a/third_party/DarkFeat/datasets/InvISP/data/Canon_EOS_5D_train.txt b/third_party/DarkFeat/datasets/InvISP/data/Canon_EOS_5D_train.txt new file mode 100644 index 0000000000000000000000000000000000000000..3d9e9f12058e136ff2d3416c92be29ba41689206 --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/data/Canon_EOS_5D_train.txt @@ -0,0 +1,650 @@ +a3674-jmac_MG_0392 +a1902-_MG_7217 +a0023-07-06-02-at-15h06m48-s_MG_1489 +a0282-20060619_125715__MG_9197 +a2314-20080426_111248__MG_9227 +a2113-20070619_135552__MG_8411 +a3057-dvf_002 +a0121-jmac_MG_7813 +a1416-07-10-06-at-16h48m40s-_MG_3892 +a3243-07-11-11-at-11h52m02s-_MG_4558 +a4814-Duggan_080114_4419 +a4966-Duggan_090124_4744 +a4558-Duggan_080410_5878 +a2125-20080710_001754__MG_9208 +a4163-MB_070908_098 +a3644-jmac_MG_5959 +a0704-jmac_MG_0617 +a4500-Duggan_090428_8065 +a4211-Duggan_090305_5296 +a4592-Duggan_090331_6589 +a1382-MB_070908_022 +a4542-Duggan_080411_6019 +a1451-07-06-28-at-12h47m34s-_MG_1828 +a4715-Duggan_090503_8760 +a4395-Duggan_090503_8734 +a4968-Duggan_080819_1132 +a4849-Duggan_090426_7764 +a2182-_MG_1566 +a3719-07-11-29-at-15h43m28s-_MG_8075 +a0525-MB_070908_076 +a0915-MB_060708_204 +a4644-Duggan_090214_5136 +a4086-jmac_MG_7933 +a1268-jmac_MG_5989 +a4227-Duggan_090504_8946 +a1061-jmac_MG_0244 +a0619-20081019_at_01h22m56__MG_3327 +a3368-jmac_MG_0786 +a3869-_MG_7067 +a4517-Duggan_090406_7318 +a1732-07-11-11-at-12h06m55s-_MG_4594 +a1081-jmac_MG_6226 +a2565-07-07-17-at-23h18m11s-_MG_2364 +a1779-07-08-11-at-14h58m37s-N0000114 +a4197-_MG_6428 +a4579-Duggan_090212_5073 +a0203-07-06-01-at-15h10m04-s_MG_1303 +a1621-jmac_MG_0344 +a0238-dvf_024 +a3666-_MG_6404 +a3658-jmac_MG_0418 +a2881-20070514_162430__MG_7345 +a4708-Duggan_090323_6142 +a0326-jmac_MG_7785 +a4862-jmac_MG_1010 +a0356-07-11-26-at-16h05m54s-_MG_7171 +a4063-07-11-25-at-18h26m49s-_MG_7002 +a4560-Duggan_090405_7058 +a0740-dvf_019 +a1559-jmac_MG_0089 +a0894-dvf_001 +a0884-MB_080329_065 +a3199-20081026_at_06h13m48__MG_3460 +a1205-07-06-02-at-11h36m32-s_MG_1421 +a2892-MB_060708_226 +a1546-MB_080329_066 +a1817-07-06-30-at-12h38m43s-_MG_2006 +a4058-MB_080329_056 +a1952-07-12-02-at-12h24m10s-_MG_8944 +a2285-07-11-29-at-17h23m11s-_MG_8171 +a4704-Duggan_090503_8779 +a0811-20051224_165428__MG_0953 +a3751-07-11-04-at-18h05m15s-_MG_4020 +a0835-MB_080329_061 +a2327-dvf_032 +a0454-08-05-25-at-12h33m47s-_MG_9489 +a3282-_MG_6990 +a3089-07-11-22-at-11h21m46s-_MG_6278 +a2928-jmac_MG_0176 +a0043-07-11-27-at-12h09m46s-_MG_7307 +a1777-jmac_MG_0499 +a1935-MB_070908_090 +a3771-07-06-01-at-13h03m06-s_MG_1256 +a4345-Duggan_080411_5976 +a3625-07-11-11-at-10h53m52s-_MG_4480 +a3242-20080623_at_15h18m22__MG_9919 +a4368-Duggan_090321_5857 +a0919-07-10-06-at-17h40m18s-_MG_3916 +a4107-dvf_018 +a4088-dvf_041 +a1901-_MG_0357 +a2104-07-08-11-at-16h50m03s-N0000154 +a1775-dvf_006 +a1317-20061213_150840__MG_3797 +a1006-_MG_7950 +a0535-jmac_MG_6029 +a0622-jmac_MG_5852 +a0754-07-11-22-at-09h58m34s-_MG_6189 +a3670-jmac_MG_5917 +a4928-Duggan_090127_4793 +a4451-Duggan_080821_1263 +a3623-20051220_201437__MG_9239 +a1352-07-11-04-at-17h58m48s-_MG_4012 +a4860-Duggan_090504_8801 +a0997-jmac_MG_7637 +a4397-Duggan_080819_1155 +a1864-_MG_6384 +a4271-Duggan_090227_5232 +a2898-dvf_011 +a2159-jmac_MG_6361 +a1612-MB_070908_015 +a0104-dvf_003 +a1178-jmac_MG_6061 +a0348-07-07-07-at-09h42m42s-_MG_2151 +a4502-Duggan_090116_4368 +a0980-_MG_0509 +a4812-Duggan_090428_8086 +a2711-MB_070908_106 +a0381-20070929_134540__MG_0110 +a3036-20090127_at_17h54m33__MG_4036 +a1400-MB_070908_014 +a0093-MB_070908_038 +a0764-MB_070908_088 +a1511-jmac_MG_6757 +a0958-jmac_MG_0737 +a2452-dvf_014 +a1802-061006_014724__MG_6933 +a3345-20080514_105211__MG_9917 +a4357-Duggan_090124_4645 +a0218-kme_181 +a4881-Duggan_090405_7225 +a2793-MB_070519_036 +a0814-MB_070908_062 +a2885-20081207_at_23h26m15__MG_3818 +a3829-07-06-02-at-05h48m48-s_MG_1315 +a4974-Duggan_090226_5202 +a1603-MB_070908_037 +a1199-jmac_MG_5873 +a4831-Duggan_090406_7270 +a3460-20080514_105637__MG_9928 +a1491-dvf_025 +a2951-jmac_MG_5613 +a4714-Duggan_080613_8704 +a3273-jmac_MG_0703 +a2588-jmac_MG_6874 +a1853-07-11-28-at-17h03m55s-_MG_7857 +a4608-Duggan_080413_6147 +a0020-jmac_MG_6225 +a2435-_MG_8018 +a1452-20080809_at_14h52m39__MG_0081 +a3339-_MG_7202 +a1413-07-11-21-at-16h37m24s-_MG_5983 +a1399-jmac_MG_7777 +a3566-07-12-01-at-12h52m44s-_MG_8540 +a0601-07-11-26-at-12h45m09s-_MG_7055 +a0529-jmac_MG_0267 +a2599-jmac_MG_0414 +a0335-jmac_MG_6437 +a2710-jmac_MG_7731 +a3511-jmac_MG_0542 +a2546-_MG_7763 +a4220-Duggan_090305_5359 +a3020-07-09-16-at-11h03m47s-_MG_3425 +a3591-07-11-30-at-16h19m33s-_MG_8384 +a4335-Duggan_090123_4520 +a2669-jmac_MG_0238 +a0047-07-11-18-at-00h05m40s-_MG_4882 +a4963-Duggan_090428_8067 +a1523-jmac_MG_0452 +a1940-jmac_MG_6206 +a2363-07-11-19-at-14h03m38s-_MG_5078 +a0646-20070826_182055__MG_9177 +a4899-Duggan_090330_6257 +a2006-07-06-02-at-06h00m56-s_MG_1324 +a4399-Duggan_080410_5879 +a1890-07-10-06-at-15h32m38s-_MG_3803 +a1973-060914_170620__MG_6779 +a2355-MB_080329_058 +a1734-07-11-11-at-11h44m17s-_MG_4537 +a3729-07-11-24-at-21h39m19s-_MG_6853 +a0077-20080627_at_14h31m24__MG_0714 +a1369-jmac_MG_5781 +a2939-20080702_at_00h12m52__MG_3193 +a4954-Duggan_080312_5489 +a0092-jmac_MG_7673 +a1760-07-06-01-at-13h01m06-s_MG_1253 +a3603-MB_080329_055 +a1338-_MG_1523 +a0501-_MG_7370 +a4052-20060620_165511__MG_9535 +a0715-060812_182920__MG_6255 +a2923-20060619_195834__MG_9248 +a1261-07-12-01-at-16h14m01s-_MG_8746 +a4565-Duggan_090504_9023 +a4953-Duggan_090330_6272 +a3797-jmac_MG_0496 +a1483-jmac_MG_7755 +a3000-_MG_7776 +a4931-Duggan_090428_8054 +a1125-07-11-25-at-10h33m49s-_MG_6884 +a0323-07-06-27-at-13h56m27s-_MG_1782 +a1471-07-07-15-at-23h51m48s-_MG_2179 +a4759-Duggan_090305_5342 +a4313-Duggan_080413_6158 +a2362-20051223_084128__MG_0542 +a4092-07-12-03-at-09h35m54s-_MG_9192 +a3841-07-12-01-at-13h04m21s-_MG_8637 +a0442-jmac_MG_1461 +a0183-07-06-02-at-07h15m59-s_MG_1347 +a4755-Duggan_090323_6173 +a4129-MB_070908_033 +a3474-jmac_MG_1125 +a3252-07-12-01-at-16h06m04s-_MG_8716 +a0944-20061213_132310__MG_3646 +a2349-07-11-20-at-08h06m58s-_MG_5505 +a1433-jmac_MG_0303 +a0707-07-12-01-at-15h31m07s-_MG_8670 +a4409-Duggan_090503_8738 +a1925-_MG_7836 +a1363-MB_060909_005 +a4904-Duggan_081024_2201 +a0638-20061008_092601__MG_0024 +a1515-jmac_MG_1266 +a2451-07-07-17-at-00h36m15s-_MG_2335 +a3223-MB_080627_677 +a4238-Duggan_090320_5609 +a2725-07-11-21-at-16h55m39s-_MG_5992 +a2361-07-06-01-at-13h15m17-s_MG_1259 +a4494-Duggan_081010_1923 +a4985-jmac_MG_7412 +a4553-Duggan_090331_6590 +a3720-jmac_MG_0851 +a3843-20061213_150009__MG_3787 +a0681-060811_183554__MG_6223 +a1091-07-07-04-at-04h03m08s-_MG_2094 +a3784-07-10-06-at-16h08m07s-_MG_3859 +a1842-07-11-21-at-08h59m04s-_MG_5807 +a4736-Duggan_090503_8761 +a0981-jmac_MG_1360 +a1275-20080809_at_14h45m40__MG_0065 +a1855-jmac_MG_0383 +a4628-Duggan_090428_8108 +a2999-jmac_MG_8001 +a4740-Duggan_080120_4782 +a4121-07-11-22-at-06h50m14s-_MG_6000 +a3111-_MG_2968 +a4007-_MG_7167 +a0470-_MG_7801 +a4819-Duggan_090330_6230 +a1847-20051222_141305__MG_0341 +a4779-Duggan_090323_6115 +a3465-20060619_114622__MG_9153 +a4742-Duggan_090331_6517 +a1994-20080708_at_13h44m41__MG_4350 +a3911-07-07-01-at-10h50m55s-_MG_2028 +a0441-jmac_MG_5386 +a3039-07-06-02-at-10h16m04-s_MG_1405 +a4212-Duggan_090321_5925 +a2837-07-12-02-at-11h35m49s-_MG_8848 +a2089-jmac_MG_1391 +a4386-Duggan_090124_4632 +a4482-Duggan_090503_8712 +a1787-_MG_3277 +a4470-Duggan_090123_4566 +a0019-jmac_MG_0653 +a4935-Duggan_090312_5580 +a4855-Duggan_090323_6207 +a0351-MB_070908_006 +a3442-MB_060909_003 +a1899-jmac_MG_1320 +a4408-Duggan_080411_5973 +a1804-MB_060909_002 +a4598-Duggan_090305_5297 +a0853-20070923_073247__MG_9686 +a3551-MB_080627_668 +a4493-Duggan_090322_6041 +a1149-_MG_6531 +a0708-20070210_164509__MG_6786 +a0594-_MG_0406 +a2471-_MG_6887 +a3648-07-06-01-at-12h59m03-s_MG_1251 +a1076-07-11-20-at-07h21m04s-_MG_5402 +a3256-jmac_MG_0351 +a3697-07-11-24-at-16h05m35s-_MG_6729 +a3079-_MG_7179 +a4232-Duggan_090323_6181 +a3838-jmac_MG_7919 +a0808-kme_147 +a0083-jmac_MG_0082 +a2831-_MG_3139 +a4221-Duggan_080126_4855 +a1758-07-07-23-at-23h39m31s-_MG_2497 +a1084-jmac_MG_5972 +a1498-07-06-02-at-14h08m33-s_MG_1456 +a0030-_MG_7844 +a4509-Duggan_090504_8967 +a2273-jmac_MG_0479 +a4231-Duggan_080326_5786 +a4601-Duggan_090331_6495 +a4443-Duggan_090503_8691 +a1122-20080622_at_13h47m40__MG_9874 +a1720-07-06-01-at-14h14m20-s_MG_1282 +a3975-jmac_MG_5721 +a1465-07-07-17-at-00h30m32s-_MG_2247 +a3660-jmac_MG_8044 +a4662-Duggan_080115_4605 +a1259-jmac_MG_0385 +a2133-20060617_140539__MG_8570 +a4751-Duggan_080819_1030 +a2812-07-11-30-at-11h07m15s-_MG_8208 +a2848-MB_060708_292 +a4906-Duggan_090210_5028 +a2208-_MG_6963 +a4888-Duggan_081024_2295 +a4468-Duggan_081122_3260 +a2005-07-11-20-at-17h05m05s-_MG_5779 +a3870-MB_070908_122 +a3832-20060613_091536__MG_7749 +a2224-MB_070908_032 +a3319-MB_070908_080 +a3409-20080509_070806__MG_9695 +a4448-Duggan_080119_4778 +a4199-jmac_MG_5003 +a1424-kme_185 +a4548-Duggan_080130_5029 +a4584-Duggan_080309_5404 +a4188-_MG_1604 +a0635-20060613_112054__MG_7862 +a0605-_MG_7197 +a0440-MB_070520_107 +a3920-jmac_MG_0682 +a1131-dvf_020 +a4351-Duggan_090428_8083 +a3822-07-11-21-at-09h53m21s-_MG_5852 +a1744-jmac_MG_0369 +a4009-jmac_MG_7717 +a3715-_MG_7773 +a3563-07-11-30-at-15h55m08s-_MG_8326 +a4760-Duggan_081024_2178 +a2836-jmac_MG_0389 +a3631-MB_070908_140 +a1479-jmac_MG_8030 +a4246-Duggan_090330_6226 +a4471-Duggan_090321_5859 +a0801-07-08-11-at-16h32m03s-_MG_3277 +a0803-20081226_at_17h04m14__MG_3930 +a0222-NKIM_MG_2635 +a4636-Duggan_080216_5303 +a3371-07-12-01-at-11h32m58s-_MG_8498 +a3831-jmac_MG_5861 +a4546-Duggan_081010_1913 +a1119-MB_070908_170 +a2597-060824_122554__MG_6756 +a2105-jmac_MG_7930 +a1697-07-12-01-at-11h12m05s-_MG_8492 +a3296-20080509_071308__MG_9701 +a3067-_MG_1539 +a1449-MB_060909_016 +a3149-20080708_at_13h43m33__MG_4340 +a3650-07-06-01-at-13h48m38-s_MG_1270 +a4308-Duggan_090209_4996 +a4839-Duggan_090321_5908 +a2102-jmac_MG_7845 +a0917-07-06-01-at-14h40m08-s_MG_1293 +a0411-07-11-21-at-13h12m13s-_MG_5935 +a4696-Duggan_080323_5686 +a1525-jmac_MG_0646 +a0632-07-06-01-at-12h50m26-s_MG_1230 +a4735-Duggan_090307_5553 +a1980-07-11-08-at-01h16m15s-_MG_4131 +a4151-dvf_026 +a2067-dvf_013 +a4108-MB_080329_057 +a1132-20061213_164642__MG_6076 +a0982-jmac_MG_1105 +a0784-_MG_7693 +a4886-Duggan_090503_8792 +a1917-jmac_MG_5620 +a0840-07-11-19-at-16h20m11s-_MG_5348 +a4750-Duggan_090504_9001 +a2230-20060616_082451__MG_8195 +a0636-07-11-27-at-10h02m30s-_MG_7226 +a0825-_MG_7225 +a2560-MB_070908_079 +a2129-jmac_MG_1342 +a0504-jmacIMG_6809 +a1070-_MG_6547 +a2550-_MG_3058 +a4990-jmac_MG_1139 +a0313-_MG_7253 +a4586-Duggan_090428_8010 +a3152-07-07-04-at-06h23m15s-_MG_2099 +a1620-20080204_113002__MG_0583 +a0242-07-06-01-at-12h55m36-s_MG_1241 +a1242-07-10-27-at-16h31m23s-_MG_3949 +a0869-20080629_at_19h10m02__MG_1342 +a2252-jmac_MG_6404 +a3018-jmac_MG_0481 +a2773-jmac_MG_4982 +a0004-jmac_MG_1384 +a4120-_MG_7211 +a3051-07-06-01-at-13h01m22-s_MG_1255 +a2900-MB_070908_087 +a1757-dvf_023 +a4878-Duggan_080207_5155 +a4540-Duggan_080411_5948 +a2277-07-11-24-at-15h53m42s-_MG_6720 +a1821-07-11-19-at-14h41m50s-_MG_5129 +a2828-jmac_MG_0100 +a3559-jmac_MG_0205 +a2158-jmac_MG_7657 +a1797-jmac_MG_6883 +a4703-Duggan_090426_7850 +a2764-07-11-19-at-13h52m09s-_MG_5054 +a1423-20080624_at_19h53m25__MG_0078 +a4965-Duggan_090405_7028 +a2085-20051009_104656__MG_0587 +a4239-Duggan_080114_4429 +a4511-Duggan_090504_9050 +a2095-07-11-22-at-08h32m36s-_MG_6015 +a4605-Duggan_090108_4208 +a0042-060813_155838__MG_6361 +a1656-dvf_005 +a2225-jmac_MG_0540 +a3647-MB_070908_094 +a4524-Duggan_080326_5805 +a4700-Duggan_090406_7321 +a1188-MB_080329_068 +a1882-07-11-23-at-17h04m28s-_MG_6574 +a1265-20051225_163547__MG_1396 +a2824-dvf_035 +a4432-Duggan_081114_3124 +a2664-20081226_at_17h48m43__MG_3997 +a0032-jmac_MG_0266 +a1730-20080809_at_18h39m49__MG_0130 +a0358-MB_080329_074 +a2731-07-12-01-at-17h40m41s-_MG_8785 +a0118-20051223_103622__MG_0617 +a4298-Duggan_090504_9090 +a3473-jmac_MG_0161 +a4898-Duggan_090212_5075 +a3685-MB_060909_011 +a2964-MB_070908_020 +a1610-08-11-09-at-22h58m42s-_MG_3590 +a3482-jmac_MG_1250 +a0418-07-11-19-at-13h26m20s-_MG_5018 +a3026-_MG_7180 +a1861-jmac_MG_6054 +a2358-jmac_MG_0546 +a4411-Duggan_090131_4857 +a4863-Duggan_080115_4511 +a0540-jmac_MG_5988 +a1263-20071122_142540__MG_0314 +a1690-061202_195438__MG_9731 +a2822-jmac_MG_1389 +a1330-20080625_at_00h06m29__MG_0169 +a2789-jmac_MG_0522 +a0259-dvf_029 +a3043-jmac_MG_6976 +a1795-jmac_MG_0165 +a2526-20061015_103622__MG_0042 +a4467-Duggan_090426_7873 +a2162-kme_014 +a3080-jmac_MG_1235 +a0038-MB_070908_135 +a4564-Duggan_090406_7253 +a3977-07-11-05-at-22h45m52s-_MG_4073 +a4463-Duggan_081024_2100 +a4421-Duggan_090214_5129 +a4438-Duggan_090330_6313 +a3292-jmac_MG_4914 +a2926-MB_070908_110 +a1790-07-06-28-at-12h47m57s-_MG_1831 +a4722-Duggan_090406_7315 +a3892-07-11-11-at-11h46m34s-_MG_4544 +a1963-jmac_MG_1112 +a0091-jmac_MG_4959 +a2772-jmac_MG_7411 +a2205-jmac_MG_5745 +a3764-20060618_093109__MG_8792 +a2180-dvf_007 +a4550-Duggan_090428_8066 +a1743-07-06-01-at-14h31m58-s_MG_1288 +a2529-07-06-02-at-06h09m13-s_MG_1328 +a0918-_MG_1507 +a2338-MB_080628_696 +a2245-20060508_141031__MG_6785 +a1564-MB_080329_054 +a1487-20081226_at_16h52m49__MG_3920 +a0539-jmac_MG_0220 +a4670-Duggan_080115_4464 +a3029-07-11-17-at-07h41m24s-_MG_4654 +a4665-Duggan_090504_8932 +a3849-MB_070908_003 +a1755-NKIM_MG_2646 +a4096-jmac_MG_0095 +a1072-jmac_MG_6892 +a3316-20051225_163230__MG_1390 +a4624-Duggan_090322_5962 +a1912-MB_070908_028 +a0146-07-11-23-at-10h54m29s-_MG_6544 +a2395-07-11-28-at-11h57m18s-_MG_7567 +a1915-07-11-27-at-19h34m28s-_MG_7389 +a4793-Duggan_090330_6227 +a3123-20070930_191159__MG_0168 +a2427-jmac_MG_5488 +a2329-07-06-02-at-06h10m57-s_MG_1331 +a0185-07-07-06-at-20h08m44s-_MG_2130 +a3531-07-06-30-at-04h02m08s-_MG_1936 +a1625-20081226_at_17h39m38__MG_3987 +a3024-07-08-11-at-16h35m32s-N0000142 +a0639-dvf_010 +a4654-Duggan_090221_5150 +a0322-kme_016 +a0406-_MG_7943 +a4998-Duggan_080210_5246 +a1887-_MG_7973 +a1232-07-11-04-at-18h21m34s-_MG_4038 +a4053-07-09-16-at-11h25m31s-_MG_3439 +a3055-20051223_105419__MG_0634 +a1206-07-11-11-at-10h31m23s-_MG_4451 +a4028-060810_105728__MG_6096 +a4761-Duggan_090504_8960 +a3320-jmac_MG_4870 +a0786-MB_060708_253 +a0239-_MG_1622 +a4940-MB_070908_065 +a3204-MB_080329_075 +a3859-_MG_3076 +a1771-20090127_at_18h47m42__MG_4085 +a2275-07-06-02-at-14h19m38-s_MG_1471 +a4865-Duggan_090331_6584 +a0514-jmac_MG_7749 +a4676-Duggan_090322_5973 +a3888-07-11-26-at-15h06m23s-_MG_7098 +a3007-07-11-28-at-10h38m19s-_MG_7488 +a2575-jmac_MG_7650 +a0488-jmac_MG_1405 +a1998-20080426_112951__MG_9254 +a0275-07-11-24-at-16h27m12s-_MG_6758 +a4918-Duggan_080324_5694 +a4461-_MG_7166 +a2884-jmac_MG_0586 +a2026-dvf_008 +a2465-20051009_143101__MG_0625 +a2882-060805_172412__MG_5993 +a2084-jmac_MG_5592 +a3279-20060620_171222__MG_9575 +a2203-kme_146 +a0354-07-07-17-at-23h28m36s-_MG_2372 +a4265-Duggan_080411_5930 +a1906-jmac_MG_4886 +a2678-07-11-30-at-15h00m07s-_MG_8238 +a0865-20080515_075226__MG_9983 +a3354-MB_070908_069 +a4763-Duggan_080203_5123 +a4416-Duggan_090428_8159 +a1290-_MG_7809 +a0486-jmac_MG_0791 +a0709-07-12-01-at-17h01m35s-_MG_8762 +a2212-jmac_MG_6333 +a0656-20070505_100410__MG_6820 +a1320-MB_060708_069 +a3264-jmac_MG_5785 +a4658-Duggan_090201_4929 +a0620-jmac_MG_6253 +a2965-07-07-16-at-00h22m25s-_MG_2198 +a3713-07-11-20-at-07h38m43s-_MG_5448 +a1818-07-06-28-at-13h38m34s-_MG_1888 +a3125-07-06-02-at-14h20m02-s_MG_1472 +a1301-07-11-24-at-14h40m51s-_MG_6711 +a4394-Duggan_090127_4837 +a1388-jmac_MG_6009 +a1009-jmac_MG_7831 +a4249-Duggan_090322_6001 +a0765-07-06-02-at-14h28m55-s_MG_1477 +a3421-20080630_at_16h14m34__MG_1769 +a0076-jmac_MG_5736 +a1183-07-07-01-at-11h01m48s-_MG_2035 +a2971-jmac_MG_1092 +a4826-Duggan_080821_1199 +a1118-jmac_MG_1307 +a3002-MB_060708_203 +a2808-20080516_072208__MG_0018 +a1103-jmac_MG_0296 +a2379-07-12-01-at-11h06m10s-_MG_8476 +a3376-MB_060909_057 +a2184-07-06-30-at-05h41m51s-_MG_1954 +a1568-_MG_6479 +a0148-07-07-16-at-23h50m49s-_MG_2214 +a4791-Duggan_090131_4873 +a2723-07-07-23-at-22h40m05s-_MG_2491 +a4455-Duggan_080106_4325 +a0797-07-10-06-at-08h42m41s-_MG_3745 +a1364-20060209_113655__MG_2902 +a0892-jmac_MG_0130 +a0423-07-06-02-at-07h35m36-s_MG_1355 +a4105-07-11-26-at-16h02m57s-_MG_7151 +a3693-07-09-22-at-20h22m54s-_MG_3623 +a1346-20061213_142422__MG_3757 +a1870-jmac_MG_6385 +a4645-Duggan_090426_7758 +a4806-Duggan_090207_4948 +a0386-jmac_MG_0520 +a4124-20080709_at_10h04m23__MG_4561 +a4768-Duggan_090330_6266 +a1277-dvf_022 +a4225-Duggan_081109_3031 +a3540-07-12-02-at-14h05m14s-_MG_8949 +a1984-MB_060909_014 +a0719-jmac_MG_5118 +a2850-jmac_MG_5803 +a4969-Duggan_080819_1109 +a2616-07-12-01-at-11h09m15s-_MG_8482 +a1955-07-11-22-at-10h50m10s-_MG_6213 +a3710-07-11-20-at-16h52m05s-_MG_5742 +a0383-MB_060909_028 +a0021-07-11-28-at-09h22m57s-_MG_7427 +a1708-_MG_7164 +a1768-07-08-11-at-17h54m02s-_MG_3365 +a2927-jmac_MG_5844 +a4126-_MG_1739 +a0920-dvf_012 +a1266-20060206_145139__MG_2286 +a0336-07-08-11-at-16h57m13s-_MG_3305 +a4510-Duggan_090305_5511 +a4528-Duggan_090209_4971 +a4685-Duggan_080411_5945 +a0617-20060619_094244__MG_9140 +a3688-jmac_MG_1424 +a3882-20051225_165429__MG_1427 +a0900-jmac_MG_7376 +a0781-20080627_at_18h09m45__MG_0793 +a1328-20080630_at_22h44m56__MG_1921 +a4184-jmac_MG_5507 +a4562-_MG_7033 +a3085-jmac_MG_8019 +a4642-Duggan_080324_5701 +a4442-Duggan_080629_9284 +a3094-jmac_MG_0621 +a4835-Duggan_090426_7891 +a3755-07-11-19-at-15h49m11s-_MG_5217 +a1588-MB_080329_053 +a3773-jmac_MG_0380 +a4861-Duggan_090123_4543 +a4339-Duggan_090111_4244 +a0263-07-11-20-at-16h57m56s-_MG_5753 +a1700-07-11-22-at-13h30m23s-_MG_6305 +a2152-jmac_MG_7721 +a3745-jmac_MG_5066 diff --git a/third_party/DarkFeat/datasets/InvISP/data/NIKON_D700.txt b/third_party/DarkFeat/datasets/InvISP/data/NIKON_D700.txt new file mode 100644 index 0000000000000000000000000000000000000000..b1a0943ce8be3767c5059e6179aa5a7fc3b0b727 --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/data/NIKON_D700.txt @@ -0,0 +1,487 @@ +https://data.csail.mit.edu/graphics/fivek/img/dng/a2754-_DSC7455.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3390-dgw_070.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4801-_DGW0327.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1085-_DSC6188.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3706-dgw_065.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3837-dgw_100.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2686-dgw_072.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1747-dgw_046.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3800-dgw_090.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4389-_DGW7865.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3582-dgw_015.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3925-_DSC6409.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4110-dgw_069.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4925-_DGW7848.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2189-dgw_087.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1807-_DGW6310.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3810-_DGW6236.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1969-_DGW6290.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0821-dgw_037.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0743-_DSC6146.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3886-_DGW6415.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2791-_DGW6374.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3183-_DSC5701.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4453-_DGW0267.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0510-_DGW6409.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4381-_DGW9028.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1015-_DSC5571.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1872-_DSC5412.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0195-_DGW6246.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0455-_DSC4605.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0822-dgw_028.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2651-dgw_017.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3355-_DGW6412.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2766-_DGW6347.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4829-_DGW7882.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3068-dgw_040.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4948-_DGW7855.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0909-_DGW6284.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2234-_DGW6319.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4218-_DGW6302.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0412-_DGW6297.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0597-dgw_012.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4333-_DGW0255.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4076-_DGW6244.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0928-_DSC3894.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0938-_DGW6281.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2403-dgw_095.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3235-dgw_117.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3006-_DGW6223.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0190-dgw_034.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4850-_DGW9453.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4955-_DGW0261.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3048-_DGW6350.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3066-_DGW6324.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2166-dgw_122.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2485-_DGW6336.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3362-dgw_110.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0991-_DSC5400.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2016-_DSC9836.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1390-_DGW6414.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0177-dgw_078.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4388-_DGW0257.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2111-_DSC5607.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0887-_DSC5906.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2915-_DSC7402.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3099-_DGW6276.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1282-_DGW6370.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3480-dgw_151.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1337-_DGW6225.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0035-dgw_048.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1224-_DGW6318.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4483-_DGW0262.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0761-_DGW6343.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0910-_DGW6379.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1287-dgw_063.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0392-_DGW6346.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3041-_DGW6232.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1481-_DGW6386.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1088-dgw_155.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0487-_DSC5455.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2140-dgw_021.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0064-_DSC7889.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4029-_DGW6245.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4459-_DGW0329.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1501-_DSC7449.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4190-dgw_050.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3907-_DGW6354.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4902-_DGW0251.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4950-_DGW0249.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3836-dgw_044.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1504-dgw_018.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0304-dgw_137.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4939-_DGW0287.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3423-_DGW6316.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1062-_DGW6315.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0543-_DGW6252.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2612-dgw_115.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3200-dgw_133.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2200-dgw_031.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3130-_DGW6351.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4684-_DGW0286.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3893-_DGW6301.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1033-_DSC4500.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4353-_DGW0322.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3500-dgw_099.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2444-dgw_032.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0225-dgw_127.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3556-_DGW6389.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3894-_DGW6435.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0046-dgw_101.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2557-_DGW6396.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4987-_DGW0297.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1241-_DSC6418.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2961-_DSC9017.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0860-dgw_049.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2119-dgw_009.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0675-_DGW6371.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4243-_DGW9580.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1560-dgw_013.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4378-_DGW0272.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3232-_DGW6397.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3356-_DSC9981.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4469-_DGW0243.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2739-_DGW6416.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2366-_DGW6298.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4581-_DGW0256.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3998-dgw_041.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2484-dgw_011.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3168-_DGW6358.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0024-_DSC8932.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1297-_DGW6304.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3699-_DGW6404.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0766-_DGW6227.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4385-_DGW9650.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1142-_DGW6357.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0634-_DGW6340.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0608-_DGW6367.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1383-_DGW6387.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2698-dgw_106.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0574-_DSC6152.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4400-_DGW9653.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4039-dgw_076.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0524-_DGW6317.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3276-dgw_159.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4545-_DGW9669.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4979-_DGW0341.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4362-_DGW7864.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3411-_DGW6385.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4837-_DGW7872.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4200-_DGW6341.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3690-_DGW6402.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2211-dgw_047.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4142-_DGW6275.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4245-_DGW9109.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1856-_DGW6328.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4022-_DGW6330.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3572-_DGW6384.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1976-_DSC4492.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0932-dgw_088.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0702-dgw_091.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4383-_DGW9644.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1711-_DGW6251.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3811-_DGW6261.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4648-_DGW0260.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4419-_DGW0269.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1484-_DSC4591.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2017-dgw_045.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3805-_DGW6339.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2520-dgw_143.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3034-_DGW6331.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3215-dgw_121.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4478-_DSC9389.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3148-dgw_107.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0217-_DGW6260.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2621-_DSC5468.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4233-_DGW9491.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0650-dgw_060.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3958-_DSC3890.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1829-_DGW6334.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2390-_DSC5419.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1248-dgw_081.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2369-_DGW6352.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0478-dgw_014.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3140-dgw_096.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1378-dgw_039.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1130-dgw_128.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4119-_DSC9047.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3820-dgw_025.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4556-_DGW0305.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4919-_DGW9626.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0421-_DGW6279.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4705-_DGW0343.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4115-dgw_029.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3496-dgw_160.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1898-dgw_144.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0949-dgw_030.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4273-_DGW0250.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0096-_DGW6249.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2794-dgw_102.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3602-_DSC9759.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4426-_DGW9439.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0546-dgw_153.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3757-_DGW6345.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4133-dgw_020.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2431-_DSC9974.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0933-dgw_007.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0651-dgw_129.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4952-_DGW9464.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1140-dgw_059.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2986-_DGW6325.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2191-dgw_003.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4049-_DSC3858.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2262-_DGW6400.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0785-dgw_058.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4615-_DGW0334.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4666-_DGW0244.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4535-_DGW0309.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3162-dgw_140.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4526-_DGW7879.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4059-_DSC6414.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0274-_DSC6439.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3926-dgw_077.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2154-_DSC6417.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3106-dgw_052.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4198-_DSC6401.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4859-_DGW0248.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4570-_DGW0236.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4274-dgw_068.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4112-_DGW6344.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2288-_DGW6237.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3593-_DSC5689.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0052-dgw_131.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2393-_DSC6398.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2468-_DSC9195.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0040-_DSC5693.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0572-_DGW6424.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3287-_DGW6308.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0431-_DSC9183.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2197-_DSC6374.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2103-dgw_054.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0292-dgw_086.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2323-dgw_109.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2722-dgw_158.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2257-dgw_061.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4531-_DGW7866.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3322-_DGW6269.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2769-_DSC9755.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1913-_DSC5474.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1168-dgw_057.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3182-_DGW6265.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2213-dgw_150.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3115-dgw_016.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2676-dgw_055.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1379-_DSC5348 (original).dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1595-_DGW6311.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0531-dgw_067.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1767-_DGW6401.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4824-_DGW0282.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2210-dgw_149.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3337-dgw_112.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1636-_DSC6280.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1852-_DSC8964.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1811-_DSC6315.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2077-_DSC6928.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4853-_DGW0247.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2004-_DGW6393.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2780-_DSC5637.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3205-dgw_042.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2827-dgw_085.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0959-_DGW6327.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4927-_DGW0242.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3250-dgw_113.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0736-_DGW6293.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1153-dgw_053.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4361-_DGW9031.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3867-_DGW6243.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3656-_DGW6254.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3458-_DSC4587.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0378-_DGW6391.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1441-dgw_132.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4718-_DGW9472.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4833-_DGW7868.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1945-_DSC5903.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0824-_DGW6283.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3394-_DGW6419.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1928-dgw_135.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3761-_DGW6383.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0627-_DSC5388.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4355-_DGW0332.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1276-_DSC6183.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4743-_DGW0316.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3753-dgw_073.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0591-_DGW6381.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4229-_DGW0240.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3173-dgw_043.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3532-_DGW6305.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1705-_DGW6349.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4054-dgw_093.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1671-_DSC6426.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1762-_DGW6326.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2938-_DGW6271.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2559-dgw_136.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3397-_DSC5572.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2809-dgw_023.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2385-_DSC4276.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4711-_DGW0312.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0279-_DSC4586.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3213-_DSC4851.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0527-_DGW6270.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0588-dgw_118.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2367-dgw_098.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2950-_DSC4397.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2268-_DGW6411.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1475-dgw_146.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3737-dgw_022.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3501-dgw_154.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1602-_DSC3915.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0883-_DGW6253.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2942-_DGW6332.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3777-dgw_024.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0969-dgw_056.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3340-_DGW6366.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3462-dgw_051.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3122-_DGW6312.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3628-_DSC9996.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3509-_DGW6337.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4300-_DGW0239.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2441-dgw_071.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1929-dgw_084.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3758-dgw_141.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4866-_DGW9039.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0747-dgw_033.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0065-_DSC6405.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2036-_DGW6338.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3419-_DSC3931.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2491-_DGW6342.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0237-_DSC9985.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4204-_DGW7870.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2030-_DSC7496.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2352-_DGW6398.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2476-_DSC6421.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3865-_DGW6257.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3972-dgw_010.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1731-dgw_130.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2360-_DGW6395.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3732-_DGW6272.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1914-dgw_080.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2909-dgw_092.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0562-dgw_082.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4008-dgw_019.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0595-_DGW6264.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1052-_DGW6238.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2041-_DGW6267.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1643-_DGW6323.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4481-_DGW6369.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2330-_DSC9771.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2439-_DGW6364.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2972-_DSC6416.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1172-_DGW6413.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2975-dgw_134.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4651-_DGW0292.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1421-_DGW6229.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1193-_DSC6404.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3028-_DSC7427.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0466-_DSC5415.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0476-_DSC6400.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3664-dgw_097.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2633-_DGW6226.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2416-_DGW6256.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0953-dgw_026.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2430-_DGW6240.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4060-_DSC5597.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2797-_DGW6280.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4729-_DGW0345.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1954-_DGW6380.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1617-dgw_124.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4774-_DGW0330.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4136-_DSC6412.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1633-_DSC5879.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0712-_DSC8911.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3012-dgw_074.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3435-dgw_001.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3076-dgw_036.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3091-_DGW6408.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1106-_DSC0010.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2460-_DSC3950.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0877-_DGW6231.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4261-_DGW9448.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1865-dgw_120.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4519-_DGW7869.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4709-_DGW0275.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3032-dgw_139.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1323-dgw_156.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0658-dgw_105.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2955-_DGW6306.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4256-_DGW0339.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2907-dgw_108.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4203-_DGW0246.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2035-_DGW6313.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3885-_DGW6320.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1234-_DGW6333.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0312-_DSC5579.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4610-_DGW0346.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3441-dgw_064.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4391-_DGW0277.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1769-_DGW6405.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1652-dgw_004.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3657-_DSC5954.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1977-_DGW6239.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1880-_DGW6418.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2984-_DGW6399.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1418-dgw_066.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1583-dgw_079.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4914-_DGW0237.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4331-_DGW0241.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0433-dgw_008.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3928-_DSC6415.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1251-_DGW6263.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4622-_DGW9528.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4132-_DSC6164.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1272-_DGW6377.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1776-dgw_142.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4441-_DGW0274.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2683-_DSC9001.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0950-_DGW6335.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3641-_DSC4628.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0002-dgw_005.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2536-_DGW6266.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1618-dgw_062.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1171-_DGW6372.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2869-dgw_111.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3924-_DSC6358.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3554-dgw_103.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4150-_DGW6309.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2014-_DSC5436.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2332-_DGW6258.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0484-_DGW6359.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1687-_DSC4299.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1563-_DGW6307.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1231-_DGW6291.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1028-_DSC6440.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0208-_DGW6392.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3789-_DSC5595.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2479-_DGW6373.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2741-dgw_152.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1975-dgw_075.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2748-_DGW6282.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3772-dgw_123.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2256-_DSC5654.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3876-dgw_114.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4682-_DGW0319.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2042-dgw_038.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4640-_DGW9747.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3709-_DGW6314.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4746-_DGW9510.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1336-_DSC8917.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0088-_DGW6376.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0672-_DSC8842.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1100-_DGW6248.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1041-_DSC4339.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4951-_DGW0252.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3821-_DGW6390.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4352-_DGW6241.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4475-_DGW7819.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0341-dgw_002.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3271-dgw_125.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1045-_DSC4480.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3931-_DGW6259.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3467-dgw_035.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4723-_DGW7894.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3878-_DSC6428.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a3375-_DSC6420.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1616-_DGW6356.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0209-_DGW6273.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1891-dgw_119.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4633-_DGW8845.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a2183-dgw_126.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a0567-_DGW6268.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4872-_DGW0314.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1431-dgw_089.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1262-_DGW6230.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4504-_DGW7893.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1340-_DSC7451.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a1875-_DGW6410.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4174-dgw_083.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4450-_DGW0270.dng +https://data.csail.mit.edu/graphics/fivek/img/dng/a4613-_DGW9045.dng diff --git a/third_party/DarkFeat/datasets/InvISP/data/NIKON_D700_test.txt b/third_party/DarkFeat/datasets/InvISP/data/NIKON_D700_test.txt new file mode 100644 index 0000000000000000000000000000000000000000..d05e49023d03828cacb7d07ca19177ba1521153f --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/data/NIKON_D700_test.txt @@ -0,0 +1,73 @@ +a4331-_DGW0241 +a0433-dgw_008 +a3928-_DSC6415 +a1251-_DGW6263 +a4622-_DGW9528 +a4132-_DSC6164 +a1272-_DGW6377 +a1776-dgw_142 +a4441-_DGW0274 +a2683-_DSC9001 +a0950-_DGW6335 +a3641-_DSC4628 +a0002-dgw_005 +a2536-_DGW6266 +a1618-dgw_062 +a1171-_DGW6372 +a2869-dgw_111 +a3924-_DSC6358 +a3554-dgw_103 +a4150-_DGW6309 +a2014-_DSC5436 +a2332-_DGW6258 +a0484-_DGW6359 +a1687-_DSC4299 +a1563-_DGW6307 +a1231-_DGW6291 +a1028-_DSC6440 +a0208-_DGW6392 +a3789-_DSC5595 +a2479-_DGW6373 +a2741-dgw_152 +a1975-dgw_075 +a2748-_DGW6282 +a3772-dgw_123 +a2256-_DSC5654 +a3876-dgw_114 +a4682-_DGW0319 +a2042-dgw_038 +a4640-_DGW9747 +a3709-_DGW6314 +a4746-_DGW9510 +a1336-_DSC8917 +a0088-_DGW6376 +a0672-_DSC8842 +a1100-_DGW6248 +a1041-_DSC4339 +a4951-_DGW0252 +a3821-_DGW6390 +a4352-_DGW6241 +a4475-_DGW7819 +a0341-dgw_002 +a3271-dgw_125 +a1045-_DSC4480 +a3931-_DGW6259 +a3467-dgw_035 +a4723-_DGW7894 +a3878-_DSC6428 +a3375-_DSC6420 +a1616-_DGW6356 +a0209-_DGW6273 +a1891-dgw_119 +a4633-_DGW8845 +a2183-dgw_126 +a0567-_DGW6268 +a4872-_DGW0314 +a1431-dgw_089 +a1262-_DGW6230 +a4504-_DGW7893 +a1340-_DSC7451 +a1875-_DGW6410 +a4174-dgw_083 +a4450-_DGW0270 +a4613-_DGW9045 diff --git a/third_party/DarkFeat/datasets/InvISP/data/NIKON_D700_train.txt b/third_party/DarkFeat/datasets/InvISP/data/NIKON_D700_train.txt new file mode 100644 index 0000000000000000000000000000000000000000..674b86ecbb56e4c970b342a1359862f2e010111d --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/data/NIKON_D700_train.txt @@ -0,0 +1,414 @@ +a2754-_DSC7455 +a3390-dgw_070 +a4801-_DGW0327 +a1085-_DSC6188 +a3706-dgw_065 +a3837-dgw_100 +a2686-dgw_072 +a1747-dgw_046 +a3800-dgw_090 +a4389-_DGW7865 +a3582-dgw_015 +a3925-_DSC6409 +a4110-dgw_069 +a4925-_DGW7848 +a2189-dgw_087 +a1807-_DGW6310 +a3810-_DGW6236 +a1969-_DGW6290 +a0821-dgw_037 +a0743-_DSC6146 +a3886-_DGW6415 +a2791-_DGW6374 +a3183-_DSC5701 +a4453-_DGW0267 +a0510-_DGW6409 +a4381-_DGW9028 +a1015-_DSC5571 +a1872-_DSC5412 +a0195-_DGW6246 +a0455-_DSC4605 +a0822-dgw_028 +a2651-dgw_017 +a3355-_DGW6412 +a2766-_DGW6347 +a4829-_DGW7882 +a3068-dgw_040 +a4948-_DGW7855 +a0909-_DGW6284 +a2234-_DGW6319 +a4218-_DGW6302 +a0412-_DGW6297 +a0597-dgw_012 +a4333-_DGW0255 +a4076-_DGW6244 +a0928-_DSC3894 +a0938-_DGW6281 +a2403-dgw_095 +a3235-dgw_117 +a3006-_DGW6223 +a0190-dgw_034 +a4850-_DGW9453 +a4955-_DGW0261 +a3048-_DGW6350 +a3066-_DGW6324 +a2166-dgw_122 +a2485-_DGW6336 +a3362-dgw_110 +a0991-_DSC5400 +a2016-_DSC9836 +a1390-_DGW6414 +a0177-dgw_078 +a4388-_DGW0257 +a2111-_DSC5607 +a0887-_DSC5906 +a2915-_DSC7402 +a3099-_DGW6276 +a1282-_DGW6370 +a3480-dgw_151 +a1337-_DGW6225 +a0035-dgw_048 +a1224-_DGW6318 +a4483-_DGW0262 +a0761-_DGW6343 +a0910-_DGW6379 +a1287-dgw_063 +a0392-_DGW6346 +a3041-_DGW6232 +a1481-_DGW6386 +a1088-dgw_155 +a0487-_DSC5455 +a2140-dgw_021 +a0064-_DSC7889 +a4029-_DGW6245 +a4459-_DGW0329 +a1501-_DSC7449 +a4190-dgw_050 +a3907-_DGW6354 +a4902-_DGW0251 +a4950-_DGW0249 +a3836-dgw_044 +a1504-dgw_018 +a0304-dgw_137 +a4939-_DGW0287 +a3423-_DGW6316 +a1062-_DGW6315 +a0543-_DGW6252 +a2612-dgw_115 +a3200-dgw_133 +a2200-dgw_031 +a3130-_DGW6351 +a4684-_DGW0286 +a3893-_DGW6301 +a1033-_DSC4500 +a4353-_DGW0322 +a3500-dgw_099 +a2444-dgw_032 +a0225-dgw_127 +a3556-_DGW6389 +a3894-_DGW6435 +a0046-dgw_101 +a2557-_DGW6396 +a4987-_DGW0297 +a1241-_DSC6418 +a2961-_DSC9017 +a0860-dgw_049 +a2119-dgw_009 +a0675-_DGW6371 +a4243-_DGW9580 +a1560-dgw_013 +a4378-_DGW0272 +a3232-_DGW6397 +a3356-_DSC9981 +a4469-_DGW0243 +a2739-_DGW6416 +a2366-_DGW6298 +a4581-_DGW0256 +a3998-dgw_041 +a2484-dgw_011 +a3168-_DGW6358 +a0024-_DSC8932 +a1297-_DGW6304 +a3699-_DGW6404 +a0766-_DGW6227 +a4385-_DGW9650 +a1142-_DGW6357 +a0634-_DGW6340 +a0608-_DGW6367 +a1383-_DGW6387 +a2698-dgw_106 +a0574-_DSC6152 +a4400-_DGW9653 +a4039-dgw_076 +a0524-_DGW6317 +a3276-dgw_159 +a4545-_DGW9669 +a4979-_DGW0341 +a4362-_DGW7864 +a3411-_DGW6385 +a4837-_DGW7872 +a4200-_DGW6341 +a3690-_DGW6402 +a2211-dgw_047 +a4142-_DGW6275 +a4245-_DGW9109 +a1856-_DGW6328 +a4022-_DGW6330 +a3572-_DGW6384 +a1976-_DSC4492 +a0932-dgw_088 +a0702-dgw_091 +a4383-_DGW9644 +a1711-_DGW6251 +a3811-_DGW6261 +a4648-_DGW0260 +a4419-_DGW0269 +a1484-_DSC4591 +a2017-dgw_045 +a3805-_DGW6339 +a2520-dgw_143 +a3034-_DGW6331 +a3215-dgw_121 +a4478-_DSC9389 +a3148-dgw_107 +a0217-_DGW6260 +a2621-_DSC5468 +a4233-_DGW9491 +a0650-dgw_060 +a3958-_DSC3890 +a1829-_DGW6334 +a2390-_DSC5419 +a1248-dgw_081 +a2369-_DGW6352 +a0478-dgw_014 +a3140-dgw_096 +a1378-dgw_039 +a1130-dgw_128 +a4119-_DSC9047 +a3820-dgw_025 +a4556-_DGW0305 +a4919-_DGW9626 +a0421-_DGW6279 +a4705-_DGW0343 +a4115-dgw_029 +a3496-dgw_160 +a1898-dgw_144 +a0949-dgw_030 +a4273-_DGW0250 +a0096-_DGW6249 +a2794-dgw_102 +a3602-_DSC9759 +a4426-_DGW9439 +a0546-dgw_153 +a3757-_DGW6345 +a4133-dgw_020 +a2431-_DSC9974 +a0933-dgw_007 +a0651-dgw_129 +a4952-_DGW9464 +a1140-dgw_059 +a2986-_DGW6325 +a2191-dgw_003 +a4049-_DSC3858 +a2262-_DGW6400 +a0785-dgw_058 +a4615-_DGW0334 +a4666-_DGW0244 +a4535-_DGW0309 +a3162-dgw_140 +a4526-_DGW7879 +a4059-_DSC6414 +a0274-_DSC6439 +a3926-dgw_077 +a2154-_DSC6417 +a3106-dgw_052 +a4198-_DSC6401 +a4859-_DGW0248 +a4570-_DGW0236 +a4274-dgw_068 +a4112-_DGW6344 +a2288-_DGW6237 +a3593-_DSC5689 +a0052-dgw_131 +a2393-_DSC6398 +a2468-_DSC9195 +a0040-_DSC5693 +a0572-_DGW6424 +a3287-_DGW6308 +a0431-_DSC9183 +a2197-_DSC6374 +a2103-dgw_054 +a0292-dgw_086 +a2323-dgw_109 +a2722-dgw_158 +a2257-dgw_061 +a4531-_DGW7866 +a3322-_DGW6269 +a2769-_DSC9755 +a1913-_DSC5474 +a1168-dgw_057 +a3182-_DGW6265 +a2213-dgw_150 +a3115-dgw_016 +a2676-dgw_055 +a1379-_DSC5348 (original) +a1595-_DGW6311 +a0531-dgw_067 +a1767-_DGW6401 +a4824-_DGW0282 +a2210-dgw_149 +a3337-dgw_112 +a1636-_DSC6280 +a1852-_DSC8964 +a1811-_DSC6315 +a2077-_DSC6928 +a4853-_DGW0247 +a2004-_DGW6393 +a2780-_DSC5637 +a3205-dgw_042 +a2827-dgw_085 +a0959-_DGW6327 +a4927-_DGW0242 +a3250-dgw_113 +a0736-_DGW6293 +a1153-dgw_053 +a4361-_DGW9031 +a3867-_DGW6243 +a3656-_DGW6254 +a3458-_DSC4587 +a0378-_DGW6391 +a1441-dgw_132 +a4718-_DGW9472 +a4833-_DGW7868 +a1945-_DSC5903 +a0824-_DGW6283 +a3394-_DGW6419 +a1928-dgw_135 +a3761-_DGW6383 +a0627-_DSC5388 +a4355-_DGW0332 +a1276-_DSC6183 +a4743-_DGW0316 +a3753-dgw_073 +a0591-_DGW6381 +a4229-_DGW0240 +a3173-dgw_043 +a3532-_DGW6305 +a1705-_DGW6349 +a4054-dgw_093 +a1671-_DSC6426 +a1762-_DGW6326 +a2938-_DGW6271 +a2559-dgw_136 +a3397-_DSC5572 +a2809-dgw_023 +a2385-_DSC4276 +a4711-_DGW0312 +a0279-_DSC4586 +a3213-_DSC4851 +a0527-_DGW6270 +a0588-dgw_118 +a2367-dgw_098 +a2950-_DSC4397 +a2268-_DGW6411 +a1475-dgw_146 +a3737-dgw_022 +a3501-dgw_154 +a1602-_DSC3915 +a0883-_DGW6253 +a2942-_DGW6332 +a3777-dgw_024 +a0969-dgw_056 +a3340-_DGW6366 +a3462-dgw_051 +a3122-_DGW6312 +a3628-_DSC9996 +a3509-_DGW6337 +a4300-_DGW0239 +a2441-dgw_071 +a1929-dgw_084 +a3758-dgw_141 +a4866-_DGW9039 +a0747-dgw_033 +a0065-_DSC6405 +a2036-_DGW6338 +a3419-_DSC3931 +a2491-_DGW6342 +a0237-_DSC9985 +a4204-_DGW7870 +a2030-_DSC7496 +a2352-_DGW6398 +a2476-_DSC6421 +a3865-_DGW6257 +a3972-dgw_010 +a1731-dgw_130 +a2360-_DGW6395 +a3732-_DGW6272 +a1914-dgw_080 +a2909-dgw_092 +a0562-dgw_082 +a4008-dgw_019 +a0595-_DGW6264 +a1052-_DGW6238 +a2041-_DGW6267 +a1643-_DGW6323 +a4481-_DGW6369 +a2330-_DSC9771 +a2439-_DGW6364 +a2972-_DSC6416 +a1172-_DGW6413 +a2975-dgw_134 +a4651-_DGW0292 +a1421-_DGW6229 +a1193-_DSC6404 +a3028-_DSC7427 +a0466-_DSC5415 +a0476-_DSC6400 +a3664-dgw_097 +a2633-_DGW6226 +a2416-_DGW6256 +a0953-dgw_026 +a2430-_DGW6240 +a4060-_DSC5597 +a2797-_DGW6280 +a4729-_DGW0345 +a1954-_DGW6380 +a1617-dgw_124 +a4774-_DGW0330 +a4136-_DSC6412 +a1633-_DSC5879 +a0712-_DSC8911 +a3012-dgw_074 +a3435-dgw_001 +a3076-dgw_036 +a3091-_DGW6408 +a1106-_DSC0010 +a2460-_DSC3950 +a0877-_DGW6231 +a4261-_DGW9448 +a1865-dgw_120 +a4519-_DGW7869 +a4709-_DGW0275 +a3032-dgw_139 +a1323-dgw_156 +a0658-dgw_105 +a2955-_DGW6306 +a4256-_DGW0339 +a2907-dgw_108 +a4203-_DGW0246 +a2035-_DGW6313 +a3885-_DGW6320 +a1234-_DGW6333 +a0312-_DSC5579 +a4610-_DGW0346 +a3441-dgw_064 +a4391-_DGW0277 +a1769-_DGW6405 +a1652-dgw_004 +a3657-_DSC5954 +a1977-_DGW6239 +a1880-_DGW6418 +a2984-_DGW6399 +a1418-dgw_066 +a1583-dgw_079 +a4914-_DGW0237 diff --git a/third_party/DarkFeat/datasets/InvISP/data/data_preprocess.py b/third_party/DarkFeat/datasets/InvISP/data/data_preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..62271771a17a4863b730136d49f2a23aed0e49b2 --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/data/data_preprocess.py @@ -0,0 +1,56 @@ +import rawpy +import numpy as np +import glob, os +import colour_demosaicing +import imageio +import argparse +from PIL import Image as PILImage +import scipy.io as scio + +parser = argparse.ArgumentParser(description="data preprocess") + +parser.add_argument("--camera", type=str, default="NIKON_D700", help="Camera Name") +parser.add_argument("--Bayer_Pattern", type=str, default="RGGB", help="Bayer Pattern of RAW") +parser.add_argument("--JPEG_Quality", type=int, default=90, help="Jpeg Quality of the ground truth.") + +args = parser.parse_args() +camera_name = args.camera +Bayer_Pattern = args.Bayer_Pattern +JPEG_Quality = args.JPEG_Quality + +dng_path = sorted(glob.glob('/mnt/nvme2n1/hyz/data/' + camera_name + '/DNG/*.cr2')) +rgb_target_path = '/mnt/nvme2n1/hyz/data/'+ camera_name + '/RGB/' +raw_input_path = '/mnt/nvme2n1/hyz/data/' + camera_name + '/RAW/' +if not os.path.isdir(rgb_target_path): + os.mkdir(rgb_target_path) +if not os.path.isdir(raw_input_path): + os.mkdir(raw_input_path) + +def flip(raw_img, flip): + if flip == 3: + raw_img = np.rot90(raw_img, k=2) + elif flip == 5: + raw_img = np.rot90(raw_img, k=1) + elif flip == 6: + raw_img = np.rot90(raw_img, k=3) + else: + pass + return raw_img + + + +for path in dng_path: + print("Start Processing %s" % os.path.basename(path)) + raw = rawpy.imread(path) + file_name = path.split('/')[-1].split('.')[0] + im = raw.postprocess(use_camera_wb=True,no_auto_bright=True) + flip_val = raw.sizes.flip + cwb = raw.camera_whitebalance + raw_img = raw.raw_image_visible + if camera_name == 'Canon_EOS_5D': + raw_img = np.maximum(raw_img - 127.0, 0) + de_raw = colour_demosaicing.demosaicing_CFA_Bayer_bilinear(raw_img, Bayer_Pattern) + de_raw = flip(de_raw, flip_val) + rgb_img = PILImage.fromarray(im).save(rgb_target_path + file_name + '.jpg', quality = JPEG_Quality, subsampling = 1) + np.savez(raw_input_path + file_name + '.npz', raw=de_raw, wb=cwb) + diff --git a/third_party/DarkFeat/datasets/InvISP/data/data_preprocess.sh b/third_party/DarkFeat/datasets/InvISP/data/data_preprocess.sh new file mode 100644 index 0000000000000000000000000000000000000000..17dae1fa90b6b3a21fc1fb91b0c63eb6f54ffeba --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/data/data_preprocess.sh @@ -0,0 +1,14 @@ +!/bin/bash +dir_nikon="./NIKON_D700/DNG/" +dir_canon="./Canon_EOS_5D/DNG/" +if [ ! -d "$dir_nikon" ];then +mkdir $dir_nikon +fi +if [ ! -d "$dir_canon" ];then +mkdir $dir_canon +fi +wget -P./NIKON_D700/DNG -i NIKON_D700.txt +wget -P./Canon_EOS_5D/DNG -i Canon_EOS_5D.txt +python data_preprocess.py +python data_preprocess.py --camera="Canon_EOS_5D" + diff --git a/third_party/DarkFeat/datasets/InvISP/dataset/FiveK_dataset.py b/third_party/DarkFeat/datasets/InvISP/dataset/FiveK_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..4c71bd3b4162bd21761983deef6b94fa46a364f6 --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/dataset/FiveK_dataset.py @@ -0,0 +1,132 @@ +from __future__ import print_function, division +import os, random, time +import torch +import numpy as np +from torch.utils.data import Dataset +from torchvision import transforms, utils +import rawpy +from glob import glob +from PIL import Image as PILImage +import numbers +from scipy.misc import imread +from .base_dataset import BaseDataset + + +class FiveKDatasetTrain(BaseDataset): + def __init__(self, opt): + super().__init__(opt=opt) + self.patch_size = 256 + input_RAWs_WBs, target_RGBs = self.load(is_train=True) + assert len(input_RAWs_WBs) == len(target_RGBs) + self.data = {'input_RAWs_WBs':input_RAWs_WBs, 'target_RGBs':target_RGBs} + + def random_flip(self, input_raw, target_rgb): + idx = np.random.randint(2) + input_raw = np.flip(input_raw,axis=idx).copy() + target_rgb = np.flip(target_rgb,axis=idx).copy() + + return input_raw, target_rgb + + def random_rotate(self, input_raw, target_rgb): + idx = np.random.randint(4) + input_raw = np.rot90(input_raw,k=idx) + target_rgb = np.rot90(target_rgb,k=idx) + + return input_raw, target_rgb + + def random_crop(self, patch_size, input_raw, target_rgb,flow=False,demos=False): + H, W, _ = input_raw.shape + rnd_h = random.randint(0, max(0, H - patch_size)) + rnd_w = random.randint(0, max(0, W - patch_size)) + + patch_input_raw = input_raw[rnd_h:rnd_h + patch_size, rnd_w:rnd_w + patch_size, :] + if flow or demos: + patch_target_rgb = target_rgb[rnd_h:rnd_h + patch_size, rnd_w:rnd_w + patch_size, :] + else: + patch_target_rgb = target_rgb[rnd_h*2:rnd_h*2 + patch_size*2, rnd_w*2:rnd_w*2 + patch_size*2, :] + + return patch_input_raw, patch_target_rgb + + def aug(self, patch_size, input_raw, target_rgb, flow=False, demos=False): + input_raw, target_rgb = self.random_crop(patch_size, input_raw,target_rgb,flow=flow, demos=demos) + input_raw, target_rgb = self.random_rotate(input_raw,target_rgb) + input_raw, target_rgb = self.random_flip(input_raw,target_rgb) + + return input_raw, target_rgb + + def __len__(self): + return len(self.data['input_RAWs_WBs']) + + def __getitem__(self, idx): + input_raw_wb_path = self.data['input_RAWs_WBs'][idx] + target_rgb_path = self.data['target_RGBs'][idx] + + target_rgb_img = imread(target_rgb_path) + input_raw_wb = np.load(input_raw_wb_path) + input_raw_img = input_raw_wb['raw'] + wb = input_raw_wb['wb'] + wb = wb / wb.max() + input_raw_img = input_raw_img * wb[:-1] + + self.patch_size = 256 + input_raw_img, target_rgb_img = self.aug(self.patch_size, input_raw_img, target_rgb_img, flow=True, demos=True) + + if self.gamma: + norm_value = np.power(4095, 1/2.2) if self.camera_name=='Canon_EOS_5D' else np.power(16383, 1/2.2) + input_raw_img = np.power(input_raw_img, 1/2.2) + else: + norm_value = 4095 if self.camera_name=='Canon_EOS_5D' else 16383 + + target_rgb_img = self.norm_img(target_rgb_img, max_value=255) + input_raw_img = self.norm_img(input_raw_img, max_value=norm_value) + target_raw_img = input_raw_img.copy() + + input_raw_img = self.np2tensor(input_raw_img).float() + target_rgb_img = self.np2tensor(target_rgb_img).float() + target_raw_img = self.np2tensor(target_raw_img).float() + + sample = {'input_raw':input_raw_img, 'target_rgb':target_rgb_img, 'target_raw':target_raw_img, + 'file_name':input_raw_wb_path.split("/")[-1].split(".")[0]} + return sample + +class FiveKDatasetTest(BaseDataset): + def __init__(self, opt): + super().__init__(opt=opt) + self.patch_size = 256 + + input_RAWs_WBs, target_RGBs = self.load(is_train=False) + assert len(input_RAWs_WBs) == len(target_RGBs) + self.data = {'input_RAWs_WBs':input_RAWs_WBs, 'target_RGBs':target_RGBs} + + def __len__(self): + return len(self.data['input_RAWs_WBs']) + + def __getitem__(self, idx): + input_raw_wb_path = self.data['input_RAWs_WBs'][idx] + target_rgb_path = self.data['target_RGBs'][idx] + + target_rgb_img = imread(target_rgb_path) + input_raw_wb = np.load(input_raw_wb_path) + input_raw_img = input_raw_wb['raw'] + wb = input_raw_wb['wb'] + wb = wb / wb.max() + input_raw_img = input_raw_img * wb[:-1] + + if self.gamma: + norm_value = np.power(4095, 1/2.2) if self.camera_name=='Canon_EOS_5D' else np.power(16383, 1/2.2) + input_raw_img = np.power(input_raw_img, 1/2.2) + else: + norm_value = 4095 if self.camera_name=='Canon_EOS_5D' else 16383 + + target_rgb_img = self.norm_img(target_rgb_img, max_value=255) + input_raw_img = self.norm_img(input_raw_img, max_value=norm_value) + target_raw_img = input_raw_img.copy() + + input_raw_img = self.np2tensor(input_raw_img).float() + target_rgb_img = self.np2tensor(target_rgb_img).float() + target_raw_img = self.np2tensor(target_raw_img).float() + + sample = {'input_raw':input_raw_img, 'target_rgb':target_rgb_img, 'target_raw':target_raw_img, + 'file_name':input_raw_wb_path.split("/")[-1].split(".")[0]} + return sample + diff --git a/third_party/DarkFeat/datasets/InvISP/dataset/__init__.py b/third_party/DarkFeat/datasets/InvISP/dataset/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/DarkFeat/datasets/InvISP/dataset/base_dataset.py b/third_party/DarkFeat/datasets/InvISP/dataset/base_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..34c5de9f75dbfb5323c2cdad532cb0a42c09df22 --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/dataset/base_dataset.py @@ -0,0 +1,84 @@ +from __future__ import print_function, division +import numpy as np +from torch.utils.data import Dataset +import torch + +class BaseDataset(Dataset): + def __init__(self, opt): + self.crop_size = 512 + self.debug_mode = opt.debug_mode + self.data_path = opt.data_path # dataset path. e.g., ./data/ + self.camera_name = opt.camera + self.gamma = opt.gamma + + def norm_img(self, img, max_value): + img = img / float(max_value) + return img + + def pack_raw(self, raw): + # pack Bayer image to 4 channels + im = np.expand_dims(raw, axis=2) + H, W = raw.shape[0], raw.shape[1] + # RGBG + out = np.concatenate((im[0:H:2, 0:W:2, :], + im[0:H:2, 1:W:2, :], + im[1:H:2, 1:W:2, :], + im[1:H:2, 0:W:2, :]), axis=2) + return out + + def np2tensor(self, array): + return torch.Tensor(array).permute(2,0,1) + + def center_crop(self, img, crop_size=None): + H = img.shape[0] + W = img.shape[1] + + if crop_size is not None: + th, tw = crop_size[0], crop_size[1] + else: + th, tw = self.crop_size, self.crop_size + x1_img = int(round((W - tw) / 2.)) + y1_img = int(round((H - th) / 2.)) + if img.ndim == 3: + input_patch = img[y1_img:y1_img + th, x1_img:x1_img + tw, :] + else: + input_patch = img[y1_img:y1_img + th, x1_img:x1_img + tw] + + return input_patch + + def load(self, is_train=True): + # ./data + # ./data/NIKON D700/RAW, ./data/NIKON D700/RGB + # ./data/Canon EOS 5D/RAW, ./data/Canon EOS 5D/RGB + # ./data/NIKON D700_train.txt, ./data/NIKON D700_test.txt + # ./data/NIKON D700_train.txt: a0016, ... + input_RAWs_WBs = [] + target_RGBs = [] + + data_path = self.data_path # ./data/ + if is_train: + txt_path = data_path + self.camera_name + "_train.txt" + else: + txt_path = data_path + self.camera_name + "_test.txt" + + with open(txt_path, "r") as f_read: + # valid_camera_list = [os.path.basename(line.strip()).split('.')[0] for line in f_read.readlines()] + valid_camera_list = [line.strip() for line in f_read.readlines()] + + if self.debug_mode: + valid_camera_list = valid_camera_list[:10] + + for i,name in enumerate(valid_camera_list): + full_name = data_path + self.camera_name + input_RAWs_WBs.append(full_name + "/RAW/" + name + ".npz") + target_RGBs.append(full_name + "/RGB/" + name + ".jpg") + + return input_RAWs_WBs, target_RGBs + + + def __len__(self): + return 0 + + def __getitem__(self, idx): + + return None diff --git a/third_party/DarkFeat/datasets/InvISP/environment.yml b/third_party/DarkFeat/datasets/InvISP/environment.yml new file mode 100644 index 0000000000000000000000000000000000000000..20a58415354b80fb01f72fbbeb8d55edee6067ce --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/environment.yml @@ -0,0 +1,56 @@ +name: invertible-isp +channels: + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _pytorch_select=0.2=gpu_0 + - blas=1.0=mkl + - ca-certificates=2021.1.19=h06a4308_1 + - certifi=2020.12.5=py36h06a4308_0 + - cffi=1.14.5=py36h261ae71_0 + - cudatoolkit=10.1.243=h6bb024c_0 + - cudnn=7.6.5=cuda10.1_0 + - freetype=2.10.4=h5ab3b9f_0 + - intel-openmp=2020.2=254 + - jpeg=9b=h024ee3a_2 + - lcms2=2.11=h396b838_0 + - ld_impl_linux-64=2.33.1=h53a641e_7 + - libffi=3.3=he6710b0_2 + - libgcc-ng=9.1.0=hdf63c60_0 + - libpng=1.6.37=hbc83047_0 + - libstdcxx-ng=9.1.0=hdf63c60_0 + - libtiff=4.1.0=h2733197_1 + - lz4-c=1.9.3=h2531618_0 + - mkl=2020.2=256 + - mkl-service=2.3.0=py36he8ac12f_0 + - mkl_fft=1.3.0=py36h54f3939_0 + - mkl_random=1.1.1=py36h0573a6f_0 + - ncurses=6.2=he6710b0_1 + - ninja=1.10.2=py36hff7bd54_0 + - numpy=1.19.2=py36h54aff64_0 + - numpy-base=1.19.2=py36hfa32c7d_0 + - olefile=0.46=py36_0 + - openssl=1.1.1k=h27cfd23_0 + - pillow=8.2.0=py36he98fc37_0 + - pip=21.0.1=py36h06a4308_0 + - pycparser=2.20=py_2 + - python=3.6.13=hdb3f193_0 + - pytorch=1.4.0=cuda101py36h02f0884_0 + - readline=8.1=h27cfd23_0 + - setuptools=52.0.0=py36h06a4308_0 + - six=1.15.0=py36h06a4308_0 + - sqlite=3.35.3=hdfb4753_0 + - tk=8.6.10=hbc83047_0 + - torchvision=0.2.1=py36_0 + - wheel=0.36.2=pyhd3eb1b0_0 + - xz=5.2.5=h7b6447c_0 + - zlib=1.2.11=h7b6447c_3 + - zstd=1.4.9=haebb681_0 + - pip: + - colour-demosaicing==0.1.6 + - colour-science==0.3.16 + - imageio==2.9.0 + - rawpy==0.16.0 + - scipy==1.2.0 + - tqdm==4.59.0 + diff --git a/third_party/DarkFeat/datasets/InvISP/model/__init__.py b/third_party/DarkFeat/datasets/InvISP/model/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/DarkFeat/datasets/InvISP/model/loss.py b/third_party/DarkFeat/datasets/InvISP/model/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..abe8b599d5402c367bb7c84b7e370964d8273518 --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/model/loss.py @@ -0,0 +1,15 @@ +import torch.nn.functional as F +import torch + + +def l1_loss(output, target_rgb, target_raw, weight=1.): + raw_loss = F.l1_loss(output['reconstruct_raw'], target_raw) + rgb_loss = F.l1_loss(output['reconstruct_rgb'], target_rgb) + total_loss = raw_loss + weight * rgb_loss + return total_loss, raw_loss, rgb_loss + +def l2_loss(output, target_rgb, target_raw, weight=1.): + raw_loss = F.mse_loss(output['reconstruct_raw'], target_raw) + rgb_loss = F.mse_loss(output['reconstruct_rgb'], target_rgb) + total_loss = raw_loss + weight * rgb_loss + return total_loss, raw_loss, rgb_loss \ No newline at end of file diff --git a/third_party/DarkFeat/datasets/InvISP/model/model.py b/third_party/DarkFeat/datasets/InvISP/model/model.py new file mode 100644 index 0000000000000000000000000000000000000000..9dd0e33cee8ebb26d621ece84622bd2611b33a60 --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/model/model.py @@ -0,0 +1,179 @@ +import math +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +import torch.nn.init as init + +from .modules import InvertibleConv1x1 + + +def initialize_weights(net_l, scale=1): + if not isinstance(net_l, list): + net_l = [net_l] + for net in net_l: + for m in net.modules(): + if isinstance(m, nn.Conv2d): + init.kaiming_normal_(m.weight, a=0, mode='fan_in') + m.weight.data *= scale # for residual block + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + init.kaiming_normal_(m.weight, a=0, mode='fan_in') + m.weight.data *= scale + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.BatchNorm2d): + init.constant_(m.weight, 1) + init.constant_(m.bias.data, 0.0) + + +def initialize_weights_xavier(net_l, scale=1): + if not isinstance(net_l, list): + net_l = [net_l] + for net in net_l: + for m in net.modules(): + if isinstance(m, nn.Conv2d): + init.xavier_normal_(m.weight) + m.weight.data *= scale # for residual block + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + init.xavier_normal_(m.weight) + m.weight.data *= scale + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.BatchNorm2d): + init.constant_(m.weight, 1) + init.constant_(m.bias.data, 0.0) + + +class DenseBlock(nn.Module): + def __init__(self, channel_in, channel_out, init='xavier', gc=32, bias=True): + super(DenseBlock, self).__init__() + self.conv1 = nn.Conv2d(channel_in, gc, 3, 1, 1, bias=bias) + self.conv2 = nn.Conv2d(channel_in + gc, gc, 3, 1, 1, bias=bias) + self.conv3 = nn.Conv2d(channel_in + 2 * gc, gc, 3, 1, 1, bias=bias) + self.conv4 = nn.Conv2d(channel_in + 3 * gc, gc, 3, 1, 1, bias=bias) + self.conv5 = nn.Conv2d(channel_in + 4 * gc, channel_out, 3, 1, 1, bias=bias) + self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True) + + if init == 'xavier': + initialize_weights_xavier([self.conv1, self.conv2, self.conv3, self.conv4], 0.1) + else: + initialize_weights([self.conv1, self.conv2, self.conv3, self.conv4], 0.1) + initialize_weights(self.conv5, 0) + + def forward(self, x): + x1 = self.lrelu(self.conv1(x)) + x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1))) + x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1))) + x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1))) + x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1)) + + return x5 + +def subnet(net_structure, init='xavier'): + def constructor(channel_in, channel_out): + if net_structure == 'DBNet': + if init == 'xavier': + return DenseBlock(channel_in, channel_out, init) + else: + return DenseBlock(channel_in, channel_out) + # return UNetBlock(channel_in, channel_out) + else: + return None + + return constructor + + +class InvBlock(nn.Module): + def __init__(self, subnet_constructor, channel_num, channel_split_num, clamp=0.8): + super(InvBlock, self).__init__() + # channel_num: 3 + # channel_split_num: 1 + + self.split_len1 = channel_split_num # 1 + self.split_len2 = channel_num - channel_split_num # 2 + + self.clamp = clamp + + self.F = subnet_constructor(self.split_len2, self.split_len1) + self.G = subnet_constructor(self.split_len1, self.split_len2) + self.H = subnet_constructor(self.split_len1, self.split_len2) + + in_channels = 3 + self.invconv = InvertibleConv1x1(in_channels, LU_decomposed=True) + self.flow_permutation = lambda z, logdet, rev: self.invconv(z, logdet, rev) + + def forward(self, x, rev=False): + if not rev: + # invert1x1conv + x, logdet = self.flow_permutation(x, logdet=0, rev=False) + + # split to 1 channel and 2 channel. + x1, x2 = (x.narrow(1, 0, self.split_len1), x.narrow(1, self.split_len1, self.split_len2)) + + y1 = x1 + self.F(x2) # 1 channel + self.s = self.clamp * (torch.sigmoid(self.H(y1)) * 2 - 1) + y2 = x2.mul(torch.exp(self.s)) + self.G(y1) # 2 channel + out = torch.cat((y1, y2), 1) + else: + # split. + x1, x2 = (x.narrow(1, 0, self.split_len1), x.narrow(1, self.split_len1, self.split_len2)) + self.s = self.clamp * (torch.sigmoid(self.H(x1)) * 2 - 1) + y2 = (x2 - self.G(x1)).div(torch.exp(self.s)) + y1 = x1 - self.F(y2) + + x = torch.cat((y1, y2), 1) + + # inv permutation + out, logdet = self.flow_permutation(x, logdet=0, rev=True) + + return out + +class InvISPNet(nn.Module): + def __init__(self, channel_in=3, channel_out=3, subnet_constructor=subnet('DBNet'), block_num=8): + super(InvISPNet, self).__init__() + operations = [] + + current_channel = channel_in + channel_num = channel_in + channel_split_num = 1 + + for j in range(block_num): + b = InvBlock(subnet_constructor, channel_num, channel_split_num) # one block is one flow step. + operations.append(b) + + self.operations = nn.ModuleList(operations) + + self.initialize() + + def initialize(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + init.xavier_normal_(m.weight) + m.weight.data *= 1. # for residual block + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + init.xavier_normal_(m.weight) + m.weight.data *= 1. + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.BatchNorm2d): + init.constant_(m.weight, 1) + init.constant_(m.bias.data, 0.0) + + def forward(self, x, rev=False): + out = x # x: [N,3,H,W] + + if not rev: + for op in self.operations: + out = op.forward(out, rev) + else: + for op in reversed(self.operations): + out = op.forward(out, rev) + + return out + diff --git a/third_party/DarkFeat/datasets/InvISP/model/modules.py b/third_party/DarkFeat/datasets/InvISP/model/modules.py new file mode 100644 index 0000000000000000000000000000000000000000..88244c0b211860d97be78ba4f60f4743228171a7 --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/model/modules.py @@ -0,0 +1,387 @@ +import math +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .utils import split_feature, compute_same_pad + + +def gaussian_p(mean, logs, x): + """ + lnL = -1/2 * { ln|Var| + ((X - Mu)^T)(Var^-1)(X - Mu) + kln(2*PI) } + k = 1 (Independent) + Var = logs ** 2 + """ + c = math.log(2 * math.pi) + return -0.5 * (logs * 2.0 + ((x - mean) ** 2) / torch.exp(logs * 2.0) + c) + + +def gaussian_likelihood(mean, logs, x): + p = gaussian_p(mean, logs, x) + return torch.sum(p, dim=[1, 2, 3]) + + +def gaussian_sample(mean, logs, temperature=1): + # Sample from Gaussian with temperature + z = torch.normal(mean, torch.exp(logs) * temperature) + + return z + + +def squeeze2d(input, factor): + if factor == 1: + return input + + B, C, H, W = input.size() + + assert H % factor == 0 and W % factor == 0, "H or W modulo factor is not 0" + + x = input.view(B, C, H // factor, factor, W // factor, factor) + x = x.permute(0, 1, 3, 5, 2, 4).contiguous() + x = x.view(B, C * factor * factor, H // factor, W // factor) + + return x + + +def unsqueeze2d(input, factor): + if factor == 1: + return input + + factor2 = factor ** 2 + + B, C, H, W = input.size() + + assert C % (factor2) == 0, "C module factor squared is not 0" + + x = input.view(B, C // factor2, factor, factor, H, W) + x = x.permute(0, 1, 4, 2, 5, 3).contiguous() + x = x.view(B, C // (factor2), H * factor, W * factor) + + return x + + +class _ActNorm(nn.Module): + """ + Activation Normalization + Initialize the bias and scale with a given minibatch, + so that the output per-channel have zero mean and unit variance for that. + + After initialization, `bias` and `logs` will be trained as parameters. + """ + + def __init__(self, num_features, scale=1.0): + super().__init__() + # register mean and scale + size = [1, num_features, 1, 1] + self.bias = nn.Parameter(torch.zeros(*size)) + self.logs = nn.Parameter(torch.zeros(*size)) + self.num_features = num_features + self.scale = scale + self.inited = False + + def initialize_parameters(self, input): + if not self.training: + raise ValueError("In Eval mode, but ActNorm not inited") + + with torch.no_grad(): + bias = -torch.mean(input.clone(), dim=[0, 2, 3], keepdim=True) + vars = torch.mean((input.clone() + bias) ** 2, dim=[0, 2, 3], keepdim=True) + logs = torch.log(self.scale / (torch.sqrt(vars) + 1e-6)) + + self.bias.data.copy_(bias.data) + self.logs.data.copy_(logs.data) + + self.inited = True + + def _center(self, input, reverse=False): + if reverse: + return input - self.bias + else: + return input + self.bias + + def _scale(self, input, logdet=None, reverse=False): + + if reverse: + input = input * torch.exp(-self.logs) + else: + input = input * torch.exp(self.logs) + + if logdet is not None: + """ + logs is log_std of `mean of channels` + so we need to multiply by number of pixels + """ + b, c, h, w = input.shape + + dlogdet = torch.sum(self.logs) * h * w + + if reverse: + dlogdet *= -1 + + logdet = logdet + dlogdet + + return input, logdet + + def forward(self, input, logdet=None, reverse=False): + self._check_input_dim(input) + + if not self.inited: + self.initialize_parameters(input) + + if reverse: + input, logdet = self._scale(input, logdet, reverse) + input = self._center(input, reverse) + else: + input = self._center(input, reverse) + input, logdet = self._scale(input, logdet, reverse) + + return input, logdet + + +class ActNorm2d(_ActNorm): + def __init__(self, num_features, scale=1.0): + super().__init__(num_features, scale) + + def _check_input_dim(self, input): + assert len(input.size()) == 4 + assert input.size(1) == self.num_features, ( + "[ActNorm]: input should be in shape as `BCHW`," + " channels should be {} rather than {}".format( + self.num_features, input.size() + ) + ) + + +class LinearZeros(nn.Module): + def __init__(self, in_channels, out_channels, logscale_factor=3): + super().__init__() + + self.linear = nn.Linear(in_channels, out_channels) + self.linear.weight.data.zero_() + self.linear.bias.data.zero_() + + self.logscale_factor = logscale_factor + + self.logs = nn.Parameter(torch.zeros(out_channels)) + + def forward(self, input): + output = self.linear(input) + return output * torch.exp(self.logs * self.logscale_factor) + + +class Conv2d(nn.Module): + def __init__( + self, + in_channels, + out_channels, + kernel_size=(3, 3), + stride=(1, 1), + padding="same", + do_actnorm=True, + weight_std=0.05, + ): + super().__init__() + + if padding == "same": + padding = compute_same_pad(kernel_size, stride) + elif padding == "valid": + padding = 0 + + self.conv = nn.Conv2d( + in_channels, + out_channels, + kernel_size, + stride, + padding, + bias=(not do_actnorm), + ) + + # init weight with std + self.conv.weight.data.normal_(mean=0.0, std=weight_std) + + if not do_actnorm: + self.conv.bias.data.zero_() + else: + self.actnorm = ActNorm2d(out_channels) + + self.do_actnorm = do_actnorm + + def forward(self, input): + x = self.conv(input) + if self.do_actnorm: + x, _ = self.actnorm(x) + return x + + +class Conv2dZeros(nn.Module): + def __init__( + self, + in_channels, + out_channels, + kernel_size=(3, 3), + stride=(1, 1), + padding="same", + logscale_factor=3, + ): + super().__init__() + + if padding == "same": + padding = compute_same_pad(kernel_size, stride) + elif padding == "valid": + padding = 0 + + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding) + + self.conv.weight.data.zero_() + self.conv.bias.data.zero_() + + self.logscale_factor = logscale_factor + self.logs = nn.Parameter(torch.zeros(out_channels, 1, 1)) + + def forward(self, input): + output = self.conv(input) + return output * torch.exp(self.logs * self.logscale_factor) + + +class Permute2d(nn.Module): + def __init__(self, num_channels, shuffle): + super().__init__() + self.num_channels = num_channels + self.indices = torch.arange(self.num_channels - 1, -1, -1, dtype=torch.long) + self.indices_inverse = torch.zeros((self.num_channels), dtype=torch.long) + + for i in range(self.num_channels): + self.indices_inverse[self.indices[i]] = i + + if shuffle: + self.reset_indices() + + def reset_indices(self): + shuffle_idx = torch.randperm(self.indices.shape[0]) + self.indices = self.indices[shuffle_idx] + + for i in range(self.num_channels): + self.indices_inverse[self.indices[i]] = i + + def forward(self, input, reverse=False): + assert len(input.size()) == 4 + + if not reverse: + input = input[:, self.indices, :, :] + return input + else: + return input[:, self.indices_inverse, :, :] + + +class Split2d(nn.Module): + def __init__(self, num_channels): + super().__init__() + self.conv = Conv2dZeros(num_channels // 2, num_channels) + + def split2d_prior(self, z): + h = self.conv(z) + return split_feature(h, "cross") + + def forward(self, input, logdet=0.0, reverse=False, temperature=None): + if reverse: + z1 = input + mean, logs = self.split2d_prior(z1) + z2 = gaussian_sample(mean, logs, temperature) + z = torch.cat((z1, z2), dim=1) + return z, logdet + else: + z1, z2 = split_feature(input, "split") + mean, logs = self.split2d_prior(z1) + logdet = gaussian_likelihood(mean, logs, z2) + logdet + return z1, logdet + + +class SqueezeLayer(nn.Module): + def __init__(self, factor): + super().__init__() + self.factor = factor + + def forward(self, input, logdet=None, reverse=False): + if reverse: + output = unsqueeze2d(input, self.factor) + else: + output = squeeze2d(input, self.factor) + + return output, logdet + + +class InvertibleConv1x1(nn.Module): + def __init__(self, num_channels, LU_decomposed): + super().__init__() + w_shape = [num_channels, num_channels] + w_init = torch.linalg.qr(torch.randn(*w_shape))[0] + + if not LU_decomposed: + self.weight = nn.Parameter(torch.Tensor(w_init)) + else: + p, lower, upper = torch.lu_unpack(*torch.lu(w_init)) + s = torch.diag(upper) + sign_s = torch.sign(s) + log_s = torch.log(torch.abs(s)) + upper = torch.triu(upper, 1) + l_mask = torch.tril(torch.ones(w_shape), -1) + eye = torch.eye(*w_shape) + + self.register_buffer("p", p) + self.register_buffer("sign_s", sign_s) + self.lower = nn.Parameter(lower) + self.log_s = nn.Parameter(log_s) + self.upper = nn.Parameter(upper) + self.l_mask = l_mask + self.eye = eye + + self.w_shape = w_shape + self.LU_decomposed = LU_decomposed + + def get_weight(self, input, reverse): + b, c, h, w = input.shape + + if not self.LU_decomposed: + dlogdet = torch.slogdet(self.weight)[1] * h * w + if reverse: + weight = torch.inverse(self.weight) + else: + weight = self.weight + else: + self.l_mask = self.l_mask.to(input.device) + self.eye = self.eye.to(input.device) + + lower = self.lower * self.l_mask + self.eye + + u = self.upper * self.l_mask.transpose(0, 1).contiguous() + u += torch.diag(self.sign_s * torch.exp(self.log_s)) + + dlogdet = torch.sum(self.log_s) * h * w + + if reverse: + u_inv = torch.inverse(u) + l_inv = torch.inverse(lower) + p_inv = torch.inverse(self.p) + + weight = torch.matmul(u_inv, torch.matmul(l_inv, p_inv)) + else: + weight = torch.matmul(self.p, torch.matmul(lower, u)) + + return weight.view(self.w_shape[0], self.w_shape[1], 1, 1), dlogdet + + def forward(self, input, logdet=None, reverse=False): + """ + log-det = log|abs(|W|)| * pixels + """ + weight, dlogdet = self.get_weight(input, reverse) + + if not reverse: + z = F.conv2d(input, weight) + if logdet is not None: + logdet = logdet + dlogdet + return z, logdet + else: + z = F.conv2d(input, weight) + if logdet is not None: + logdet = logdet - dlogdet + return z, logdet diff --git a/third_party/DarkFeat/datasets/InvISP/model/utils.py b/third_party/DarkFeat/datasets/InvISP/model/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d1bef31afd7d61d4c942ffd895c818b90571b4b7 --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/model/utils.py @@ -0,0 +1,52 @@ +import math +import torch + + +def compute_same_pad(kernel_size, stride): + if isinstance(kernel_size, int): + kernel_size = [kernel_size] + + if isinstance(stride, int): + stride = [stride] + + assert len(stride) == len( + kernel_size + ), "Pass kernel size and stride both as int, or both as equal length iterable" + + return [((k - 1) * s + 1) // 2 for k, s in zip(kernel_size, stride)] + + +def uniform_binning_correction(x, n_bits=8): + """Replaces x^i with q^i(x) = U(x, x + 1.0 / 256.0). + + Args: + x: 4-D Tensor of shape (NCHW) + n_bits: optional. + Returns: + x: x ~ U(x, x + 1.0 / 256) + objective: Equivalent to -q(x)*log(q(x)). + """ + b, c, h, w = x.size() + n_bins = 2 ** n_bits + chw = c * h * w + x += torch.zeros_like(x).uniform_(0, 1.0 / n_bins) + + objective = -math.log(n_bins) * chw * torch.ones(b, device=x.device) + return x, objective + + +def split_feature(tensor, type="split"): + """ + type = ["split", "cross"] + """ + C = tensor.size(1) + if type == "split": + # return tensor[:, : C // 2, ...], tensor[:, C // 2 :, ...] + return tensor[:, :1, ...], tensor[:,1:, ...] + elif type == "cross": + # return tensor[:, 0::2, ...], tensor[:, 1::2, ...] + return tensor[:, 0::2, ...], tensor[:, 1::2, ...] + + + + diff --git a/third_party/DarkFeat/datasets/InvISP/pretrained/canon.pth b/third_party/DarkFeat/datasets/InvISP/pretrained/canon.pth new file mode 100644 index 0000000000000000000000000000000000000000..b7a126d418459dba22fcb60b9906104fb59d8296 --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/pretrained/canon.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e620bd152f0f8a1db5266ed1219fe3c608c478d543f899495ef2a6b16261fa1b +size 5750545 diff --git a/third_party/DarkFeat/datasets/InvISP/test.sh b/third_party/DarkFeat/datasets/InvISP/test.sh new file mode 100644 index 0000000000000000000000000000000000000000..dc71a15aef80302525ed8cba5a8e29f1e28db05d --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/test.sh @@ -0,0 +1,15 @@ +# python test_rgb.py --task=pretrained \ +# --data_path="./data/" \ +# --gamma \ +# --camera="Canon_EOS_5D" \ +# --out_path="./exps/" \ +# --ckpt="./pretrained/canon.pth" \ +# # --split_to_patch + +python test_raw.py --task=pretrained \ + --data_path="./data/" \ + --gamma \ + --camera="Canon_EOS_5D" \ + --out_path="./exps/" \ + --ckpt="./pretrained/canon.pth" \ + --split_to_patch diff --git a/third_party/DarkFeat/datasets/InvISP/test_raw.py b/third_party/DarkFeat/datasets/InvISP/test_raw.py new file mode 100644 index 0000000000000000000000000000000000000000..37610f8268e4586864e0275236c5bb1932f894df --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/test_raw.py @@ -0,0 +1,118 @@ +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Variable +import torch +import numpy as np +import os, time, random +import argparse +from torch.utils.data import Dataset, DataLoader +from PIL import Image as PILImage +from glob import glob +from tqdm import tqdm + +from model.model import InvISPNet +from dataset.FiveK_dataset import FiveKDatasetTest +from config.config import get_arguments + +from utils.JPEG import DiffJPEG +from utils.commons import denorm, preprocess_test_patch + + +os.system('nvidia-smi -q -d Memory |grep -A4 GPU|grep Free >tmp') +os.environ['CUDA_VISIBLE_DEVICES'] = str(np.argmax([int(x.split()[2]) for x in open('tmp', 'r').readlines()])) +# os.environ['CUDA_VISIBLE_DEVICES'] = '7' +os.system('rm tmp') + +DiffJPEG = DiffJPEG(differentiable=True, quality=90).cuda() + +parser = get_arguments() +parser.add_argument("--ckpt", type=str, help="Checkpoint path.") +parser.add_argument("--out_path", type=str, default="./exps/", help="Path to save checkpoint. ") +parser.add_argument("--split_to_patch", dest='split_to_patch', action='store_true', help="Test on patch. ") +args = parser.parse_args() +print("Parsed arguments: {}".format(args)) + + +ckpt_name = args.ckpt.split("/")[-1].split(".")[0] +if args.split_to_patch: + os.makedirs(args.out_path+"%s/results_metric_%s/"%(args.task, ckpt_name), exist_ok=True) + out_path = args.out_path+"%s/results_metric_%s/"%(args.task, ckpt_name) +else: + os.makedirs(args.out_path+"%s/results_%s/"%(args.task, ckpt_name), exist_ok=True) + out_path = args.out_path+"%s/results_%s/"%(args.task, ckpt_name) + + +def main(args): + # ======================================define the model============================================ + net = InvISPNet(channel_in=3, channel_out=3, block_num=8) + device = torch.device("cuda:0") + + net.to(device) + net.eval() + # load the pretrained weight if there exists one + if os.path.isfile(args.ckpt): + net.load_state_dict(torch.load(args.ckpt), strict=False) + print("[INFO] Loaded checkpoint: {}".format(args.ckpt)) + + print("[INFO] Start data load and preprocessing") + RAWDataset = FiveKDatasetTest(opt=args) + dataloader = DataLoader(RAWDataset, batch_size=1, shuffle=False, num_workers=0, drop_last=True) + + input_RGBs = sorted(glob(out_path+"pred*jpg")) + input_RGBs_names = [path.split("/")[-1].split(".")[0][5:] for path in input_RGBs] + + print("[INFO] Start test...") + for i_batch, sample_batched in enumerate(tqdm(dataloader)): + step_time = time.time() + + input, target_rgb, target_raw = sample_batched['input_raw'].to(device), sample_batched['target_rgb'].to(device), \ + sample_batched['target_raw'].to(device) + file_name = sample_batched['file_name'][0] + + if args.split_to_patch: + input_list, target_rgb_list, target_raw_list = preprocess_test_patch(input, target_rgb, target_raw) + else: + # remove [:,:,::2,::2] if you have enough GPU memory to test the full resolution + input_list, target_rgb_list, target_raw_list = [input[:,:,::2,::2]], [target_rgb[:,:,::2,::2]], [target_raw[:,:,::2,::2]] + + for i_patch in range(len(input_list)): + file_name_patch = file_name + "_%05d"%i_patch + idx = input_RGBs_names.index(file_name_patch) + input_RGB_path = input_RGBs[idx] + input_RGB = torch.from_numpy(np.array(PILImage.open(input_RGB_path))/255.0).unsqueeze(0).permute(0,3,1,2).float().to(device) + + target_raw_patch = target_raw_list[i_patch] + + with torch.no_grad(): + reconstruct_raw = net(input_RGB, rev=True) + + pred_raw = reconstruct_raw.detach().permute(0,2,3,1) + pred_raw = torch.clamp(pred_raw, 0, 1) + + target_raw_patch = target_raw_patch.permute(0,2,3,1) + pred_raw = denorm(pred_raw, 255) + target_raw_patch = denorm(target_raw_patch, 255) + + pred_raw = pred_raw.cpu().numpy() + target_raw_patch = target_raw_patch.cpu().numpy().astype(np.float32) + + raw_pred = PILImage.fromarray(np.uint8(pred_raw[0,:,:,0])) + raw_tar_pred = PILImage.fromarray(np.hstack((np.uint8(target_raw_patch[0,:,:,0]), np.uint8(pred_raw[0,:,:,0])))) + + raw_tar = PILImage.fromarray(np.uint8(target_raw_patch[0,:,:,0])) + + raw_pred.save(out_path+"raw_pred_%s_%05d.jpg"%(file_name, i_patch)) + raw_tar.save(out_path+"raw_tar_%s_%05d.jpg"%(file_name, i_patch)) + raw_tar_pred.save(out_path+"raw_gt_pred_%s_%05d.jpg"%(file_name, i_patch)) + + np.save(out_path+"raw_pred_%s_%05d.npy"%(file_name, i_patch), pred_raw[0,:,:,:]/255.0) + np.save(out_path+"raw_tar_%s_%05d.npy"%(file_name, i_patch), target_raw_patch[0,:,:,:]/255.0) + + del reconstruct_raw + + +if __name__ == '__main__': + + torch.set_num_threads(4) + main(args) + diff --git a/third_party/DarkFeat/datasets/InvISP/test_rgb.py b/third_party/DarkFeat/datasets/InvISP/test_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..d1e054b899d9142609e3f90f4a12d367a45aeac0 --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/test_rgb.py @@ -0,0 +1,105 @@ +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Variable +import torch +import numpy as np +import os, time, random +import argparse +from torch.utils.data import Dataset, DataLoader +from PIL import Image as PILImage + +from model.model import InvISPNet +from dataset.FiveK_dataset import FiveKDatasetTest +from config.config import get_arguments + +from utils.JPEG import DiffJPEG +from utils.commons import denorm, preprocess_test_patch +from tqdm import tqdm + +os.system('nvidia-smi -q -d Memory |grep -A4 GPU|grep Free >tmp') +os.environ['CUDA_VISIBLE_DEVICES'] = str(np.argmax([int(x.split()[2]) for x in open('tmp', 'r').readlines()])) +# os.environ['CUDA_VISIBLE_DEVICES'] = '7' +os.system('rm tmp') + +DiffJPEG = DiffJPEG(differentiable=True, quality=90).cuda() + +parser = get_arguments() +parser.add_argument("--ckpt", type=str, help="Checkpoint path.") +parser.add_argument("--out_path", type=str, default="./exps/", help="Path to save results. ") +parser.add_argument("--split_to_patch", dest='split_to_patch', action='store_true', help="Test on patch. ") +args = parser.parse_args() +print("Parsed arguments: {}".format(args)) + + +ckpt_name = args.ckpt.split("/")[-1].split(".")[0] +if args.split_to_patch: + os.makedirs(args.out_path+"%s/results_metric_%s/"%(args.task, ckpt_name), exist_ok=True) + out_path = args.out_path+"%s/results_metric_%s/"%(args.task, ckpt_name) +else: + os.makedirs(args.out_path+"%s/results_%s/"%(args.task, ckpt_name), exist_ok=True) + out_path = args.out_path+"%s/results_%s/"%(args.task, ckpt_name) + + +def main(args): + # ======================================define the model============================================ + net = InvISPNet(channel_in=3, channel_out=3, block_num=8) + device = torch.device("cuda:0") + + net.to(device) + net.eval() + # load the pretrained weight if there exists one + if os.path.isfile(args.ckpt): + net.load_state_dict(torch.load(args.ckpt), strict=False) + print("[INFO] Loaded checkpoint: {}".format(args.ckpt)) + + print("[INFO] Start data load and preprocessing") + RAWDataset = FiveKDatasetTest(opt=args) + dataloader = DataLoader(RAWDataset, batch_size=1, shuffle=False, num_workers=0, drop_last=True) + + print("[INFO] Start test...") + for i_batch, sample_batched in enumerate(tqdm(dataloader)): + step_time = time.time() + + input, target_rgb, target_raw = sample_batched['input_raw'].to(device), sample_batched['target_rgb'].to(device), \ + sample_batched['target_raw'].to(device) + file_name = sample_batched['file_name'][0] + + if args.split_to_patch: + input_list, target_rgb_list, target_raw_list = preprocess_test_patch(input, target_rgb, target_raw) + else: + # remove [:,:,::2,::2] if you have enough GPU memory to test the full resolution + input_list, target_rgb_list, target_raw_list = [input[:,:,::2,::2]], [target_rgb[:,:,::2,::2]], [target_raw[:,:,::2,::2]] + + for i_patch in range(len(input_list)): + input_patch = input_list[i_patch] + target_rgb_patch = target_rgb_list[i_patch] + target_raw_patch = target_raw_list[i_patch] + + with torch.no_grad(): + reconstruct_rgb = net(input_patch) + reconstruct_rgb = torch.clamp(reconstruct_rgb, 0, 1) + + pred_rgb = reconstruct_rgb.detach().permute(0,2,3,1) + target_rgb_patch = target_rgb_patch.permute(0,2,3,1) + + pred_rgb = denorm(pred_rgb, 255) + target_rgb_patch = denorm(target_rgb_patch, 255) + pred_rgb = pred_rgb.cpu().numpy() + target_rgb_patch = target_rgb_patch.cpu().numpy().astype(np.float32) + + # print(type(pred_rgb)) + pred = PILImage.fromarray(np.uint8(pred_rgb[0,:,:,:])) + tar_pred = PILImage.fromarray(np.hstack((np.uint8(target_rgb_patch[0,:,:,:]), np.uint8(pred_rgb[0,:,:,:])))) + + tar = PILImage.fromarray(np.uint8(target_rgb_patch[0,:,:,:])) + + pred.save(out_path+"pred_%s_%05d.jpg"%(file_name, i_patch), quality=90, subsampling=1) + tar.save(out_path+"tar_%s_%05d.jpg"%(file_name, i_patch), quality=90, subsampling=1) + tar_pred.save(out_path+"gt_pred_%s_%05d.jpg"%(file_name, i_patch), quality=90, subsampling=1) + + del reconstruct_rgb + +if __name__ == '__main__': + torch.set_num_threads(4) + main(args) + diff --git a/third_party/DarkFeat/datasets/InvISP/train.py b/third_party/DarkFeat/datasets/InvISP/train.py new file mode 100644 index 0000000000000000000000000000000000000000..16186cb38d825ac1299e5c4164799d35bfa79907 --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/train.py @@ -0,0 +1,98 @@ +import numpy as np +import os, time, random +import argparse +import json + +import torch.nn.functional as F +import torch +from torch.utils.data import Dataset, DataLoader +from torch.optim import lr_scheduler + +from model.model import InvISPNet +from dataset.FiveK_dataset import FiveKDatasetTrain +from config.config import get_arguments + +from utils.JPEG import DiffJPEG + +os.system('nvidia-smi -q -d Memory |grep -A4 GPU|grep Free >tmp') +os.environ['CUDA_VISIBLE_DEVICES'] = str(np.argmax([int(x.split()[2]) for x in open('tmp', 'r').readlines()])) +# os.environ['CUDA_VISIBLE_DEVICES'] = "1" +os.system('rm tmp') + +DiffJPEG = DiffJPEG(differentiable=True, quality=90).cuda() + +parser = get_arguments() +parser.add_argument("--out_path", type=str, default="./exps/", help="Path to save checkpoint. ") +parser.add_argument("--resume", dest='resume', action='store_true', help="Resume training. ") +parser.add_argument("--loss", type=str, default="L1", choices=["L1", "L2"], help="Choose which loss function to use. ") +parser.add_argument("--lr", type=float, default=0.0001, help="Learning rate") +parser.add_argument("--aug", dest='aug', action='store_true', help="Use data augmentation.") +args = parser.parse_args() +print("Parsed arguments: {}".format(args)) + +os.makedirs(args.out_path, exist_ok=True) +os.makedirs(args.out_path+"%s"%args.task, exist_ok=True) +os.makedirs(args.out_path+"%s/checkpoint"%args.task, exist_ok=True) + +with open(args.out_path+"%s/commandline_args.yaml"%args.task , 'w') as f: + json.dump(args.__dict__, f, indent=2) + +def main(args): + # ======================================define the model====================================== + net = InvISPNet(channel_in=3, channel_out=3, block_num=8) + net.cuda() + # load the pretrained weight if there exists one + if args.resume: + net.load_state_dict(torch.load(args.out_path+"%s/checkpoint/latest.pth"%args.task)) + print("[INFO] loaded " + args.out_path+"%s/checkpoint/latest.pth"%args.task) + + optimizer = torch.optim.Adam(net.parameters(), lr=args.lr) + scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[50, 80], gamma=0.5) + + print("[INFO] Start data loading and preprocessing") + RAWDataset = FiveKDatasetTrain(opt=args) + dataloader = DataLoader(RAWDataset, batch_size=args.batch_size, shuffle=True, num_workers=0, drop_last=True) + + print("[INFO] Start to train") + step = 0 + for epoch in range(0, 300): + epoch_time = time.time() + + for i_batch, sample_batched in enumerate(dataloader): + step_time = time.time() + + input, target_rgb, target_raw = sample_batched['input_raw'].cuda(), sample_batched['target_rgb'].cuda(), \ + sample_batched['target_raw'].cuda() + + reconstruct_rgb = net(input) + reconstruct_rgb = torch.clamp(reconstruct_rgb, 0, 1) + rgb_loss = F.l1_loss(reconstruct_rgb, target_rgb) + reconstruct_rgb = DiffJPEG(reconstruct_rgb) + reconstruct_raw = net(reconstruct_rgb, rev=True) + raw_loss = F.l1_loss(reconstruct_raw, target_raw) + + loss = args.rgb_weight * rgb_loss + raw_loss + + optimizer.zero_grad() + loss.backward() + optimizer.step() + + print("task: %s Epoch: %d Step: %d || loss: %.5f raw_loss: %.5f rgb_loss: %.5f || lr: %f time: %f"%( + args.task, epoch, step, loss.detach().cpu().numpy(), raw_loss.detach().cpu().numpy(), + rgb_loss.detach().cpu().numpy(), optimizer.param_groups[0]['lr'], time.time()-step_time + )) + step += 1 + + torch.save(net.state_dict(), args.out_path+"%s/checkpoint/latest.pth"%args.task) + if (epoch+1) % 10 == 0: + # os.makedirs(args.out_path+"%s/checkpoint/%04d"%(args.task,epoch), exist_ok=True) + torch.save(net.state_dict(), args.out_path+"%s/checkpoint/%04d.pth"%(args.task,epoch)) + print("[INFO] Successfully saved "+args.out_path+"%s/checkpoint/%04d.pth"%(args.task,epoch)) + scheduler.step() + + print("[INFO] Epoch time: ", time.time()-epoch_time, "task: ", args.task) + +if __name__ == '__main__': + + torch.set_num_threads(4) + main(args) diff --git a/third_party/DarkFeat/datasets/InvISP/train.sh b/third_party/DarkFeat/datasets/InvISP/train.sh new file mode 100644 index 0000000000000000000000000000000000000000..c94626d01d4adb7b6a453b6f09fa2c9f6479f90d --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/train.sh @@ -0,0 +1,16 @@ +# python train.py --task=debug \ +# --data_path="./data/" \ +# --gamma \ +# --aug \ +# --camera="NIKON_D700" \ +# --out_path="./exps/" \ +# # --debug_mode + +python train.py --task=debug2 \ + --data_path="./data/" \ + --gamma \ + --aug \ + --camera="Canon_EOS_5D" \ + --out_path="./exps/" \ + --debug_mode + diff --git a/third_party/DarkFeat/datasets/InvISP/utils/JPEG.py b/third_party/DarkFeat/datasets/InvISP/utils/JPEG.py new file mode 100644 index 0000000000000000000000000000000000000000..8997ee98a41668b4737a9b2acc2341032f173bd3 --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/utils/JPEG.py @@ -0,0 +1,43 @@ + + +import torch +import torch.nn as nn + +from .JPEG_utils import diff_round, quality_to_factor, Quantization +from .compression import compress_jpeg +from .decompression import decompress_jpeg + + +class DiffJPEG(nn.Module): + def __init__(self, differentiable=True, quality=75): + ''' Initialize the DiffJPEG layer + Inputs: + height(int): Original image height + width(int): Original image width + differentiable(bool): If true uses custom differentiable + rounding function, if false uses standrard torch.round + quality(float): Quality factor for jpeg compression scheme. + ''' + super(DiffJPEG, self).__init__() + if differentiable: + rounding = diff_round + # rounding = Quantization() + else: + rounding = torch.round + factor = quality_to_factor(quality) + self.compress = compress_jpeg(rounding=rounding, factor=factor) + # self.decompress = decompress_jpeg(height, width, rounding=rounding, + # factor=factor) + self.decompress = decompress_jpeg(rounding=rounding, factor=factor) + + def forward(self, x): + ''' + ''' + org_height = x.shape[2] + org_width = x.shape[3] + y, cb, cr = self.compress(x) + + recovered = self.decompress(y, cb, cr, org_height, org_width) + return recovered + + diff --git a/third_party/DarkFeat/datasets/InvISP/utils/JPEG_utils.py b/third_party/DarkFeat/datasets/InvISP/utils/JPEG_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e2ebd9bdc184e869ade58eea1c6763baa1d9fc91 --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/utils/JPEG_utils.py @@ -0,0 +1,75 @@ +# Standard libraries +import numpy as np +# PyTorch +import torch +import torch.nn as nn +import math + +y_table = np.array( + [[16, 11, 10, 16, 24, 40, 51, 61], [12, 12, 14, 19, 26, 58, 60, + 55], [14, 13, 16, 24, 40, 57, 69, 56], + [14, 17, 22, 29, 51, 87, 80, 62], [18, 22, 37, 56, 68, 109, 103, + 77], [24, 35, 55, 64, 81, 104, 113, 92], + [49, 64, 78, 87, 103, 121, 120, 101], [72, 92, 95, 98, 112, 100, 103, 99]], + dtype=np.float32).T + +y_table = nn.Parameter(torch.from_numpy(y_table)) +# +c_table = np.empty((8, 8), dtype=np.float32) +c_table.fill(99) +c_table[:4, :4] = np.array([[17, 18, 24, 47], [18, 21, 26, 66], + [24, 26, 56, 99], [47, 66, 99, 99]]).T +c_table = nn.Parameter(torch.from_numpy(c_table)) + + +def diff_round_back(x): + """ Differentiable rounding function + Input: + x(tensor) + Output: + x(tensor) + """ + return torch.round(x) + (x - torch.round(x))**3 + + + +def diff_round(input_tensor): + test = 0 + for n in range(1, 10): + test += math.pow(-1, n+1) / n * torch.sin(2 * math.pi * n * input_tensor) + final_tensor = input_tensor - 1 / math.pi * test + return final_tensor + + +class Quant(torch.autograd.Function): + + @staticmethod + def forward(ctx, input): + input = torch.clamp(input, 0, 1) + output = (input * 255.).round() / 255. + return output + + @staticmethod + def backward(ctx, grad_output): + return grad_output + +class Quantization(nn.Module): + def __init__(self): + super(Quantization, self).__init__() + + def forward(self, input): + return Quant.apply(input) + + +def quality_to_factor(quality): + """ Calculate factor corresponding to quality + Input: + quality(float): Quality for jpeg compression + Output: + factor(float): Compression factor + """ + if quality < 50: + quality = 5000. / quality + else: + quality = 200. - quality*2 + return quality / 100. \ No newline at end of file diff --git a/third_party/DarkFeat/datasets/InvISP/utils/__init__.py b/third_party/DarkFeat/datasets/InvISP/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/DarkFeat/datasets/InvISP/utils/commons.py b/third_party/DarkFeat/datasets/InvISP/utils/commons.py new file mode 100644 index 0000000000000000000000000000000000000000..e594e0597bac601edc2015d9cae670799f981495 --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/utils/commons.py @@ -0,0 +1,23 @@ +import numpy as np + + +def denorm(img, max_value): + img = img * float(max_value) + return img + +def preprocess_test_patch(input_image, target_image, gt_image): + input_patch_list = [] + target_patch_list = [] + gt_patch_list = [] + H = input_image.shape[2] + W = input_image.shape[3] + for i in range(3): + for j in range(3): + input_patch = input_image[:,:,int(i * H / 3):int((i+1) * H / 3),int(j * W / 3):int((j+1) * W / 3)] + target_patch = target_image[:,:,int(i * H / 3):int((i+1) * H / 3),int(j * W / 3):int((j+1) * W / 3)] + gt_patch = gt_image[:,:,int(i * H / 3):int((i+1) * H / 3),int(j * W / 3):int((j+1) * W / 3)] + input_patch_list.append(input_patch) + target_patch_list.append(target_patch) + gt_patch_list.append(gt_patch) + + return input_patch_list, target_patch_list, gt_patch_list diff --git a/third_party/DarkFeat/datasets/InvISP/utils/compression.py b/third_party/DarkFeat/datasets/InvISP/utils/compression.py new file mode 100644 index 0000000000000000000000000000000000000000..3ae22f8839517bfd7e3c774528943e8fff59dce7 --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/utils/compression.py @@ -0,0 +1,185 @@ +# Standard libraries +import itertools +import numpy as np +# PyTorch +import torch +import torch.nn as nn +# Local +from . import JPEG_utils + + +class rgb_to_ycbcr_jpeg(nn.Module): + """ Converts RGB image to YCbCr + Input: + image(tensor): batch x 3 x height x width + Outpput: + result(tensor): batch x height x width x 3 + """ + def __init__(self): + super(rgb_to_ycbcr_jpeg, self).__init__() + matrix = np.array( + [[0.299, 0.587, 0.114], [-0.168736, -0.331264, 0.5], + [0.5, -0.418688, -0.081312]], dtype=np.float32).T + self.shift = nn.Parameter(torch.tensor([0., 128., 128.])) + # + self.matrix = nn.Parameter(torch.from_numpy(matrix)) + + def forward(self, image): + image = image.permute(0, 2, 3, 1) + result = torch.tensordot(image, self.matrix, dims=1) + self.shift + # result = torch.from_numpy(result) + result.view(image.shape) + return result + + + +class chroma_subsampling(nn.Module): + """ Chroma subsampling on CbCv channels + Input: + image(tensor): batch x height x width x 3 + Output: + y(tensor): batch x height x width + cb(tensor): batch x height/2 x width/2 + cr(tensor): batch x height/2 x width/2 + """ + def __init__(self): + super(chroma_subsampling, self).__init__() + + def forward(self, image): + image_2 = image.permute(0, 3, 1, 2).clone() + avg_pool = nn.AvgPool2d(kernel_size=2, stride=(2, 2), + count_include_pad=False) + cb = avg_pool(image_2[:, 1, :, :].unsqueeze(1)) + cr = avg_pool(image_2[:, 2, :, :].unsqueeze(1)) + cb = cb.permute(0, 2, 3, 1) + cr = cr.permute(0, 2, 3, 1) + return image[:, :, :, 0], cb.squeeze(3), cr.squeeze(3) + + +class block_splitting(nn.Module): + """ Splitting image into patches + Input: + image(tensor): batch x height x width + Output: + patch(tensor): batch x h*w/64 x h x w + """ + def __init__(self): + super(block_splitting, self).__init__() + self.k = 8 + + def forward(self, image): + height, width = image.shape[1:3] + # print(height, width) + batch_size = image.shape[0] + # print(image.shape) + image_reshaped = image.view(batch_size, height // self.k, self.k, -1, self.k) + image_transposed = image_reshaped.permute(0, 1, 3, 2, 4) + return image_transposed.contiguous().view(batch_size, -1, self.k, self.k) + + +class dct_8x8(nn.Module): + """ Discrete Cosine Transformation + Input: + image(tensor): batch x height x width + Output: + dcp(tensor): batch x height x width + """ + def __init__(self): + super(dct_8x8, self).__init__() + tensor = np.zeros((8, 8, 8, 8), dtype=np.float32) + for x, y, u, v in itertools.product(range(8), repeat=4): + tensor[x, y, u, v] = np.cos((2 * x + 1) * u * np.pi / 16) * np.cos( + (2 * y + 1) * v * np.pi / 16) + alpha = np.array([1. / np.sqrt(2)] + [1] * 7) + # + self.tensor = nn.Parameter(torch.from_numpy(tensor).float()) + self.scale = nn.Parameter(torch.from_numpy(np.outer(alpha, alpha) * 0.25).float() ) + + def forward(self, image): + image = image - 128 + result = self.scale * torch.tensordot(image, self.tensor, dims=2) + result.view(image.shape) + return result + + +class y_quantize(nn.Module): + """ JPEG Quantization for Y channel + Input: + image(tensor): batch x height x width + rounding(function): rounding function to use + factor(float): Degree of compression + Output: + image(tensor): batch x height x width + """ + def __init__(self, rounding, factor=1): + super(y_quantize, self).__init__() + self.rounding = rounding + self.factor = factor + self.y_table = JPEG_utils.y_table + + def forward(self, image): + image = image.float() / (self.y_table * self.factor) + image = self.rounding(image) + return image + + +class c_quantize(nn.Module): + """ JPEG Quantization for CrCb channels + Input: + image(tensor): batch x height x width + rounding(function): rounding function to use + factor(float): Degree of compression + Output: + image(tensor): batch x height x width + """ + def __init__(self, rounding, factor=1): + super(c_quantize, self).__init__() + self.rounding = rounding + self.factor = factor + self.c_table = JPEG_utils.c_table + + def forward(self, image): + image = image.float() / (self.c_table * self.factor) + image = self.rounding(image) + return image + + +class compress_jpeg(nn.Module): + """ Full JPEG compression algortihm + Input: + imgs(tensor): batch x 3 x height x width + rounding(function): rounding function to use + factor(float): Compression factor + Ouput: + compressed(dict(tensor)): batch x h*w/64 x 8 x 8 + """ + def __init__(self, rounding=torch.round, factor=1): + super(compress_jpeg, self).__init__() + self.l1 = nn.Sequential( + rgb_to_ycbcr_jpeg(), + # comment this line if no subsampling + chroma_subsampling() + ) + self.l2 = nn.Sequential( + block_splitting(), + dct_8x8() + ) + self.c_quantize = c_quantize(rounding=rounding, factor=factor) + self.y_quantize = y_quantize(rounding=rounding, factor=factor) + + def forward(self, image): + y, cb, cr = self.l1(image*255) # modify + + # y, cb, cr = result[:,:,:,0], result[:,:,:,1], result[:,:,:,2] + components = {'y': y, 'cb': cb, 'cr': cr} + for k in components.keys(): + comp = self.l2(components[k]) + # print(comp.shape) + if k in ('cb', 'cr'): + comp = self.c_quantize(comp) + else: + comp = self.y_quantize(comp) + + components[k] = comp + + return components['y'], components['cb'], components['cr'] \ No newline at end of file diff --git a/third_party/DarkFeat/datasets/InvISP/utils/decompression.py b/third_party/DarkFeat/datasets/InvISP/utils/decompression.py new file mode 100644 index 0000000000000000000000000000000000000000..b73ff96d5f6818e1d0464b9c4133f559a3b23fba --- /dev/null +++ b/third_party/DarkFeat/datasets/InvISP/utils/decompression.py @@ -0,0 +1,190 @@ +# Standard libraries +import itertools +import numpy as np +# PyTorch +import torch +import torch.nn as nn +# Local +from . import JPEG_utils as utils + + +class y_dequantize(nn.Module): + """ Dequantize Y channel + Inputs: + image(tensor): batch x height x width + factor(float): compression factor + Outputs: + image(tensor): batch x height x width + """ + def __init__(self, factor=1): + super(y_dequantize, self).__init__() + self.y_table = utils.y_table + self.factor = factor + + def forward(self, image): + return image * (self.y_table * self.factor) + + +class c_dequantize(nn.Module): + """ Dequantize CbCr channel + Inputs: + image(tensor): batch x height x width + factor(float): compression factor + Outputs: + image(tensor): batch x height x width + """ + def __init__(self, factor=1): + super(c_dequantize, self).__init__() + self.factor = factor + self.c_table = utils.c_table + + def forward(self, image): + return image * (self.c_table * self.factor) + + +class idct_8x8(nn.Module): + """ Inverse discrete Cosine Transformation + Input: + dcp(tensor): batch x height x width + Output: + image(tensor): batch x height x width + """ + def __init__(self): + super(idct_8x8, self).__init__() + alpha = np.array([1. / np.sqrt(2)] + [1] * 7) + self.alpha = nn.Parameter(torch.from_numpy(np.outer(alpha, alpha)).float()) + tensor = np.zeros((8, 8, 8, 8), dtype=np.float32) + for x, y, u, v in itertools.product(range(8), repeat=4): + tensor[x, y, u, v] = np.cos((2 * u + 1) * x * np.pi / 16) * np.cos( + (2 * v + 1) * y * np.pi / 16) + self.tensor = nn.Parameter(torch.from_numpy(tensor).float()) + + def forward(self, image): + + image = image * self.alpha + result = 0.25 * torch.tensordot(image, self.tensor, dims=2) + 128 + result.view(image.shape) + return result + + +class block_merging(nn.Module): + """ Merge pathces into image + Inputs: + patches(tensor) batch x height*width/64, height x width + height(int) + width(int) + Output: + image(tensor): batch x height x width + """ + def __init__(self): + super(block_merging, self).__init__() + + def forward(self, patches, height, width): + k = 8 + batch_size = patches.shape[0] + # print(patches.shape) # (1,1024,8,8) + image_reshaped = patches.view(batch_size, height//k, width//k, k, k) + image_transposed = image_reshaped.permute(0, 1, 3, 2, 4) + return image_transposed.contiguous().view(batch_size, height, width) + + +class chroma_upsampling(nn.Module): + """ Upsample chroma layers + Input: + y(tensor): y channel image + cb(tensor): cb channel + cr(tensor): cr channel + Ouput: + image(tensor): batch x height x width x 3 + """ + def __init__(self): + super(chroma_upsampling, self).__init__() + + def forward(self, y, cb, cr): + def repeat(x, k=2): + height, width = x.shape[1:3] + x = x.unsqueeze(-1) + x = x.repeat(1, 1, k, k) + x = x.view(-1, height * k, width * k) + return x + + cb = repeat(cb) + cr = repeat(cr) + + return torch.cat([y.unsqueeze(3), cb.unsqueeze(3), cr.unsqueeze(3)], dim=3) + + +class ycbcr_to_rgb_jpeg(nn.Module): + """ Converts YCbCr image to RGB JPEG + Input: + image(tensor): batch x height x width x 3 + Outpput: + result(tensor): batch x 3 x height x width + """ + def __init__(self): + super(ycbcr_to_rgb_jpeg, self).__init__() + + matrix = np.array( + [[1., 0., 1.402], [1, -0.344136, -0.714136], [1, 1.772, 0]], + dtype=np.float32).T + self.shift = nn.Parameter(torch.tensor([0, -128., -128.])) + self.matrix = nn.Parameter(torch.from_numpy(matrix)) + + def forward(self, image): + result = torch.tensordot(image + self.shift, self.matrix, dims=1) + #result = torch.from_numpy(result) + result.view(image.shape) + return result.permute(0, 3, 1, 2) + + +class decompress_jpeg(nn.Module): + """ Full JPEG decompression algortihm + Input: + compressed(dict(tensor)): batch x h*w/64 x 8 x 8 + rounding(function): rounding function to use + factor(float): Compression factor + Ouput: + image(tensor): batch x 3 x height x width + """ + # def __init__(self, height, width, rounding=torch.round, factor=1): + def __init__(self, rounding=torch.round, factor=1): + super(decompress_jpeg, self).__init__() + self.c_dequantize = c_dequantize(factor=factor) + self.y_dequantize = y_dequantize(factor=factor) + self.idct = idct_8x8() + self.merging = block_merging() + # comment this line if no subsampling + self.chroma = chroma_upsampling() + self.colors = ycbcr_to_rgb_jpeg() + + # self.height, self.width = height, width + + def forward(self, y, cb, cr, height, width): + components = {'y': y, 'cb': cb, 'cr': cr} + # height = y.shape[0] + # width = y.shape[1] + self.height = height + self.width = width + for k in components.keys(): + if k in ('cb', 'cr'): + comp = self.c_dequantize(components[k]) + # comment this line if no subsampling + height, width = int(self.height/2), int(self.width/2) + # height, width = int(self.height), int(self.width) + + else: + comp = self.y_dequantize(components[k]) + # comment this line if no subsampling + height, width = self.height, self.width + comp = self.idct(comp) + components[k] = self.merging(comp, height, width) + # + # comment this line if no subsampling + image = self.chroma(components['y'], components['cb'], components['cr']) + # image = torch.cat([components['y'].unsqueeze(3), components['cb'].unsqueeze(3), components['cr'].unsqueeze(3)], dim=3) + image = self.colors(image) + + image = torch.min(255*torch.ones_like(image), + torch.max(torch.zeros_like(image), image)) + return image/255 + diff --git a/third_party/DarkFeat/datasets/__init__.py b/third_party/DarkFeat/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/DarkFeat/datasets/gl3d/io.py b/third_party/DarkFeat/datasets/gl3d/io.py new file mode 100644 index 0000000000000000000000000000000000000000..9e5b4b0459d6814ef6af17a0a322b59202037d4f --- /dev/null +++ b/third_party/DarkFeat/datasets/gl3d/io.py @@ -0,0 +1,76 @@ +import os +import re +import cv2 +import numpy as np + +from ..utils.common import Notify + +def read_list(list_path): + """Read list.""" + if list_path is None or not os.path.exists(list_path): + print(Notify.FAIL, 'Not exist', list_path, Notify.ENDC) + exit(-1) + content = open(list_path).read().splitlines() + return content + + +def load_pfm(pfm_path): + with open(pfm_path, 'rb') as fin: + color = None + width = None + height = None + scale = None + data_type = None + header = str(fin.readline().decode('UTF-8')).rstrip() + + if header == 'PF': + color = True + elif header == 'Pf': + color = False + else: + raise Exception('Not a PFM file.') + + dim_match = re.match(r'^(\d+)\s(\d+)\s$', + fin.readline().decode('UTF-8')) + if dim_match: + width, height = map(int, dim_match.groups()) + else: + raise Exception('Malformed PFM header.') + scale = float((fin.readline().decode('UTF-8')).rstrip()) + if scale < 0: # little-endian + data_type = ' 0: + img = cv2.resize( + img, (config['resize'], config['resize'])) + return img + + +def _parse_depth(depth_paths, idx, config): + depth = load_pfm(depth_paths[idx]) + + if config['resize'] > 0: + target_size = config['resize'] + if config['input_type'] == 'raw': + depth = cv2.resize(depth, (int(target_size/2), int(target_size/2))) + else: + depth = cv2.resize(depth, (target_size, target_size)) + return depth + + +def _parse_kpts(kpts_paths, idx, config): + kpts = np.load(kpts_paths[idx])['pts'] + # output: [N, 2] (W first H last) + return kpts diff --git a/third_party/DarkFeat/datasets/gl3d_dataset.py b/third_party/DarkFeat/datasets/gl3d_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..db3d2db646ae7fce81424f5f72cdff7e6e34ba60 --- /dev/null +++ b/third_party/DarkFeat/datasets/gl3d_dataset.py @@ -0,0 +1,127 @@ +import os +import numpy as np +import torch +from torch.utils.data import Dataset +from random import shuffle, seed + +from .gl3d.io import read_list, _parse_img, _parse_depth, _parse_kpts +from .utils.common import Notify +from .utils.photaug import photaug + + +class GL3DDataset(Dataset): + def __init__(self, dataset_dir, config, data_split, is_training): + self.dataset_dir = dataset_dir + self.config = config + self.is_training = is_training + self.data_split = data_split + + self.match_set_list, self.global_img_list, \ + self.global_depth_list = self.prepare_match_sets() + + pass + + + def __len__(self): + return len(self.match_set_list) + + + def __getitem__(self, idx): + match_set_path = self.match_set_list[idx] + decoded = np.fromfile(match_set_path, dtype=np.float32) + + idx0, idx1 = int(decoded[0]), int(decoded[1]) + inlier_num = int(decoded[2]) + ori_img_size0 = np.reshape(decoded[3:5], (2,)) + ori_img_size1 = np.reshape(decoded[5:7], (2,)) + K0 = np.reshape(decoded[7:16], (3, 3)) + K1 = np.reshape(decoded[16:25], (3, 3)) + rel_pose = np.reshape(decoded[34:46], (3, 4)) + + # parse images. + img0 = _parse_img(self.global_img_list, idx0, self.config) + img1 = _parse_img(self.global_img_list, idx1, self.config) + # parse depths + depth0 = _parse_depth(self.global_depth_list, idx0, self.config) + depth1 = _parse_depth(self.global_depth_list, idx1, self.config) + + # photometric augmentation + img0 = photaug(img0) + img1 = photaug(img1) + + return { + 'img0': img0 / 255., + 'img1': img1 / 255., + 'depth0': depth0, + 'depth1': depth1, + 'ori_img_size0': ori_img_size0, + 'ori_img_size1': ori_img_size1, + 'K0': K0, + 'K1': K1, + 'rel_pose': rel_pose, + 'inlier_num': inlier_num + } + + + def points_to_2D(self, pnts, H, W): + labels = np.zeros((H, W)) + pnts = pnts.astype(int) + labels[pnts[:, 1], pnts[:, 0]] = 1 + return labels + + + def prepare_match_sets(self, q_diff_thld=3, rot_diff_thld=60): + """Get match sets. + Args: + is_training: Use training imageset or testing imageset. + data_split: Data split name. + Returns: + match_set_list: List of match sets path. + global_img_list: List of global image path. + global_context_feat_list: + """ + # get necessary lists. + gl3d_list_folder = os.path.join(self.dataset_dir, 'list', self.data_split) + global_info = read_list(os.path.join( + gl3d_list_folder, 'image_index_offset.txt')) + global_img_list = [os.path.join(self.dataset_dir, i) for i in read_list( + os.path.join(gl3d_list_folder, 'image_list.txt'))] + global_depth_list = [os.path.join(self.dataset_dir, i) for i in read_list( + os.path.join(gl3d_list_folder, 'depth_list.txt'))] + + imageset_list_name = 'imageset_train.txt' if self.is_training else 'imageset_test.txt' + match_set_list = self.get_match_set_list(os.path.join( + gl3d_list_folder, imageset_list_name), q_diff_thld, rot_diff_thld) + return match_set_list, global_img_list, global_depth_list + + + def get_match_set_list(self, imageset_list_path, q_diff_thld, rot_diff_thld): + """Get the path list of match sets. + Args: + imageset_list_path: Path to imageset list. + q_diff_thld: Threshold of image pair sampling regarding camera orientation. + Returns: + match_set_list: List of match set path. + """ + imageset_list = [os.path.join(self.dataset_dir, 'data', i) + for i in read_list(imageset_list_path)] + print(Notify.INFO, 'Use # imageset', len(imageset_list), Notify.ENDC) + match_set_list = [] + # discard image pairs whose image simiarity is beyond the threshold. + for i in imageset_list: + match_set_folder = os.path.join(i, 'match_sets') + if os.path.exists(match_set_folder): + match_set_files = os.listdir(match_set_folder) + for val in match_set_files: + name, ext = os.path.splitext(val) + if ext == '.match_set': + splits = name.split('_') + q_diff = int(splits[2]) + rot_diff = int(splits[3]) + if q_diff >= q_diff_thld and rot_diff <= rot_diff_thld: + match_set_list.append( + os.path.join(match_set_folder, val)) + + print(Notify.INFO, 'Get # match sets', len(match_set_list), Notify.ENDC) + return match_set_list + diff --git a/third_party/DarkFeat/datasets/noise.py b/third_party/DarkFeat/datasets/noise.py new file mode 100644 index 0000000000000000000000000000000000000000..aa68c98183186e9e9185e78e1a3e7335ac8d5bb1 --- /dev/null +++ b/third_party/DarkFeat/datasets/noise.py @@ -0,0 +1,82 @@ +import numpy as np +import random +from scipy.stats import tukeylambda + +camera_params = { + 'Kmin': 0.2181895124454343, + 'Kmax': 3.0, + 'G_shape': np.array([0.15714286, 0.14285714, 0.08571429, 0.08571429, 0.2 , + 0.2 , 0.1 , 0.08571429, 0.05714286, 0.07142857, + 0.02857143, 0.02857143, 0.01428571, 0.02857143, 0.08571429, + 0.07142857, 0.11428571, 0.11428571]), + 'Profile-1': { + 'R_scale': { + 'slope': 0.4712797750747537, + 'bias': -0.8078958947116487, + 'sigma': 0.2436176299944695 + }, + 'g_scale': { + 'slope': 0.6771267783987617, + 'bias': 1.5121876510805845, + 'sigma': 0.24641096601611254 + }, + 'G_scale': { + 'slope': 0.6558756156508007, + 'bias': 1.09268679594838, + 'sigma': 0.28604721742277756 + } + }, + 'black_level': 2048, + 'max_value': 16383 +} + + +# photon shot noise +def addPStarNoise(img, K): + return np.random.poisson(img / K).astype(np.float32) * K + + +# read noise +# tukey lambda distribution +def addGStarNoise(img, K, G_shape, G_scale_param): + # sample a shape parameter [lambda] from histogram of samples + a, b = np.histogram(G_shape, bins=10, range=(-0.25, 0.25)) + a, b = np.array(a), np.array(b) + a = a / a.sum() + + rand_num = random.uniform(0, 1) + idx = np.sum(np.cumsum(a) < rand_num) + lam = random.uniform(b[idx], b[idx+1]) + + # calculate scale parameter [G_scale] + log_K = np.log(K) + log_G_scale = np.random.standard_normal() * G_scale_param['sigma'] * 1 +\ + G_scale_param['slope'] * log_K + G_scale_param['bias'] + G_scale = np.exp(log_G_scale) + # print(f'G_scale: {G_scale}') + + return img + tukeylambda.rvs(lam, scale=G_scale, size=img.shape).astype(np.float32) + + +# row noise +# uniform distribution for each row +def addRowNoise(img, K, R_scale_param): + # calculate scale parameter [R_scale] + log_K = np.log(K) + log_R_scale = np.random.standard_normal() * R_scale_param['sigma'] * 1 +\ + R_scale_param['slope'] * log_K + R_scale_param['bias'] + R_scale = np.exp(log_R_scale) + # print(f'R_scale: {R_scale}') + + row_noise = np.random.randn(img.shape[0], 1).astype(np.float32) * R_scale + return img + np.tile(row_noise, (1, img.shape[1])) + + +# quantization noise +# uniform distribution +def addQuantNoise(img, q): + return img + np.random.uniform(low=-0.5*q, high=0.5*q, size=img.shape) + + +def sampleK(Kmin, Kmax): + return np.exp(np.random.uniform(low=np.log(Kmin), high=np.log(Kmax))) diff --git a/third_party/DarkFeat/datasets/noise_simulator.py b/third_party/DarkFeat/datasets/noise_simulator.py new file mode 100644 index 0000000000000000000000000000000000000000..17e21d3b3443aaa3585ae8460709f60b05835a84 --- /dev/null +++ b/third_party/DarkFeat/datasets/noise_simulator.py @@ -0,0 +1,244 @@ +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Variable +import torch +import numpy as np +import os, time, random +import argparse +from torch.utils.data import Dataset, DataLoader +from PIL import Image as PILImage +from glob import glob +from tqdm import tqdm +import rawpy +import colour_demosaicing + +from .InvISP.model.model import InvISPNet +from .utils.common import Notify +from datasets.noise import camera_params, addGStarNoise, addPStarNoise, addQuantNoise, addRowNoise, sampleK + + +class NoiseSimulator: + def __init__(self, device, ckpt_path='./datasets/InvISP/pretrained/canon.pth'): + self.device = device + + # load Invertible ISP Network + self.net = InvISPNet(channel_in=3, channel_out=3, block_num=8).to(self.device).eval() + self.net.load_state_dict(torch.load(ckpt_path), strict=False) + print(Notify.INFO, "Loaded ISPNet checkpoint: {}".format(ckpt_path), Notify.ENDC) + + # white balance parameters + self.wb = np.array([2020.0, 1024.0, 1458.0, 1024.0]) + + # use Canon EOS 5D4 noise parameters provided by ELD + self.camera_params = camera_params + + # random specify exposure time ratio from 50 to 150 + self.ratio_min = 50 + self.ratio_max = 150 + pass + + # inverse demosaic + # input: [H, W, 3] + # output: [H, W] + def invDemosaic(self, img): + img_R = img[::2, ::2, 0] + img_G1 = img[::2, 1::2, 1] + img_G2 = img[1::2, ::2, 1] + img_B = img[1::2, 1::2, 2] + raw_img = np.ones(img.shape[:2]) + raw_img[::2, ::2] = img_R + raw_img[::2, 1::2] = img_G1 + raw_img[1::2, ::2] = img_G2 + raw_img[1::2, 1::2] = img_B + return raw_img + + # demosaic - nearest ver + # input: [H, W] + # output: [H, W, 3] + def demosaicNearest(self, img): + raw = np.ones((img.shape[0], img.shape[1], 3)) + raw[::2, ::2, 0] = img[::2, ::2] + raw[::2, 1::2, 0] = img[::2, ::2] + raw[1::2, ::2, 0] = img[::2, ::2] + raw[1::2, 1::2, 0] = img[::2, ::2] + raw[::2, ::2, 2] = img[1::2, 1::2] + raw[::2, 1::2, 2] = img[1::2, 1::2] + raw[1::2, ::2, 2] = img[1::2, 1::2] + raw[1::2, 1::2, 2] = img[1::2, 1::2] + raw[::2, ::2, 1] = img[::2, 1::2] + raw[::2, 1::2, 1] = img[::2, 1::2] + raw[1::2, ::2, 1] = img[1::2, ::2] + raw[1::2, 1::2, 1] = img[1::2, ::2] + return raw + + # demosaic + # input: [H, W] + # output: [H, W, 3] + def demosaic(self, img): + return colour_demosaicing.demosaicing_CFA_Bayer_bilinear(img, 'RGGB') + + # load rgb image + def path2rgb(self, path): + return torch.from_numpy(np.array(PILImage.open(path))/255.0) + + # InvISP + # input: rgb image [H, W, 3] + # output: raw image [H, W] + def rgb2raw(self, rgb, batched=False): + # 1. rgb -> invnet + if not batched: + rgb = rgb.unsqueeze(0) + + rgb = rgb.permute(0,3,1,2).float().to(self.device) + with torch.no_grad(): + reconstruct_raw = self.net(rgb, rev=True) + + pred_raw = reconstruct_raw.detach().permute(0,2,3,1) + pred_raw = torch.clamp(pred_raw, 0, 1) + + if not batched: + pred_raw = pred_raw[0, ...] + + pred_raw = pred_raw.cpu().numpy() + + # 2. -> inv gamma + norm_value = np.power(16383, 1/2.2) + pred_raw *= norm_value + pred_raw = np.power(pred_raw, 2.2) + + # 3. -> inv white balance + wb = self.wb / self.wb.max() + pred_raw = pred_raw / wb[:-1] + + # 4. -> add black level + pred_raw += self.camera_params['black_level'] + + # 5. -> inv demosaic + if not batched: + pred_raw = self.invDemosaic(pred_raw) + else: + preds = [] + for i in range(pred_raw.shape[0]): + preds.append(self.invDemosaic(pred_raw[i])) + pred_raw = np.stack(preds, axis=0) + + return pred_raw + + + def raw2noisyRaw(self, raw, ratio_dec=1, batched=False): + if not batched: + ratio = (random.uniform(self.ratio_min, self.ratio_max) - 1) * ratio_dec + 1 + raw = raw.copy() / ratio + + K = sampleK(self.camera_params['Kmin'], self.camera_params['Kmax']) + q = 1 / (self.camera_params['max_value'] - self.camera_params['black_level']) + + raw = addPStarNoise(raw, K) + raw = addGStarNoise(raw, K, self.camera_params['G_shape'], self.camera_params['Profile-1']['G_scale']) + raw = addRowNoise(raw, K, self.camera_params['Profile-1']['R_scale']) + raw = addQuantNoise(raw, q) + raw *= ratio + return raw + + else: + raw = raw.copy() + for i in range(raw.shape[0]): + ratio = random.uniform(self.ratio_min, self.ratio_max) + raw[i] /= ratio + + K = sampleK(self.camera_params['Kmin'], self.camera_params['Kmax']) + q = 1 / (self.camera_params['max_value'] - self.camera_params['black_level']) + + raw[i] = addPStarNoise(raw[i], K) + raw[i] = addGStarNoise(raw[i], K, self.camera_params['G_shape'], self.camera_params['Profile-1']['G_scale']) + raw[i] = addRowNoise(raw[i], K, self.camera_params['Profile-1']['R_scale']) + raw[i] = addQuantNoise(raw[i], q) + raw[i] *= ratio + return raw + + def raw2rgb(self, raw, batched=False): + # 1. -> demosaic + if not batched: + raw = self.demosaic(raw) + else: + raws = [] + for i in range(raw.shape[0]): + raws.append(self.demosaic(raw[i])) + raw = np.stack(raws, axis=0) + + # 2. -> substract black level + raw -= self.camera_params['black_level'] + raw = np.clip(raw, 0, self.camera_params['max_value'] - self.camera_params['black_level']) + + # 3. -> white balance + wb = self.wb / self.wb.max() + raw = raw * wb[:-1] + + # 4. -> gamma + norm_value = np.power(16383, 1/2.2) + raw = np.power(raw, 1/2.2) + raw /= norm_value + + # 5. -> ispnet + if not batched: + input_raw_img = torch.Tensor(raw).permute(2,0,1).float().to(self.device)[np.newaxis, ...] + else: + input_raw_img = torch.Tensor(raw).permute(0,3,1,2).float().to(self.device) + + with torch.no_grad(): + reconstruct_rgb = self.net(input_raw_img) + reconstruct_rgb = torch.clamp(reconstruct_rgb, 0, 1) + + pred_rgb = reconstruct_rgb.detach().permute(0,2,3,1) + + if not batched: + pred_rgb = pred_rgb[0, ...] + pred_rgb = pred_rgb.cpu().numpy() + + return pred_rgb + + + def raw2packedRaw(self, raw, batched=False): + # 1. -> substract black level + raw -= self.camera_params['black_level'] + raw = np.clip(raw, 0, self.camera_params['max_value'] - self.camera_params['black_level']) + raw /= self.camera_params['max_value'] + + # 2. pack + if not batched: + im = np.expand_dims(raw, axis=2) + img_shape = im.shape + H = img_shape[0] + W = img_shape[1] + + out = np.concatenate((im[0:H:2, 0:W:2, :], + im[0:H:2, 1:W:2, :], + im[1:H:2, 1:W:2, :], + im[1:H:2, 0:W:2, :]), axis=2) + else: + im = np.expand_dims(raw, axis=3) + img_shape = im.shape + H = img_shape[1] + W = img_shape[2] + + out = np.concatenate((im[:, 0:H:2, 0:W:2, :], + im[:, 0:H:2, 1:W:2, :], + im[:, 1:H:2, 1:W:2, :], + im[:, 1:H:2, 0:W:2, :]), axis=3) + return out + + def raw2demosaicRaw(self, raw, batched=False): + # 1. -> demosaic + if not batched: + raw = self.demosaic(raw) + else: + raws = [] + for i in range(raw.shape[0]): + raws.append(self.demosaic(raw[i])) + raw = np.stack(raws, axis=0) + + # 2. -> substract black level + raw -= self.camera_params['black_level'] + raw = np.clip(raw, 0, self.camera_params['max_value'] - self.camera_params['black_level']) + raw /= self.camera_params['max_value'] + return raw diff --git a/third_party/DarkFeat/datasets/sample.dat b/third_party/DarkFeat/datasets/sample.dat new file mode 100644 index 0000000000000000000000000000000000000000..3edfb76db709167bd289493ddc3a4d1169703662 Binary files /dev/null and b/third_party/DarkFeat/datasets/sample.dat differ diff --git a/third_party/DarkFeat/datasets/utils/common.py b/third_party/DarkFeat/datasets/utils/common.py new file mode 100644 index 0000000000000000000000000000000000000000..6433408a39e53fcedb634901268754ed1ba971b3 --- /dev/null +++ b/third_party/DarkFeat/datasets/utils/common.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +""" +Copyright 2017, Zixin Luo, HKUST. +Commonly used functions +""" + +from __future__ import print_function + +import os +from datetime import datetime + + +class ClassProperty(property): + """For dynamically obtaining system time""" + + def __get__(self, cls, owner): + return classmethod(self.fget).__get__(None, owner)() + + +class Notify(object): + """Colorful printing prefix. + A quick example: + print(Notify.INFO, YOUR TEXT, Notify.ENDC) + """ + + def __init__(self): + pass + + @ClassProperty + def HEADER(cls): + return str(datetime.now()) + ': \033[95m' + + @ClassProperty + def INFO(cls): + return str(datetime.now()) + ': \033[92mI' + + @ClassProperty + def OKBLUE(cls): + return str(datetime.now()) + ': \033[94m' + + @ClassProperty + def WARNING(cls): + return str(datetime.now()) + ': \033[93mW' + + @ClassProperty + def FAIL(cls): + return str(datetime.now()) + ': \033[91mF' + + @ClassProperty + def BOLD(cls): + return str(datetime.now()) + ': \033[1mB' + + @ClassProperty + def UNDERLINE(cls): + return str(datetime.now()) + ': \033[4mU' + ENDC = '\033[0m' + + diff --git a/third_party/DarkFeat/datasets/utils/photaug.py b/third_party/DarkFeat/datasets/utils/photaug.py new file mode 100644 index 0000000000000000000000000000000000000000..41f2278c720355470f00a881a1516cf1b71d2c4a --- /dev/null +++ b/third_party/DarkFeat/datasets/utils/photaug.py @@ -0,0 +1,50 @@ +import cv2 +import numpy as np +import random + + +def random_brightness_np(image, max_abs_change=50): + delta = random.uniform(-max_abs_change, max_abs_change) + return np.clip(image + delta, 0, 255) + +def random_contrast_np(image, strength_range=[0.3, 1.5]): + delta = random.uniform(*strength_range) + mean = image.mean() + return np.clip((image - mean) * delta + mean, 0, 255) + +def motion_blur_np(img, max_kernel_size=3): + # Either vertial, hozirontal or diagonal blur + mode = np.random.choice(['h', 'v', 'diag_down', 'diag_up']) + ksize = np.random.randint( + 0, (max_kernel_size+1)/2)*2 + 1 # make sure is odd + center = int((ksize-1)/2) + kernel = np.zeros((ksize, ksize)) + if mode == 'h': + kernel[center, :] = 1. + elif mode == 'v': + kernel[:, center] = 1. + elif mode == 'diag_down': + kernel = np.eye(ksize) + elif mode == 'diag_up': + kernel = np.flip(np.eye(ksize), 0) + var = ksize * ksize / 16. + grid = np.repeat(np.arange(ksize)[:, np.newaxis], ksize, axis=-1) + gaussian = np.exp(-(np.square(grid-center) + + np.square(grid.T-center))/(2.*var)) + kernel *= gaussian + kernel /= np.sum(kernel) + img = cv2.filter2D(img, -1, kernel) + return np.clip(img, 0, 255) + +def additive_gaussian_noise(image, stddev_range=[5, 95]): + stddev = random.uniform(*stddev_range) + noise = np.random.normal(size=image.shape, scale=stddev) + noisy_image = np.clip(image + noise, 0, 255) + return noisy_image + +def photaug(img): + img = random_brightness_np(img) + img = random_contrast_np(img) + # img = additive_gaussian_noise(img) + img = motion_blur_np(img) + return img diff --git a/third_party/DarkFeat/demo_darkfeat.py b/third_party/DarkFeat/demo_darkfeat.py new file mode 100644 index 0000000000000000000000000000000000000000..ca50ae5b892e7a90e75da7197c33bc0c06e699bf --- /dev/null +++ b/third_party/DarkFeat/demo_darkfeat.py @@ -0,0 +1,124 @@ +from pathlib import Path +import argparse +import cv2 +import matplotlib.cm as cm +import torch +import numpy as np +from utils.nnmatching import NNMatching +from utils.misc import (AverageTimer, VideoStreamer, make_matching_plot_fast, frame2tensor) + +torch.set_grad_enabled(False) + + +def compute_essential(matched_kp1, matched_kp2, K): + pts1 = cv2.undistortPoints(matched_kp1,cameraMatrix=K, distCoeffs = (-0.117918271740560,0.075246403574314,0,0)) + pts2 = cv2.undistortPoints(matched_kp2,cameraMatrix=K, distCoeffs = (-0.117918271740560,0.075246403574314,0,0)) + K_1 = np.eye(3) + # Estimate the homography between the matches using RANSAC + ransac_model, ransac_inliers = cv2.findEssentialMat(pts1, pts2, K_1, method=cv2.RANSAC, prob=0.999, threshold=0.001, maxIters=10000) + if ransac_inliers is None or ransac_model.shape != (3,3): + ransac_inliers = np.array([]) + ransac_model = None + return ransac_model, ransac_inliers, pts1, pts2 + + +sizer = (960, 640) +focallength_x = 4.504986436499113e+03/(6744/sizer[0]) +focallength_y = 4.513311442889859e+03/(4502/sizer[1]) +K = np.eye(3) +K[0,0] = focallength_x +K[1,1] = focallength_y +K[0,2] = 3.363322177533149e+03/(6744/sizer[0])# * 0.5 +K[1,2] = 2.291824660547715e+03/(4502/sizer[1])# * 0.5 + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='DarkFeat demo', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--input', type=str, + help='path to an image directory') + parser.add_argument( + '--output_dir', type=str, default=None, + help='Directory where to write output frames (If None, no output)') + + parser.add_argument( + '--image_glob', type=str, nargs='+', default=['*.ARW'], + help='Glob if a directory of images is specified') + parser.add_argument( + '--resize', type=int, nargs='+', default=[640, 480], + help='Resize the input image before running inference. If two numbers, ' + 'resize to the exact dimensions, if one number, resize the max ' + 'dimension, if -1, do not resize') + parser.add_argument( + '--force_cpu', action='store_true', + help='Force pytorch to run in CPU mode.') + parser.add_argument('--model_path', type=str, + help='Path to the pretrained model') + + opt = parser.parse_args() + print(opt) + + assert len(opt.resize) == 2 + print('Will resize to {}x{} (WxH)'.format(opt.resize[0], opt.resize[1])) + + device = 'cuda' if torch.cuda.is_available() and not opt.force_cpu else 'cpu' + print('Running inference on device \"{}\"'.format(device)) + matching = NNMatching(opt.model_path).eval().to(device) + keys = ['keypoints', 'scores', 'descriptors'] + + vs = VideoStreamer(opt.input, opt.resize, opt.image_glob) + frame, ret = vs.next_frame() + assert ret, 'Error when reading the first frame (try different --input?)' + + frame_tensor = frame2tensor(frame, device) + last_data = matching.darkfeat({'image': frame_tensor}) + last_data = {k+'0': [last_data[k]] for k in keys} + last_data['image0'] = frame_tensor + last_frame = frame + last_image_id = 0 + + if opt.output_dir is not None: + print('==> Will write outputs to {}'.format(opt.output_dir)) + Path(opt.output_dir).mkdir(exist_ok=True) + + timer = AverageTimer() + + while True: + frame, ret = vs.next_frame() + if not ret: + print('Finished demo_darkfeat.py') + break + timer.update('data') + stem0, stem1 = last_image_id, vs.i - 1 + + frame_tensor = frame2tensor(frame, device) + pred = matching({**last_data, 'image1': frame_tensor}) + kpts0 = last_data['keypoints0'][0].cpu().numpy() + kpts1 = pred['keypoints1'][0].cpu().numpy() + matches = pred['matches0'][0].cpu().numpy() + confidence = pred['matching_scores0'][0].cpu().numpy() + timer.update('forward') + + valid = matches > -1 + mkpts0 = kpts0[valid] + mkpts1 = kpts1[matches[valid]] + + E, inliers, pts1, pts2 = compute_essential(mkpts0, mkpts1, K) + color = cm.jet(np.clip(confidence[valid][inliers[:, 0].astype('bool')] * 2 - 1, -1, 1)) + + text = [ + 'DarkFeat', + 'Matches: {}'.format(inliers.sum()) + ] + + out = make_matching_plot_fast( + last_frame, frame, mkpts0[inliers[:, 0].astype('bool')], mkpts1[inliers[:, 0].astype('bool')], color, text, + path=None, small_text=' ') + + if opt.output_dir is not None: + stem = 'matches_{:06}_{:06}'.format(stem0, stem1) + out_file = str(Path(opt.output_dir, stem + '.png')) + print('Writing image to {}'.format(out_file)) + cv2.imwrite(out_file, out) diff --git a/third_party/DarkFeat/export_features.py b/third_party/DarkFeat/export_features.py new file mode 100644 index 0000000000000000000000000000000000000000..c7caea5e57890948728f84cbb7e68e59d455e171 --- /dev/null +++ b/third_party/DarkFeat/export_features.py @@ -0,0 +1,128 @@ +import argparse +import glob +import math +import subprocess +import numpy as np +import os +import tqdm +import torch +import torch.nn as nn +import cv2 +from darkfeat import DarkFeat +from utils import matching + +def darkfeat_pre(img, cuda): + H, W = img.shape[0], img.shape[1] + inp = img.copy() + inp = inp.transpose(2, 0, 1) + inp = torch.from_numpy(inp) + inp = torch.autograd.Variable(inp).view(1, 3, H, W) + if cuda: + inp = inp.cuda() + return inp + +if __name__ == '__main__': + # Parse command line arguments. + parser = argparse.ArgumentParser() + parser.add_argument('--H', type=int, default=int(640)) + parser.add_argument('--W', type=int, default=int(960)) + parser.add_argument('--histeq', action='store_true') + parser.add_argument('--model_path', type=str) + parser.add_argument('--dataset_dir', type=str, default='/data/hyz/MID/') + opt = parser.parse_args() + + sizer = (opt.W, opt.H) + focallength_x = 4.504986436499113e+03/(6744/sizer[0]) + focallength_y = 4.513311442889859e+03/(4502/sizer[1]) + K = np.eye(3) + K[0,0] = focallength_x + K[1,1] = focallength_y + K[0,2] = 3.363322177533149e+03/(6744/sizer[0])# * 0.5 + K[1,2] = 2.291824660547715e+03/(4502/sizer[1])# * 0.5 + Kinv = np.linalg.inv(K) + Kinvt = np.transpose(Kinv) + + cuda = True + if cuda: + darkfeat = DarkFeat(opt.model_path).cuda().eval() + + for scene in ['Indoor', 'Outdoor']: + base_save = './result/' + scene + '/' + dir_base = opt.dataset_dir + '/' + scene + '/' + pair_list = sorted(os.listdir(dir_base)) + + for pair in tqdm.tqdm(pair_list): + opention = 1 + if scene == 'Outdoor': + pass + else: + if int(pair[4::]) <= 17: + opention = 0 + else: + pass + name=[] + files = sorted(os.listdir(dir_base+pair)) + for file_ in files: + if file_.endswith('.cr2'): + name.append(file_[0:9]) + ISO = ['00100', '00200', '00400', '00800', '01600', '03200', '06400', '12800'] + if opention == 1: + Shutter_speed = ['0.005','0.01','0.025','0.05','0.17','0.5'] + else: + Shutter_speed = ['0.01','0.02','0.05','0.1','0.3','1'] + + E_GT = np.load(dir_base+pair+'/GT_Correspondence/'+'E_estimated.npy') + F_GT = np.dot(np.dot(Kinvt,E_GT),Kinv) + R_GT = np.load(dir_base+pair+'/GT_Correspondence/'+'R_GT.npy') + t_GT = np.load(dir_base+pair+'/GT_Correspondence/'+'T_GT.npy') + + id0, id1 = sorted([ int(i.split('/')[-1]) for i in glob.glob(f'{dir_base+pair}/?????') ]) + + cnt = 0 + + for iso in ISO: + for ex in Shutter_speed: + dark_name1 = name[0] + iso+'_'+ex+'_'+scene+'.npy' + dark_name2 = name[1] + iso+'_'+ex+'_'+scene+'.npy' + + if not opt.histeq: + dst_T1_None = f'{dir_base}{pair}/{id0:05d}-npy-nohisteq/{dark_name1}' + dst_T2_None = f'{dir_base}{pair}/{id1:05d}-npy-nohisteq/{dark_name2}' + + img1_orig_None = np.load(dst_T1_None) + img2_orig_None = np.load(dst_T2_None) + + dir_save = base_save + pair + '/None/' + + img_input1 = darkfeat_pre(img1_orig_None.astype('float32')/255.0, cuda) + img_input2 = darkfeat_pre(img2_orig_None.astype('float32')/255.0, cuda) + + else: + dst_T1_histeq = f'{dir_base}{pair}/{id0:05d}-npy/{dark_name1}' + dst_T2_histeq = f'{dir_base}{pair}/{id1:05d}-npy/{dark_name2}' + + img1_orig_histeq = np.load(dst_T1_histeq) + img2_orig_histeq = np.load(dst_T2_histeq) + + dir_save = base_save + pair + '/HistEQ/' + + img_input1 = darkfeat_pre(img1_orig_histeq.astype('float32')/255.0, cuda) + img_input2 = darkfeat_pre(img2_orig_histeq.astype('float32')/255.0, cuda) + + result1 = darkfeat({'image': img_input1}) + result2 = darkfeat({'image': img_input2}) + + mkpts0, mkpts1, _ = matching.match_descriptors( + cv2.KeyPoint_convert(result1['keypoints'].detach().cpu().float().numpy()), result1['descriptors'].detach().cpu().numpy(), + cv2.KeyPoint_convert(result2['keypoints'].detach().cpu().float().numpy()), result2['descriptors'].detach().cpu().numpy(), + ORB=False + ) + + POINT_1_dir = dir_save+f'DarkFeat/POINT_1/' + POINT_2_dir = dir_save+f'DarkFeat/POINT_2/' + + subprocess.check_output(['mkdir', '-p', POINT_1_dir]) + subprocess.check_output(['mkdir', '-p', POINT_2_dir]) + np.save(POINT_1_dir+dark_name1[0:-3]+'npy',mkpts0) + np.save(POINT_2_dir+dark_name2[0:-3]+'npy',mkpts1) + diff --git a/third_party/DarkFeat/fig/fig.gif b/third_party/DarkFeat/fig/fig.gif new file mode 100644 index 0000000000000000000000000000000000000000..e82c0007c93e18e05cf67e767e0bfe861eafb680 --- /dev/null +++ b/third_party/DarkFeat/fig/fig.gif @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:526d2b455e852b323e6864b2e24b57cdf2482dd0f63dca1139c898e6b1b0f126 +size 15429829 diff --git a/third_party/DarkFeat/nets/__init__.py b/third_party/DarkFeat/nets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/DarkFeat/nets/geom.py b/third_party/DarkFeat/nets/geom.py new file mode 100644 index 0000000000000000000000000000000000000000..043ca6e8f5917c56defd6aa17c1ff236a431f8c0 --- /dev/null +++ b/third_party/DarkFeat/nets/geom.py @@ -0,0 +1,323 @@ +import time +import numpy as np +import torch +import torch.nn.functional as F + + +def rnd_sample(inputs, n_sample): + cur_size = inputs[0].shape[0] + rnd_idx = torch.randperm(cur_size)[0:n_sample] + outputs = [i[rnd_idx] for i in inputs] + return outputs + + +def _grid_positions(h, w, bs): + x_rng = torch.arange(0, w.int()) + y_rng = torch.arange(0, h.int()) + xv, yv = torch.meshgrid(x_rng, y_rng, indexing='xy') + return torch.reshape( + torch.stack((yv, xv), axis=-1), + (1, -1, 2) + ).repeat(bs, 1, 1).float() + + +def getK(ori_img_size, cur_feat_size, K): + # WARNING: cur_feat_size's order is [h, w] + r = ori_img_size / cur_feat_size[[1, 0]] + r_K0 = torch.stack([K[:, 0] / r[:, 0][..., None], K[:, 1] / + r[:, 1][..., None], K[:, 2]], axis=1) + return r_K0 + + +def gather_nd(params, indices): + """ The same as tf.gather_nd but batched gather is not supported yet. + indices is an k-dimensional integer tensor, best thought of as a (k-1)-dimensional tensor of indices into params, where each element defines a slice of params: + + output[\\(i_0, ..., i_{k-2}\\)] = params[indices[\\(i_0, ..., i_{k-2}\\)]] + + Args: + params (Tensor): "n" dimensions. shape: [x_0, x_1, x_2, ..., x_{n-1}] + indices (Tensor): "k" dimensions. shape: [y_0,y_2,...,y_{k-2}, m]. m <= n. + + Returns: gathered Tensor. + shape [y_0,y_2,...y_{k-2}] + params.shape[m:] + + """ + orig_shape = list(indices.shape) + num_samples = np.prod(orig_shape[:-1]) + m = orig_shape[-1] + n = len(params.shape) + + if m <= n: + out_shape = orig_shape[:-1] + list(params.shape)[m:] + else: + raise ValueError( + f'the last dimension of indices must less or equal to the rank of params. Got indices:{indices.shape}, params:{params.shape}. {m} > {n}' + ) + + indices = indices.reshape((num_samples, m)).transpose(0, 1).tolist() + output = params[indices] # (num_samples, ...) + return output.reshape(out_shape).contiguous() + +# input: pos [kpt_n, 2]; inputs [H, W, 128] / [H, W] +# output: [kpt_n, 128] / [kpt_n] +def interpolate(pos, inputs, nd=True): + h = inputs.shape[0] + w = inputs.shape[1] + + i = pos[:, 0] + j = pos[:, 1] + + i_top_left = torch.clamp(torch.floor(i).int(), 0, h - 1) + j_top_left = torch.clamp(torch.floor(j).int(), 0, w - 1) + + i_top_right = torch.clamp(torch.floor(i).int(), 0, h - 1) + j_top_right = torch.clamp(torch.ceil(j).int(), 0, w - 1) + + i_bottom_left = torch.clamp(torch.ceil(i).int(), 0, h - 1) + j_bottom_left = torch.clamp(torch.floor(j).int(), 0, w - 1) + + i_bottom_right = torch.clamp(torch.ceil(i).int(), 0, h - 1) + j_bottom_right = torch.clamp(torch.ceil(j).int(), 0, w - 1) + + dist_i_top_left = i - i_top_left.float() + dist_j_top_left = j - j_top_left.float() + w_top_left = (1 - dist_i_top_left) * (1 - dist_j_top_left) + w_top_right = (1 - dist_i_top_left) * dist_j_top_left + w_bottom_left = dist_i_top_left * (1 - dist_j_top_left) + w_bottom_right = dist_i_top_left * dist_j_top_left + + if nd: + w_top_left = w_top_left[..., None] + w_top_right = w_top_right[..., None] + w_bottom_left = w_bottom_left[..., None] + w_bottom_right = w_bottom_right[..., None] + + interpolated_val = ( + w_top_left * gather_nd(inputs, torch.stack([i_top_left, j_top_left], axis=-1)) + + w_top_right * gather_nd(inputs, torch.stack([i_top_right, j_top_right], axis=-1)) + + w_bottom_left * gather_nd(inputs, torch.stack([i_bottom_left, j_bottom_left], axis=-1)) + + w_bottom_right * + gather_nd(inputs, torch.stack([i_bottom_right, j_bottom_right], axis=-1)) + ) + + return interpolated_val + + +def validate_and_interpolate(pos, inputs, validate_corner=True, validate_val=None, nd=False): + if nd: + h, w, c = inputs.shape + else: + h, w = inputs.shape + ids = torch.arange(0, pos.shape[0]) + + i = pos[:, 0] + j = pos[:, 1] + + i_top_left = torch.floor(i).int() + j_top_left = torch.floor(j).int() + + i_top_right = torch.floor(i).int() + j_top_right = torch.ceil(j).int() + + i_bottom_left = torch.ceil(i).int() + j_bottom_left = torch.floor(j).int() + + i_bottom_right = torch.ceil(i).int() + j_bottom_right = torch.ceil(j).int() + + if validate_corner: + # Valid corner + valid_top_left = torch.logical_and(i_top_left >= 0, j_top_left >= 0) + valid_top_right = torch.logical_and(i_top_right >= 0, j_top_right < w) + valid_bottom_left = torch.logical_and(i_bottom_left < h, j_bottom_left >= 0) + valid_bottom_right = torch.logical_and(i_bottom_right < h, j_bottom_right < w) + + valid_corner = torch.logical_and( + torch.logical_and(valid_top_left, valid_top_right), + torch.logical_and(valid_bottom_left, valid_bottom_right) + ) + + i_top_left = i_top_left[valid_corner] + j_top_left = j_top_left[valid_corner] + + i_top_right = i_top_right[valid_corner] + j_top_right = j_top_right[valid_corner] + + i_bottom_left = i_bottom_left[valid_corner] + j_bottom_left = j_bottom_left[valid_corner] + + i_bottom_right = i_bottom_right[valid_corner] + j_bottom_right = j_bottom_right[valid_corner] + + ids = ids[valid_corner] + + if validate_val is not None: + # Valid depth + valid_depth = torch.logical_and( + torch.logical_and( + gather_nd(inputs, torch.stack([i_top_left, j_top_left], axis=-1)) > 0, + gather_nd(inputs, torch.stack([i_top_right, j_top_right], axis=-1)) > 0 + ), + torch.logical_and( + gather_nd(inputs, torch.stack([i_bottom_left, j_bottom_left], axis=-1)) > 0, + gather_nd(inputs, torch.stack([i_bottom_right, j_bottom_right], axis=-1)) > 0 + ) + ) + + i_top_left = i_top_left[valid_depth] + j_top_left = j_top_left[valid_depth] + + i_top_right = i_top_right[valid_depth] + j_top_right = j_top_right[valid_depth] + + i_bottom_left = i_bottom_left[valid_depth] + j_bottom_left = j_bottom_left[valid_depth] + + i_bottom_right = i_bottom_right[valid_depth] + j_bottom_right = j_bottom_right[valid_depth] + + ids = ids[valid_depth] + + # Interpolation + i = i[ids] + j = j[ids] + dist_i_top_left = i - i_top_left.float() + dist_j_top_left = j - j_top_left.float() + w_top_left = (1 - dist_i_top_left) * (1 - dist_j_top_left) + w_top_right = (1 - dist_i_top_left) * dist_j_top_left + w_bottom_left = dist_i_top_left * (1 - dist_j_top_left) + w_bottom_right = dist_i_top_left * dist_j_top_left + + if nd: + w_top_left = w_top_left[..., None] + w_top_right = w_top_right[..., None] + w_bottom_left = w_bottom_left[..., None] + w_bottom_right = w_bottom_right[..., None] + + interpolated_val = ( + w_top_left * gather_nd(inputs, torch.stack([i_top_left, j_top_left], axis=-1)) + + w_top_right * gather_nd(inputs, torch.stack([i_top_right, j_top_right], axis=-1)) + + w_bottom_left * gather_nd(inputs, torch.stack([i_bottom_left, j_bottom_left], axis=-1)) + + w_bottom_right * gather_nd(inputs, torch.stack([i_bottom_right, j_bottom_right], axis=-1)) + ) + + pos = torch.stack([i, j], axis=1) + return [interpolated_val, pos, ids] + + +# pos0: [2, 230400, 2] +# depth0: [2, 480, 480] +def getWarp(pos0, rel_pose, depth0, K0, depth1, K1, bs): + def swap_axis(data): + return torch.stack([data[:, 1], data[:, 0]], axis=-1) + + all_pos0 = [] + all_pos1 = [] + all_ids = [] + for i in range(bs): + z0, new_pos0, ids = validate_and_interpolate(pos0[i], depth0[i], validate_val=0) + + uv0_homo = torch.cat([swap_axis(new_pos0), torch.ones((new_pos0.shape[0], 1)).to(new_pos0.device)], axis=-1) + xy0_homo = torch.matmul(torch.linalg.inv(K0[i]), uv0_homo.t()) + xyz0_homo = torch.cat([torch.unsqueeze(z0, 0) * xy0_homo, + torch.ones((1, new_pos0.shape[0])).to(z0.device)], axis=0) + + xyz1 = torch.matmul(rel_pose[i], xyz0_homo) + xy1_homo = xyz1 / torch.unsqueeze(xyz1[-1, :], axis=0) + uv1 = torch.matmul(K1[i], xy1_homo).t()[:, 0:2] + + new_pos1 = swap_axis(uv1) + annotated_depth, new_pos1, new_ids = validate_and_interpolate( + new_pos1, depth1[i], validate_val=0) + + ids = ids[new_ids] + new_pos0 = new_pos0[new_ids] + estimated_depth = xyz1.t()[new_ids][:, -1] + + inlier_mask = torch.abs(estimated_depth - annotated_depth) < 0.05 + + all_ids.append(ids[inlier_mask]) + all_pos0.append(new_pos0[inlier_mask]) + all_pos1.append(new_pos1[inlier_mask]) + # all_pos0 & all_pose1: [inlier_num, 2] * batch_size + return all_pos0, all_pos1, all_ids + + +# pos0: [2, 230400, 2] +# depth0: [2, 480, 480] +def getWarpNoValidate(pos0, rel_pose, depth0, K0, depth1, K1, bs): + def swap_axis(data): + return torch.stack([data[:, 1], data[:, 0]], axis=-1) + + all_pos0 = [] + all_pos1 = [] + all_ids = [] + for i in range(bs): + z0, new_pos0, ids = validate_and_interpolate(pos0[i], depth0[i], validate_val=0) + + uv0_homo = torch.cat([swap_axis(new_pos0), torch.ones((new_pos0.shape[0], 1)).to(new_pos0.device)], axis=-1) + xy0_homo = torch.matmul(torch.linalg.inv(K0[i]), uv0_homo.t()) + xyz0_homo = torch.cat([torch.unsqueeze(z0, 0) * xy0_homo, + torch.ones((1, new_pos0.shape[0])).to(z0.device)], axis=0) + + xyz1 = torch.matmul(rel_pose[i], xyz0_homo) + xy1_homo = xyz1 / torch.unsqueeze(xyz1[-1, :], axis=0) + uv1 = torch.matmul(K1[i], xy1_homo).t()[:, 0:2] + + new_pos1 = swap_axis(uv1) + _, new_pos1, new_ids = validate_and_interpolate( + new_pos1, depth1[i], validate_val=0) + + ids = ids[new_ids] + new_pos0 = new_pos0[new_ids] + + all_ids.append(ids) + all_pos0.append(new_pos0) + all_pos1.append(new_pos1) + # all_pos0 & all_pose1: [inlier_num, 2] * batch_size + return all_pos0, all_pos1, all_ids + + +# pos0: [2, 230400, 2] +# depth0: [2, 480, 480] +def getWarpNoValidate2(pos0, rel_pose, depth0, K0, depth1, K1): + def swap_axis(data): + return torch.stack([data[:, 1], data[:, 0]], axis=-1) + + z0 = interpolate(pos0, depth0, nd=False) + + uv0_homo = torch.cat([swap_axis(pos0), torch.ones((pos0.shape[0], 1)).to(pos0.device)], axis=-1) + xy0_homo = torch.matmul(torch.linalg.inv(K0), uv0_homo.t()) + xyz0_homo = torch.cat([torch.unsqueeze(z0, 0) * xy0_homo, + torch.ones((1, pos0.shape[0])).to(z0.device)], axis=0) + + xyz1 = torch.matmul(rel_pose, xyz0_homo) + xy1_homo = xyz1 / torch.unsqueeze(xyz1[-1, :], axis=0) + uv1 = torch.matmul(K1, xy1_homo).t()[:, 0:2] + + new_pos1 = swap_axis(uv1) + + return new_pos1 + + + +def get_dist_mat(feat1, feat2, dist_type): + eps = 1e-6 + cos_dist_mat = torch.matmul(feat1, feat2.t()) + if dist_type == 'cosine_dist': + dist_mat = torch.clamp(cos_dist_mat, -1, 1) + elif dist_type == 'euclidean_dist': + dist_mat = torch.sqrt(torch.clamp(2 - 2 * cos_dist_mat, min=eps)) + elif dist_type == 'euclidean_dist_no_norm': + norm1 = torch.sum(feat1 * feat1, axis=-1, keepdims=True) + norm2 = torch.sum(feat2 * feat2, axis=-1, keepdims=True) + dist_mat = torch.sqrt( + torch.clamp( + norm1 - 2 * cos_dist_mat + norm2.t(), + min=0. + ) + eps + ) + else: + raise NotImplementedError() + return dist_mat diff --git a/third_party/DarkFeat/nets/l2net.py b/third_party/DarkFeat/nets/l2net.py new file mode 100644 index 0000000000000000000000000000000000000000..e1ddfe8919bd4d5fe75215d253525123e1402952 --- /dev/null +++ b/third_party/DarkFeat/nets/l2net.py @@ -0,0 +1,116 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.parameter import Parameter + +from .score import peakiness_score + + +class BaseNet(nn.Module): + """ Helper class to construct a fully-convolutional network that + extract a l2-normalized patch descriptor. + """ + def __init__(self, inchan=3, dilated=True, dilation=1, bn=True, bn_affine=False): + super(BaseNet, self).__init__() + self.inchan = inchan + self.curchan = inchan + self.dilated = dilated + self.dilation = dilation + self.bn = bn + self.bn_affine = bn_affine + + def _make_bn(self, outd): + return nn.BatchNorm2d(outd, affine=self.bn_affine) + + def _add_conv(self, outd, k=3, stride=1, dilation=1, bn=True, relu=True, k_pool = 1, pool_type='max', bias=False): + # as in the original implementation, dilation is applied at the end of layer, so it will have impact only from next layer + d = self.dilation * dilation + # if self.dilated: + # conv_params = dict(padding=((k-1)*d)//2, dilation=d, stride=1) + # self.dilation *= stride + # else: + # conv_params = dict(padding=((k-1)*d)//2, dilation=d, stride=stride) + conv_params = dict(padding=((k-1)*d)//2, dilation=d, stride=stride, bias=bias) + + ops = nn.ModuleList([]) + + ops.append( nn.Conv2d(self.curchan, outd, kernel_size=k, **conv_params) ) + if bn and self.bn: ops.append( self._make_bn(outd) ) + if relu: ops.append( nn.ReLU(inplace=True) ) + self.curchan = outd + + if k_pool > 1: + if pool_type == 'avg': + ops.append(torch.nn.AvgPool2d(kernel_size=k_pool)) + elif pool_type == 'max': + ops.append(torch.nn.MaxPool2d(kernel_size=k_pool)) + else: + print(f"Error, unknown pooling type {pool_type}...") + + return nn.Sequential(*ops) + + +class Quad_L2Net(BaseNet): + """ Same than L2_Net, but replace the final 8x8 conv by 3 successive 2x2 convs. + """ + def __init__(self, dim=128, mchan=4, relu22=False, **kw): + BaseNet.__init__(self, **kw) + self.conv0 = self._add_conv( 8*mchan) + self.conv1 = self._add_conv( 8*mchan, bn=False) + self.bn1 = self._make_bn(8*mchan) + self.conv2 = self._add_conv( 16*mchan, stride=2) + self.conv3 = self._add_conv( 16*mchan, bn=False) + self.bn3 = self._make_bn(16*mchan) + self.conv4 = self._add_conv( 32*mchan, stride=2) + self.conv5 = self._add_conv( 32*mchan) + # replace last 8x8 convolution with 3 3x3 convolutions + self.conv6_0 = self._add_conv( 32*mchan) + self.conv6_1 = self._add_conv( 32*mchan) + self.conv6_2 = self._add_conv(dim, bn=False, relu=False) + self.out_dim = dim + + self.moving_avg_params = nn.ParameterList([ + Parameter(torch.tensor(1.), requires_grad=False), + Parameter(torch.tensor(1.), requires_grad=False), + Parameter(torch.tensor(1.), requires_grad=False) + ]) + + def forward(self, x): + # x: [N, C, H, W] + x0 = self.conv0(x) + x1 = self.conv1(x0) + x1_bn = self.bn1(x1) + x2 = self.conv2(x1_bn) + x3 = self.conv3(x2) + x3_bn = self.bn3(x3) + x4 = self.conv4(x3_bn) + x5 = self.conv5(x4) + x6_0 = self.conv6_0(x5) + x6_1 = self.conv6_1(x6_0) + x6_2 = self.conv6_2(x6_1) + + # calculate score map + comb_weights = torch.tensor([1., 2., 3.], device=x.device) + comb_weights /= torch.sum(comb_weights) + ksize = [3, 2, 1] + det_score_maps = [] + + for idx, xx in enumerate([x1, x3, x6_2]): + if self.training: + instance_max = torch.max(xx) + self.moving_avg_params[idx].data = self.moving_avg_params[idx] * 0.99 + instance_max.detach() * 0.01 + else: + pass + + alpha, beta = peakiness_score(xx, self.moving_avg_params[idx].detach(), ksize=3, dilation=ksize[idx]) + + score_vol = alpha * beta + det_score_map = torch.max(score_vol, dim=1, keepdim=True)[0] + det_score_map = F.interpolate(det_score_map, size=x.shape[2:], mode='bilinear', align_corners=True) + det_score_map = comb_weights[idx] * det_score_map + det_score_maps.append(det_score_map) + + det_score_map = torch.sum(torch.stack(det_score_maps, dim=0), dim=0) + # print([param.data for param in self.moving_avg_params]) + + return x6_2, det_score_map, x1, x3 diff --git a/third_party/DarkFeat/nets/loss.py b/third_party/DarkFeat/nets/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..0dd42b4214d021137ddfe72771ccad0264d2321f --- /dev/null +++ b/third_party/DarkFeat/nets/loss.py @@ -0,0 +1,260 @@ +import torch +import torch.nn.functional as F + +from .geom import rnd_sample, interpolate, get_dist_mat + + +def make_detector_loss(pos0, pos1, dense_feat_map0, dense_feat_map1, + score_map0, score_map1, batch_size, num_corr, loss_type, config): + joint_loss = 0. + accuracy = 0. + all_valid_pos0 = [] + all_valid_pos1 = [] + all_valid_match = [] + for i in range(batch_size): + # random sample + valid_pos0, valid_pos1 = rnd_sample([pos0[i], pos1[i]], num_corr) + valid_num = valid_pos0.shape[0] + + valid_feat0 = interpolate(valid_pos0 / 4, dense_feat_map0[i]) + valid_feat1 = interpolate(valid_pos1 / 4, dense_feat_map1[i]) + + valid_feat0 = F.normalize(valid_feat0, p=2, dim=-1) + valid_feat1 = F.normalize(valid_feat1, p=2, dim=-1) + + valid_score0 = interpolate(valid_pos0, torch.squeeze(score_map0[i], dim=-1), nd=False) + valid_score1 = interpolate(valid_pos1, torch.squeeze(score_map1[i], dim=-1), nd=False) + + if config['network']['det']['corr_weight']: + corr_weight = valid_score0 * valid_score1 + else: + corr_weight = None + + safe_radius = config['network']['det']['safe_radius'] + if safe_radius > 0: + radius_mask_row = get_dist_mat( + valid_pos1, valid_pos1, "euclidean_dist_no_norm") + radius_mask_row = torch.le(radius_mask_row, safe_radius) + radius_mask_col = get_dist_mat( + valid_pos0, valid_pos0, "euclidean_dist_no_norm") + radius_mask_col = torch.le(radius_mask_col, safe_radius) + radius_mask_row = radius_mask_row.float() - torch.eye(valid_num, device=radius_mask_row.device) + radius_mask_col = radius_mask_col.float() - torch.eye(valid_num, device=radius_mask_col.device) + else: + radius_mask_row = None + radius_mask_col = None + + if valid_num < 32: + si_loss, si_accuracy, matched_mask = 0., 1., torch.zeros((1, valid_num)).bool() + else: + si_loss, si_accuracy, matched_mask = make_structured_loss( + torch.unsqueeze(valid_feat0, 0), torch.unsqueeze(valid_feat1, 0), + loss_type=loss_type, + radius_mask_row=radius_mask_row, radius_mask_col=radius_mask_col, + corr_weight=torch.unsqueeze(corr_weight, 0) if corr_weight is not None else None + ) + + joint_loss += si_loss / batch_size + accuracy += si_accuracy / batch_size + all_valid_match.append(torch.squeeze(matched_mask, dim=0)) + all_valid_pos0.append(valid_pos0) + all_valid_pos1.append(valid_pos1) + + return joint_loss, accuracy + + +def make_structured_loss(feat_anc, feat_pos, + loss_type='RATIO', inlier_mask=None, + radius_mask_row=None, radius_mask_col=None, + corr_weight=None, dist_mat=None): + """ + Structured loss construction. + Args: + feat_anc, feat_pos: Feature matrix. + loss_type: Loss type. + inlier_mask: + Returns: + + """ + batch_size = feat_anc.shape[0] + num_corr = feat_anc.shape[1] + if inlier_mask is None: + inlier_mask = torch.ones((batch_size, num_corr), device=feat_anc.device).bool() + inlier_num = torch.count_nonzero(inlier_mask.float(), dim=-1) + + if loss_type == 'L2NET' or loss_type == 'CIRCLE': + dist_type = 'cosine_dist' + elif loss_type.find('HARD') >= 0: + dist_type = 'euclidean_dist' + else: + raise NotImplementedError() + + if dist_mat is None: + dist_mat = get_dist_mat(feat_anc.squeeze(0), feat_pos.squeeze(0), dist_type).unsqueeze(0) + pos_vec = dist_mat[0].diag().unsqueeze(0) + + if loss_type.find('HARD') >= 0: + neg_margin = 1 + dist_mat_without_min_on_diag = dist_mat + \ + 10 * torch.unsqueeze(torch.eye(num_corr, device=dist_mat.device), dim=0) + mask = torch.le(dist_mat_without_min_on_diag, 0.008).float() + dist_mat_without_min_on_diag += mask*10 + + if radius_mask_row is not None: + hard_neg_dist_row = dist_mat_without_min_on_diag + 10 * radius_mask_row + else: + hard_neg_dist_row = dist_mat_without_min_on_diag + if radius_mask_col is not None: + hard_neg_dist_col = dist_mat_without_min_on_diag + 10 * radius_mask_col + else: + hard_neg_dist_col = dist_mat_without_min_on_diag + + hard_neg_dist_row = torch.min(hard_neg_dist_row, dim=-1)[0] + hard_neg_dist_col = torch.min(hard_neg_dist_col, dim=-2)[0] + + if loss_type == 'HARD_TRIPLET': + loss_row = torch.clamp(neg_margin + pos_vec - hard_neg_dist_row, min=0) + loss_col = torch.clamp(neg_margin + pos_vec - hard_neg_dist_col, min=0) + elif loss_type == 'HARD_CONTRASTIVE': + pos_margin = 0.2 + pos_loss = torch.clamp(pos_vec - pos_margin, min=0) + loss_row = pos_loss + torch.clamp(neg_margin - hard_neg_dist_row, min=0) + loss_col = pos_loss + torch.clamp(neg_margin - hard_neg_dist_col, min=0) + else: + raise NotImplementedError() + + elif loss_type == 'CIRCLE': + log_scale = 512 + m = 0.1 + neg_mask_row = torch.unsqueeze(torch.eye(num_corr, device=feat_anc.device), 0) + if radius_mask_row is not None: + neg_mask_row += radius_mask_row + neg_mask_col = torch.unsqueeze(torch.eye(num_corr, device=feat_anc.device), 0) + if radius_mask_col is not None: + neg_mask_col += radius_mask_col + + pos_margin = 1 - m + neg_margin = m + pos_optimal = 1 + m + neg_optimal = -m + + neg_mat_row = dist_mat - 128 * neg_mask_row + neg_mat_col = dist_mat - 128 * neg_mask_col + + lse_positive = torch.logsumexp(-log_scale * (pos_vec[..., None] - pos_margin) * \ + torch.clamp(pos_optimal - pos_vec[..., None], min=0).detach(), dim=-1) + + lse_negative_row = torch.logsumexp(log_scale * (neg_mat_row - neg_margin) * \ + torch.clamp(neg_mat_row - neg_optimal, min=0).detach(), dim=-1) + + lse_negative_col = torch.logsumexp(log_scale * (neg_mat_col - neg_margin) * \ + torch.clamp(neg_mat_col - neg_optimal, min=0).detach(), dim=-2) + + loss_row = F.softplus(lse_positive + lse_negative_row) / log_scale + loss_col = F.softplus(lse_positive + lse_negative_col) / log_scale + + else: + raise NotImplementedError() + + if dist_type == 'cosine_dist': + err_row = dist_mat - torch.unsqueeze(pos_vec, -1) + err_col = dist_mat - torch.unsqueeze(pos_vec, -2) + elif dist_type == 'euclidean_dist' or dist_type == 'euclidean_dist_no_norm': + err_row = torch.unsqueeze(pos_vec, -1) - dist_mat + err_col = torch.unsqueeze(pos_vec, -2) - dist_mat + else: + raise NotImplementedError() + if radius_mask_row is not None: + err_row = err_row - 10 * radius_mask_row + if radius_mask_col is not None: + err_col = err_col - 10 * radius_mask_col + err_row = torch.sum(torch.clamp(err_row, min=0), dim=-1) + err_col = torch.sum(torch.clamp(err_col, min=0), dim=-2) + + loss = 0 + accuracy = 0 + + tot_loss = (loss_row + loss_col) / 2 + if corr_weight is not None: + tot_loss = tot_loss * corr_weight + + for i in range(batch_size): + if corr_weight is not None: + loss += torch.sum(tot_loss[i][inlier_mask[i]]) / \ + (torch.sum(corr_weight[i][inlier_mask[i]]) + 1e-6) + else: + loss += torch.mean(tot_loss[i][inlier_mask[i]]) + cnt_err_row = torch.count_nonzero(err_row[i][inlier_mask[i]]).float() + cnt_err_col = torch.count_nonzero(err_col[i][inlier_mask[i]]).float() + tot_err = cnt_err_row + cnt_err_col + if inlier_num[i] != 0: + accuracy += 1. - tot_err / inlier_num[i] / batch_size / 2. + else: + accuracy += 1. + + matched_mask = torch.logical_and(torch.eq(err_row, 0), torch.eq(err_col, 0)) + matched_mask = torch.logical_and(matched_mask, inlier_mask) + + loss /= batch_size + accuracy /= batch_size + + return loss, accuracy, matched_mask + + +# for the neighborhood areas of keypoints extracted from normal image, the score from noise_score_map should be close +# for the rest, the noise image's score should less than normal image +# input: score_map [batch_size, H, W, 1]; indices [2, k, 2] +# output: loss [scalar] +def make_noise_score_map_loss(score_map, noise_score_map, indices, batch_size, thld=0.): + H, W = score_map.shape[1:3] + loss = 0 + for i in range(batch_size): + kpts_coords = indices[i].T # (2, num_kpts) + mask = torch.zeros([H, W], device=score_map.device) + mask[kpts_coords.cpu().numpy()] = 1 + + # using 3x3 kernel to put kpts' neightborhood area into the mask + kernel = torch.ones([1, 1, 3, 3], device=score_map.device) + mask = F.conv2d(mask.unsqueeze(0).unsqueeze(0), kernel, padding=1)[0, 0] > 0 + + loss1 = torch.sum(torch.abs(score_map[i] - noise_score_map[i]).squeeze() * mask) / torch.sum(mask) + loss2 = torch.sum(torch.clamp(noise_score_map[i] - score_map[i] - thld, min=0).squeeze() * torch.logical_not(mask)) / (H * W - torch.sum(mask)) + + loss += loss1 + loss += loss2 + + if i == 0: + first_mask = mask + + return loss, first_mask + + +def make_noise_score_map_loss_labelmap(score_map, noise_score_map, labelmap, batch_size, thld=0.): + H, W = score_map.shape[1:3] + loss = 0 + for i in range(batch_size): + # using 3x3 kernel to put kpts' neightborhood area into the mask + kernel = torch.ones([1, 1, 3, 3], device=score_map.device) + mask = F.conv2d(labelmap[i].unsqueeze(0).to(score_map.device).float(), kernel, padding=1)[0, 0] > 0 + + loss1 = torch.sum(torch.abs(score_map[i] - noise_score_map[i]).squeeze() * mask) / torch.sum(mask) + loss2 = torch.sum(torch.clamp(noise_score_map[i] - score_map[i] - thld, min=0).squeeze() * torch.logical_not(mask)) / (H * W - torch.sum(mask)) + + loss += loss1 + loss += loss2 + + if i == 0: + first_mask = mask + + return loss, first_mask + + +def make_score_map_peakiness_loss(score_map, scores, batch_size): + H, W = score_map.shape[1:3] + loss = 0 + + for i in range(batch_size): + loss += torch.mean(scores[i]) - torch.mean(score_map[i]) + + loss /= batch_size + return 1 - loss diff --git a/third_party/DarkFeat/nets/multi_sampler.py b/third_party/DarkFeat/nets/multi_sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..dc400fb2afeb50575cd81d3c01b605bea6db1121 --- /dev/null +++ b/third_party/DarkFeat/nets/multi_sampler.py @@ -0,0 +1,172 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +from .geom import rnd_sample, interpolate + +class MultiSampler (nn.Module): + """ Similar to NghSampler, but doesnt warp the 2nd image. + Distance to GT => 0 ... pos_d ... neg_d ... ngh + Pixel label => + + + + + + 0 0 - - - - - - - + + Subsample on query side: if > 0, regular grid + < 0, random points + In both cases, the number of query points is = W*H/subq**2 + """ + def __init__(self, ngh, subq=1, subd=1, pos_d=0, neg_d=2, border=None, + maxpool_pos=True, subd_neg=0): + nn.Module.__init__(self) + assert 0 <= pos_d < neg_d <= (ngh if ngh else 99) + self.ngh = ngh + self.pos_d = pos_d + self.neg_d = neg_d + assert subd <= ngh or ngh == 0 + assert subq != 0 + self.sub_q = subq + self.sub_d = subd + self.sub_d_neg = subd_neg + if border is None: border = ngh + assert border >= ngh, 'border has to be larger than ngh' + self.border = border + self.maxpool_pos = maxpool_pos + self.precompute_offsets() + + def precompute_offsets(self): + pos_d2 = self.pos_d**2 + neg_d2 = self.neg_d**2 + rad2 = self.ngh**2 + rad = (self.ngh//self.sub_d) * self.ngh # make an integer multiple + pos = [] + neg = [] + for j in range(-rad, rad+1, self.sub_d): + for i in range(-rad, rad+1, self.sub_d): + d2 = i*i + j*j + if d2 <= pos_d2: + pos.append( (i,j) ) + elif neg_d2 <= d2 <= rad2: + neg.append( (i,j) ) + + self.register_buffer('pos_offsets', torch.LongTensor(pos).view(-1,2).t()) + self.register_buffer('neg_offsets', torch.LongTensor(neg).view(-1,2).t()) + + + def forward(self, feat0, feat1, noise_feat0, noise_feat1, conf0, conf1, noise_conf0, noise_conf1, pos0, pos1, B, H, W, N=2500): + pscores_ls, nscores_ls, distractors_ls = [], [], [] + valid_feat0_ls = [] + noise_pscores_ls, noise_nscores_ls, noise_distractors_ls = [], [], [] + valid_noise_feat0_ls = [] + valid_pos1_ls, valid_pos2_ls = [], [] + qconf_ls = [] + noise_qconf_ls = [] + mask_ls = [] + + for i in range(B): + tmp_mask = (pos0[i][:, 1] >= self.border) * (pos0[i][:, 1] < W-self.border) \ + * (pos0[i][:, 0] >= self.border) * (pos0[i][:, 0] < H-self.border) + + selected_pos0 = pos0[i][tmp_mask] + selected_pos1 = pos1[i][tmp_mask] + valid_pos0, valid_pos1 = rnd_sample([selected_pos0, selected_pos1], N) + + # sample features from first image + valid_feat0 = interpolate(valid_pos0 / 4, feat0[i]) # [N, 128] + valid_feat0 = F.normalize(valid_feat0, p=2, dim=-1) # [N, 128] + qconf = interpolate(valid_pos0 / 4, conf0[i]) + + valid_noise_feat0 = interpolate(valid_pos0 / 4, noise_feat0[i]) # [N, 128] + valid_noise_feat0 = F.normalize(valid_noise_feat0, p=2, dim=-1) # [N, 128] + noise_qconf = interpolate(valid_pos0 / 4, noise_conf0[i]) + + # sample GT from second image + mask = (valid_pos1[:, 1] >= 0) * (valid_pos1[:, 1] < W) \ + * (valid_pos1[:, 0] >= 0) * (valid_pos1[:, 0] < H) + + def clamp(xy): + xy = xy + torch.clamp(xy[0], 0, H-1, out=xy[0]) + torch.clamp(xy[1], 0, W-1, out=xy[1]) + return xy + + # compute positive scores + valid_pos1p = clamp(valid_pos1.t()[:,None,:] + self.pos_offsets[:,:,None].to(valid_pos1.device)) # [2, 29, N] + valid_pos1p = valid_pos1p.permute(1, 2, 0).reshape(-1, 2) # [29, N, 2] -> [29*N, 2] + valid_feat1p = interpolate(valid_pos1p / 4, feat1[i]).reshape(self.pos_offsets.shape[-1], -1, 128) # [29, N, 128] + valid_feat1p = F.normalize(valid_feat1p, p=2, dim=-1) # [29, N, 128] + valid_noise_feat1p = interpolate(valid_pos1p / 4, feat1[i]).reshape(self.pos_offsets.shape[-1], -1, 128) # [29, N, 128] + valid_noise_feat1p = F.normalize(valid_noise_feat1p, p=2, dim=-1) # [29, N, 128] + + pscores = (valid_feat0[None,:,:] * valid_feat1p).sum(dim=-1).t() # [N, 29] + pscores, pos = pscores.max(dim=1, keepdim=True) + sel = clamp(valid_pos1.t() + self.pos_offsets[:,pos.view(-1)].to(valid_pos1.device)) + qconf = (qconf + interpolate(sel.t() / 4, conf1[i]))/2 + noise_pscores = (valid_noise_feat0[None,:,:] * valid_noise_feat1p).sum(dim=-1).t() # [N, 29] + noise_pscores, noise_pos = noise_pscores.max(dim=1, keepdim=True) + noise_sel = clamp(valid_pos1.t() + self.pos_offsets[:,noise_pos.view(-1)].to(valid_pos1.device)) + noise_qconf = (noise_qconf + interpolate(noise_sel.t() / 4, noise_conf1[i]))/2 + + # compute negative scores + valid_pos1n = clamp(valid_pos1.t()[:,None,:] + self.neg_offsets[:,:,None].to(valid_pos1.device)) # [2, 29, N] + valid_pos1n = valid_pos1n.permute(1, 2, 0).reshape(-1, 2) # [29, N, 2] -> [29*N, 2] + valid_feat1n = interpolate(valid_pos1n / 4, feat1[i]).reshape(self.neg_offsets.shape[-1], -1, 128) # [29, N, 128] + valid_feat1n = F.normalize(valid_feat1n, p=2, dim=-1) # [29, N, 128] + nscores = (valid_feat0[None,:,:] * valid_feat1n).sum(dim=-1).t() # [N, 29] + valid_noise_feat1n = interpolate(valid_pos1n / 4, noise_feat1[i]).reshape(self.neg_offsets.shape[-1], -1, 128) # [29, N, 128] + valid_noise_feat1n = F.normalize(valid_noise_feat1n, p=2, dim=-1) # [29, N, 128] + noise_nscores = (valid_noise_feat0[None,:,:] * valid_noise_feat1n).sum(dim=-1).t() # [N, 29] + + if self.sub_d_neg: + valid_pos2 = rnd_sample([selected_pos1], N)[0] + distractors = interpolate(valid_pos2 / 4, feat1[i]) + distractors = F.normalize(distractors, p=2, dim=-1) + noise_distractors = interpolate(valid_pos2 / 4, noise_feat1[i]) + noise_distractors = F.normalize(noise_distractors, p=2, dim=-1) + + pscores_ls.append(pscores) + nscores_ls.append(nscores) + distractors_ls.append(distractors) + valid_feat0_ls.append(valid_feat0) + noise_pscores_ls.append(noise_pscores) + noise_nscores_ls.append(noise_nscores) + noise_distractors_ls.append(noise_distractors) + valid_noise_feat0_ls.append(valid_noise_feat0) + valid_pos1_ls.append(valid_pos1) + valid_pos2_ls.append(valid_pos2) + qconf_ls.append(qconf) + noise_qconf_ls.append(noise_qconf) + mask_ls.append(mask) + + N = np.min([len(i) for i in qconf_ls]) + + # merge batches + qconf = torch.stack([i[:N] for i in qconf_ls], dim=0).squeeze(-1) + mask = torch.stack([i[:N] for i in mask_ls], dim=0) + pscores = torch.cat([i[:N] for i in pscores_ls], dim=0) + nscores = torch.cat([i[:N] for i in nscores_ls], dim=0) + distractors = torch.cat([i[:N] for i in distractors_ls], dim=0) + valid_feat0 = torch.cat([i[:N] for i in valid_feat0_ls], dim=0) + valid_pos1 = torch.cat([i[:N] for i in valid_pos1_ls], dim=0) + valid_pos2 = torch.cat([i[:N] for i in valid_pos2_ls], dim=0) + + noise_qconf = torch.stack([i[:N] for i in noise_qconf_ls], dim=0).squeeze(-1) + noise_pscores = torch.cat([i[:N] for i in noise_pscores_ls], dim=0) + noise_nscores = torch.cat([i[:N] for i in noise_nscores_ls], dim=0) + noise_distractors = torch.cat([i[:N] for i in noise_distractors_ls], dim=0) + valid_noise_feat0 = torch.cat([i[:N] for i in valid_noise_feat0_ls], dim=0) + + # remove scores that corresponds to positives or nulls + dscores = torch.matmul(valid_feat0, distractors.t()) + noise_dscores = torch.matmul(valid_noise_feat0, noise_distractors.t()) + + dis2 = (valid_pos2[:, 1] - valid_pos1[:, 1][:,None])**2 + (valid_pos2[:, 0] - valid_pos1[:, 0][:,None])**2 + b = torch.arange(B, device=dscores.device)[:,None].expand(B, N).reshape(-1) + dis2 += (b != b[:,None]).long() * self.neg_d**2 + dscores[dis2 < self.neg_d**2] = 0 + noise_dscores[dis2 < self.neg_d**2] = 0 + scores = torch.cat((pscores, nscores, dscores), dim=1) + noise_scores = torch.cat((noise_pscores, noise_nscores, noise_dscores), dim=1) + + gt = scores.new_zeros(scores.shape, dtype=torch.uint8) + gt[:, :pscores.shape[1]] = 1 + + return scores, noise_scores, gt, mask, qconf, noise_qconf diff --git a/third_party/DarkFeat/nets/noise_reliability_loss.py b/third_party/DarkFeat/nets/noise_reliability_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..9efddae149653c225ee7f2c1eb5fed5f92cef15c --- /dev/null +++ b/third_party/DarkFeat/nets/noise_reliability_loss.py @@ -0,0 +1,40 @@ +import torch +import torch.nn as nn +from .reliability_loss import APLoss + + +class MultiPixelAPLoss (nn.Module): + """ Computes the pixel-wise AP loss: + Given two images and ground-truth optical flow, computes the AP per pixel. + + feat1: (B, C, H, W) pixel-wise features extracted from img1 + feat2: (B, C, H, W) pixel-wise features extracted from img2 + aflow: (B, 2, H, W) absolute flow: aflow[...,y1,x1] = x2,y2 + """ + def __init__(self, sampler, nq=20): + nn.Module.__init__(self) + self.aploss = APLoss(nq, min=0, max=1, euc=False) + self.sampler = sampler + self.base = 0.25 + self.dec_base = 0.20 + + def loss_from_ap(self, ap, rel, noise_ap, noise_rel): + dec_ap = torch.clamp(ap - noise_ap, min=0, max=1) + return (1 - ap*noise_rel - (1-noise_rel)*self.base), (1. - dec_ap*(1-noise_rel) - noise_rel*self.dec_base) + + def forward(self, feat0, feat1, noise_feat0, noise_feat1, conf0, conf1, noise_conf0, noise_conf1, pos0, pos1, B, H, W, N=1500): + # subsample things + scores, noise_scores, gt, msk, qconf, noise_qconf = self.sampler(feat0, feat1, noise_feat0, noise_feat1, \ + conf0, conf1, noise_conf0, noise_conf1, pos0, pos1, B, H, W, N=1500) + + # compute pixel-wise AP + n = qconf.numel() + if n == 0: return 0, 0 + scores, noise_scores, gt = scores.view(n,-1), noise_scores, gt.view(n,-1) + ap = self.aploss(scores, gt).view(msk.shape) + noise_ap = self.aploss(noise_scores, gt).view(msk.shape) + + pixel_loss = self.loss_from_ap(ap, qconf, noise_ap, noise_qconf) + + loss = pixel_loss[0][msk].mean(), pixel_loss[1][msk].mean() + return loss \ No newline at end of file diff --git a/third_party/DarkFeat/nets/reliability_loss.py b/third_party/DarkFeat/nets/reliability_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..527f9886a2d4785680bac52ff2fa20033b8d8920 --- /dev/null +++ b/third_party/DarkFeat/nets/reliability_loss.py @@ -0,0 +1,105 @@ +import torch +import torch.nn as nn +import numpy as np + + +class APLoss (nn.Module): + """ differentiable AP loss, through quantization. + + Input: (N, M) values in [min, max] + label: (N, M) values in {0, 1} + + Returns: list of query AP (for each n in {1..N}) + Note: typically, you want to minimize 1 - mean(AP) + """ + def __init__(self, nq=25, min=0, max=1, euc=False): + nn.Module.__init__(self) + assert isinstance(nq, int) and 2 <= nq <= 100 + self.nq = nq + self.min = min + self.max = max + self.euc = euc + gap = max - min + assert gap > 0 + + # init quantizer = non-learnable (fixed) convolution + self.quantizer = q = nn.Conv1d(1, 2*nq, kernel_size=1, bias=True) + a = (nq-1) / gap + #1st half = lines passing to (min+x,1) and (min+x+1/a,0) with x = {nq-1..0}*gap/(nq-1) + q.weight.data[:nq] = -a + q.bias.data[:nq] = torch.from_numpy(a*min + np.arange(nq, 0, -1)) # b = 1 + a*(min+x) + #2nd half = lines passing to (min+x,1) and (min+x-1/a,0) with x = {nq-1..0}*gap/(nq-1) + q.weight.data[nq:] = a + q.bias.data[nq:] = torch.from_numpy(np.arange(2-nq, 2, 1) - a*min) # b = 1 - a*(min+x) + # first and last one are special: just horizontal straight line + q.weight.data[0] = q.weight.data[-1] = 0 + q.bias.data[0] = q.bias.data[-1] = 1 + + def compute_AP(self, x, label): + N, M = x.shape + # print(x.shape, label.shape) + if self.euc: # euclidean distance in same range than similarities + x = 1 - torch.sqrt(2.001 - 2*x) + + # quantize all predictions + q = self.quantizer(x.unsqueeze(1)) + q = torch.min(q[:,:self.nq], q[:,self.nq:]).clamp(min=0) # N x Q x M [1600, 20, 1681] + + nbs = q.sum(dim=-1) # number of samples N x Q = c + rec = (q * label.view(N,1,M).float()).sum(dim=-1) # nb of correct samples = c+ N x Q + prec = rec.cumsum(dim=-1) / (1e-16 + nbs.cumsum(dim=-1)) # precision + rec /= rec.sum(dim=-1).unsqueeze(1) # norm in [0,1] + + ap = (prec * rec).sum(dim=-1) # per-image AP + return ap + + def forward(self, x, label): + assert x.shape == label.shape # N x M + return self.compute_AP(x, label) + + +class PixelAPLoss (nn.Module): + """ Computes the pixel-wise AP loss: + Given two images and ground-truth optical flow, computes the AP per pixel. + + feat1: (B, C, H, W) pixel-wise features extracted from img1 + feat2: (B, C, H, W) pixel-wise features extracted from img2 + aflow: (B, 2, H, W) absolute flow: aflow[...,y1,x1] = x2,y2 + """ + def __init__(self, sampler, nq=20): + nn.Module.__init__(self) + self.aploss = APLoss(nq, min=0, max=1, euc=False) + self.name = 'pixAP' + self.sampler = sampler + + def loss_from_ap(self, ap, rel): + return 1 - ap + + def forward(self, feat0, feat1, conf0, conf1, pos0, pos1, B, H, W, N=1200): + # subsample things + scores, gt, msk, qconf = self.sampler(feat0, feat1, conf0, conf1, pos0, pos1, B, H, W, N=1200) + + # compute pixel-wise AP + n = qconf.numel() + if n == 0: return 0 + scores, gt = scores.view(n,-1), gt.view(n,-1) + ap = self.aploss(scores, gt).view(msk.shape) + + pixel_loss = self.loss_from_ap(ap, qconf) + + loss = pixel_loss[msk].mean() + return loss + + +class ReliabilityLoss (PixelAPLoss): + """ same than PixelAPLoss, but also train a pixel-wise confidence + that this pixel is going to have a good AP. + """ + def __init__(self, sampler, base=0.5, **kw): + PixelAPLoss.__init__(self, sampler, **kw) + assert 0 <= base < 1 + self.base = base + + def loss_from_ap(self, ap, rel): + return 1 - ap*rel - (1-rel)*self.base + diff --git a/third_party/DarkFeat/nets/sampler.py b/third_party/DarkFeat/nets/sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..b732a3671872d5675be9826f76b0818d3b99d466 --- /dev/null +++ b/third_party/DarkFeat/nets/sampler.py @@ -0,0 +1,160 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +from .geom import rnd_sample, interpolate + +class NghSampler2 (nn.Module): + """ Similar to NghSampler, but doesnt warp the 2nd image. + Distance to GT => 0 ... pos_d ... neg_d ... ngh + Pixel label => + + + + + + 0 0 - - - - - - - + + Subsample on query side: if > 0, regular grid + < 0, random points + In both cases, the number of query points is = W*H/subq**2 + """ + def __init__(self, ngh, subq=1, subd=1, pos_d=0, neg_d=2, border=None, + maxpool_pos=True, subd_neg=0): + nn.Module.__init__(self) + assert 0 <= pos_d < neg_d <= (ngh if ngh else 99) + self.ngh = ngh + self.pos_d = pos_d + self.neg_d = neg_d + assert subd <= ngh or ngh == 0 + assert subq != 0 + self.sub_q = subq + self.sub_d = subd + self.sub_d_neg = subd_neg + if border is None: border = ngh + assert border >= ngh, 'border has to be larger than ngh' + self.border = border + self.maxpool_pos = maxpool_pos + self.precompute_offsets() + + def precompute_offsets(self): + pos_d2 = self.pos_d**2 + neg_d2 = self.neg_d**2 + rad2 = self.ngh**2 + rad = (self.ngh//self.sub_d) * self.ngh # make an integer multiple + pos = [] + neg = [] + for j in range(-rad, rad+1, self.sub_d): + for i in range(-rad, rad+1, self.sub_d): + d2 = i*i + j*j + if d2 <= pos_d2: + pos.append( (i,j) ) + elif neg_d2 <= d2 <= rad2: + neg.append( (i,j) ) + + self.register_buffer('pos_offsets', torch.LongTensor(pos).view(-1,2).t()) + self.register_buffer('neg_offsets', torch.LongTensor(neg).view(-1,2).t()) + + def gen_grid(self, step, B, H, W, dev): + b1 = torch.arange(B, device=dev) + if step > 0: + # regular grid + x1 = torch.arange(self.border, W-self.border, step, device=dev) + y1 = torch.arange(self.border, H-self.border, step, device=dev) + H1, W1 = len(y1), len(x1) + x1 = x1[None,None,:].expand(B,H1,W1).reshape(-1) + y1 = y1[None,:,None].expand(B,H1,W1).reshape(-1) + b1 = b1[:,None,None].expand(B,H1,W1).reshape(-1) + shape = (B, H1, W1) + else: + # randomly spread + n = (H - 2*self.border) * (W - 2*self.border) // step**2 + x1 = torch.randint(self.border, W-self.border, (n,), device=dev) + y1 = torch.randint(self.border, H-self.border, (n,), device=dev) + x1 = x1[None,:].expand(B,n).reshape(-1) + y1 = y1[None,:].expand(B,n).reshape(-1) + b1 = b1[:,None].expand(B,n).reshape(-1) + shape = (B, n) + return b1, y1, x1, shape + + def forward(self, feat0, feat1, conf0, conf1, pos0, pos1, B, H, W, N=2500): + pscores_ls, nscores_ls, distractors_ls = [], [], [] + valid_feat0_ls = [] + valid_pos1_ls, valid_pos2_ls = [], [] + qconf_ls = [] + mask_ls = [] + + for i in range(B): + # positions in the first image + tmp_mask = (pos0[i][:, 1] >= self.border) * (pos0[i][:, 1] < W-self.border) \ + * (pos0[i][:, 0] >= self.border) * (pos0[i][:, 0] < H-self.border) + + selected_pos0 = pos0[i][tmp_mask] + selected_pos1 = pos1[i][tmp_mask] + valid_pos0, valid_pos1 = rnd_sample([selected_pos0, selected_pos1], N) + + # sample features from first image + valid_feat0 = interpolate(valid_pos0 / 4, feat0[i]) # [N, 128] + valid_feat0 = F.normalize(valid_feat0, p=2, dim=-1) # [N, 128] + qconf = interpolate(valid_pos0 / 4, conf0[i]) + + # sample GT from second image + mask = (valid_pos1[:, 1] >= 0) * (valid_pos1[:, 1] < W) \ + * (valid_pos1[:, 0] >= 0) * (valid_pos1[:, 0] < H) + + def clamp(xy): + xy = xy + torch.clamp(xy[0], 0, H-1, out=xy[0]) + torch.clamp(xy[1], 0, W-1, out=xy[1]) + return xy + + # compute positive scores + valid_pos1p = clamp(valid_pos1.t()[:,None,:] + self.pos_offsets[:,:,None].to(valid_pos1.device)) # [2, 29, N] + valid_pos1p = valid_pos1p.permute(1, 2, 0).reshape(-1, 2) # [29, N, 2] -> [29*N, 2] + valid_feat1p = interpolate(valid_pos1p / 4, feat1[i]).reshape(self.pos_offsets.shape[-1], -1, 128) # [29, N, 128] + valid_feat1p = F.normalize(valid_feat1p, p=2, dim=-1) # [29, N, 128] + + pscores = (valid_feat0[None,:,:] * valid_feat1p).sum(dim=-1).t() # [N, 29] + pscores, pos = pscores.max(dim=1, keepdim=True) + sel = clamp(valid_pos1.t() + self.pos_offsets[:,pos.view(-1)].to(valid_pos1.device)) + qconf = (qconf + interpolate(sel.t() / 4, conf1[i]))/2 + + # compute negative scores + valid_pos1n = clamp(valid_pos1.t()[:,None,:] + self.neg_offsets[:,:,None].to(valid_pos1.device)) # [2, 29, N] + valid_pos1n = valid_pos1n.permute(1, 2, 0).reshape(-1, 2) # [29, N, 2] -> [29*N, 2] + valid_feat1n = interpolate(valid_pos1n / 4, feat1[i]).reshape(self.neg_offsets.shape[-1], -1, 128) # [29, N, 128] + valid_feat1n = F.normalize(valid_feat1n, p=2, dim=-1) # [29, N, 128] + nscores = (valid_feat0[None,:,:] * valid_feat1n).sum(dim=-1).t() # [N, 29] + + if self.sub_d_neg: + valid_pos2 = rnd_sample([selected_pos1], N)[0] + distractors = interpolate(valid_pos2 / 4, feat1[i]) + distractors = F.normalize(distractors, p=2, dim=-1) + + pscores_ls.append(pscores) + nscores_ls.append(nscores) + distractors_ls.append(distractors) + valid_feat0_ls.append(valid_feat0) + valid_pos1_ls.append(valid_pos1) + valid_pos2_ls.append(valid_pos2) + qconf_ls.append(qconf) + mask_ls.append(mask) + + N = np.min([len(i) for i in qconf_ls]) + + # merge batches + qconf = torch.stack([i[:N] for i in qconf_ls], dim=0).squeeze(-1) + mask = torch.stack([i[:N] for i in mask_ls], dim=0) + pscores = torch.cat([i[:N] for i in pscores_ls], dim=0) + nscores = torch.cat([i[:N] for i in nscores_ls], dim=0) + distractors = torch.cat([i[:N] for i in distractors_ls], dim=0) + valid_feat0 = torch.cat([i[:N] for i in valid_feat0_ls], dim=0) + valid_pos1 = torch.cat([i[:N] for i in valid_pos1_ls], dim=0) + valid_pos2 = torch.cat([i[:N] for i in valid_pos2_ls], dim=0) + + dscores = torch.matmul(valid_feat0, distractors.t()) + dis2 = (valid_pos2[:, 1] - valid_pos1[:, 1][:,None])**2 + (valid_pos2[:, 0] - valid_pos1[:, 0][:,None])**2 + b = torch.arange(B, device=dscores.device)[:,None].expand(B, N).reshape(-1) + dis2 += (b != b[:,None]).long() * self.neg_d**2 + dscores[dis2 < self.neg_d**2] = 0 + scores = torch.cat((pscores, nscores, dscores), dim=1) + + gt = scores.new_zeros(scores.shape, dtype=torch.uint8) + gt[:, :pscores.shape[1]] = 1 + + return scores, gt, mask, qconf diff --git a/third_party/DarkFeat/nets/score.py b/third_party/DarkFeat/nets/score.py new file mode 100644 index 0000000000000000000000000000000000000000..a78cf1c893bc338c12803697d55e121a75171f2c --- /dev/null +++ b/third_party/DarkFeat/nets/score.py @@ -0,0 +1,116 @@ +import torch +import torch.nn.functional as F +import numpy as np + +from .geom import gather_nd + +# input: [batch_size, C, H, W] +# output: [batch_size, C, H, W], [batch_size, C, H, W] +def peakiness_score(inputs, moving_instance_max, ksize=3, dilation=1): + inputs = inputs / moving_instance_max + + batch_size, C, H, W = inputs.shape + + pad_size = ksize // 2 + (dilation - 1) + kernel = torch.ones([C, 1, ksize, ksize], device=inputs.device) / (ksize * ksize) + + pad_inputs = F.pad(inputs, [pad_size] * 4, mode='reflect') + + avg_spatial_inputs = F.conv2d( + pad_inputs, + kernel, + stride=1, + dilation=dilation, + padding=0, + groups=C + ) + avg_channel_inputs = torch.mean(inputs, axis=1, keepdim=True) # channel dimension is 1 + + alpha = F.softplus(inputs - avg_spatial_inputs) + beta = F.softplus(inputs - avg_channel_inputs) + + return alpha, beta + + +# input: score_map [batch_size, 1, H, W] +# output: indices [2, k, 2], scores [2, k] +def extract_kpts(score_map, k=256, score_thld=0, edge_thld=0, nms_size=3, eof_size=5): + h = score_map.shape[2] + w = score_map.shape[3] + + mask = score_map > score_thld + if nms_size > 0: + nms_mask = F.max_pool2d(score_map, kernel_size=nms_size, stride=1, padding=nms_size//2) + nms_mask = torch.eq(score_map, nms_mask) + mask = torch.logical_and(nms_mask, mask) + if eof_size > 0: + eof_mask = torch.ones((1, 1, h - 2 * eof_size, w - 2 * eof_size), dtype=torch.float32, device=score_map.device) + eof_mask = F.pad(eof_mask, [eof_size] * 4, value=0) + eof_mask = eof_mask.bool() + mask = torch.logical_and(eof_mask, mask) + if edge_thld > 0: + non_edge_mask = edge_mask(score_map, 1, dilation=3, edge_thld=edge_thld) + mask = torch.logical_and(non_edge_mask, mask) + + bs = score_map.shape[0] + if bs is None: + indices = torch.nonzero(mask)[0] + scores = gather_nd(score_map, indices)[0] + sample = torch.sort(scores, descending=True)[1][0:k] + indices = indices[sample].unsqueeze(0) + scores = scores[sample].unsqueeze(0) + else: + indices = [] + scores = [] + for i in range(bs): + tmp_mask = mask[i][0] + tmp_score_map = score_map[i][0] + tmp_indices = torch.nonzero(tmp_mask) + tmp_scores = gather_nd(tmp_score_map, tmp_indices) + tmp_sample = torch.sort(tmp_scores, descending=True)[1][0:k] + tmp_indices = tmp_indices[tmp_sample] + tmp_scores = tmp_scores[tmp_sample] + indices.append(tmp_indices) + scores.append(tmp_scores) + try: + indices = torch.stack(indices, dim=0) + scores = torch.stack(scores, dim=0) + except: + min_num = np.min([len(i) for i in indices]) + indices = torch.stack([i[:min_num] for i in indices], dim=0) + scores = torch.stack([i[:min_num] for i in scores], dim=0) + return indices, scores + + +def edge_mask(inputs, n_channel, dilation=1, edge_thld=5): + b, c, h, w = inputs.size() + device = inputs.device + + dii_filter = torch.tensor( + [[0, 1., 0], [0, -2., 0], [0, 1., 0]] + ).view(1, 1, 3, 3) + dij_filter = 0.25 * torch.tensor( + [[1., 0, -1.], [0, 0., 0], [-1., 0, 1.]] + ).view(1, 1, 3, 3) + djj_filter = torch.tensor( + [[0, 0, 0], [1., -2., 1.], [0, 0, 0]] + ).view(1, 1, 3, 3) + + dii = F.conv2d( + inputs.view(-1, 1, h, w), dii_filter.to(device), padding=dilation, dilation=dilation + ).view(b, c, h, w) + dij = F.conv2d( + inputs.view(-1, 1, h, w), dij_filter.to(device), padding=dilation, dilation=dilation + ).view(b, c, h, w) + djj = F.conv2d( + inputs.view(-1, 1, h, w), djj_filter.to(device), padding=dilation, dilation=dilation + ).view(b, c, h, w) + + det = dii * djj - dij * dij + tr = dii + djj + del dii, dij, djj + + threshold = (edge_thld + 1) ** 2 / edge_thld + is_not_edge = torch.min(tr * tr / det <= threshold, det > 0) + + return is_not_edge diff --git a/third_party/DarkFeat/pose_estimation.py b/third_party/DarkFeat/pose_estimation.py new file mode 100644 index 0000000000000000000000000000000000000000..c87877191e7e31c3bc0a362d7d481dfd5d4b5757 --- /dev/null +++ b/third_party/DarkFeat/pose_estimation.py @@ -0,0 +1,137 @@ +import argparse +import cv2 +import numpy as np +import os +import math +import subprocess +from tqdm import tqdm + + +def compute_essential(matched_kp1, matched_kp2, K): + pts1 = cv2.undistortPoints(matched_kp1,cameraMatrix=K, distCoeffs = (-0.117918271740560,0.075246403574314,0,0)) + pts2 = cv2.undistortPoints(matched_kp2,cameraMatrix=K, distCoeffs = (-0.117918271740560,0.075246403574314,0,0)) + K_1 = np.eye(3) + # Estimate the homography between the matches using RANSAC + ransac_model, ransac_inliers = cv2.findEssentialMat(pts1, pts2, K_1, method=cv2.RANSAC, prob=0.999, threshold=0.001, maxIters=10000) + if ransac_inliers is None or ransac_model.shape != (3,3): + ransac_inliers = np.array([]) + ransac_model = None + return ransac_model, ransac_inliers, pts1, pts2 + + +def compute_error(R_GT,t_GT,E,pts1_norm, pts2_norm, inliers): + """Compute the angular error between two rotation matrices and two translation vectors. + Keyword arguments: + R -- 2D numpy array containing an estimated rotation + gt_R -- 2D numpy array containing the corresponding ground truth rotation + t -- 2D numpy array containing an estimated translation as column + gt_t -- 2D numpy array containing the corresponding ground truth translation + """ + + inliers = inliers.ravel() + R = np.eye(3) + t = np.zeros((3,1)) + sst = True + try: + _, R, t, _ = cv2.recoverPose(E, pts1_norm, pts2_norm, np.eye(3), inliers) + except: + sst = False + # calculate angle between provided rotations + # + if sst: + dR = np.matmul(R, np.transpose(R_GT)) + dR = cv2.Rodrigues(dR)[0] + dR = np.linalg.norm(dR) * 180 / math.pi + + # calculate angle between provided translations + dT = float(np.dot(t_GT.T, t)) + dT /= float(np.linalg.norm(t_GT)) + + if dT > 1 or dT < -1: + print("Domain warning! dT:",dT) + dT = max(-1,min(1,dT)) + dT = math.acos(dT) * 180 / math.pi + dT = np.minimum(dT, 180 - dT) # ambiguity of E estimation + else: + dR, dT = 180.0, 180.0 + return dR, dT + + +def pose_evaluation(result_base_dir, dark_name1, dark_name2, enhancer, K, R_GT, t_GT): + try: + m_kp1 = np.load(result_base_dir+enhancer+'/DarkFeat/POINT_1/'+dark_name1) + m_kp2 = np.load(result_base_dir+enhancer+'/DarkFeat/POINT_2/'+dark_name2) + except: + return 180.0, 180.0 + try: + E, inliers, pts1, pts2 = compute_essential(m_kp1, m_kp2, K) + except: + E, inliers, pts1, pts2 = np.zeros((3, 3)), np.array([]), None, None + dR, dT = compute_error(R_GT, t_GT, E, pts1, pts2, inliers) + return dR, dT + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--histeq', action='store_true') + parser.add_argument('--dataset_dir', type=str, default='/data/hyz/MID/') + opt = parser.parse_args() + + sizer = (960, 640) + focallength_x = 4.504986436499113e+03/(6744/sizer[0]) + focallength_y = 4.513311442889859e+03/(4502/sizer[1]) + K = np.eye(3) + K[0,0] = focallength_x + K[1,1] = focallength_y + K[0,2] = 3.363322177533149e+03/(6744/sizer[0]) + K[1,2] = 2.291824660547715e+03/(4502/sizer[1]) + Kinv = np.linalg.inv(K) + Kinvt = np.transpose(Kinv) + + PE_MT = np.zeros((6, 8)) + + enhancer = 'None' if not opt.histeq else 'HistEQ' + + for scene in ['Indoor', 'Outdoor']: + dir_base = opt.dataset_dir + '/' + scene + '/' + base_save = 'result_errors/' + scene + '/' + pair_list = sorted(os.listdir(dir_base)) + + os.makedirs(base_save, exist_ok=True) + + for pair in tqdm(pair_list): + opention = 1 + if scene == 'Outdoor': + pass + else: + if int(pair[4::]) <= 17: + opention = 0 + else: + pass + name = [] + files = sorted(os.listdir(dir_base+pair)) + for file_ in files: + if file_.endswith('.cr2'): + name.append(file_[0:9]) + ISO = ['00100', '00200', '00400', '00800', '01600', '03200', '06400', '12800'] + if opention == 1: + Shutter_speed = ['0.005','0.01','0.025','0.05','0.17','0.5'] + else: + Shutter_speed = ['0.01','0.02','0.05','0.1','0.3','1'] + + E_GT = np.load(dir_base+pair+'/GT_Correspondence/'+'E_estimated.npy') + F_GT = np.dot(np.dot(Kinvt,E_GT),Kinv) + R_GT = np.load(dir_base+pair+'/GT_Correspondence/'+'R_GT.npy') + t_GT = np.load(dir_base+pair+'/GT_Correspondence/'+'T_GT.npy') + result_base_dir ='result/' +scene+'/'+pair+'/' + for iso in ISO: + for ex in Shutter_speed: + dark_name1 = name[0]+iso+'_'+ex+'_'+scene+'.npy' + dark_name2 = name[1]+iso+'_'+ex+'_'+scene+'.npy' + + dr, dt = pose_evaluation(result_base_dir,dark_name1,dark_name2,enhancer,K,R_GT,t_GT) + PE_MT[Shutter_speed.index(ex),ISO.index(iso)] = max(dr, dt) + + subprocess.check_output(['mkdir', '-p', base_save + pair + f'/{enhancer}/']) + np.save(base_save + pair + f'/{enhancer}/Pose_error_DarkFeat.npy', PE_MT) + \ No newline at end of file diff --git a/third_party/DarkFeat/raw_preprocess.py b/third_party/DarkFeat/raw_preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..226155a84e97f15782d3650f4ef6b3fa1880e07b --- /dev/null +++ b/third_party/DarkFeat/raw_preprocess.py @@ -0,0 +1,62 @@ +import glob +import rawpy +import cv2 +import os +import numpy as np +import colour_demosaicing +from tqdm import tqdm + + +def process_raw(args, path, w_new, h_new): + raw = rawpy.imread(str(path)).raw_image_visible + if '_00200_' in str(path) or '_00100_' in str(path): + raw = np.clip(raw.astype('float32') - 512, 0, 65535) + else: + raw = np.clip(raw.astype('float32') - 2048, 0, 65535) + img = colour_demosaicing.demosaicing_CFA_Bayer_bilinear(raw, 'RGGB').astype('float32') + img = np.clip(img, 0, 16383) + + # HistEQ start + if args.histeq: + img2 = np.zeros_like(img) + for i in range(3): + hist,bins = np.histogram(img[..., i].flatten(),16384,[0,16384]) + cdf = hist.cumsum() + cdf_normalized = cdf * float(hist.max()) / cdf.max() + cdf_m = np.ma.masked_equal(cdf,0) + cdf_m = (cdf_m - cdf_m.min())*16383/(cdf_m.max()-cdf_m.min()) + cdf = np.ma.filled(cdf_m,0).astype('uint16') + img2[..., i] = cdf[img[..., i].astype('int16')] + img[..., i] = img2[..., i].astype('float32') + # HistEQ end + + m = img.mean() + d = np.abs(img - img.mean()).mean() + img = (img - m + 2*d) / 4/d * 255 + image = np.clip(img, 0, 255) + + image = cv2.resize(image.astype('float32'), (w_new, h_new), interpolation=cv2.INTER_AREA) + + if args.histeq: + path=str(path) + os.makedirs('/'.join(path.split('/')[:-2]+[path.split('/')[-2]+'-npy']), exist_ok=True) + np.save('/'.join(path.split('/')[:-2]+[path.split('/')[-2]+'-npy']+[path.split('/')[-1].replace('cr2','npy')]), image) + else: + path=str(path) + os.makedirs('/'.join(path.split('/')[:-2]+[path.split('/')[-2]+'-npy-nohisteq']), exist_ok=True) + np.save('/'.join(path.split('/')[:-2]+[path.split('/')[-2]+'-npy-nohisteq']+[path.split('/')[-1].replace('cr2','npy')]), image) + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--H', type=int, default=int(640)) + parser.add_argument('--W', type=int, default=int(960)) + parser.add_argument('--histeq', action='store_true') + parser.add_argument('--dataset_dir', type=str, default='/data/hyz/MID/') + args = parser.parse_args() + + path_ls = glob.glob(args.dataset_dir + '/*/pair*/?????/*') + for path in tqdm(path_ls): + process_raw(args, path, args.W, args.H) + diff --git a/third_party/DarkFeat/read_error.py b/third_party/DarkFeat/read_error.py new file mode 100644 index 0000000000000000000000000000000000000000..406b92dbd3877a11e51aebc3a705cd8d8d17e173 --- /dev/null +++ b/third_party/DarkFeat/read_error.py @@ -0,0 +1,56 @@ +import os +import numpy as np +import subprocess + +# def ratio(losses, thresholds=[1,2,3,4,5,6,7,8,9,10]): +def ratio(losses, thresholds=[5,10]): + return [ + '{:.3f}'.format(np.mean(losses < threshold)) + for threshold in thresholds + ] + +if __name__ == '__main__': + scene = 'Indoor' + dir_base = 'result_errors/Indoor/' + save_pt = 'resultfinal_errors/Indoor/' + + subprocess.check_output(['mkdir', '-p', save_pt]) + + with open(save_pt +'ratio_methods_'+scene+'.txt','w') as f: + f.write('5deg 10deg'+'\n') + pair_list = os.listdir(dir_base) + enhancer = os.listdir(dir_base+'/pair9/') + for method in enhancer: + pose_error_list = sorted(os.listdir(dir_base+'/pair9/'+method)) + for pose_error in pose_error_list: + error_array = np.expand_dims(np.zeros((6, 8)),axis=2) + for pair in pair_list: + try: + error = np.expand_dims(np.load(dir_base+'/'+pair+'/'+method+'/'+pose_error),axis=2) + except: + print('error in', dir_base+'/'+pair+'/'+method+'/'+pose_error) + continue + error_array = np.concatenate((error_array,error),axis=2) + ratio_result = ratio(error_array[:,:,1::].flatten()) + f.write(method + '_' + pose_error[11:-4] +' '+' '.join([str(i) for i in ratio_result])+"\n") + + + scene = 'Outdoor' + dir_base = 'result_errors/Outdoor/' + save_pt = 'resultfinal_errors/Outdoor/' + + subprocess.check_output(['mkdir', '-p', save_pt]) + + with open(save_pt +'ratio_methods_'+scene+'.txt','w') as f: + f.write('5deg 10deg'+'\n') + pair_list = os.listdir(dir_base) + enhancer = os.listdir(dir_base+'/pair9/') + for method in enhancer: + pose_error_list = sorted(os.listdir(dir_base+'/pair9/'+method)) + for pose_error in pose_error_list: + error_array = np.expand_dims(np.zeros((6, 8)),axis=2) + for pair in pair_list: + error = np.expand_dims(np.load(dir_base+'/'+pair+'/'+method+'/'+pose_error),axis=2) + error_array = np.concatenate((error_array,error),axis=2) + ratio_result = ratio(error_array[:,:,1::].flatten()) + f.write(method + '_' + pose_error[11:-4] +' '+' '.join([str(i) for i in ratio_result])+"\n") diff --git a/third_party/DarkFeat/requirements.txt b/third_party/DarkFeat/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..579c30a3063ffe54e9d0eca07ecc10dc0154d6b9 --- /dev/null +++ b/third_party/DarkFeat/requirements.txt @@ -0,0 +1,7 @@ +colour_demosaicing +opencv-python +pyyaml +rawpy +tensorboardX +tqdm +matplotlib diff --git a/third_party/DarkFeat/run.py b/third_party/DarkFeat/run.py new file mode 100644 index 0000000000000000000000000000000000000000..0e4c87053d2970fc927d8991aa0dab208f3c4917 --- /dev/null +++ b/third_party/DarkFeat/run.py @@ -0,0 +1,48 @@ +import cv2 +import yaml +import argparse +import os +from torch.utils.data import DataLoader + +from datasets.gl3d_dataset import GL3DDataset +from trainer import Trainer +from trainer_single_norel import SingleTrainerNoRel +from trainer_single import SingleTrainer + + +if __name__ == '__main__': + # add argument parser + parser = argparse.ArgumentParser() + parser.add_argument('--config', type=str, default='./configs/config.yaml') + parser.add_argument('--dataset_dir', type=str, default='/mnt/nvme2n1/hyz/data/GL3D') + parser.add_argument('--data_split', type=str, default='comb') + parser.add_argument('--is_training', type=bool, default=True) + parser.add_argument('--job_name', type=str, default='') + parser.add_argument('--gpu', type=str, default='0') + parser.add_argument('--start_cnt', type=int, default=0) + parser.add_argument('--stage', type=int, default=1) + args = parser.parse_args() + + # load global config + with open(args.config, 'r') as f: + config = yaml.load(f, Loader=yaml.FullLoader) + + # setup dataloader + dataset = GL3DDataset(args.dataset_dir, config['network'], args.data_split, is_training=args.is_training) + data_loader = DataLoader(dataset, batch_size=2, shuffle=True, num_workers=4) + + os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu + + + if args.stage == 1: + trainer = SingleTrainerNoRel(config, f'cuda:0', data_loader, args.job_name, args.start_cnt) + elif args.stage == 2: + trainer = SingleTrainer(config, f'cuda:0', data_loader, args.job_name, args.start_cnt) + elif args.stage == 3: + trainer = Trainer(config, f'cuda:0', data_loader, args.job_name, args.start_cnt) + else: + raise NotImplementedError() + + trainer.train() + + \ No newline at end of file diff --git a/third_party/DarkFeat/trainer.py b/third_party/DarkFeat/trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..e6ff2af9608e934b6899058d756bb2ab7d0fee2d --- /dev/null +++ b/third_party/DarkFeat/trainer.py @@ -0,0 +1,348 @@ +import os +import cv2 +import time +import yaml +import torch +import datetime +from tensorboardX import SummaryWriter +import torchvision.transforms as tvf +import torch.nn as nn +import torch.nn.functional as F + +from nets.geom import getK, getWarp, _grid_positions, getWarpNoValidate +from nets.loss import make_detector_loss, make_noise_score_map_loss +from nets.score import extract_kpts +from nets.multi_sampler import MultiSampler +from nets.noise_reliability_loss import MultiPixelAPLoss +from datasets.noise_simulator import NoiseSimulator +from nets.l2net import Quad_L2Net + + +class Trainer: + def __init__(self, config, device, loader, job_name, start_cnt): + self.config = config + self.device = device + self.loader = loader + + # tensorboard writer construction + os.makedirs('./runs/', exist_ok=True) + if job_name != '': + self.log_dir = f'runs/{job_name}' + else: + self.log_dir = f'runs/{datetime.datetime.now().strftime("%m-%d-%H%M%S")}' + + self.writer = SummaryWriter(self.log_dir) + with open(f'{self.log_dir}/config.yaml', 'w') as f: + yaml.dump(config, f) + + if config['network']['input_type'] == 'gray': + self.model = eval(f'{config["network"]["model"]}(inchan=1)').to(device) + elif config['network']['input_type'] == 'rgb' or config['network']['input_type'] == 'raw-demosaic': + self.model = eval(f'{config["network"]["model"]}(inchan=3)').to(device) + elif config['network']['input_type'] == 'raw': + self.model = eval(f'{config["network"]["model"]}(inchan=4)').to(device) + else: + raise NotImplementedError() + + # noise maker + self.noise_maker = NoiseSimulator(device) + + # reliability map conv + self.model.clf = nn.Conv2d(128, 2, kernel_size=1).cuda() + + # load model + self.cnt = 0 + if start_cnt != 0: + self.model.load_state_dict(torch.load(f'{self.log_dir}/model_{start_cnt:06d}.pth', map_location=device)) + self.cnt = start_cnt + 1 + + # sampler + sampler = MultiSampler(ngh=7, subq=-8, subd=1, pos_d=3, neg_d=5, border=16, + subd_neg=-8,maxpool_pos=True).to(device) + self.reliability_relitive_loss = MultiPixelAPLoss(sampler, nq=20).to(device) + + + # optimizer and scheduler + if self.config['training']['optimizer'] == 'SGD': + self.optimizer = torch.optim.SGD( + [{'params': self.model.parameters(), 'initial_lr': self.config['training']['lr']}], + lr=self.config['training']['lr'], + momentum=self.config['training']['momentum'], + weight_decay=self.config['training']['weight_decay'], + ) + elif self.config['training']['optimizer'] == 'Adam': + self.optimizer = torch.optim.Adam( + [{'params': self.model.parameters(), 'initial_lr': self.config['training']['lr']}], + lr=self.config['training']['lr'], + weight_decay=self.config['training']['weight_decay'] + ) + else: + raise NotImplementedError() + + self.lr_scheduler = torch.optim.lr_scheduler.StepLR( + self.optimizer, + step_size=self.config['training']['lr_step'], + gamma=self.config['training']['lr_gamma'], + last_epoch=start_cnt + ) + for param_tensor in self.model.state_dict(): + print(param_tensor, "\t", self.model.state_dict()[param_tensor].size()) + + + def save(self, iter_num): + torch.save(self.model.state_dict(), f'{self.log_dir}/model_{iter_num:06d}.pth') + + def load(self, path): + self.model.load_state_dict(torch.load(path)) + + def train(self): + self.model.train() + + for epoch in range(2): + for batch_idx, inputs in enumerate(self.loader): + self.optimizer.zero_grad() + t = time.time() + + # preprocess and add noise + img0_ori, noise_img0_ori = self.preprocess_noise_pair(inputs['img0'], self.cnt) + img1_ori, noise_img1_ori = self.preprocess_noise_pair(inputs['img1'], self.cnt) + + img0 = img0_ori.permute(0, 3, 1, 2).float().to(self.device) + img1 = img1_ori.permute(0, 3, 1, 2).float().to(self.device) + noise_img0 = noise_img0_ori.permute(0, 3, 1, 2).float().to(self.device) + noise_img1 = noise_img1_ori.permute(0, 3, 1, 2).float().to(self.device) + + if self.config['network']['input_type'] == 'rgb': + # 3-channel rgb + RGB_mean = [0.485, 0.456, 0.406] + RGB_std = [0.229, 0.224, 0.225] + norm_RGB = tvf.Normalize(mean=RGB_mean, std=RGB_std) + img0 = norm_RGB(img0) + img1 = norm_RGB(img1) + noise_img0 = norm_RGB(noise_img0) + noise_img1 = norm_RGB(noise_img1) + + elif self.config['network']['input_type'] == 'gray': + # 1-channel + img0 = torch.mean(img0, dim=1, keepdim=True) + img1 = torch.mean(img1, dim=1, keepdim=True) + noise_img0 = torch.mean(noise_img0, dim=1, keepdim=True) + noise_img1 = torch.mean(noise_img1, dim=1, keepdim=True) + norm_gray0 = tvf.Normalize(mean=img0.mean(), std=img0.std()) + norm_gray1 = tvf.Normalize(mean=img1.mean(), std=img1.std()) + img0 = norm_gray0(img0) + img1 = norm_gray1(img1) + noise_img0 = norm_gray0(noise_img0) + noise_img1 = norm_gray1(noise_img1) + + elif self.config['network']['input_type'] == 'raw': + # 4-channel + pass + + elif self.config['network']['input_type'] == 'raw-demosaic': + # 3-channel + pass + + else: + raise NotImplementedError() + + desc0, score_map0, _, _ = self.model(img0) + desc1, score_map1, _, _ = self.model(img1) + + conf0 = F.softmax(self.model.clf(torch.abs(desc0)**2.0), dim=1)[:,1:2] + conf1 = F.softmax(self.model.clf(torch.abs(desc1)**2.0), dim=1)[:,1:2] + + noise_desc0, noise_score_map0, noise_at0, noise_att0 = self.model(noise_img0) + noise_desc1, noise_score_map1, noise_at1, noise_att1 = self.model(noise_img1) + + noise_conf0 = F.softmax(self.model.clf(torch.abs(noise_desc0)**2.0), dim=1)[:,1:2] + noise_conf1 = F.softmax(self.model.clf(torch.abs(noise_desc1)**2.0), dim=1)[:,1:2] + + cur_feat_size0 = torch.tensor(score_map0.shape[2:]) + cur_feat_size1 = torch.tensor(score_map1.shape[2:]) + + desc0 = desc0.permute(0, 2, 3, 1) + desc1 = desc1.permute(0, 2, 3, 1) + score_map0 = score_map0.permute(0, 2, 3, 1) + score_map1 = score_map1.permute(0, 2, 3, 1) + noise_desc0 = noise_desc0.permute(0, 2, 3, 1) + noise_desc1 = noise_desc1.permute(0, 2, 3, 1) + noise_score_map0 = noise_score_map0.permute(0, 2, 3, 1) + noise_score_map1 = noise_score_map1.permute(0, 2, 3, 1) + conf0 = conf0.permute(0, 2, 3, 1) + conf1 = conf1.permute(0, 2, 3, 1) + noise_conf0 = noise_conf0.permute(0, 2, 3, 1) + noise_conf1 = noise_conf1.permute(0, 2, 3, 1) + + r_K0 = getK(inputs['ori_img_size0'], cur_feat_size0, inputs['K0']).to(self.device) + r_K1 = getK(inputs['ori_img_size1'], cur_feat_size1, inputs['K1']).to(self.device) + + pos0 = _grid_positions( + cur_feat_size0[0], cur_feat_size0[1], img0.shape[0]).to(self.device) + + pos0_for_rel, pos1_for_rel, _ = getWarpNoValidate( + pos0, inputs['rel_pose'].to(self.device), inputs['depth0'].to(self.device), + r_K0, inputs['depth1'].to(self.device), r_K1, img0.shape[0]) + + pos0, pos1, _ = getWarp( + pos0, inputs['rel_pose'].to(self.device), inputs['depth0'].to(self.device), + r_K0, inputs['depth1'].to(self.device), r_K1, img0.shape[0]) + + reliab_loss_relative = self.reliability_relitive_loss(desc0, desc1, noise_desc0, noise_desc1, conf0, conf1, noise_conf0, noise_conf1, pos0_for_rel, pos1_for_rel, img0.shape[0], img0.shape[2], img0.shape[3]) + + det_structured_loss, det_accuracy = make_detector_loss( + pos0, pos1, desc0, desc1, + score_map0, score_map1, img0.shape[0], + self.config['network']['use_corr_n'], + self.config['network']['loss_type'], + self.config + ) + + det_structured_loss_noise, det_accuracy_noise = make_detector_loss( + pos0, pos1, noise_desc0, noise_desc1, + noise_score_map0, noise_score_map1, img0.shape[0], + self.config['network']['use_corr_n'], + self.config['network']['loss_type'], + self.config + ) + + indices0, scores0 = extract_kpts( + score_map0.permute(0, 3, 1, 2), + k=self.config['network']['det']['kpt_n'], + score_thld=self.config['network']['det']['score_thld'], + nms_size=self.config['network']['det']['nms_size'], + eof_size=self.config['network']['det']['eof_size'], + edge_thld=self.config['network']['det']['edge_thld'] + ) + indices1, scores1 = extract_kpts( + score_map1.permute(0, 3, 1, 2), + k=self.config['network']['det']['kpt_n'], + score_thld=self.config['network']['det']['score_thld'], + nms_size=self.config['network']['det']['nms_size'], + eof_size=self.config['network']['det']['eof_size'], + edge_thld=self.config['network']['det']['edge_thld'] + ) + + noise_score_loss0, mask0 = make_noise_score_map_loss(score_map0, noise_score_map0, indices0, img0.shape[0], thld=0.1) + noise_score_loss1, mask1 = make_noise_score_map_loss(score_map1, noise_score_map1, indices1, img1.shape[0], thld=0.1) + + total_loss = det_structured_loss + det_structured_loss_noise + total_loss += noise_score_loss0 / 2. * 1. + total_loss += noise_score_loss1 / 2. * 1. + total_loss += reliab_loss_relative[0] / 2. * 0.5 + total_loss += reliab_loss_relative[1] / 2. * 0.5 + + self.writer.add_scalar("acc/normal_acc", det_accuracy, self.cnt) + self.writer.add_scalar("acc/noise_acc", det_accuracy_noise, self.cnt) + self.writer.add_scalar("loss/total_loss", total_loss, self.cnt) + self.writer.add_scalar("loss/noise_score_loss", (noise_score_loss0 + noise_score_loss1) / 2., self.cnt) + self.writer.add_scalar("loss/det_loss_normal", det_structured_loss, self.cnt) + self.writer.add_scalar("loss/det_loss_noise", det_structured_loss_noise, self.cnt) + print('iter={},\tloss={:.4f},\tacc={:.4f},\t{:.4f}s/iter'.format(self.cnt, total_loss, det_accuracy, time.time()-t)) + # print(f'normal_loss: {det_structured_loss}, noise_loss: {det_structured_loss_noise}, reliab_loss: {reliab_loss_relative[0]}, {reliab_loss_relative[1]}') + + if det_structured_loss != 0: + total_loss.backward() + self.optimizer.step() + self.lr_scheduler.step() + + if self.cnt % 100 == 0: + noise_indices0, noise_scores0 = extract_kpts( + noise_score_map0.permute(0, 3, 1, 2), + k=self.config['network']['det']['kpt_n'], + score_thld=self.config['network']['det']['score_thld'], + nms_size=self.config['network']['det']['nms_size'], + eof_size=self.config['network']['det']['eof_size'], + edge_thld=self.config['network']['det']['edge_thld'] + ) + noise_indices1, noise_scores1 = extract_kpts( + noise_score_map1.permute(0, 3, 1, 2), + k=self.config['network']['det']['kpt_n'], + score_thld=self.config['network']['det']['score_thld'], + nms_size=self.config['network']['det']['nms_size'], + eof_size=self.config['network']['det']['eof_size'], + edge_thld=self.config['network']['det']['edge_thld'] + ) + if self.config['network']['input_type'] == 'raw': + kpt_img0 = self.showKeyPoints(img0_ori[0][..., :3] * 255., indices0[0]) + kpt_img1 = self.showKeyPoints(img1_ori[0][..., :3] * 255., indices1[0]) + noise_kpt_img0 = self.showKeyPoints(noise_img0_ori[0][..., :3] * 255., noise_indices0[0]) + noise_kpt_img1 = self.showKeyPoints(noise_img1_ori[0][..., :3] * 255., noise_indices1[0]) + else: + kpt_img0 = self.showKeyPoints(img0_ori[0] * 255., indices0[0]) + kpt_img1 = self.showKeyPoints(img1_ori[0] * 255., indices1[0]) + noise_kpt_img0 = self.showKeyPoints(noise_img0_ori[0] * 255., noise_indices0[0]) + noise_kpt_img1 = self.showKeyPoints(noise_img1_ori[0] * 255., noise_indices1[0]) + + self.writer.add_image('img0/kpts', kpt_img0, self.cnt, dataformats='HWC') + self.writer.add_image('img1/kpts', kpt_img1, self.cnt, dataformats='HWC') + self.writer.add_image('img0/noise_kpts', noise_kpt_img0, self.cnt, dataformats='HWC') + self.writer.add_image('img1/noise_kpts', noise_kpt_img1, self.cnt, dataformats='HWC') + self.writer.add_image('img0/score_map', score_map0[0], self.cnt, dataformats='HWC') + self.writer.add_image('img1/score_map', score_map1[0], self.cnt, dataformats='HWC') + self.writer.add_image('img0/noise_score_map', noise_score_map0[0], self.cnt, dataformats='HWC') + self.writer.add_image('img1/noise_score_map', noise_score_map1[0], self.cnt, dataformats='HWC') + self.writer.add_image('img0/kpt_mask', mask0.unsqueeze(2), self.cnt, dataformats='HWC') + self.writer.add_image('img1/kpt_mask', mask1.unsqueeze(2), self.cnt, dataformats='HWC') + self.writer.add_image('img0/conf', conf0[0], self.cnt, dataformats='HWC') + self.writer.add_image('img1/conf', conf1[0], self.cnt, dataformats='HWC') + self.writer.add_image('img0/noise_conf', noise_conf0[0], self.cnt, dataformats='HWC') + self.writer.add_image('img1/noise_conf', noise_conf1[0], self.cnt, dataformats='HWC') + + if self.cnt % 5000 == 0: + self.save(self.cnt) + + self.cnt += 1 + + + def showKeyPoints(self, img, indices): + key_points = cv2.KeyPoint_convert(indices.cpu().float().numpy()[:, ::-1]) + img = img.numpy().astype('uint8') + img = cv2.drawKeypoints(img, key_points, None, color=(0, 255, 0)) + return img + + + def preprocess(self, img, iter_idx): + if not self.config['network']['noise'] and 'raw' not in self.config['network']['input_type']: + return img + + raw = self.noise_maker.rgb2raw(img, batched=True) + + if self.config['network']['noise']: + ratio_dec = min(self.config['network']['noise_maxstep'], iter_idx) / self.config['network']['noise_maxstep'] + raw = self.noise_maker.raw2noisyRaw(raw, ratio_dec=ratio_dec, batched=True) + + if self.config['network']['input_type'] == 'raw': + return torch.tensor(self.noise_maker.raw2packedRaw(raw, batched=True)) + + if self.config['network']['input_type'] == 'raw-demosaic': + return torch.tensor(self.noise_maker.raw2demosaicRaw(raw, batched=True)) + + rgb = self.noise_maker.raw2rgb(raw, batched=True) + if self.config['network']['input_type'] == 'rgb' or self.config['network']['input_type'] == 'gray': + return torch.tensor(rgb) + + raise NotImplementedError() + + + def preprocess_noise_pair(self, img, iter_idx): + assert self.config['network']['noise'] + + raw = self.noise_maker.rgb2raw(img, batched=True) + + ratio_dec = min(self.config['network']['noise_maxstep'], iter_idx) / self.config['network']['noise_maxstep'] + noise_raw = self.noise_maker.raw2noisyRaw(raw, ratio_dec=ratio_dec, batched=True) + + if self.config['network']['input_type'] == 'raw': + return torch.tensor(self.noise_maker.raw2packedRaw(raw, batched=True)), \ + torch.tensor(self.noise_maker.raw2packedRaw(noise_raw, batched=True)) + + if self.config['network']['input_type'] == 'raw-demosaic': + return torch.tensor(self.noise_maker.raw2demosaicRaw(raw, batched=True)), \ + torch.tensor(self.noise_maker.raw2demosaicRaw(noise_raw, batched=True)) + + noise_rgb = self.noise_maker.raw2rgb(noise_raw, batched=True) + if self.config['network']['input_type'] == 'rgb' or self.config['network']['input_type'] == 'gray': + return img, torch.tensor(noise_rgb) + + raise NotImplementedError() diff --git a/third_party/DarkFeat/trainer_single.py b/third_party/DarkFeat/trainer_single.py new file mode 100644 index 0000000000000000000000000000000000000000..65566e7e27cfd605eba000d308b6d3610f29e746 --- /dev/null +++ b/third_party/DarkFeat/trainer_single.py @@ -0,0 +1,294 @@ +import os +import cv2 +import time +import yaml +import torch +import datetime +from tensorboardX import SummaryWriter +import torchvision.transforms as tvf +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +from nets.geom import getK, getWarp, _grid_positions, getWarpNoValidate +from nets.loss import make_detector_loss +from nets.score import extract_kpts +from nets.sampler import NghSampler2 +from nets.reliability_loss import ReliabilityLoss +from datasets.noise_simulator import NoiseSimulator +from nets.l2net import Quad_L2Net + + +class SingleTrainer: + def __init__(self, config, device, loader, job_name, start_cnt): + self.config = config + self.device = device + self.loader = loader + + # tensorboard writer construction + os.makedirs('./runs/', exist_ok=True) + if job_name != '': + self.log_dir = f'runs/{job_name}' + else: + self.log_dir = f'runs/{datetime.datetime.now().strftime("%m-%d-%H%M%S")}' + + self.writer = SummaryWriter(self.log_dir) + with open(f'{self.log_dir}/config.yaml', 'w') as f: + yaml.dump(config, f) + + if config['network']['input_type'] == 'gray' or config['network']['input_type'] == 'raw-gray': + self.model = eval(f'{config["network"]["model"]}(inchan=1)').to(device) + elif config['network']['input_type'] == 'rgb' or config['network']['input_type'] == 'raw-demosaic': + self.model = eval(f'{config["network"]["model"]}(inchan=3)').to(device) + elif config['network']['input_type'] == 'raw': + self.model = eval(f'{config["network"]["model"]}(inchan=4)').to(device) + else: + raise NotImplementedError() + + # noise maker + self.noise_maker = NoiseSimulator(device) + + # load model + self.cnt = 0 + if start_cnt != 0: + self.model.load_state_dict(torch.load(f'{self.log_dir}/model_{start_cnt:06d}.pth')) + self.cnt = start_cnt + 1 + + # sampler + sampler = NghSampler2(ngh=7, subq=-8, subd=1, pos_d=3, neg_d=5, border=16, + subd_neg=-8,maxpool_pos=True).to(device) + self.reliability_loss = ReliabilityLoss(sampler, base=0.3, nq=20).to(device) + # reliability map conv + self.model.clf = nn.Conv2d(128, 2, kernel_size=1).cuda() + + # optimizer and scheduler + if self.config['training']['optimizer'] == 'SGD': + self.optimizer = torch.optim.SGD( + [{'params': self.model.parameters(), 'initial_lr': self.config['training']['lr']}], + lr=self.config['training']['lr'], + momentum=self.config['training']['momentum'], + weight_decay=self.config['training']['weight_decay'], + ) + elif self.config['training']['optimizer'] == 'Adam': + self.optimizer = torch.optim.Adam( + [{'params': self.model.parameters(), 'initial_lr': self.config['training']['lr']}], + lr=self.config['training']['lr'], + weight_decay=self.config['training']['weight_decay'] + ) + else: + raise NotImplementedError() + + self.lr_scheduler = torch.optim.lr_scheduler.StepLR( + self.optimizer, + step_size=self.config['training']['lr_step'], + gamma=self.config['training']['lr_gamma'], + last_epoch=start_cnt + ) + for param_tensor in self.model.state_dict(): + print(param_tensor, "\t", self.model.state_dict()[param_tensor].size()) + + + def save(self, iter_num): + torch.save(self.model.state_dict(), f'{self.log_dir}/model_{iter_num:06d}.pth') + + def load(self, path): + self.model.load_state_dict(torch.load(path)) + + def train(self): + self.model.train() + + for epoch in range(2): + for batch_idx, inputs in enumerate(self.loader): + self.optimizer.zero_grad() + t = time.time() + + # preprocess and add noise + img0_ori, noise_img0_ori = self.preprocess_noise_pair(inputs['img0'], self.cnt) + img1_ori, noise_img1_ori = self.preprocess_noise_pair(inputs['img1'], self.cnt) + + img0 = img0_ori.permute(0, 3, 1, 2).float().to(self.device) + img1 = img1_ori.permute(0, 3, 1, 2).float().to(self.device) + + if self.config['network']['input_type'] == 'rgb': + # 3-channel rgb + RGB_mean = [0.485, 0.456, 0.406] + RGB_std = [0.229, 0.224, 0.225] + norm_RGB = tvf.Normalize(mean=RGB_mean, std=RGB_std) + img0 = norm_RGB(img0) + img1 = norm_RGB(img1) + noise_img0 = norm_RGB(noise_img0) + noise_img1 = norm_RGB(noise_img1) + + elif self.config['network']['input_type'] == 'gray': + # 1-channel + img0 = torch.mean(img0, dim=1, keepdim=True) + img1 = torch.mean(img1, dim=1, keepdim=True) + noise_img0 = torch.mean(noise_img0, dim=1, keepdim=True) + noise_img1 = torch.mean(noise_img1, dim=1, keepdim=True) + norm_gray0 = tvf.Normalize(mean=img0.mean(), std=img0.std()) + norm_gray1 = tvf.Normalize(mean=img1.mean(), std=img1.std()) + img0 = norm_gray0(img0) + img1 = norm_gray1(img1) + noise_img0 = norm_gray0(noise_img0) + noise_img1 = norm_gray1(noise_img1) + + elif self.config['network']['input_type'] == 'raw': + # 4-channel + pass + + elif self.config['network']['input_type'] == 'raw-demosaic': + # 3-channel + pass + + else: + raise NotImplementedError() + + desc0, score_map0, _, _ = self.model(img0) + desc1, score_map1, _, _ = self.model(img1) + + cur_feat_size0 = torch.tensor(score_map0.shape[2:]) + cur_feat_size1 = torch.tensor(score_map1.shape[2:]) + + conf0 = F.softmax(self.model.clf(torch.abs(desc0)**2.0), dim=1)[:,1:2] + conf1 = F.softmax(self.model.clf(torch.abs(desc1)**2.0), dim=1)[:,1:2] + + desc0 = desc0.permute(0, 2, 3, 1) + desc1 = desc1.permute(0, 2, 3, 1) + score_map0 = score_map0.permute(0, 2, 3, 1) + score_map1 = score_map1.permute(0, 2, 3, 1) + conf0 = conf0.permute(0, 2, 3, 1) + conf1 = conf1.permute(0, 2, 3, 1) + + r_K0 = getK(inputs['ori_img_size0'], cur_feat_size0, inputs['K0']).to(self.device) + r_K1 = getK(inputs['ori_img_size1'], cur_feat_size1, inputs['K1']).to(self.device) + + pos0 = _grid_positions( + cur_feat_size0[0], cur_feat_size0[1], img0.shape[0]).to(self.device) + + pos0_for_rel, pos1_for_rel, _ = getWarpNoValidate( + pos0, inputs['rel_pose'].to(self.device), inputs['depth0'].to(self.device), + r_K0, inputs['depth1'].to(self.device), r_K1, img0.shape[0]) + + pos0, pos1, _ = getWarp( + pos0, inputs['rel_pose'].to(self.device), inputs['depth0'].to(self.device), + r_K0, inputs['depth1'].to(self.device), r_K1, img0.shape[0]) + + reliab_loss = self.reliability_loss(desc0, desc1, conf0, conf1, pos0_for_rel, pos1_for_rel, img0.shape[0], img0.shape[2], img0.shape[3]) + + det_structured_loss, det_accuracy = make_detector_loss( + pos0, pos1, desc0, desc1, + score_map0, score_map1, img0.shape[0], + self.config['network']['use_corr_n'], + self.config['network']['loss_type'], + self.config + ) + + total_loss = det_structured_loss + self.writer.add_scalar("loss/det_loss_normal", det_structured_loss, self.cnt) + + total_loss += reliab_loss + + self.writer.add_scalar("acc/normal_acc", det_accuracy, self.cnt) + self.writer.add_scalar("loss/total_loss", total_loss, self.cnt) + self.writer.add_scalar("loss/reliab_loss", reliab_loss, self.cnt) + print('iter={},\tloss={:.4f},\tacc={:.4f},\t{:.4f}s/iter'.format(self.cnt, total_loss, det_accuracy, time.time()-t)) + + if det_structured_loss != 0: + total_loss.backward() + self.optimizer.step() + self.lr_scheduler.step() + + if self.cnt % 100 == 0: + indices0, scores0 = extract_kpts( + score_map0.permute(0, 3, 1, 2), + k=self.config['network']['det']['kpt_n'], + score_thld=self.config['network']['det']['score_thld'], + nms_size=self.config['network']['det']['nms_size'], + eof_size=self.config['network']['det']['eof_size'], + edge_thld=self.config['network']['det']['edge_thld'] + ) + indices1, scores1 = extract_kpts( + score_map1.permute(0, 3, 1, 2), + k=self.config['network']['det']['kpt_n'], + score_thld=self.config['network']['det']['score_thld'], + nms_size=self.config['network']['det']['nms_size'], + eof_size=self.config['network']['det']['eof_size'], + edge_thld=self.config['network']['det']['edge_thld'] + ) + + if self.config['network']['input_type'] == 'raw': + kpt_img0 = self.showKeyPoints(img0_ori[0][..., :3] * 255., indices0[0]) + kpt_img1 = self.showKeyPoints(img1_ori[0][..., :3] * 255., indices1[0]) + else: + kpt_img0 = self.showKeyPoints(img0_ori[0] * 255., indices0[0]) + kpt_img1 = self.showKeyPoints(img1_ori[0] * 255., indices1[0]) + + self.writer.add_image('img0/kpts', kpt_img0, self.cnt, dataformats='HWC') + self.writer.add_image('img1/kpts', kpt_img1, self.cnt, dataformats='HWC') + self.writer.add_image('img0/score_map', score_map0[0], self.cnt, dataformats='HWC') + self.writer.add_image('img1/score_map', score_map1[0], self.cnt, dataformats='HWC') + self.writer.add_image('img0/conf', conf0[0], self.cnt, dataformats='HWC') + self.writer.add_image('img1/conf', conf1[0], self.cnt, dataformats='HWC') + + if self.cnt % 10000 == 0: + self.save(self.cnt) + + self.cnt += 1 + + + def showKeyPoints(self, img, indices): + key_points = cv2.KeyPoint_convert(indices.cpu().float().numpy()[:, ::-1]) + img = img.numpy().astype('uint8') + img = cv2.drawKeypoints(img, key_points, None, color=(0, 255, 0)) + return img + + + def preprocess(self, img, iter_idx): + if not self.config['network']['noise'] and 'raw' not in self.config['network']['input_type']: + return img + + raw = self.noise_maker.rgb2raw(img, batched=True) + + if self.config['network']['noise']: + ratio_dec = min(self.config['network']['noise_maxstep'], iter_idx) / self.config['network']['noise_maxstep'] + raw = self.noise_maker.raw2noisyRaw(raw, ratio_dec=ratio_dec, batched=True) + + if self.config['network']['input_type'] == 'raw': + return torch.tensor(self.noise_maker.raw2packedRaw(raw, batched=True)) + + if self.config['network']['input_type'] == 'raw-demosaic': + return torch.tensor(self.noise_maker.raw2demosaicRaw(raw, batched=True)) + + rgb = self.noise_maker.raw2rgb(raw, batched=True) + if self.config['network']['input_type'] == 'rgb' or self.config['network']['input_type'] == 'gray': + return torch.tensor(rgb) + + raise NotImplementedError() + + + def preprocess_noise_pair(self, img, iter_idx): + assert self.config['network']['noise'] + + raw = self.noise_maker.rgb2raw(img, batched=True) + + ratio_dec = min(self.config['network']['noise_maxstep'], iter_idx) / self.config['network']['noise_maxstep'] + noise_raw = self.noise_maker.raw2noisyRaw(raw, ratio_dec=ratio_dec, batched=True) + + if self.config['network']['input_type'] == 'raw': + return torch.tensor(self.noise_maker.raw2packedRaw(raw, batched=True)), \ + torch.tensor(self.noise_maker.raw2packedRaw(noise_raw, batched=True)) + + if self.config['network']['input_type'] == 'raw-demosaic': + return torch.tensor(self.noise_maker.raw2demosaicRaw(raw, batched=True)), \ + torch.tensor(self.noise_maker.raw2demosaicRaw(noise_raw, batched=True)) + + if self.config['network']['input_type'] == 'raw-gray': + factor = torch.tensor([0.299, 0.587, 0.114]).double() + return torch.matmul(torch.tensor(self.noise_maker.raw2demosaicRaw(raw, batched=True)), factor).unsqueeze(-1), \ + torch.matmul(torch.tensor(self.noise_maker.raw2demosaicRaw(noise_raw, batched=True)), factor).unsqueeze(-1) + + noise_rgb = self.noise_maker.raw2rgb(noise_raw, batched=True) + if self.config['network']['input_type'] == 'rgb' or self.config['network']['input_type'] == 'gray': + return img, torch.tensor(noise_rgb) + + raise NotImplementedError() diff --git a/third_party/DarkFeat/trainer_single_norel.py b/third_party/DarkFeat/trainer_single_norel.py new file mode 100644 index 0000000000000000000000000000000000000000..a572e9c599adc30e5753e11e668d121cd378672a --- /dev/null +++ b/third_party/DarkFeat/trainer_single_norel.py @@ -0,0 +1,265 @@ +import os +import cv2 +import time +import yaml +import torch +import datetime +from tensorboardX import SummaryWriter +import torchvision.transforms as tvf +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +from nets.l2net import Quad_L2Net +from nets.geom import getK, getWarp, _grid_positions +from nets.loss import make_detector_loss +from nets.score import extract_kpts +from datasets.noise_simulator import NoiseSimulator +from nets.l2net import Quad_L2Net + + +class SingleTrainerNoRel: + def __init__(self, config, device, loader, job_name, start_cnt): + self.config = config + self.device = device + self.loader = loader + + # tensorboard writer construction + os.makedirs('./runs/', exist_ok=True) + if job_name != '': + self.log_dir = f'runs/{job_name}' + else: + self.log_dir = f'runs/{datetime.datetime.now().strftime("%m-%d-%H%M%S")}' + + self.writer = SummaryWriter(self.log_dir) + with open(f'{self.log_dir}/config.yaml', 'w') as f: + yaml.dump(config, f) + + if config['network']['input_type'] == 'gray' or config['network']['input_type'] == 'raw-gray': + self.model = eval(f'{config["network"]["model"]}(inchan=1)').to(device) + elif config['network']['input_type'] == 'rgb' or config['network']['input_type'] == 'raw-demosaic': + self.model = eval(f'{config["network"]["model"]}(inchan=3)').to(device) + elif config['network']['input_type'] == 'raw': + self.model = eval(f'{config["network"]["model"]}(inchan=4)').to(device) + else: + raise NotImplementedError() + + # noise maker + self.noise_maker = NoiseSimulator(device) + + # load model + self.cnt = 0 + if start_cnt != 0: + self.model.load_state_dict(torch.load(f'{self.log_dir}/model_{start_cnt:06d}.pth')) + self.cnt = start_cnt + 1 + + # optimizer and scheduler + if self.config['training']['optimizer'] == 'SGD': + self.optimizer = torch.optim.SGD( + [{'params': self.model.parameters(), 'initial_lr': self.config['training']['lr']}], + lr=self.config['training']['lr'], + momentum=self.config['training']['momentum'], + weight_decay=self.config['training']['weight_decay'], + ) + elif self.config['training']['optimizer'] == 'Adam': + self.optimizer = torch.optim.Adam( + [{'params': self.model.parameters(), 'initial_lr': self.config['training']['lr']}], + lr=self.config['training']['lr'], + weight_decay=self.config['training']['weight_decay'] + ) + else: + raise NotImplementedError() + + self.lr_scheduler = torch.optim.lr_scheduler.StepLR( + self.optimizer, + step_size=self.config['training']['lr_step'], + gamma=self.config['training']['lr_gamma'], + last_epoch=start_cnt + ) + for param_tensor in self.model.state_dict(): + print(param_tensor, "\t", self.model.state_dict()[param_tensor].size()) + + + def save(self, iter_num): + torch.save(self.model.state_dict(), f'{self.log_dir}/model_{iter_num:06d}.pth') + + def load(self, path): + self.model.load_state_dict(torch.load(path)) + + def train(self): + self.model.train() + + for epoch in range(2): + for batch_idx, inputs in enumerate(self.loader): + self.optimizer.zero_grad() + t = time.time() + + # preprocess and add noise + img0_ori, noise_img0_ori = self.preprocess_noise_pair(inputs['img0'], self.cnt) + img1_ori, noise_img1_ori = self.preprocess_noise_pair(inputs['img1'], self.cnt) + + img0 = img0_ori.permute(0, 3, 1, 2).float().to(self.device) + img1 = img1_ori.permute(0, 3, 1, 2).float().to(self.device) + + if self.config['network']['input_type'] == 'rgb': + # 3-channel rgb + RGB_mean = [0.485, 0.456, 0.406] + RGB_std = [0.229, 0.224, 0.225] + norm_RGB = tvf.Normalize(mean=RGB_mean, std=RGB_std) + img0 = norm_RGB(img0) + img1 = norm_RGB(img1) + noise_img0 = norm_RGB(noise_img0) + noise_img1 = norm_RGB(noise_img1) + + elif self.config['network']['input_type'] == 'gray': + # 1-channel + img0 = torch.mean(img0, dim=1, keepdim=True) + img1 = torch.mean(img1, dim=1, keepdim=True) + noise_img0 = torch.mean(noise_img0, dim=1, keepdim=True) + noise_img1 = torch.mean(noise_img1, dim=1, keepdim=True) + norm_gray0 = tvf.Normalize(mean=img0.mean(), std=img0.std()) + norm_gray1 = tvf.Normalize(mean=img1.mean(), std=img1.std()) + img0 = norm_gray0(img0) + img1 = norm_gray1(img1) + noise_img0 = norm_gray0(noise_img0) + noise_img1 = norm_gray1(noise_img1) + + elif self.config['network']['input_type'] == 'raw': + # 4-channel + pass + + elif self.config['network']['input_type'] == 'raw-demosaic': + # 3-channel + pass + + else: + raise NotImplementedError() + + desc0, score_map0, _, _ = self.model(img0) + desc1, score_map1, _, _ = self.model(img1) + + cur_feat_size0 = torch.tensor(score_map0.shape[2:]) + cur_feat_size1 = torch.tensor(score_map1.shape[2:]) + + desc0 = desc0.permute(0, 2, 3, 1) + desc1 = desc1.permute(0, 2, 3, 1) + score_map0 = score_map0.permute(0, 2, 3, 1) + score_map1 = score_map1.permute(0, 2, 3, 1) + + r_K0 = getK(inputs['ori_img_size0'], cur_feat_size0, inputs['K0']).to(self.device) + r_K1 = getK(inputs['ori_img_size1'], cur_feat_size1, inputs['K1']).to(self.device) + + pos0 = _grid_positions( + cur_feat_size0[0], cur_feat_size0[1], img0.shape[0]).to(self.device) + + pos0, pos1, _ = getWarp( + pos0, inputs['rel_pose'].to(self.device), inputs['depth0'].to(self.device), + r_K0, inputs['depth1'].to(self.device), r_K1, img0.shape[0]) + + det_structured_loss, det_accuracy = make_detector_loss( + pos0, pos1, desc0, desc1, + score_map0, score_map1, img0.shape[0], + self.config['network']['use_corr_n'], + self.config['network']['loss_type'], + self.config + ) + + total_loss = det_structured_loss + + self.writer.add_scalar("acc/normal_acc", det_accuracy, self.cnt) + self.writer.add_scalar("loss/total_loss", total_loss, self.cnt) + self.writer.add_scalar("loss/det_loss_normal", det_structured_loss, self.cnt) + print('iter={},\tloss={:.4f},\tacc={:.4f},\t{:.4f}s/iter'.format(self.cnt, total_loss, det_accuracy, time.time()-t)) + + if det_structured_loss != 0: + total_loss.backward() + self.optimizer.step() + self.lr_scheduler.step() + + if self.cnt % 100 == 0: + indices0, scores0 = extract_kpts( + score_map0.permute(0, 3, 1, 2), + k=self.config['network']['det']['kpt_n'], + score_thld=self.config['network']['det']['score_thld'], + nms_size=self.config['network']['det']['nms_size'], + eof_size=self.config['network']['det']['eof_size'], + edge_thld=self.config['network']['det']['edge_thld'] + ) + indices1, scores1 = extract_kpts( + score_map1.permute(0, 3, 1, 2), + k=self.config['network']['det']['kpt_n'], + score_thld=self.config['network']['det']['score_thld'], + nms_size=self.config['network']['det']['nms_size'], + eof_size=self.config['network']['det']['eof_size'], + edge_thld=self.config['network']['det']['edge_thld'] + ) + + if self.config['network']['input_type'] == 'raw': + kpt_img0 = self.showKeyPoints(img0_ori[0][..., :3] * 255., indices0[0]) + kpt_img1 = self.showKeyPoints(img1_ori[0][..., :3] * 255., indices1[0]) + else: + kpt_img0 = self.showKeyPoints(img0_ori[0] * 255., indices0[0]) + kpt_img1 = self.showKeyPoints(img1_ori[0] * 255., indices1[0]) + + self.writer.add_image('img0/kpts', kpt_img0, self.cnt, dataformats='HWC') + self.writer.add_image('img1/kpts', kpt_img1, self.cnt, dataformats='HWC') + self.writer.add_image('img0/score_map', score_map0[0], self.cnt, dataformats='HWC') + self.writer.add_image('img1/score_map', score_map1[0], self.cnt, dataformats='HWC') + + if self.cnt % 10000 == 0: + self.save(self.cnt) + + self.cnt += 1 + + + def showKeyPoints(self, img, indices): + key_points = cv2.KeyPoint_convert(indices.cpu().float().numpy()[:, ::-1]) + img = img.numpy().astype('uint8') + img = cv2.drawKeypoints(img, key_points, None, color=(0, 255, 0)) + return img + + + def preprocess(self, img, iter_idx): + if not self.config['network']['noise'] and 'raw' not in self.config['network']['input_type']: + return img + + raw = self.noise_maker.rgb2raw(img, batched=True) + + if self.config['network']['noise']: + ratio_dec = min(self.config['network']['noise_maxstep'], iter_idx) / self.config['network']['noise_maxstep'] + raw = self.noise_maker.raw2noisyRaw(raw, ratio_dec=ratio_dec, batched=True) + + if self.config['network']['input_type'] == 'raw': + return torch.tensor(self.noise_maker.raw2packedRaw(raw, batched=True)) + + if self.config['network']['input_type'] == 'raw-demosaic': + return torch.tensor(self.noise_maker.raw2demosaicRaw(raw, batched=True)) + + rgb = self.noise_maker.raw2rgb(raw, batched=True) + if self.config['network']['input_type'] == 'rgb' or self.config['network']['input_type'] == 'gray': + return torch.tensor(rgb) + + raise NotImplementedError() + + + def preprocess_noise_pair(self, img, iter_idx): + assert self.config['network']['noise'] + + raw = self.noise_maker.rgb2raw(img, batched=True) + + ratio_dec = min(self.config['network']['noise_maxstep'], iter_idx) / self.config['network']['noise_maxstep'] + noise_raw = self.noise_maker.raw2noisyRaw(raw, ratio_dec=ratio_dec, batched=True) + + if self.config['network']['input_type'] == 'raw': + return torch.tensor(self.noise_maker.raw2packedRaw(raw, batched=True)), \ + torch.tensor(self.noise_maker.raw2packedRaw(noise_raw, batched=True)) + + if self.config['network']['input_type'] == 'raw-demosaic': + return torch.tensor(self.noise_maker.raw2demosaicRaw(raw, batched=True)), \ + torch.tensor(self.noise_maker.raw2demosaicRaw(noise_raw, batched=True)) + + noise_rgb = self.noise_maker.raw2rgb(noise_raw, batched=True) + if self.config['network']['input_type'] == 'rgb' or self.config['network']['input_type'] == 'gray': + return img, torch.tensor(noise_rgb) + + raise NotImplementedError() diff --git a/third_party/DarkFeat/utils/__init__.py b/third_party/DarkFeat/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/DarkFeat/utils/matching.py b/third_party/DarkFeat/utils/matching.py new file mode 100644 index 0000000000000000000000000000000000000000..ca091f418bb4dc4d278611e5126a930aa51e7f3f --- /dev/null +++ b/third_party/DarkFeat/utils/matching.py @@ -0,0 +1,128 @@ +import math +import numpy as np +import cv2 + +def extract_ORB_keypoints_and_descriptors(img): + # gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + detector = cv2.ORB_create(nfeatures=1000) + kp, desc = detector.detectAndCompute(img, None) + return kp, desc + +def match_descriptors_NG(kp1, desc1, kp2, desc2): + bf = cv2.BFMatcher() + try: + matches = bf.knnMatch(desc1, desc2,k=2) + except: + matches = [] + good_matches=[] + image1_kp = [] + image2_kp = [] + ratios = [] + try: + for (m1,m2) in matches: + if m1.distance < 0.8 * m2.distance: + good_matches.append(m1) + image2_kp.append(kp2[m1.trainIdx].pt) + image1_kp.append(kp1[m1.queryIdx].pt) + ratios.append(m1.distance / m2.distance) + except: + pass + image1_kp = np.array([image1_kp]) + image2_kp = np.array([image2_kp]) + ratios = np.array([ratios]) + ratios = np.expand_dims(ratios, 2) + return image1_kp, image2_kp, good_matches, ratios + +def match_descriptors(kp1, desc1, kp2, desc2, ORB): + if ORB: + bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) + try: + matches = bf.match(desc1,desc2) + matches = sorted(matches, key = lambda x:x.distance) + except: + matches = [] + good_matches=[] + image1_kp = [] + image2_kp = [] + count = 0 + try: + for m in matches: + count+=1 + if count < 1000: + good_matches.append(m) + image2_kp.append(kp2[m.trainIdx].pt) + image1_kp.append(kp1[m.queryIdx].pt) + except: + pass + else: + # Match the keypoints with the warped_keypoints with nearest neighbor search + bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True) + try: + matches = bf.match(desc1.transpose(1,0), desc2.transpose(1,0)) + matches = sorted(matches, key = lambda x:x.distance) + except: + matches = [] + good_matches=[] + image1_kp = [] + image2_kp = [] + try: + for m in matches: + good_matches.append(m) + image2_kp.append(kp2[m.trainIdx].pt) + image1_kp.append(kp1[m.queryIdx].pt) + except: + pass + + image1_kp = np.array([image1_kp]) + image2_kp = np.array([image2_kp]) + return image1_kp, image2_kp, good_matches + + +def compute_essential(matched_kp1, matched_kp2, K): + pts1 = cv2.undistortPoints(matched_kp1,cameraMatrix=K, distCoeffs = (-0.117918271740560,0.075246403574314,0,0)) + pts2 = cv2.undistortPoints(matched_kp2,cameraMatrix=K, distCoeffs = (-0.117918271740560,0.075246403574314,0,0)) + K_1 = np.eye(3) + # Estimate the homography between the matches using RANSAC + ransac_model, ransac_inliers = cv2.findEssentialMat(pts1, pts2, K_1, method=cv2.FM_RANSAC, prob=0.999, threshold=0.001) + if ransac_inliers is None or ransac_model.shape != (3,3): + ransac_inliers = np.array([]) + ransac_model = None + return ransac_model, ransac_inliers, pts1, pts2 + + +def compute_error(R_GT,t_GT,E,pts1_norm, pts2_norm, inliers): + """Compute the angular error between two rotation matrices and two translation vectors. + Keyword arguments: + R -- 2D numpy array containing an estimated rotation + gt_R -- 2D numpy array containing the corresponding ground truth rotation + t -- 2D numpy array containing an estimated translation as column + gt_t -- 2D numpy array containing the corresponding ground truth translation + """ + + inliers = inliers.ravel() + R = np.eye(3) + t = np.zeros((3,1)) + sst = True + try: + cv2.recoverPose(E, pts1_norm, pts2_norm, np.eye(3), R, t, inliers) + except: + sst = False + # calculate angle between provided rotations + # + if sst: + dR = np.matmul(R, np.transpose(R_GT)) + dR = cv2.Rodrigues(dR)[0] + dR = np.linalg.norm(dR) * 180 / math.pi + + # calculate angle between provided translations + dT = float(np.dot(t_GT.T, t)) + dT /= float(np.linalg.norm(t_GT)) + + if dT > 1 or dT < -1: + print("Domain warning! dT:",dT) + dT = max(-1,min(1,dT)) + dT = math.acos(dT) * 180 / math.pi + dT = np.minimum(dT, 180 - dT) # ambiguity of E estimation + else: + dR,dT = 180.0, 180.0 + return dR, dT diff --git a/third_party/DarkFeat/utils/misc.py b/third_party/DarkFeat/utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..1df6fdec97121486dbb94e0b32a2f66c85c48f7d --- /dev/null +++ b/third_party/DarkFeat/utils/misc.py @@ -0,0 +1,158 @@ +from pathlib import Path +import time +from collections import OrderedDict +import numpy as np +import cv2 +import rawpy +import torch +import colour_demosaicing + + +class AverageTimer: + """ Class to help manage printing simple timing of code execution. """ + + def __init__(self, smoothing=0.3, newline=False): + self.smoothing = smoothing + self.newline = newline + self.times = OrderedDict() + self.will_print = OrderedDict() + self.reset() + + def reset(self): + now = time.time() + self.start = now + self.last_time = now + for name in self.will_print: + self.will_print[name] = False + + def update(self, name='default'): + now = time.time() + dt = now - self.last_time + if name in self.times: + dt = self.smoothing * dt + (1 - self.smoothing) * self.times[name] + self.times[name] = dt + self.will_print[name] = True + self.last_time = now + + def print(self, text='Timer'): + total = 0. + print('[{}]'.format(text), end=' ') + for key in self.times: + val = self.times[key] + if self.will_print[key]: + print('%s=%.3f' % (key, val), end=' ') + total += val + print('total=%.3f sec {%.1f FPS}' % (total, 1./total), end=' ') + if self.newline: + print(flush=True) + else: + print(end='\r', flush=True) + self.reset() + + +class VideoStreamer: + def __init__(self, basedir, resize, image_glob): + self.listing = [] + self.resize = resize + self.i = 0 + if Path(basedir).is_dir(): + print('==> Processing image directory input: {}'.format(basedir)) + self.listing = list(Path(basedir).glob(image_glob[0])) + for j in range(1, len(image_glob)): + image_path = list(Path(basedir).glob(image_glob[j])) + self.listing = self.listing + image_path + self.listing.sort() + if len(self.listing) == 0: + raise IOError('No images found (maybe bad \'image_glob\' ?)') + self.max_length = len(self.listing) + else: + raise ValueError('VideoStreamer input \"{}\" not recognized.'.format(basedir)) + + def load_image(self, impath): + raw = rawpy.imread(str(impath)).raw_image_visible + raw = np.clip(raw.astype('float32') - 512, 0, 65535) + img = colour_demosaicing.demosaicing_CFA_Bayer_bilinear(raw, 'RGGB').astype('float32') + img = np.clip(img, 0, 16383) + + m = img.mean() + d = np.abs(img - img.mean()).mean() + img = (img - m + 2*d) / 4/d * 255 + image = np.clip(img, 0, 255) + + w_new, h_new = self.resize[0], self.resize[1] + + im = cv2.resize(image.astype('float32'), (w_new, h_new), interpolation=cv2.INTER_AREA) + return im + + def next_frame(self): + if self.i == self.max_length: + return (None, False) + image_file = str(self.listing[self.i]) + image = self.load_image(image_file) + self.i = self.i + 1 + return (image, True) + + +def frame2tensor(frame, device): + if len(frame.shape) == 2: + return torch.from_numpy(frame/255.).float()[None, None].to(device) + else: + return torch.from_numpy(frame/255.).float().permute(2, 0, 1)[None].to(device) + + +def make_matching_plot_fast(image0, image1, mkpts0, mkpts1, + color, text, path=None, margin=10, + opencv_display=False, opencv_title='', + small_text=[]): + H0, W0 = image0.shape[:2] + H1, W1 = image1.shape[:2] + H, W = max(H0, H1), W0 + W1 + margin + + out = 255*np.ones((H, W, 3), np.uint8) + out[:H0, :W0, :] = image0 + out[:H1, W0+margin:, :] = image1 + + # Scale factor for consistent visualization across scales. + sc = min(H / 640., 2.0) + + # Big text. + Ht = int(30 * sc) # text height + txt_color_fg = (255, 255, 255) + txt_color_bg = (0, 0, 0) + + for i, t in enumerate(text): + cv2.putText(out, t, (int(8*sc), Ht*(i+1)), cv2.FONT_HERSHEY_DUPLEX, + 1.0*sc, txt_color_bg, 2, cv2.LINE_AA) + cv2.putText(out, t, (int(8*sc), Ht*(i+1)), cv2.FONT_HERSHEY_DUPLEX, + 1.0*sc, txt_color_fg, 1, cv2.LINE_AA) + + out_backup = out.copy() + + mkpts0, mkpts1 = np.round(mkpts0).astype(int), np.round(mkpts1).astype(int) + color = (np.array(color[:, :3])*255).astype(int)[:, ::-1] + for (x0, y0), (x1, y1), c in zip(mkpts0, mkpts1, color): + c = c.tolist() + cv2.line(out, (x0, y0), (x1 + margin + W0, y1), + color=c, thickness=1, lineType=cv2.LINE_AA) + # display line end-points as circles + cv2.circle(out, (x0, y0), 2, c, -1, lineType=cv2.LINE_AA) + cv2.circle(out, (x1 + margin + W0, y1), 2, c, -1, + lineType=cv2.LINE_AA) + + # Small text. + Ht = int(18 * sc) # text height + for i, t in enumerate(reversed(small_text)): + cv2.putText(out, t, (int(8*sc), int(H-Ht*(i+.6))), cv2.FONT_HERSHEY_DUPLEX, + 0.5*sc, txt_color_bg, 2, cv2.LINE_AA) + cv2.putText(out, t, (int(8*sc), int(H-Ht*(i+.6))), cv2.FONT_HERSHEY_DUPLEX, + 0.5*sc, txt_color_fg, 1, cv2.LINE_AA) + + if path is not None: + cv2.imwrite(str(path), out) + + if opencv_display: + cv2.imshow(opencv_title, out) + cv2.waitKey(1) + + return out / 2 + out_backup / 2 + diff --git a/third_party/DarkFeat/utils/nn.py b/third_party/DarkFeat/utils/nn.py new file mode 100644 index 0000000000000000000000000000000000000000..8a80631d6e12d848cceee3b636baf49deaa7647a --- /dev/null +++ b/third_party/DarkFeat/utils/nn.py @@ -0,0 +1,50 @@ +import torch +from torch import nn + + +class NN2(nn.Module): + def __init__(self): + super().__init__() + + def forward(self, data): + desc1, desc2 = data['descriptors0'].cuda(), data['descriptors1'].cuda() + kpts1, kpts2 = data['keypoints0'].cuda(), data['keypoints1'].cuda() + + # torch.cuda.synchronize() + # t = time.time() + + if kpts1.shape[1] <= 1 or kpts2.shape[1] <= 1: # no keypoints + shape0, shape1 = kpts1.shape[:-1], kpts2.shape[:-1] + return { + 'matches0': kpts1.new_full(shape0, -1, dtype=torch.int), + 'matches1': kpts2.new_full(shape1, -1, dtype=torch.int), + 'matching_scores0': kpts1.new_zeros(shape0), + 'matching_scores1': kpts2.new_zeros(shape1), + } + + sim = torch.matmul(desc1.squeeze().T, desc2.squeeze()) + ids1 = torch.arange(0, sim.shape[0], device=desc1.device) + nn12 = torch.argmax(sim, dim=1) + + nn21 = torch.argmax(sim, dim=0) + mask = torch.eq(ids1, nn21[nn12]) + matches = torch.stack([torch.masked_select(ids1, mask), torch.masked_select(nn12, mask)]) + # matches = torch.stack([ids1, nn12]) + indices0 = torch.ones((1, desc1.shape[-1]), dtype=int) * -1 + mscores0 = torch.ones((1, desc1.shape[-1]), dtype=float) * -1 + + # torch.cuda.synchronize() + # print(time.time() - t) + + matches_0 = matches[0].cpu().int().numpy() + matches_1 = matches[1].cpu().int() + for i in range(matches.shape[-1]): + indices0[0, matches_0[i]] = matches_1[i].int() + mscores0[0, matches_0[i]] = sim[matches_0[i], matches_1[i]] + + return { + 'matches0': indices0, # use -1 for invalid match + 'matches1': indices0, # use -1 for invalid match + 'matching_scores0': mscores0, + 'matching_scores1': mscores0, + } diff --git a/third_party/DarkFeat/utils/nnmatching.py b/third_party/DarkFeat/utils/nnmatching.py new file mode 100644 index 0000000000000000000000000000000000000000..7be6f98c050fc2e416ef48e25ca0f293106c1082 --- /dev/null +++ b/third_party/DarkFeat/utils/nnmatching.py @@ -0,0 +1,41 @@ +import torch + +from .nn import NN2 +from darkfeat import DarkFeat + +class NNMatching(torch.nn.Module): + def __init__(self, model_path=''): + super().__init__() + self.nn = NN2().eval() + self.darkfeat = DarkFeat(model_path).eval() + + def forward(self, data): + """ Run DarkFeat and nearest neighborhood matching + Args: + data: dictionary with minimal keys: ['image0', 'image1'] + """ + pred = {} + + # Extract DarkFeat (keypoints, scores, descriptors) + if 'keypoints0' not in data: + pred0 = self.darkfeat({'image': data['image0']}) + # print({k+'0': v[0].shape for k, v in pred0.items()}) + pred = {**pred, **{k+'0': [v] for k, v in pred0.items()}} + if 'keypoints1' not in data: + pred1 = self.darkfeat({'image': data['image1']}) + pred = {**pred, **{k+'1': [v] for k, v in pred1.items()}} + + + # Batch all features + # We should either have i) one image per batch, or + # ii) the same number of local features for all images in the batch. + data = {**data, **pred} + + for k in data: + if isinstance(data[k], (list, tuple)): + data[k] = torch.stack(data[k]) + + # Perform the matching + pred = {**pred, **self.nn(data)} + + return pred diff --git a/third_party/GlueStick/.gitignore b/third_party/GlueStick/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..c246e14ed9611a54be01334d4c2e734dca731e4b --- /dev/null +++ b/third_party/GlueStick/.gitignore @@ -0,0 +1,132 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ +.idea/* +*events.out.tfevents.* +/outputs \ No newline at end of file diff --git a/third_party/GlueStick/LICENSE b/third_party/GlueStick/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..866f33543245c285b350696b00be76bc278ca4a7 --- /dev/null +++ b/third_party/GlueStick/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Computer Vision and Geometry Lab + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/third_party/GlueStick/README.md b/third_party/GlueStick/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3145f02d47f4c60dd7d9a7d04e10f87b8f55dad7 --- /dev/null +++ b/third_party/GlueStick/README.md @@ -0,0 +1,48 @@ +# GlueStick +[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/cvg/GlueStick/blob/main/gluestick_matching_demo.ipynb) [![arXiv](https://img.shields.io/badge/arXiv-2304.02008-b31b1b.svg?style=flat)](https://arxiv.org/abs/2304.02008) [![Project Page](https://badgen.net/badge/color/project/green?icon=awesome&label)](https://iago-suarez.com/gluestick) + +Joint deep matcher for points and lines 🖼️💥🖼️ + +![Visualization of point and line matches](resources/demo_seq1.gif) + +This repository contains the official implementation of +[GlueStick: Robust Image Matching by Sticking Points and Lines Together](https://arxiv.org/abs/2304.02008). + +## Install 🛠️ + +To install the software in Ubuntu 22.04 follow these instructions: +```bash +sudo apt-get install build-essential cmake libopencv-dev libopencv-contrib-dev +git clone --recursive https://github.com/cvg/GlueStick.git +cd GlueStick +# Create and activate a virtual environment +python -m venv venv +source venv/bin/activate +pip install -r requirements.txt +pip install -e . +``` + +## Running GlueStick 🏃 +Download the weights of the model: +``` +wget https://github.com/cvg/GlueStick/releases/download/v0.1_arxiv/checkpoint_GlueStick_MD.tar -P resources/weights +``` + +You can execute the inference with it with: +``` +python -m gluestick.run -img1 resources/img1.jpg -img2 resources/img2.jpg +``` + +## Training 🏋️ +We want to provide you with high-quality and flexible code for training. Stay tuned, we will release it soon! + +## Citation 📝 +If you use this code in your project, please consider citing the following paper: +```bibtex +@article{pautrat_suarez_2023_gluestick, + title={{GlueStick}: Robust Image Matching by Sticking Points and Lines Together}, + author={Pautrat, R{\'e}mi* and Su{\'a}rez, Iago* and Yu, Yifan and Pollefeys, Marc and Larsson, Viktor}, + journal={ArXiv}, + year={2023} +} +``` diff --git a/third_party/GlueStick/gluestick/__init__.py b/third_party/GlueStick/gluestick/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d3051821ecfb2e18f4b9b4dfb50f35064106eb57 --- /dev/null +++ b/third_party/GlueStick/gluestick/__init__.py @@ -0,0 +1,53 @@ +import collections.abc as collections +from pathlib import Path + +import torch + +GLUESTICK_ROOT = Path(__file__).parent.parent + + +def get_class(mod_name, base_path, BaseClass): + """Get the class object which inherits from BaseClass and is defined in + the module named mod_name, child of base_path. + """ + import inspect + mod_path = '{}.{}'.format(base_path, mod_name) + mod = __import__(mod_path, fromlist=['']) + classes = inspect.getmembers(mod, inspect.isclass) + # Filter classes defined in the module + classes = [c for c in classes if c[1].__module__ == mod_path] + # Filter classes inherited from BaseModel + classes = [c for c in classes if issubclass(c[1], BaseClass)] + assert len(classes) == 1, classes + return classes[0][1] + + +def get_model(name): + from .models.base_model import BaseModel + return get_class('models.' + name, __name__, BaseModel) + + +def numpy_image_to_torch(image): + """Normalize the image tensor and reorder the dimensions.""" + if image.ndim == 3: + image = image.transpose((2, 0, 1)) # HxWxC to CxHxW + elif image.ndim == 2: + image = image[None] # add channel axis + else: + raise ValueError(f'Not an image: {image.shape}') + return torch.from_numpy(image / 255.).float() + + +def map_tensor(input_, func): + if isinstance(input_, (str, bytes)): + return input_ + elif isinstance(input_, collections.Mapping): + return {k: map_tensor(sample, func) for k, sample in input_.items()} + elif isinstance(input_, collections.Sequence): + return [map_tensor(sample, func) for sample in input_] + else: + return func(input_) + + +def batch_to_np(batch): + return map_tensor(batch, lambda t: t.detach().cpu().numpy()[0]) diff --git a/third_party/GlueStick/gluestick/drawing.py b/third_party/GlueStick/gluestick/drawing.py new file mode 100644 index 0000000000000000000000000000000000000000..8e6d24b6bfedc93449142647410057d978d733ef --- /dev/null +++ b/third_party/GlueStick/gluestick/drawing.py @@ -0,0 +1,166 @@ +import matplotlib +import matplotlib.pyplot as plt +import numpy as np +import seaborn as sns + + +def plot_images(imgs, titles=None, cmaps='gray', dpi=100, pad=.5, + adaptive=True): + """Plot a set of images horizontally. + Args: + imgs: a list of NumPy or PyTorch images, RGB (H, W, 3) or mono (H, W). + titles: a list of strings, as titles for each image. + cmaps: colormaps for monochrome images. + adaptive: whether the figure size should fit the image aspect ratios. + """ + n = len(imgs) + if not isinstance(cmaps, (list, tuple)): + cmaps = [cmaps] * n + + if adaptive: + ratios = [i.shape[1] / i.shape[0] for i in imgs] # W / H + else: + ratios = [4 / 3] * n + figsize = [sum(ratios) * 4.5, 4.5] + fig, ax = plt.subplots( + 1, n, figsize=figsize, dpi=dpi, gridspec_kw={'width_ratios': ratios}) + if n == 1: + ax = [ax] + for i in range(n): + ax[i].imshow(imgs[i], cmap=plt.get_cmap(cmaps[i])) + ax[i].get_yaxis().set_ticks([]) + ax[i].get_xaxis().set_ticks([]) + ax[i].set_axis_off() + for spine in ax[i].spines.values(): # remove frame + spine.set_visible(False) + if titles: + ax[i].set_title(titles[i]) + fig.tight_layout(pad=pad) + return ax + + +def plot_keypoints(kpts, colors='lime', ps=4, alpha=1): + """Plot keypoints for existing images. + Args: + kpts: list of ndarrays of size (N, 2). + colors: string, or list of list of tuples (one for each keypoints). + ps: size of the keypoints as float. + """ + if not isinstance(colors, list): + colors = [colors] * len(kpts) + axes = plt.gcf().axes + for a, k, c in zip(axes, kpts, colors): + a.scatter(k[:, 0], k[:, 1], c=c, s=ps, alpha=alpha, linewidths=0) + + +def plot_matches(kpts0, kpts1, color=None, lw=1.5, ps=4, indices=(0, 1), a=1.): + """Plot matches for a pair of existing images. + Args: + kpts0, kpts1: corresponding keypoints of size (N, 2). + color: color of each match, string or RGB tuple. Random if not given. + lw: width of the lines. + ps: size of the end points (no endpoint if ps=0) + indices: indices of the images to draw the matches on. + a: alpha opacity of the match lines. + """ + fig = plt.gcf() + ax = fig.axes + assert len(ax) > max(indices) + ax0, ax1 = ax[indices[0]], ax[indices[1]] + fig.canvas.draw() + + assert len(kpts0) == len(kpts1) + if color is None: + color = matplotlib.cm.hsv(np.random.rand(len(kpts0))).tolist() + elif len(color) > 0 and not isinstance(color[0], (tuple, list)): + color = [color] * len(kpts0) + + if lw > 0: + # transform the points into the figure coordinate system + transFigure = fig.transFigure.inverted() + fkpts0 = transFigure.transform(ax0.transData.transform(kpts0)) + fkpts1 = transFigure.transform(ax1.transData.transform(kpts1)) + fig.lines += [matplotlib.lines.Line2D( + (fkpts0[i, 0], fkpts1[i, 0]), (fkpts0[i, 1], fkpts1[i, 1]), + zorder=1, transform=fig.transFigure, c=color[i], linewidth=lw, + alpha=a) + for i in range(len(kpts0))] + + # freeze the axes to prevent the transform to change + ax0.autoscale(enable=False) + ax1.autoscale(enable=False) + + if ps > 0: + ax0.scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps) + ax1.scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps) + + +def plot_lines(lines, line_colors='orange', point_colors='cyan', + ps=4, lw=2, alpha=1., indices=(0, 1)): + """ Plot lines and endpoints for existing images. + Args: + lines: list of ndarrays of size (N, 2, 2). + colors: string, or list of list of tuples (one for each keypoints). + ps: size of the keypoints as float pixels. + lw: line width as float pixels. + alpha: transparency of the points and lines. + indices: indices of the images to draw the matches on. + """ + if not isinstance(line_colors, list): + line_colors = [line_colors] * len(lines) + if not isinstance(point_colors, list): + point_colors = [point_colors] * len(lines) + + fig = plt.gcf() + ax = fig.axes + assert len(ax) > max(indices) + axes = [ax[i] for i in indices] + fig.canvas.draw() + + # Plot the lines and junctions + for a, l, lc, pc in zip(axes, lines, line_colors, point_colors): + for i in range(len(l)): + line = matplotlib.lines.Line2D((l[i, 0, 0], l[i, 1, 0]), + (l[i, 0, 1], l[i, 1, 1]), + zorder=1, c=lc, linewidth=lw, + alpha=alpha) + a.add_line(line) + pts = l.reshape(-1, 2) + a.scatter(pts[:, 0], pts[:, 1], + c=pc, s=ps, linewidths=0, zorder=2, alpha=alpha) + + +def plot_color_line_matches(lines, correct_matches=None, + lw=2, indices=(0, 1)): + """Plot line matches for existing images with multiple colors. + Args: + lines: list of ndarrays of size (N, 2, 2). + correct_matches: bool array of size (N,) indicating correct matches. + lw: line width as float pixels. + indices: indices of the images to draw the matches on. + """ + n_lines = len(lines[0]) + colors = sns.color_palette('husl', n_colors=n_lines) + np.random.shuffle(colors) + alphas = np.ones(n_lines) + # If correct_matches is not None, display wrong matches with a low alpha + if correct_matches is not None: + alphas[~np.array(correct_matches)] = 0.2 + + fig = plt.gcf() + ax = fig.axes + assert len(ax) > max(indices) + axes = [ax[i] for i in indices] + fig.canvas.draw() + + # Plot the lines + for a, l in zip(axes, lines): + # Transform the points into the figure coordinate system + transFigure = fig.transFigure.inverted() + endpoint0 = transFigure.transform(a.transData.transform(l[:, 0])) + endpoint1 = transFigure.transform(a.transData.transform(l[:, 1])) + fig.lines += [matplotlib.lines.Line2D( + (endpoint0[i, 0], endpoint1[i, 0]), + (endpoint0[i, 1], endpoint1[i, 1]), + zorder=1, transform=fig.transFigure, c=colors[i], + alpha=alphas[i], linewidth=lw) for i in range(n_lines)] diff --git a/third_party/GlueStick/gluestick/geometry.py b/third_party/GlueStick/gluestick/geometry.py new file mode 100644 index 0000000000000000000000000000000000000000..97853c4807d319eb9ea0377db7385e9a72fb400b --- /dev/null +++ b/third_party/GlueStick/gluestick/geometry.py @@ -0,0 +1,175 @@ +from typing import Tuple + +import numpy as np +import torch + + +def to_homogeneous(points): + """Convert N-dimensional points to homogeneous coordinates. + Args: + points: torch.Tensor or numpy.ndarray with size (..., N). + Returns: + A torch.Tensor or numpy.ndarray with size (..., N+1). + """ + if isinstance(points, torch.Tensor): + pad = points.new_ones(points.shape[:-1] + (1,)) + return torch.cat([points, pad], dim=-1) + elif isinstance(points, np.ndarray): + pad = np.ones((points.shape[:-1] + (1,)), dtype=points.dtype) + return np.concatenate([points, pad], axis=-1) + else: + raise ValueError + + +def from_homogeneous(points, eps=0.): + """Remove the homogeneous dimension of N-dimensional points. + Args: + points: torch.Tensor or numpy.ndarray with size (..., N+1). + Returns: + A torch.Tensor or numpy ndarray with size (..., N). + """ + return points[..., :-1] / (points[..., -1:] + eps) + + +def skew_symmetric(v): + """Create a skew-symmetric matrix from a (batched) vector of size (..., 3). + """ + z = torch.zeros_like(v[..., 0]) + M = torch.stack([ + z, -v[..., 2], v[..., 1], + v[..., 2], z, -v[..., 0], + -v[..., 1], v[..., 0], z, + ], dim=-1).reshape(v.shape[:-1] + (3, 3)) + return M + + +def T_to_E(T): + """Convert batched poses (..., 4, 4) to batched essential matrices.""" + return skew_symmetric(T[..., :3, 3]) @ T[..., :3, :3] + + +def warp_points_torch(points, H, inverse=True): + """ + Warp a list of points with the INVERSE of the given homography. + The inverse is used to be coherent with tf.contrib.image.transform + Arguments: + points: batched list of N points, shape (B, N, 2). + homography: batched or not (shapes (B, 8) and (8,) respectively). + Returns: a Tensor of shape (B, N, 2) containing the new coordinates of the warped points. + """ + # H = np.expand_dims(homography, axis=0) if len(homography.shape) == 1 else homography + + # Get the points to the homogeneous format + points = to_homogeneous(points) + + # Apply the homography + out_shape = tuple(list(H.shape[:-1]) + [3, 3]) + H_mat = torch.cat([H, torch.ones_like(H[..., :1])], axis=-1).reshape(out_shape) + if inverse: + H_mat = torch.inverse(H_mat) + warped_points = torch.einsum('...nj,...ji->...ni', points, H_mat.transpose(-2, -1)) + + warped_points = from_homogeneous(warped_points, eps=1e-5) + + return warped_points + + +def seg_equation(segs): + # calculate list of start, end and midpoints points from both lists + start_points, end_points = to_homogeneous(segs[..., 0, :]), to_homogeneous(segs[..., 1, :]) + # Compute the line equations as ax + by + c = 0 , where x^2 + y^2 = 1 + lines = torch.cross(start_points, end_points, dim=-1) + lines_norm = (torch.sqrt(lines[..., 0] ** 2 + lines[..., 1] ** 2)[..., None]) + assert torch.all(lines_norm > 0), 'Error: trying to compute the equation of a line with a single point' + lines = lines / lines_norm + return lines + + +def is_inside_img(pts: torch.Tensor, img_shape: Tuple[int, int]): + h, w = img_shape + return (pts >= 0).all(dim=-1) & (pts[..., 0] < w) & (pts[..., 1] < h) & (~torch.isinf(pts).any(dim=-1)) + + +def shrink_segs_to_img(segs: torch.Tensor, img_shape: Tuple[int, int]) -> torch.Tensor: + """ + Shrink an array of segments to fit inside the image. + :param segs: The tensor of segments with shape (N, 2, 2) + :param img_shape: The image shape in format (H, W) + """ + EPS = 1e-4 + device = segs.device + w, h = img_shape[1], img_shape[0] + # Project the segments to the reference image + segs = segs.clone() + eqs = seg_equation(segs) + x0, y0 = torch.tensor([1., 0, 0.], device=device), torch.tensor([0., 1, 0], device=device) + x0 = x0.repeat(eqs.shape[:-1] + (1,)) + y0 = y0.repeat(eqs.shape[:-1] + (1,)) + pt_x0s = torch.cross(eqs, x0, dim=-1) + pt_x0s = pt_x0s[..., :-1] / pt_x0s[..., None, -1] + pt_x0s_valid = is_inside_img(pt_x0s, img_shape) + pt_y0s = torch.cross(eqs, y0, dim=-1) + pt_y0s = pt_y0s[..., :-1] / pt_y0s[..., None, -1] + pt_y0s_valid = is_inside_img(pt_y0s, img_shape) + + xW, yH = torch.tensor([1., 0, EPS - w], device=device), torch.tensor([0., 1, EPS - h], device=device) + xW = xW.repeat(eqs.shape[:-1] + (1,)) + yH = yH.repeat(eqs.shape[:-1] + (1,)) + pt_xWs = torch.cross(eqs, xW, dim=-1) + pt_xWs = pt_xWs[..., :-1] / pt_xWs[..., None, -1] + pt_xWs_valid = is_inside_img(pt_xWs, img_shape) + pt_yHs = torch.cross(eqs, yH, dim=-1) + pt_yHs = pt_yHs[..., :-1] / pt_yHs[..., None, -1] + pt_yHs_valid = is_inside_img(pt_yHs, img_shape) + + # If the X coordinate of the first endpoint is out + mask = (segs[..., 0, 0] < 0) & pt_x0s_valid + segs[mask, 0, :] = pt_x0s[mask] + mask = (segs[..., 0, 0] > (w - 1)) & pt_xWs_valid + segs[mask, 0, :] = pt_xWs[mask] + # If the X coordinate of the second endpoint is out + mask = (segs[..., 1, 0] < 0) & pt_x0s_valid + segs[mask, 1, :] = pt_x0s[mask] + mask = (segs[:, 1, 0] > (w - 1)) & pt_xWs_valid + segs[mask, 1, :] = pt_xWs[mask] + # If the Y coordinate of the first endpoint is out + mask = (segs[..., 0, 1] < 0) & pt_y0s_valid + segs[mask, 0, :] = pt_y0s[mask] + mask = (segs[..., 0, 1] > (h - 1)) & pt_yHs_valid + segs[mask, 0, :] = pt_yHs[mask] + # If the Y coordinate of the second endpoint is out + mask = (segs[..., 1, 1] < 0) & pt_y0s_valid + segs[mask, 1, :] = pt_y0s[mask] + mask = (segs[..., 1, 1] > (h - 1)) & pt_yHs_valid + segs[mask, 1, :] = pt_yHs[mask] + + assert torch.all(segs >= 0) and torch.all(segs[..., 0] < w) and torch.all(segs[..., 1] < h) + return segs + + +def warp_lines_torch(lines, H, inverse=True, dst_shape: Tuple[int, int] = None) -> Tuple[torch.Tensor, torch.Tensor]: + """ + :param lines: A tensor of shape (B, N, 2, 2) where B is the batch size, N the number of lines. + :param H: The homography used to convert the lines. batched or not (shapes (B, 8) and (8,) respectively). + :param inverse: Whether to apply H or the inverse of H + :param dst_shape:If provided, lines are trimmed to be inside the image + """ + device = lines.device + batch_size, n = lines.shape[:2] + lines = warp_points_torch(lines.reshape(batch_size, -1, 2), H, inverse).reshape(lines.shape) + + if dst_shape is None: + return lines, torch.ones(lines.shape[:-2], dtype=torch.bool, device=device) + + out_img = torch.any((lines < 0) | (lines >= torch.tensor(dst_shape[::-1], device=device)), -1) + valid = ~out_img.all(-1) + any_out_of_img = out_img.any(-1) + lines_to_trim = valid & any_out_of_img + + for b in range(batch_size): + lines_to_trim_mask_b = lines_to_trim[b] + lines_to_trim_b = lines[b][lines_to_trim_mask_b] + corrected_lines = shrink_segs_to_img(lines_to_trim_b, dst_shape) + lines[b][lines_to_trim_mask_b] = corrected_lines + + return lines, valid diff --git a/third_party/GlueStick/gluestick/models/__init__.py b/third_party/GlueStick/gluestick/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/GlueStick/gluestick/models/base_model.py b/third_party/GlueStick/gluestick/models/base_model.py new file mode 100644 index 0000000000000000000000000000000000000000..30ca991655a28ca88074b42312c33b360f655fab --- /dev/null +++ b/third_party/GlueStick/gluestick/models/base_model.py @@ -0,0 +1,126 @@ +""" +Base class for trainable models. +""" + +from abc import ABCMeta, abstractmethod +import omegaconf +from omegaconf import OmegaConf +from torch import nn +from copy import copy + + +class MetaModel(ABCMeta): + def __prepare__(name, bases, **kwds): + total_conf = OmegaConf.create() + for base in bases: + for key in ('base_default_conf', 'default_conf'): + update = getattr(base, key, {}) + if isinstance(update, dict): + update = OmegaConf.create(update) + total_conf = OmegaConf.merge(total_conf, update) + return dict(base_default_conf=total_conf) + + +class BaseModel(nn.Module, metaclass=MetaModel): + """ + What the child model is expect to declare: + default_conf: dictionary of the default configuration of the model. + It recursively updates the default_conf of all parent classes, and + it is updated by the user-provided configuration passed to __init__. + Configurations can be nested. + + required_data_keys: list of expected keys in the input data dictionary. + + strict_conf (optional): boolean. If false, BaseModel does not raise + an error when the user provides an unknown configuration entry. + + _init(self, conf): initialization method, where conf is the final + configuration object (also accessible with `self.conf`). Accessing + unknown configuration entries will raise an error. + + _forward(self, data): method that returns a dictionary of batched + prediction tensors based on a dictionary of batched input data tensors. + + loss(self, pred, data): method that returns a dictionary of losses, + computed from model predictions and input data. Each loss is a batch + of scalars, i.e. a torch.Tensor of shape (B,). + The total loss to be optimized has the key `'total'`. + + metrics(self, pred, data): method that returns a dictionary of metrics, + each as a batch of scalars. + """ + default_conf = { + 'name': None, + 'trainable': True, # if false: do not optimize this model parameters + 'freeze_batch_normalization': False, # use test-time statistics + } + required_data_keys = [] + strict_conf = True + + def __init__(self, conf): + """Perform some logic and call the _init method of the child model.""" + super().__init__() + default_conf = OmegaConf.merge( + self.base_default_conf, OmegaConf.create(self.default_conf)) + if self.strict_conf: + OmegaConf.set_struct(default_conf, True) + + # fixme: backward compatibility + if 'pad' in conf and 'pad' not in default_conf: # backward compat. + with omegaconf.read_write(conf): + with omegaconf.open_dict(conf): + conf['interpolation'] = {'pad': conf.pop('pad')} + + if isinstance(conf, dict): + conf = OmegaConf.create(conf) + self.conf = conf = OmegaConf.merge(default_conf, conf) + OmegaConf.set_readonly(conf, True) + OmegaConf.set_struct(conf, True) + self.required_data_keys = copy(self.required_data_keys) + self._init(conf) + + if not conf.trainable: + for p in self.parameters(): + p.requires_grad = False + + def train(self, mode=True): + super().train(mode) + + def freeze_bn(module): + if isinstance(module, nn.modules.batchnorm._BatchNorm): + module.eval() + if self.conf.freeze_batch_normalization: + self.apply(freeze_bn) + + return self + + def forward(self, data): + """Check the data and call the _forward method of the child model.""" + def recursive_key_check(expected, given): + for key in expected: + assert key in given, f'Missing key {key} in data' + if isinstance(expected, dict): + recursive_key_check(expected[key], given[key]) + + recursive_key_check(self.required_data_keys, data) + return self._forward(data) + + @abstractmethod + def _init(self, conf): + """To be implemented by the child class.""" + raise NotImplementedError + + @abstractmethod + def _forward(self, data): + """To be implemented by the child class.""" + raise NotImplementedError + + @abstractmethod + def loss(self, pred, data): + """To be implemented by the child class.""" + raise NotImplementedError + + @abstractmethod + def metrics(self, pred, data): + """To be implemented by the child class.""" + raise NotImplementedError diff --git a/third_party/GlueStick/gluestick/models/gluestick.py b/third_party/GlueStick/gluestick/models/gluestick.py new file mode 100644 index 0000000000000000000000000000000000000000..c2a6c477eebecc2c43feea007f99c2115aa7c216 --- /dev/null +++ b/third_party/GlueStick/gluestick/models/gluestick.py @@ -0,0 +1,558 @@ +import warnings +from copy import deepcopy + +warnings.filterwarnings("ignore", category=UserWarning) +import torch +import torch.utils.checkpoint +from torch import nn +from .base_model import BaseModel + +ETH_EPS = 1e-8 + + +class GlueStick(BaseModel): + default_conf = { + 'input_dim': 256, + 'descriptor_dim': 256, + 'bottleneck_dim': None, + 'weights': None, + 'keypoint_encoder': [32, 64, 128, 256], + 'GNN_layers': ['self', 'cross'] * 9, + 'num_line_iterations': 1, + 'line_attention': False, + 'filter_threshold': 0.2, + 'checkpointed': False, + 'skip_init': False, + 'inter_supervision': None, + 'loss': { + 'nll_weight': 1., + 'nll_balancing': 0.5, + 'reward_weight': 0., + 'bottleneck_l2_weight': 0., + 'dense_nll_weight': 0., + 'inter_supervision': [0.3, 0.6], + }, + } + required_data_keys = [ + 'keypoints0', 'keypoints1', + 'descriptors0', 'descriptors1', + 'keypoint_scores0', 'keypoint_scores1'] + + DEFAULT_LOSS_CONF = {'nll_weight': 1., 'nll_balancing': 0.5, 'reward_weight': 0., 'bottleneck_l2_weight': 0.} + + def _init(self, conf): + if conf.bottleneck_dim is not None: + self.bottleneck_down = nn.Conv1d( + conf.input_dim, conf.bottleneck_dim, kernel_size=1) + self.bottleneck_up = nn.Conv1d( + conf.bottleneck_dim, conf.input_dim, kernel_size=1) + nn.init.constant_(self.bottleneck_down.bias, 0.0) + nn.init.constant_(self.bottleneck_up.bias, 0.0) + + if conf.input_dim != conf.descriptor_dim: + self.input_proj = nn.Conv1d( + conf.input_dim, conf.descriptor_dim, kernel_size=1) + nn.init.constant_(self.input_proj.bias, 0.0) + + self.kenc = KeypointEncoder(conf.descriptor_dim, + conf.keypoint_encoder) + self.lenc = EndPtEncoder(conf.descriptor_dim, conf.keypoint_encoder) + self.gnn = AttentionalGNN(conf.descriptor_dim, conf.GNN_layers, + checkpointed=conf.checkpointed, + inter_supervision=conf.inter_supervision, + num_line_iterations=conf.num_line_iterations, + line_attention=conf.line_attention) + self.final_proj = nn.Conv1d(conf.descriptor_dim, conf.descriptor_dim, + kernel_size=1) + nn.init.constant_(self.final_proj.bias, 0.0) + nn.init.orthogonal_(self.final_proj.weight, gain=1) + self.final_line_proj = nn.Conv1d( + conf.descriptor_dim, conf.descriptor_dim, kernel_size=1) + nn.init.constant_(self.final_line_proj.bias, 0.0) + nn.init.orthogonal_(self.final_line_proj.weight, gain=1) + if conf.inter_supervision is not None: + self.inter_line_proj = nn.ModuleList( + [nn.Conv1d(conf.descriptor_dim, conf.descriptor_dim, kernel_size=1) + for _ in conf.inter_supervision]) + self.layer2idx = {} + for i, l in enumerate(conf.inter_supervision): + nn.init.constant_(self.inter_line_proj[i].bias, 0.0) + nn.init.orthogonal_(self.inter_line_proj[i].weight, gain=1) + self.layer2idx[l] = i + + bin_score = torch.nn.Parameter(torch.tensor(1.)) + self.register_parameter('bin_score', bin_score) + line_bin_score = torch.nn.Parameter(torch.tensor(1.)) + self.register_parameter('line_bin_score', line_bin_score) + + if conf.weights: + assert isinstance(conf.weights, str) + state_dict = torch.load(conf.weights, map_location='cpu') + if 'model' in state_dict: + state_dict = {k.replace('matcher.', ''): v for k, v in state_dict['model'].items() if 'matcher.' in k} + state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()} + self.load_state_dict(state_dict) + + def _forward(self, data): + device = data['keypoints0'].device + b_size = len(data['keypoints0']) + image_size0 = (data['image_size0'] if 'image_size0' in data + else data['image0'].shape) + image_size1 = (data['image_size1'] if 'image_size1' in data + else data['image1'].shape) + + pred = {} + desc0, desc1 = data['descriptors0'], data['descriptors1'] + kpts0, kpts1 = data['keypoints0'], data['keypoints1'] + + n_kpts0, n_kpts1 = kpts0.shape[1], kpts1.shape[1] + n_lines0, n_lines1 = data['lines0'].shape[1], data['lines1'].shape[1] + if n_kpts0 == 0 or n_kpts1 == 0: + # No detected keypoints nor lines + pred['log_assignment'] = torch.zeros( + b_size, n_kpts0, n_kpts1, dtype=torch.float, device=device) + pred['matches0'] = torch.full( + (b_size, n_kpts0), -1, device=device, dtype=torch.int64) + pred['matches1'] = torch.full( + (b_size, n_kpts1), -1, device=device, dtype=torch.int64) + pred['match_scores0'] = torch.zeros( + (b_size, n_kpts0), device=device, dtype=torch.float32) + pred['match_scores1'] = torch.zeros( + (b_size, n_kpts1), device=device, dtype=torch.float32) + pred['line_log_assignment'] = torch.zeros(b_size, n_lines0, n_lines1, + dtype=torch.float, device=device) + pred['line_matches0'] = torch.full((b_size, n_lines0), -1, + device=device, dtype=torch.int64) + pred['line_matches1'] = torch.full((b_size, n_lines1), -1, + device=device, dtype=torch.int64) + pred['line_match_scores0'] = torch.zeros( + (b_size, n_lines0), device=device, dtype=torch.float32) + pred['line_match_scores1'] = torch.zeros( + (b_size, n_kpts1), device=device, dtype=torch.float32) + return pred + + lines0 = data['lines0'].flatten(1, 2) + lines1 = data['lines1'].flatten(1, 2) + lines_junc_idx0 = data['lines_junc_idx0'].flatten(1, 2) # [b_size, num_lines * 2] + lines_junc_idx1 = data['lines_junc_idx1'].flatten(1, 2) + + if self.conf.bottleneck_dim is not None: + pred['down_descriptors0'] = desc0 = self.bottleneck_down(desc0) + pred['down_descriptors1'] = desc1 = self.bottleneck_down(desc1) + desc0 = self.bottleneck_up(desc0) + desc1 = self.bottleneck_up(desc1) + desc0 = nn.functional.normalize(desc0, p=2, dim=1) + desc1 = nn.functional.normalize(desc1, p=2, dim=1) + pred['bottleneck_descriptors0'] = desc0 + pred['bottleneck_descriptors1'] = desc1 + if self.conf.loss.nll_weight == 0: + desc0 = desc0.detach() + desc1 = desc1.detach() + + if self.conf.input_dim != self.conf.descriptor_dim: + desc0 = self.input_proj(desc0) + desc1 = self.input_proj(desc1) + + kpts0 = normalize_keypoints(kpts0, image_size0) + kpts1 = normalize_keypoints(kpts1, image_size1) + + assert torch.all(kpts0 >= -1) and torch.all(kpts0 <= 1) + assert torch.all(kpts1 >= -1) and torch.all(kpts1 <= 1) + desc0 = desc0 + self.kenc(kpts0, data['keypoint_scores0']) + desc1 = desc1 + self.kenc(kpts1, data['keypoint_scores1']) + + if n_lines0 != 0 and n_lines1 != 0: + # Pre-compute the line encodings + lines0 = normalize_keypoints(lines0, image_size0).reshape( + b_size, n_lines0, 2, 2) + lines1 = normalize_keypoints(lines1, image_size1).reshape( + b_size, n_lines1, 2, 2) + line_enc0 = self.lenc(lines0, data['line_scores0']) + line_enc1 = self.lenc(lines1, data['line_scores1']) + else: + line_enc0 = torch.zeros( + b_size, self.conf.descriptor_dim, n_lines0 * 2, + dtype=torch.float, device=device) + line_enc1 = torch.zeros( + b_size, self.conf.descriptor_dim, n_lines1 * 2, + dtype=torch.float, device=device) + + desc0, desc1 = self.gnn(desc0, desc1, line_enc0, line_enc1, + lines_junc_idx0, lines_junc_idx1) + + # Match all points (KP and line junctions) + mdesc0, mdesc1 = self.final_proj(desc0), self.final_proj(desc1) + + kp_scores = torch.einsum('bdn,bdm->bnm', mdesc0, mdesc1) + kp_scores = kp_scores / self.conf.descriptor_dim ** .5 + kp_scores = log_double_softmax(kp_scores, self.bin_score) + m0, m1, mscores0, mscores1 = self._get_matches(kp_scores) + pred['log_assignment'] = kp_scores + pred['matches0'] = m0 + pred['matches1'] = m1 + pred['match_scores0'] = mscores0 + pred['match_scores1'] = mscores1 + + # Match the lines + if n_lines0 > 0 and n_lines1 > 0: + (line_scores, m0_lines, m1_lines, mscores0_lines, + mscores1_lines, raw_line_scores) = self._get_line_matches( + desc0[:, :, :2 * n_lines0], desc1[:, :, :2 * n_lines1], + lines_junc_idx0, lines_junc_idx1, self.final_line_proj) + if self.conf.inter_supervision: + for l in self.conf.inter_supervision: + (line_scores_i, m0_lines_i, m1_lines_i, mscores0_lines_i, + mscores1_lines_i) = self._get_line_matches( + self.gnn.inter_layers[l][0][:, :, :2 * n_lines0], + self.gnn.inter_layers[l][1][:, :, :2 * n_lines1], + lines_junc_idx0, lines_junc_idx1, + self.inter_line_proj[self.layer2idx[l]]) + pred[f'line_{l}_log_assignment'] = line_scores_i + pred[f'line_{l}_matches0'] = m0_lines_i + pred[f'line_{l}_matches1'] = m1_lines_i + pred[f'line_{l}_match_scores0'] = mscores0_lines_i + pred[f'line_{l}_match_scores1'] = mscores1_lines_i + else: + line_scores = torch.zeros(b_size, n_lines0, n_lines1, + dtype=torch.float, device=device) + m0_lines = torch.full((b_size, n_lines0), -1, + device=device, dtype=torch.int64) + m1_lines = torch.full((b_size, n_lines1), -1, + device=device, dtype=torch.int64) + mscores0_lines = torch.zeros( + (b_size, n_lines0), device=device, dtype=torch.float32) + mscores1_lines = torch.zeros( + (b_size, n_lines1), device=device, dtype=torch.float32) + raw_line_scores = torch.zeros(b_size, n_lines0, n_lines1, + dtype=torch.float, device=device) + pred['line_log_assignment'] = line_scores + pred['line_matches0'] = m0_lines + pred['line_matches1'] = m1_lines + pred['line_match_scores0'] = mscores0_lines + pred['line_match_scores1'] = mscores1_lines + pred['raw_line_scores'] = raw_line_scores + + return pred + + def _get_matches(self, scores_mat): + max0 = scores_mat[:, :-1, :-1].max(2) + max1 = scores_mat[:, :-1, :-1].max(1) + m0, m1 = max0.indices, max1.indices + mutual0 = arange_like(m0, 1)[None] == m1.gather(1, m0) + mutual1 = arange_like(m1, 1)[None] == m0.gather(1, m1) + zero = scores_mat.new_tensor(0) + mscores0 = torch.where(mutual0, max0.values.exp(), zero) + mscores1 = torch.where(mutual1, mscores0.gather(1, m1), zero) + valid0 = mutual0 & (mscores0 > self.conf.filter_threshold) + valid1 = mutual1 & valid0.gather(1, m1) + m0 = torch.where(valid0, m0, m0.new_tensor(-1)) + m1 = torch.where(valid1, m1, m1.new_tensor(-1)) + return m0, m1, mscores0, mscores1 + + def _get_line_matches(self, ldesc0, ldesc1, lines_junc_idx0, + lines_junc_idx1, final_proj): + mldesc0 = final_proj(ldesc0) + mldesc1 = final_proj(ldesc1) + + line_scores = torch.einsum('bdn,bdm->bnm', mldesc0, mldesc1) + line_scores = line_scores / self.conf.descriptor_dim ** .5 + + # Get the line representation from the junction descriptors + n2_lines0 = lines_junc_idx0.shape[1] + n2_lines1 = lines_junc_idx1.shape[1] + line_scores = torch.gather( + line_scores, dim=2, + index=lines_junc_idx1[:, None, :].repeat(1, line_scores.shape[1], 1)) + line_scores = torch.gather( + line_scores, dim=1, + index=lines_junc_idx0[:, :, None].repeat(1, 1, n2_lines1)) + line_scores = line_scores.reshape((-1, n2_lines0 // 2, 2, + n2_lines1 // 2, 2)) + + # Match either in one direction or the other + raw_line_scores = 0.5 * torch.maximum( + line_scores[:, :, 0, :, 0] + line_scores[:, :, 1, :, 1], + line_scores[:, :, 0, :, 1] + line_scores[:, :, 1, :, 0]) + line_scores = log_double_softmax(raw_line_scores, self.line_bin_score) + m0_lines, m1_lines, mscores0_lines, mscores1_lines = self._get_matches( + line_scores) + return (line_scores, m0_lines, m1_lines, mscores0_lines, + mscores1_lines, raw_line_scores) + + def loss(self, pred, data): + raise NotImplementedError() + + def metrics(self, pred, data): + raise NotImplementedError() + + +def MLP(channels, do_bn=True): + n = len(channels) + layers = [] + for i in range(1, n): + layers.append( + nn.Conv1d(channels[i - 1], channels[i], kernel_size=1, bias=True)) + if i < (n - 1): + if do_bn: + layers.append(nn.BatchNorm1d(channels[i])) + layers.append(nn.ReLU()) + return nn.Sequential(*layers) + + +def normalize_keypoints(kpts, shape_or_size): + if isinstance(shape_or_size, (tuple, list)): + # it's a shape + h, w = shape_or_size[-2:] + size = kpts.new_tensor([[w, h]]) + else: + # it's a size + assert isinstance(shape_or_size, torch.Tensor) + size = shape_or_size.to(kpts) + c = size / 2 + f = size.max(1, keepdim=True).values * 0.7 # somehow we used 0.7 for SG + return (kpts - c[:, None, :]) / f[:, None, :] + + +class KeypointEncoder(nn.Module): + def __init__(self, feature_dim, layers): + super().__init__() + self.encoder = MLP([3] + list(layers) + [feature_dim], do_bn=True) + nn.init.constant_(self.encoder[-1].bias, 0.0) + + def forward(self, kpts, scores): + inputs = [kpts.transpose(1, 2), scores.unsqueeze(1)] + return self.encoder(torch.cat(inputs, dim=1)) + + +class EndPtEncoder(nn.Module): + def __init__(self, feature_dim, layers): + super().__init__() + self.encoder = MLP([5] + list(layers) + [feature_dim], do_bn=True) + nn.init.constant_(self.encoder[-1].bias, 0.0) + + def forward(self, endpoints, scores): + # endpoints should be [B, N, 2, 2] + # output is [B, feature_dim, N * 2] + b_size, n_pts, _, _ = endpoints.shape + assert tuple(endpoints.shape[-2:]) == (2, 2) + endpt_offset = (endpoints[:, :, 1] - endpoints[:, :, 0]).unsqueeze(2) + endpt_offset = torch.cat([endpt_offset, -endpt_offset], dim=2) + endpt_offset = endpt_offset.reshape(b_size, 2 * n_pts, 2).transpose(1, 2) + inputs = [endpoints.flatten(1, 2).transpose(1, 2), + endpt_offset, scores.repeat(1, 2).unsqueeze(1)] + return self.encoder(torch.cat(inputs, dim=1)) + + +@torch.cuda.amp.custom_fwd(cast_inputs=torch.float32) +def attention(query, key, value): + dim = query.shape[1] + scores = torch.einsum('bdhn,bdhm->bhnm', query, key) / dim ** .5 + prob = torch.nn.functional.softmax(scores, dim=-1) + return torch.einsum('bhnm,bdhm->bdhn', prob, value), prob + + +class MultiHeadedAttention(nn.Module): + def __init__(self, h, d_model): + super().__init__() + assert d_model % h == 0 + self.dim = d_model // h + self.h = h + self.merge = nn.Conv1d(d_model, d_model, kernel_size=1) + self.proj = nn.ModuleList([deepcopy(self.merge) for _ in range(3)]) + # self.prob = [] + + def forward(self, query, key, value): + b = query.size(0) + query, key, value = [l(x).view(b, self.dim, self.h, -1) + for l, x in zip(self.proj, (query, key, value))] + x, prob = attention(query, key, value) + # self.prob.append(prob.mean(dim=1)) + return self.merge(x.contiguous().view(b, self.dim * self.h, -1)) + + +class AttentionalPropagation(nn.Module): + def __init__(self, num_dim, num_heads, skip_init=False): + super().__init__() + self.attn = MultiHeadedAttention(num_heads, num_dim) + self.mlp = MLP([num_dim * 2, num_dim * 2, num_dim], do_bn=True) + nn.init.constant_(self.mlp[-1].bias, 0.0) + if skip_init: + self.register_parameter('scaling', nn.Parameter(torch.tensor(0.))) + else: + self.scaling = 1. + + def forward(self, x, source): + message = self.attn(x, source, source) + return self.mlp(torch.cat([x, message], dim=1)) * self.scaling + + +class GNNLayer(nn.Module): + def __init__(self, feature_dim, layer_type, skip_init): + super().__init__() + assert layer_type in ['cross', 'self'] + self.type = layer_type + self.update = AttentionalPropagation(feature_dim, 4, skip_init) + + def forward(self, desc0, desc1): + if self.type == 'cross': + src0, src1 = desc1, desc0 + elif self.type == 'self': + src0, src1 = desc0, desc1 + else: + raise ValueError("Unknown layer type: " + self.type) + # self.update.attn.prob = [] + delta0, delta1 = self.update(desc0, src0), self.update(desc1, src1) + desc0, desc1 = (desc0 + delta0), (desc1 + delta1) + return desc0, desc1 + + +class LineLayer(nn.Module): + def __init__(self, feature_dim, line_attention=False): + super().__init__() + self.dim = feature_dim + self.mlp = MLP([self.dim * 3, self.dim * 2, self.dim], do_bn=True) + self.line_attention = line_attention + if line_attention: + self.proj_node = nn.Conv1d(self.dim, self.dim, kernel_size=1) + self.proj_neigh = nn.Conv1d(2 * self.dim, self.dim, kernel_size=1) + + def get_endpoint_update(self, ldesc, line_enc, lines_junc_idx): + # ldesc is [bs, D, n_junc], line_enc [bs, D, n_lines * 2] + # and lines_junc_idx [bs, n_lines * 2] + # Create one message per line endpoint + b_size = lines_junc_idx.shape[0] + line_desc = torch.gather( + ldesc, 2, lines_junc_idx[:, None].repeat(1, self.dim, 1)) + message = torch.cat([ + line_desc, + line_desc.reshape(b_size, self.dim, -1, 2).flip([-1]).flatten(2, 3).clone(), + line_enc], dim=1) + return self.mlp(message) # [b_size, D, n_lines * 2] + + def get_endpoint_attention(self, ldesc, line_enc, lines_junc_idx): + # ldesc is [bs, D, n_junc], line_enc [bs, D, n_lines * 2] + # and lines_junc_idx [bs, n_lines * 2] + b_size = lines_junc_idx.shape[0] + expanded_lines_junc_idx = lines_junc_idx[:, None].repeat(1, self.dim, 1) + + # Query: desc of the current node + query = self.proj_node(ldesc) # [b_size, D, n_junc] + query = torch.gather(query, 2, expanded_lines_junc_idx) + # query is [b_size, D, n_lines * 2] + + # Key: combination of neighboring desc and line encodings + line_desc = torch.gather(ldesc, 2, expanded_lines_junc_idx) + key = self.proj_neigh(torch.cat([ + line_desc.reshape(b_size, self.dim, -1, 2).flip([-1]).flatten(2, 3).clone(), + line_enc], dim=1)) # [b_size, D, n_lines * 2] + + # Compute the attention weights with a custom softmax per junction + prob = (query * key).sum(dim=1) / self.dim ** .5 # [b_size, n_lines * 2] + prob = torch.exp(prob - prob.max()) + denom = torch.zeros_like(ldesc[:, 0]).scatter_reduce_( + dim=1, index=lines_junc_idx, + src=prob, reduce='sum', include_self=False) # [b_size, n_junc] + denom = torch.gather(denom, 1, lines_junc_idx) # [b_size, n_lines * 2] + prob = prob / (denom + ETH_EPS) + return prob # [b_size, n_lines * 2] + + def forward(self, ldesc0, ldesc1, line_enc0, line_enc1, lines_junc_idx0, + lines_junc_idx1): + # Gather the endpoint updates + lupdate0 = self.get_endpoint_update(ldesc0, line_enc0, lines_junc_idx0) + lupdate1 = self.get_endpoint_update(ldesc1, line_enc1, lines_junc_idx1) + + update0, update1 = torch.zeros_like(ldesc0), torch.zeros_like(ldesc1) + dim = ldesc0.shape[1] + if self.line_attention: + # Compute an attention for each neighbor and do a weighted average + prob0 = self.get_endpoint_attention(ldesc0, line_enc0, + lines_junc_idx0) + lupdate0 = lupdate0 * prob0[:, None] + update0 = update0.scatter_reduce_( + dim=2, index=lines_junc_idx0[:, None].repeat(1, dim, 1), + src=lupdate0, reduce='sum', include_self=False) + prob1 = self.get_endpoint_attention(ldesc1, line_enc1, + lines_junc_idx1) + lupdate1 = lupdate1 * prob1[:, None] + update1 = update1.scatter_reduce_( + dim=2, index=lines_junc_idx1[:, None].repeat(1, dim, 1), + src=lupdate1, reduce='sum', include_self=False) + else: + # Average the updates for each junction (requires torch > 1.12) + update0 = update0.scatter_reduce_( + dim=2, index=lines_junc_idx0[:, None].repeat(1, dim, 1), + src=lupdate0, reduce='mean', include_self=False) + update1 = update1.scatter_reduce_( + dim=2, index=lines_junc_idx1[:, None].repeat(1, dim, 1), + src=lupdate1, reduce='mean', include_self=False) + + # Update + ldesc0 = ldesc0 + update0 + ldesc1 = ldesc1 + update1 + + return ldesc0, ldesc1 + + +class AttentionalGNN(nn.Module): + def __init__(self, feature_dim, layer_types, checkpointed=False, + skip=False, inter_supervision=None, num_line_iterations=1, + line_attention=False): + super().__init__() + self.checkpointed = checkpointed + self.inter_supervision = inter_supervision + self.num_line_iterations = num_line_iterations + self.inter_layers = {} + self.layers = nn.ModuleList([ + GNNLayer(feature_dim, layer_type, skip) + for layer_type in layer_types]) + self.line_layers = nn.ModuleList( + [LineLayer(feature_dim, line_attention) + for _ in range(len(layer_types) // 2)]) + + def forward(self, desc0, desc1, line_enc0, line_enc1, + lines_junc_idx0, lines_junc_idx1): + for i, layer in enumerate(self.layers): + if self.checkpointed: + desc0, desc1 = torch.utils.checkpoint.checkpoint( + layer, desc0, desc1, preserve_rng_state=False) + else: + desc0, desc1 = layer(desc0, desc1) + if (layer.type == 'self' and lines_junc_idx0.shape[1] > 0 + and lines_junc_idx1.shape[1] > 0): + # Add line self attention layers after every self layer + for _ in range(self.num_line_iterations): + if self.checkpointed: + desc0, desc1 = torch.utils.checkpoint.checkpoint( + self.line_layers[i // 2], desc0, desc1, line_enc0, + line_enc1, lines_junc_idx0, lines_junc_idx1, + preserve_rng_state=False) + else: + desc0, desc1 = self.line_layers[i // 2]( + desc0, desc1, line_enc0, line_enc1, + lines_junc_idx0, lines_junc_idx1) + + # Optionally store the line descriptor at intermediate layers + if (self.inter_supervision is not None + and (i // 2) in self.inter_supervision + and layer.type == 'cross'): + self.inter_layers[i // 2] = (desc0.clone(), desc1.clone()) + return desc0, desc1 + + +def log_double_softmax(scores, bin_score): + b, m, n = scores.shape + bin_ = bin_score[None, None, None] + scores0 = torch.cat([scores, bin_.expand(b, m, 1)], 2) + scores1 = torch.cat([scores, bin_.expand(b, 1, n)], 1) + scores0 = torch.nn.functional.log_softmax(scores0, 2) + scores1 = torch.nn.functional.log_softmax(scores1, 1) + scores = scores.new_full((b, m + 1, n + 1), 0) + scores[:, :m, :n] = (scores0[:, :, :n] + scores1[:, :m, :]) / 2 + scores[:, :-1, -1] = scores0[:, :, -1] + scores[:, -1, :-1] = scores1[:, -1, :] + return scores + + +def arange_like(x, dim): + return x.new_ones(x.shape[dim]).cumsum(0) - 1 # traceable in 1.1 diff --git a/third_party/GlueStick/gluestick/models/superpoint.py b/third_party/GlueStick/gluestick/models/superpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..0e0948a90cf5c858ddd14cc498231479fa10d6e3 --- /dev/null +++ b/third_party/GlueStick/gluestick/models/superpoint.py @@ -0,0 +1,224 @@ +""" +Inference model of SuperPoint, a feature detector and descriptor. + +Described in: + SuperPoint: Self-Supervised Interest Point Detection and Description, + Daniel DeTone, Tomasz Malisiewicz, Andrew Rabinovich, CVPRW 2018. + +Original code: github.com/MagicLeapResearch/SuperPointPretrainedNetwork +""" + +import torch +from torch import nn + +from .. import GLUESTICK_ROOT +from ..models.base_model import BaseModel + + +def simple_nms(scores, radius): + """Perform non maximum suppression on the heatmap using max-pooling. + This method does not suppress contiguous points that have the same score. + Args: + scores: the score heatmap of size `(B, H, W)`. + size: an interger scalar, the radius of the NMS window. + """ + + def max_pool(x): + return torch.nn.functional.max_pool2d( + x, kernel_size=radius * 2 + 1, stride=1, padding=radius) + + zeros = torch.zeros_like(scores) + max_mask = scores == max_pool(scores) + for _ in range(2): + supp_mask = max_pool(max_mask.float()) > 0 + supp_scores = torch.where(supp_mask, zeros, scores) + new_max_mask = supp_scores == max_pool(supp_scores) + max_mask = max_mask | (new_max_mask & (~supp_mask)) + return torch.where(max_mask, scores, zeros) + + +def remove_borders(keypoints, scores, b, h, w): + mask_h = (keypoints[:, 0] >= b) & (keypoints[:, 0] < (h - b)) + mask_w = (keypoints[:, 1] >= b) & (keypoints[:, 1] < (w - b)) + mask = mask_h & mask_w + return keypoints[mask], scores[mask] + + +def top_k_keypoints(keypoints, scores, k): + if k >= len(keypoints): + return keypoints, scores + scores, indices = torch.topk(scores, k, dim=0, sorted=True) + return keypoints[indices], scores + + +def sample_descriptors(keypoints, descriptors, s): + b, c, h, w = descriptors.shape + keypoints = keypoints - s / 2 + 0.5 + keypoints /= torch.tensor([(w * s - s / 2 - 0.5), (h * s - s / 2 - 0.5)], + ).to(keypoints)[None] + keypoints = keypoints * 2 - 1 # normalize to (-1, 1) + args = {'align_corners': True} if torch.__version__ >= '1.3' else {} + descriptors = torch.nn.functional.grid_sample( + descriptors, keypoints.view(b, 1, -1, 2), mode='bilinear', **args) + descriptors = torch.nn.functional.normalize( + descriptors.reshape(b, c, -1), p=2, dim=1) + return descriptors + + +class SuperPoint(BaseModel): + default_conf = { + 'has_detector': True, + 'has_descriptor': True, + 'descriptor_dim': 256, + + # Inference + 'return_all': False, + 'sparse_outputs': True, + 'nms_radius': 4, + 'detection_threshold': 0.005, + 'max_num_keypoints': -1, + 'force_num_keypoints': False, + 'remove_borders': 4, + } + required_data_keys = ['image'] + + def _init(self, conf): + self.relu = nn.ReLU(inplace=True) + self.pool = nn.MaxPool2d(kernel_size=2, stride=2) + c1, c2, c3, c4, c5 = 64, 64, 128, 128, 256 + + self.conv1a = nn.Conv2d(1, c1, kernel_size=3, stride=1, padding=1) + self.conv1b = nn.Conv2d(c1, c1, kernel_size=3, stride=1, padding=1) + self.conv2a = nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1) + self.conv2b = nn.Conv2d(c2, c2, kernel_size=3, stride=1, padding=1) + self.conv3a = nn.Conv2d(c2, c3, kernel_size=3, stride=1, padding=1) + self.conv3b = nn.Conv2d(c3, c3, kernel_size=3, stride=1, padding=1) + self.conv4a = nn.Conv2d(c3, c4, kernel_size=3, stride=1, padding=1) + self.conv4b = nn.Conv2d(c4, c4, kernel_size=3, stride=1, padding=1) + + if conf.has_detector: + self.convPa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1) + self.convPb = nn.Conv2d(c5, 65, kernel_size=1, stride=1, padding=0) + + if conf.has_descriptor: + self.convDa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1) + self.convDb = nn.Conv2d( + c5, conf.descriptor_dim, kernel_size=1, stride=1, padding=0) + + path = GLUESTICK_ROOT / 'resources' / 'weights' / 'superpoint_v1.pth' + self.load_state_dict(torch.load(str(path)), strict=False) + + def _forward(self, data): + image = data['image'] + if image.shape[1] == 3: # RGB + scale = image.new_tensor([0.299, 0.587, 0.114]).view(1, 3, 1, 1) + image = (image * scale).sum(1, keepdim=True) + + # Shared Encoder + x = self.relu(self.conv1a(image)) + x = self.relu(self.conv1b(x)) + x = self.pool(x) + x = self.relu(self.conv2a(x)) + x = self.relu(self.conv2b(x)) + x = self.pool(x) + x = self.relu(self.conv3a(x)) + x = self.relu(self.conv3b(x)) + x = self.pool(x) + x = self.relu(self.conv4a(x)) + x = self.relu(self.conv4b(x)) + + pred = {} + if self.conf.has_detector and self.conf.max_num_keypoints != 0: + # Compute the dense keypoint scores + cPa = self.relu(self.convPa(x)) + scores = self.convPb(cPa) + scores = torch.nn.functional.softmax(scores, 1)[:, :-1] + b, c, h, w = scores.shape + scores = scores.permute(0, 2, 3, 1).reshape(b, h, w, 8, 8) + scores = scores.permute(0, 1, 3, 2, 4).reshape(b, h * 8, w * 8) + pred['keypoint_scores'] = dense_scores = scores + if self.conf.has_descriptor: + # Compute the dense descriptors + cDa = self.relu(self.convDa(x)) + all_desc = self.convDb(cDa) + all_desc = torch.nn.functional.normalize(all_desc, p=2, dim=1) + pred['descriptors'] = all_desc + + if self.conf.max_num_keypoints == 0: # Predict dense descriptors only + b_size = len(image) + device = image.device + return { + 'keypoints': torch.empty(b_size, 0, 2, device=device), + 'keypoint_scores': torch.empty(b_size, 0, device=device), + 'descriptors': torch.empty(b_size, self.conf.descriptor_dim, 0, device=device), + 'all_descriptors': all_desc + } + + if self.conf.sparse_outputs: + assert self.conf.has_detector and self.conf.has_descriptor + + scores = simple_nms(scores, self.conf.nms_radius) + + # Extract keypoints + keypoints = [ + torch.nonzero(s > self.conf.detection_threshold) + for s in scores] + scores = [s[tuple(k.t())] for s, k in zip(scores, keypoints)] + + # Discard keypoints near the image borders + keypoints, scores = list(zip(*[ + remove_borders(k, s, self.conf.remove_borders, h * 8, w * 8) + for k, s in zip(keypoints, scores)])) + + # Keep the k keypoints with highest score + if self.conf.max_num_keypoints > 0: + keypoints, scores = list(zip(*[ + top_k_keypoints(k, s, self.conf.max_num_keypoints) + for k, s in zip(keypoints, scores)])) + + # Convert (h, w) to (x, y) + keypoints = [torch.flip(k, [1]).float() for k in keypoints] + + if self.conf.force_num_keypoints: + _, _, h, w = data['image'].shape + assert self.conf.max_num_keypoints > 0 + scores = list(scores) + for i in range(len(keypoints)): + k, s = keypoints[i], scores[i] + missing = self.conf.max_num_keypoints - len(k) + if missing > 0: + new_k = torch.rand(missing, 2).to(k) + new_k = new_k * k.new_tensor([[w - 1, h - 1]]) + new_s = torch.zeros(missing).to(s) + keypoints[i] = torch.cat([k, new_k], 0) + scores[i] = torch.cat([s, new_s], 0) + + # Extract descriptors + desc = [sample_descriptors(k[None], d[None], 8)[0] + for k, d in zip(keypoints, all_desc)] + + if (len(keypoints) == 1) or self.conf.force_num_keypoints: + keypoints = torch.stack(keypoints, 0) + scores = torch.stack(scores, 0) + desc = torch.stack(desc, 0) + + pred = { + 'keypoints': keypoints, + 'keypoint_scores': scores, + 'descriptors': desc, + } + + if self.conf.return_all: + pred['all_descriptors'] = all_desc + pred['dense_score'] = dense_scores + else: + del all_desc + torch.cuda.empty_cache() + + return pred + + def loss(self, pred, data): + raise NotImplementedError + + def metrics(self, pred, data): + raise NotImplementedError diff --git a/third_party/GlueStick/gluestick/models/two_view_pipeline.py b/third_party/GlueStick/gluestick/models/two_view_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..e0e21c1f62e2bd4ad573ebb87ea5635742b5032e --- /dev/null +++ b/third_party/GlueStick/gluestick/models/two_view_pipeline.py @@ -0,0 +1,176 @@ +""" +A two-view sparse feature matching pipeline. + +This model contains sub-models for each step: + feature extraction, feature matching, outlier filtering, pose estimation. +Each step is optional, and the features or matches can be provided as input. +Default: SuperPoint with nearest neighbor matching. + +Convention for the matches: m0[i] is the index of the keypoint in image 1 +that corresponds to the keypoint i in image 0. m0[i] = -1 if i is unmatched. +""" + +import numpy as np +import torch + +from .. import get_model +from .base_model import BaseModel + + +def keep_quadrant_kp_subset(keypoints, scores, descs, h, w): + """Keep only keypoints in one of the four quadrant of the image.""" + h2, w2 = h // 2, w // 2 + w_x = np.random.choice([0, w2]) + w_y = np.random.choice([0, h2]) + valid_mask = ((keypoints[..., 0] >= w_x) + & (keypoints[..., 0] < w_x + w2) + & (keypoints[..., 1] >= w_y) + & (keypoints[..., 1] < w_y + h2)) + keypoints = keypoints[valid_mask][None] + scores = scores[valid_mask][None] + descs = descs.permute(0, 2, 1)[valid_mask].t()[None] + return keypoints, scores, descs + + +def keep_random_kp_subset(keypoints, scores, descs, num_selected): + """Keep a random subset of keypoints.""" + num_kp = keypoints.shape[1] + selected_kp = torch.randperm(num_kp)[:num_selected] + keypoints = keypoints[:, selected_kp] + scores = scores[:, selected_kp] + descs = descs[:, :, selected_kp] + return keypoints, scores, descs + + +def keep_best_kp_subset(keypoints, scores, descs, num_selected): + """Keep the top num_selected best keypoints.""" + sorted_indices = torch.sort(scores, dim=1)[1] + selected_kp = sorted_indices[:, -num_selected:] + keypoints = torch.gather(keypoints, 1, + selected_kp[:, :, None].repeat(1, 1, 2)) + scores = torch.gather(scores, 1, selected_kp) + descs = torch.gather(descs, 2, + selected_kp[:, None].repeat(1, descs.shape[1], 1)) + return keypoints, scores, descs + + +class TwoViewPipeline(BaseModel): + default_conf = { + 'extractor': { + 'name': 'superpoint', + 'trainable': False, + }, + 'use_lines': False, + 'use_points': True, + 'randomize_num_kp': False, + 'detector': {'name': None}, + 'descriptor': {'name': None}, + 'matcher': {'name': 'nearest_neighbor_matcher'}, + 'filter': {'name': None}, + 'solver': {'name': None}, + 'ground_truth': { + 'from_pose_depth': False, + 'from_homography': False, + 'th_positive': 3, + 'th_negative': 5, + 'reward_positive': 1, + 'reward_negative': -0.25, + 'is_likelihood_soft': True, + 'p_random_occluders': 0, + 'n_line_sampled_pts': 50, + 'line_perp_dist_th': 5, + 'overlap_th': 0.2, + 'min_visibility_th': 0.5 + }, + } + required_data_keys = ['image0', 'image1'] + strict_conf = False # need to pass new confs to children models + components = [ + 'extractor', 'detector', 'descriptor', 'matcher', 'filter', 'solver'] + + def _init(self, conf): + if conf.extractor.name: + self.extractor = get_model(conf.extractor.name)(conf.extractor) + else: + if self.conf.detector.name: + self.detector = get_model(conf.detector.name)(conf.detector) + else: + self.required_data_keys += ['keypoints0', 'keypoints1'] + if self.conf.descriptor.name: + self.descriptor = get_model(conf.descriptor.name)( + conf.descriptor) + else: + self.required_data_keys += ['descriptors0', 'descriptors1'] + + if conf.matcher.name: + self.matcher = get_model(conf.matcher.name)(conf.matcher) + else: + self.required_data_keys += ['matches0'] + + if conf.filter.name: + self.filter = get_model(conf.filter.name)(conf.filter) + + if conf.solver.name: + self.solver = get_model(conf.solver.name)(conf.solver) + + def _forward(self, data): + + def process_siamese(data, i): + data_i = {k[:-1]: v for k, v in data.items() if k[-1] == i} + if self.conf.extractor.name: + pred_i = self.extractor(data_i) + else: + pred_i = {} + if self.conf.detector.name: + pred_i = self.detector(data_i) + else: + for k in ['keypoints', 'keypoint_scores', 'descriptors', + 'lines', 'line_scores', 'line_descriptors', + 'valid_lines']: + if k in data_i: + pred_i[k] = data_i[k] + if self.conf.descriptor.name: + pred_i = { + **pred_i, **self.descriptor({**data_i, **pred_i})} + return pred_i + + pred0 = process_siamese(data, '0') + pred1 = process_siamese(data, '1') + + pred = {**{k + '0': v for k, v in pred0.items()}, + **{k + '1': v for k, v in pred1.items()}} + + if self.conf.matcher.name: + pred = {**pred, **self.matcher({**data, **pred})} + + if self.conf.filter.name: + pred = {**pred, **self.filter({**data, **pred})} + + if self.conf.solver.name: + pred = {**pred, **self.solver({**data, **pred})} + + return pred + + def loss(self, pred, data): + losses = {} + total = 0 + for k in self.components: + if self.conf[k].name: + try: + losses_ = getattr(self, k).loss(pred, {**pred, **data}) + except NotImplementedError: + continue + losses = {**losses, **losses_} + total = losses_['total'] + total + return {**losses, 'total': total} + + def metrics(self, pred, data): + metrics = {} + for k in self.components: + if self.conf[k].name: + try: + metrics_ = getattr(self, k).metrics(pred, {**pred, **data}) + except NotImplementedError: + continue + metrics = {**metrics, **metrics_} + return metrics diff --git a/third_party/GlueStick/gluestick/models/wireframe.py b/third_party/GlueStick/gluestick/models/wireframe.py new file mode 100644 index 0000000000000000000000000000000000000000..0e3dd9873c6fdb4edcb4c75a103673ee2cb3b3fa --- /dev/null +++ b/third_party/GlueStick/gluestick/models/wireframe.py @@ -0,0 +1,274 @@ +import numpy as np +import torch +from pytlsd import lsd +from sklearn.cluster import DBSCAN + +from .base_model import BaseModel +from .superpoint import SuperPoint, sample_descriptors +from ..geometry import warp_lines_torch + + +def lines_to_wireframe(lines, line_scores, all_descs, conf): + """ Given a set of lines, their score and dense descriptors, + merge close-by endpoints and compute a wireframe defined by + its junctions and connectivity. + Returns: + junctions: list of [num_junc, 2] tensors listing all wireframe junctions + junc_scores: list of [num_junc] tensors with the junction score + junc_descs: list of [dim, num_junc] tensors with the junction descriptors + connectivity: list of [num_junc, num_junc] bool arrays with True when 2 junctions are connected + new_lines: the new set of [b_size, num_lines, 2, 2] lines + lines_junc_idx: a [b_size, num_lines, 2] tensor with the indices of the junctions of each endpoint + num_true_junctions: a list of the number of valid junctions for each image in the batch, + i.e. before filling with random ones + """ + b_size, _, _, _ = all_descs.shape + device = lines.device + endpoints = lines.reshape(b_size, -1, 2) + + (junctions, junc_scores, junc_descs, connectivity, new_lines, + lines_junc_idx, num_true_junctions) = [], [], [], [], [], [], [] + for bs in range(b_size): + # Cluster the junctions that are close-by + db = DBSCAN(eps=conf.nms_radius, min_samples=1).fit( + endpoints[bs].cpu().numpy()) + clusters = db.labels_ + n_clusters = len(set(clusters)) + num_true_junctions.append(n_clusters) + + # Compute the average junction and score for each cluster + clusters = torch.tensor(clusters, dtype=torch.long, + device=device) + new_junc = torch.zeros(n_clusters, 2, dtype=torch.float, + device=device) + new_junc.scatter_reduce_(0, clusters[:, None].repeat(1, 2), + endpoints[bs], reduce='mean', + include_self=False) + junctions.append(new_junc) + new_scores = torch.zeros(n_clusters, dtype=torch.float, device=device) + new_scores.scatter_reduce_( + 0, clusters, torch.repeat_interleave(line_scores[bs], 2), + reduce='mean', include_self=False) + junc_scores.append(new_scores) + + # Compute the new lines + new_lines.append(junctions[-1][clusters].reshape(-1, 2, 2)) + lines_junc_idx.append(clusters.reshape(-1, 2)) + + # Compute the junction connectivity + junc_connect = torch.eye(n_clusters, dtype=torch.bool, + device=device) + pairs = clusters.reshape(-1, 2) # these pairs are connected by a line + junc_connect[pairs[:, 0], pairs[:, 1]] = True + junc_connect[pairs[:, 1], pairs[:, 0]] = True + connectivity.append(junc_connect) + + # Interpolate the new junction descriptors + junc_descs.append(sample_descriptors( + junctions[-1][None], all_descs[bs:(bs + 1)], 8)[0]) + + new_lines = torch.stack(new_lines, dim=0) + lines_junc_idx = torch.stack(lines_junc_idx, dim=0) + return (junctions, junc_scores, junc_descs, connectivity, + new_lines, lines_junc_idx, num_true_junctions) + + +class SPWireframeDescriptor(BaseModel): + default_conf = { + 'sp_params': { + 'has_detector': True, + 'has_descriptor': True, + 'descriptor_dim': 256, + 'trainable': False, + + # Inference + 'return_all': True, + 'sparse_outputs': True, + 'nms_radius': 4, + 'detection_threshold': 0.005, + 'max_num_keypoints': 1000, + 'force_num_keypoints': True, + 'remove_borders': 4, + }, + 'wireframe_params': { + 'merge_points': True, + 'merge_line_endpoints': True, + 'nms_radius': 3, + 'max_n_junctions': 500, + }, + 'max_n_lines': 250, + 'min_length': 15, + } + required_data_keys = ['image'] + + def _init(self, conf): + self.conf = conf + self.sp = SuperPoint(conf.sp_params) + + def detect_lsd_lines(self, x, max_n_lines=None): + if max_n_lines is None: + max_n_lines = self.conf.max_n_lines + lines, scores, valid_lines = [], [], [] + for b in range(len(x)): + # For each image on batch + img = (x[b].squeeze().cpu().numpy() * 255).astype(np.uint8) + if max_n_lines is None: + b_segs = lsd(img) + else: + for s in [0.3, 0.4, 0.5, 0.7, 0.8, 1.0]: + b_segs = lsd(img, scale=s) + if len(b_segs) >= max_n_lines: + break + + segs_length = np.linalg.norm(b_segs[:, 2:4] - b_segs[:, 0:2], axis=1) + # Remove short lines + b_segs = b_segs[segs_length >= self.conf.min_length] + segs_length = segs_length[segs_length >= self.conf.min_length] + b_scores = b_segs[:, -1] * np.sqrt(segs_length) + # Take the most relevant segments with + indices = np.argsort(-b_scores) + if max_n_lines is not None: + indices = indices[:max_n_lines] + lines.append(torch.from_numpy(b_segs[indices, :4].reshape(-1, 2, 2))) + scores.append(torch.from_numpy(b_scores[indices])) + valid_lines.append(torch.ones_like(scores[-1], dtype=torch.bool)) + + lines = torch.stack(lines).to(x) + scores = torch.stack(scores).to(x) + valid_lines = torch.stack(valid_lines).to(x.device) + return lines, scores, valid_lines + + def _forward(self, data): + b_size, _, h, w = data['image'].shape + device = data['image'].device + + if not self.conf.sp_params.force_num_keypoints: + assert b_size == 1, "Only batch size of 1 accepted for non padded inputs" + + # Line detection + if 'lines' not in data or 'line_scores' not in data: + if 'original_img' in data: + # Detect more lines, because when projecting them to the image most of them will be discarded + lines, line_scores, valid_lines = self.detect_lsd_lines( + data['original_img'], self.conf.max_n_lines * 3) + # Apply the same transformation that is applied in homography_adaptation + lines, valid_lines2 = warp_lines_torch(lines, data['H'], False, data['image'].shape[-2:]) + valid_lines = valid_lines & valid_lines2 + lines[~valid_lines] = -1 + line_scores[~valid_lines] = 0 + # Re-sort the line segments to pick the ones that are inside the image and have bigger score + sorted_scores, sorting_indices = torch.sort(line_scores, dim=-1, descending=True) + line_scores = sorted_scores[:, :self.conf.max_n_lines] + sorting_indices = sorting_indices[:, :self.conf.max_n_lines] + lines = torch.take_along_dim(lines, sorting_indices[..., None, None], 1) + valid_lines = torch.take_along_dim(valid_lines, sorting_indices, 1) + else: + lines, line_scores, valid_lines = self.detect_lsd_lines(data['image']) + + else: + lines, line_scores, valid_lines = data['lines'], data['line_scores'], data['valid_lines'] + if line_scores.shape[-1] != 0: + line_scores /= (line_scores.new_tensor(1e-8) + line_scores.max(dim=1).values[:, None]) + + # SuperPoint prediction + pred = self.sp(data) + + # Remove keypoints that are too close to line endpoints + if self.conf.wireframe_params.merge_points: + kp = pred['keypoints'] + line_endpts = lines.reshape(b_size, -1, 2) + dist_pt_lines = torch.norm( + kp[:, :, None] - line_endpts[:, None], dim=-1) + # For each keypoint, mark it as valid or to remove + pts_to_remove = torch.any( + dist_pt_lines < self.conf.sp_params.nms_radius, dim=2) + # Simply remove them (we assume batch_size = 1 here) + assert len(kp) == 1 + pred['keypoints'] = pred['keypoints'][0][~pts_to_remove[0]][None] + pred['keypoint_scores'] = pred['keypoint_scores'][0][~pts_to_remove[0]][None] + pred['descriptors'] = pred['descriptors'][0].T[~pts_to_remove[0]].T[None] + + # Connect the lines together to form a wireframe + orig_lines = lines.clone() + if self.conf.wireframe_params.merge_line_endpoints and len(lines[0]) > 0: + # Merge first close-by endpoints to connect lines + (line_points, line_pts_scores, line_descs, line_association, + lines, lines_junc_idx, num_true_junctions) = lines_to_wireframe( + lines, line_scores, pred['all_descriptors'], + conf=self.conf.wireframe_params) + + # Add the keypoints to the junctions and fill the rest with random keypoints + (all_points, all_scores, all_descs, + pl_associativity) = [], [], [], [] + for bs in range(b_size): + all_points.append(torch.cat( + [line_points[bs], pred['keypoints'][bs]], dim=0)) + all_scores.append(torch.cat( + [line_pts_scores[bs], pred['keypoint_scores'][bs]], dim=0)) + all_descs.append(torch.cat( + [line_descs[bs], pred['descriptors'][bs]], dim=1)) + + associativity = torch.eye(len(all_points[-1]), dtype=torch.bool, device=device) + associativity[:num_true_junctions[bs], :num_true_junctions[bs]] = \ + line_association[bs][:num_true_junctions[bs], :num_true_junctions[bs]] + pl_associativity.append(associativity) + + all_points = torch.stack(all_points, dim=0) + all_scores = torch.stack(all_scores, dim=0) + all_descs = torch.stack(all_descs, dim=0) + pl_associativity = torch.stack(pl_associativity, dim=0) + else: + # Lines are independent + all_points = torch.cat([lines.reshape(b_size, -1, 2), + pred['keypoints']], dim=1) + n_pts = all_points.shape[1] + num_lines = lines.shape[1] + num_true_junctions = [num_lines * 2] * b_size + all_scores = torch.cat([ + torch.repeat_interleave(line_scores, 2, dim=1), + pred['keypoint_scores']], dim=1) + pred['line_descriptors'] = self.endpoints_pooling( + lines, pred['all_descriptors'], (h, w)) + all_descs = torch.cat([ + pred['line_descriptors'].reshape(b_size, self.conf.sp_params.descriptor_dim, -1), + pred['descriptors']], dim=2) + pl_associativity = torch.eye( + n_pts, dtype=torch.bool, + device=device)[None].repeat(b_size, 1, 1) + lines_junc_idx = torch.arange( + num_lines * 2, device=device).reshape(1, -1, 2).repeat(b_size, 1, 1) + + del pred['all_descriptors'] # Remove dense descriptors to save memory + torch.cuda.empty_cache() + + return {'keypoints': all_points, + 'keypoint_scores': all_scores, + 'descriptors': all_descs, + 'pl_associativity': pl_associativity, + 'num_junctions': torch.tensor(num_true_junctions), + 'lines': lines, + 'orig_lines': orig_lines, + 'lines_junc_idx': lines_junc_idx, + 'line_scores': line_scores, + 'valid_lines': valid_lines} + + @staticmethod + def endpoints_pooling(segs, all_descriptors, img_shape): + assert segs.ndim == 4 and segs.shape[-2:] == (2, 2) + filter_shape = all_descriptors.shape[-2:] + scale_x = filter_shape[1] / img_shape[1] + scale_y = filter_shape[0] / img_shape[0] + + scaled_segs = torch.round(segs * torch.tensor([scale_x, scale_y]).to(segs)).long() + scaled_segs[..., 0] = torch.clip(scaled_segs[..., 0], 0, filter_shape[1] - 1) + scaled_segs[..., 1] = torch.clip(scaled_segs[..., 1], 0, filter_shape[0] - 1) + line_descriptors = [all_descriptors[None, b, ..., torch.squeeze(b_segs[..., 1]), torch.squeeze(b_segs[..., 0])] + for b, b_segs in enumerate(scaled_segs)] + line_descriptors = torch.cat(line_descriptors) + return line_descriptors # Shape (1, 256, 308, 2) + + def loss(self, pred, data): + raise NotImplementedError + + def metrics(self, pred, data): + return {} diff --git a/third_party/GlueStick/gluestick/run.py b/third_party/GlueStick/gluestick/run.py new file mode 100644 index 0000000000000000000000000000000000000000..6baa88834f0b4dfde769ebe6c671e4ec49d4ed10 --- /dev/null +++ b/third_party/GlueStick/gluestick/run.py @@ -0,0 +1,107 @@ +import argparse +import os +from os.path import join + +import cv2 +import torch +from matplotlib import pyplot as plt + +from gluestick import batch_to_np, numpy_image_to_torch, GLUESTICK_ROOT +from .drawing import plot_images, plot_lines, plot_color_line_matches, plot_keypoints, plot_matches +from .models.two_view_pipeline import TwoViewPipeline + + +def main(): + # Parse input parameters + parser = argparse.ArgumentParser( + prog='GlueStick Demo', + description='Demo app to show the point and line matches obtained by GlueStick') + parser.add_argument('-img1', default=join('resources' + os.path.sep + 'img1.jpg')) + parser.add_argument('-img2', default=join('resources' + os.path.sep + 'img2.jpg')) + parser.add_argument('--max_pts', type=int, default=1000) + parser.add_argument('--max_lines', type=int, default=300) + parser.add_argument('--skip-imshow', default=False, action='store_true') + args = parser.parse_args() + + # Evaluation config + conf = { + 'name': 'two_view_pipeline', + 'use_lines': True, + 'extractor': { + 'name': 'wireframe', + 'sp_params': { + 'force_num_keypoints': False, + 'max_num_keypoints': args.max_pts, + }, + 'wireframe_params': { + 'merge_points': True, + 'merge_line_endpoints': True, + }, + 'max_n_lines': args.max_lines, + }, + 'matcher': { + 'name': 'gluestick', + 'weights': str(GLUESTICK_ROOT / 'resources' / 'weights' / 'checkpoint_GlueStick_MD.tar'), + 'trainable': False, + }, + 'ground_truth': { + 'from_pose_depth': False, + } + } + + device = 'cuda' if torch.cuda.is_available() else 'cpu' + + pipeline_model = TwoViewPipeline(conf).to(device).eval() + + gray0 = cv2.imread(args.img1, 0) + gray1 = cv2.imread(args.img2, 0) + + torch_gray0, torch_gray1 = numpy_image_to_torch(gray0), numpy_image_to_torch(gray1) + torch_gray0, torch_gray1 = torch_gray0.to(device)[None], torch_gray1.to(device)[None] + x = {'image0': torch_gray0, 'image1': torch_gray1} + pred = pipeline_model(x) + + pred = batch_to_np(pred) + kp0, kp1 = pred["keypoints0"], pred["keypoints1"] + m0 = pred["matches0"] + + line_seg0, line_seg1 = pred["lines0"], pred["lines1"] + line_matches = pred["line_matches0"] + + valid_matches = m0 != -1 + match_indices = m0[valid_matches] + matched_kps0 = kp0[valid_matches] + matched_kps1 = kp1[match_indices] + + valid_matches = line_matches != -1 + match_indices = line_matches[valid_matches] + matched_lines0 = line_seg0[valid_matches] + matched_lines1 = line_seg1[match_indices] + + # Plot the matches + img0, img1 = cv2.cvtColor(gray0, cv2.COLOR_GRAY2BGR), cv2.cvtColor(gray1, cv2.COLOR_GRAY2BGR) + plot_images([img0, img1], ['Image 1 - detected lines', 'Image 2 - detected lines'], dpi=200, pad=2.0) + plot_lines([line_seg0, line_seg1], ps=4, lw=2) + plt.gcf().canvas.manager.set_window_title('Detected Lines') + plt.savefig('detected_lines.png') + + plot_images([img0, img1], ['Image 1 - detected points', 'Image 2 - detected points'], dpi=200, pad=2.0) + plot_keypoints([kp0, kp1], colors='c') + plt.gcf().canvas.manager.set_window_title('Detected Points') + plt.savefig('detected_points.png') + + plot_images([img0, img1], ['Image 1 - line matches', 'Image 2 - line matches'], dpi=200, pad=2.0) + plot_color_line_matches([matched_lines0, matched_lines1], lw=2) + plt.gcf().canvas.manager.set_window_title('Line Matches') + plt.savefig('line_matches.png') + + plot_images([img0, img1], ['Image 1 - point matches', 'Image 2 - point matches'], dpi=200, pad=2.0) + plot_matches(matched_kps0, matched_kps1, 'green', lw=1, ps=0) + plt.gcf().canvas.manager.set_window_title('Point Matches') + plt.savefig('detected_points.png') + if not args.skip_imshow: + plt.show() + + +if __name__ == '__main__': + main() diff --git a/third_party/GlueStick/gluestick_matching_demo.ipynb b/third_party/GlueStick/gluestick_matching_demo.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..6c02358f7e4d1b6a388c426eb19e3849e1c167b6 --- /dev/null +++ b/third_party/GlueStick/gluestick_matching_demo.ipynb @@ -0,0 +1,1132 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": true, + "pycharm": { + "is_executing": true + }, + "id": "_BY4CluidpCw" + }, + "source": [ + "# GlueStick Image Matching Demo 🖼️💥🖼️\n", + "\n", + "\n", + "In this python notebook we show how to obtain point and line matches using GlueStick. GlueStick is a unified pipeline that uses a single GNN to process both types of features and predicts coherent point and line matched that help each other in the matching process.\n", + "\n", + "![](https://iago-suarez.com/gluestick/static/images/method_overview2.svg)\n", + "\n", + "If you use this python notebook please cite our work:\n", + "\n", + "> Pautrat, R.* and Suárez, I.* and Yu, Y. and Pollefeys, M. and Larsson, V. (2023). \"GlueStick: Robust Image Matching by Sticking Points and Lines Together\". ArXiv preprint." + ] + }, + { + "cell_type": "code", + "source": [ + "# Download the repository\n", + "!git clone https://github.com/cvg/GlueStick.git\n", + "%cd GlueStick" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "CVBUeKT4dqBu", + "outputId": "db7a0e29-d4b5-4609-d65b-4e0f50a3a1e9" + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Cloning into 'GlueStick'...\n", + "remote: Enumerating objects: 33, done.\u001b[K\n", + "remote: Counting objects: 100% (33/33), done.\u001b[K\n", + "remote: Compressing objects: 100% (31/31), done.\u001b[K\n", + "remote: Total 33 (delta 3), reused 24 (delta 0), pack-reused 0\u001b[K\n", + "Unpacking objects: 100% (33/33), 30.89 MiB | 8.17 MiB/s, done.\n", + "/content/GlueStick\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Install requirements\n", + "!pip install -r requirements.txt" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "v-5DsNXreiGn", + "outputId": "e0007926-eebc-4ab1-faf7-2fdce2bf08f0" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Collecting git+https://github.com/iago-suarez/pytlsd.git@d518527 (from -r requirements.txt (line 12))\n", + " Cloning https://github.com/iago-suarez/pytlsd.git (to revision d518527) to /tmp/pip-req-build-u60qtkws\n", + " Running command git clone --filter=blob:none --quiet https://github.com/iago-suarez/pytlsd.git /tmp/pip-req-build-u60qtkws\n", + "\u001b[33m WARNING: Did not find branch or tag 'd518527', assuming revision or ref.\u001b[0m\u001b[33m\n", + "\u001b[0m Running command git checkout -q d518527\n", + " Resolved https://github.com/iago-suarez/pytlsd.git to commit d518527\n", + " Running command git submodule update --init --recursive -q\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.9/dist-packages (from -r requirements.txt (line 1)) (1.22.4)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.9/dist-packages (from -r requirements.txt (line 2)) (3.7.1)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.9/dist-packages (from -r requirements.txt (line 3)) (1.10.1)\n", + "Requirement already satisfied: scikit_learn in /usr/local/lib/python3.9/dist-packages (from -r requirements.txt (line 4)) (1.2.2)\n", + "Requirement already satisfied: seaborn in /usr/local/lib/python3.9/dist-packages (from -r requirements.txt (line 5)) (0.12.2)\n", + "Collecting omegaconf==2.2.*\n", + " Downloading omegaconf-2.2.3-py3-none-any.whl (79 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.3/79.3 KB\u001b[0m \u001b[31m404.2 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: opencv-python==4.7.0.* in /usr/local/lib/python3.9/dist-packages (from -r requirements.txt (line 7)) (4.7.0.72)\n", + "Requirement already satisfied: torch>=1.12 in /usr/local/lib/python3.9/dist-packages (from -r requirements.txt (line 8)) (2.0.0+cu118)\n", + "Requirement already satisfied: torchvision>=0.13 in /usr/local/lib/python3.9/dist-packages (from -r requirements.txt (line 9)) (0.15.1+cu118)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.9/dist-packages (from -r requirements.txt (line 10)) (67.6.1)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from -r requirements.txt (line 11)) (4.65.0)\n", + "Requirement already satisfied: PyYAML>=5.1.0 in /usr/local/lib/python3.9/dist-packages (from omegaconf==2.2.*->-r requirements.txt (line 6)) (6.0)\n", + "Collecting antlr4-python3-runtime==4.9.*\n", + " Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.0/117.0 KB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib->-r requirements.txt (line 2)) (8.4.0)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.9/dist-packages (from matplotlib->-r requirements.txt (line 2)) (2.8.2)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib->-r requirements.txt (line 2)) (1.0.7)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib->-r requirements.txt (line 2)) (1.4.4)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib->-r requirements.txt (line 2)) (4.39.3)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib->-r requirements.txt (line 2)) (23.0)\n", + "Requirement already satisfied: importlib-resources>=3.2.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib->-r requirements.txt (line 2)) (5.12.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib->-r requirements.txt (line 2)) (3.0.9)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.9/dist-packages (from matplotlib->-r requirements.txt (line 2)) (0.11.0)\n", + "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.9/dist-packages (from scikit_learn->-r requirements.txt (line 4)) (1.1.1)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.9/dist-packages (from scikit_learn->-r requirements.txt (line 4)) (3.1.0)\n", + "Requirement already satisfied: pandas>=0.25 in /usr/local/lib/python3.9/dist-packages (from seaborn->-r requirements.txt (line 5)) (1.4.4)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.9/dist-packages (from torch>=1.12->-r requirements.txt (line 8)) (4.5.0)\n", + "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.9/dist-packages (from torch>=1.12->-r requirements.txt (line 8)) (2.0.0)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.9/dist-packages (from torch>=1.12->-r requirements.txt (line 8)) (1.11.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from torch>=1.12->-r requirements.txt (line 8)) (3.10.7)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.9/dist-packages (from torch>=1.12->-r requirements.txt (line 8)) (3.1.2)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.9/dist-packages (from torch>=1.12->-r requirements.txt (line 8)) (3.0)\n", + "Requirement already satisfied: cmake in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch>=1.12->-r requirements.txt (line 8)) (3.25.2)\n", + "Requirement already satisfied: lit in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch>=1.12->-r requirements.txt (line 8)) (16.0.0)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from torchvision>=0.13->-r requirements.txt (line 9)) (2.27.1)\n", + "Requirement already satisfied: zipp>=3.1.0 in /usr/local/lib/python3.9/dist-packages (from importlib-resources>=3.2.0->matplotlib->-r requirements.txt (line 2)) (3.15.0)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.9/dist-packages (from pandas>=0.25->seaborn->-r requirements.txt (line 5)) (2022.7.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.9/dist-packages (from python-dateutil>=2.7->matplotlib->-r requirements.txt (line 2)) (1.16.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.9/dist-packages (from jinja2->torch>=1.12->-r requirements.txt (line 8)) (2.1.2)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->torchvision>=0.13->-r requirements.txt (line 9)) (1.26.15)\n", + "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests->torchvision>=0.13->-r requirements.txt (line 9)) (2.0.12)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->torchvision>=0.13->-r requirements.txt (line 9)) (3.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests->torchvision>=0.13->-r requirements.txt (line 9)) (2022.12.7)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.9/dist-packages (from sympy->torch>=1.12->-r requirements.txt (line 8)) (1.3.0)\n", + "Building wheels for collected packages: antlr4-python3-runtime, pytlsd\n", + " Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144573 sha256=ac7a12e0ddab8ea2fd70b57eab16afa268aba7e1115fa14f726de7a6ee963d7a\n", + " Stored in directory: /root/.cache/pip/wheels/23/cf/80/f3efa822e6ab23277902ee9165fe772eeb1dfb8014f359020a\n", + " Building wheel for pytlsd (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for pytlsd: filename=pytlsd-0.0.3-cp39-cp39-linux_x86_64.whl size=66125 sha256=7cb1787ea41321dcaae4cdf9dfc9ef78db8ff1d8aa10b5da1caef0494b383c36\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-ycm_joyo/wheels/24/1d/6a/937976436d1167d79c0763e00e9cd181c385c79206149bfc3a\n", + "Successfully built antlr4-python3-runtime pytlsd\n", + "Installing collected packages: pytlsd, antlr4-python3-runtime, omegaconf\n", + "Successfully installed antlr4-python3-runtime-4.9.3 omegaconf-2.2.3 pytlsd-0.0.3\n" + ] + }, + { + "output_type": "display_data", + "data": { + "application/vnd.colab-display-data+json": { + "pip_warning": { + "packages": [ + "pydevd_plugins" + ] + } + } + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Download the pre-trained model" + ], + "metadata": { + "id": "7McenwHtfGLE" + } + }, + { + "cell_type": "code", + "source": [ + "!wget https://github.com/cvg/GlueStick/releases/download/v0.1_arxiv/checkpoint_GlueStick_MD.tar -P resources/weights" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jmdiMOTFfBNN", + "outputId": "5041123a-52a0-453a-bebc-54bda11d4e51" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2023-04-04 23:22:22-- https://github.com/cvg/GlueStick/releases/download/v0.1_arxiv/checkpoint_GlueStick_MD.tar\n", + "Resolving github.com (github.com)... 140.82.114.3\n", + "Connecting to github.com (github.com)|140.82.114.3|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/622867606/b6e2035f-ead7-4d20-93f4-855c5396a8b2?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20230404%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20230404T232223Z&X-Amz-Expires=300&X-Amz-Signature=d7d6b2730dd0af6674207751cbb9655a3590b05d35fccf115fb9ae48905ff13a&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=622867606&response-content-disposition=attachment%3B%20filename%3Dcheckpoint_GlueStick_MD.tar&response-content-type=application%2Foctet-stream [following]\n", + "--2023-04-04 23:22:23-- https://objects.githubusercontent.com/github-production-release-asset-2e65be/622867606/b6e2035f-ead7-4d20-93f4-855c5396a8b2?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20230404%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20230404T232223Z&X-Amz-Expires=300&X-Amz-Signature=d7d6b2730dd0af6674207751cbb9655a3590b05d35fccf115fb9ae48905ff13a&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=622867606&response-content-disposition=attachment%3B%20filename%3Dcheckpoint_GlueStick_MD.tar&response-content-type=application%2Foctet-stream\n", + "Resolving objects.githubusercontent.com (objects.githubusercontent.com)... 185.199.109.133, 185.199.111.133, 185.199.110.133, ...\n", + "Connecting to objects.githubusercontent.com (objects.githubusercontent.com)|185.199.109.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 112588421 (107M) [application/octet-stream]\n", + "Saving to: ‘resources/weights/checkpoint_GlueStick_MD.tar’\n", + "\n", + "checkpoint_GlueStic 100%[===================>] 107.37M 57.6MB/s in 1.9s \n", + "\n", + "2023-04-04 23:22:25 (57.6 MB/s) - ‘resources/weights/checkpoint_GlueStick_MD.tar’ saved [112588421/112588421]\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "udUG35j0dpC0" + }, + "outputs": [], + "source": [ + "from os.path import join\n", + "\n", + "import cv2\n", + "import torch\n", + "from matplotlib import pyplot as plt\n", + "\n", + "from gluestick import batch_to_np, numpy_image_to_torch, GLUESTICK_ROOT\n", + "from gluestick.drawing import plot_images, plot_lines, plot_color_line_matches, plot_keypoints, plot_matches\n", + "from gluestick.models.two_view_pipeline import TwoViewPipeline" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0GkvjCpvdpC2" + }, + "source": [ + "Define the configuration and model that we are going to use in our demo:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lxWDkN5XdpC2", + "outputId": "3026899d-721c-4163-c1d0-81aea226b40a" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "TwoViewPipeline(\n", + " (extractor): SPWireframeDescriptor(\n", + " (sp): SuperPoint(\n", + " (relu): ReLU(inplace=True)\n", + " (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", + " (conv1a): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (conv1b): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (conv2a): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (conv2b): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (conv3a): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (conv3b): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (conv4a): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (conv4b): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (convPa): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (convPb): Conv2d(256, 65, kernel_size=(1, 1), stride=(1, 1))\n", + " (convDa): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (convDb): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))\n", + " )\n", + " )\n", + " (matcher): GlueStick(\n", + " (kenc): KeypointEncoder(\n", + " (encoder): Sequential(\n", + " (0): Conv1d(3, 32, kernel_size=(1,), stride=(1,))\n", + " (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU()\n", + " (3): Conv1d(32, 64, kernel_size=(1,), stride=(1,))\n", + " (4): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (5): ReLU()\n", + " (6): Conv1d(64, 128, kernel_size=(1,), stride=(1,))\n", + " (7): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (8): ReLU()\n", + " (9): Conv1d(128, 256, kernel_size=(1,), stride=(1,))\n", + " (10): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (11): ReLU()\n", + " (12): Conv1d(256, 256, kernel_size=(1,), stride=(1,))\n", + " )\n", + " )\n", + " (lenc): EndPtEncoder(\n", + " (encoder): Sequential(\n", + " (0): Conv1d(5, 32, kernel_size=(1,), stride=(1,))\n", + " (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU()\n", + " (3): Conv1d(32, 64, kernel_size=(1,), stride=(1,))\n", + " (4): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (5): ReLU()\n", + " (6): Conv1d(64, 128, kernel_size=(1,), stride=(1,))\n", + " (7): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (8): ReLU()\n", + " (9): Conv1d(128, 256, kernel_size=(1,), stride=(1,))\n", + " (10): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (11): ReLU()\n", + " (12): Conv1d(256, 256, kernel_size=(1,), stride=(1,))\n", + " )\n", + " )\n", + " (gnn): AttentionalGNN(\n", + " (layers): ModuleList(\n", + " (0-17): 18 x GNNLayer(\n", + " (update): AttentionalPropagation(\n", + " (attn): MultiHeadedAttention(\n", + " (merge): Conv1d(256, 256, kernel_size=(1,), stride=(1,))\n", + " (proj): ModuleList(\n", + " (0-2): 3 x Conv1d(256, 256, kernel_size=(1,), stride=(1,))\n", + " )\n", + " )\n", + " (mlp): Sequential(\n", + " (0): Conv1d(512, 512, kernel_size=(1,), stride=(1,))\n", + " (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU()\n", + " (3): Conv1d(512, 256, kernel_size=(1,), stride=(1,))\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (line_layers): ModuleList(\n", + " (0-8): 9 x LineLayer(\n", + " (mlp): Sequential(\n", + " (0): Conv1d(768, 512, kernel_size=(1,), stride=(1,))\n", + " (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU()\n", + " (3): Conv1d(512, 256, kernel_size=(1,), stride=(1,))\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (final_proj): Conv1d(256, 256, kernel_size=(1,), stride=(1,))\n", + " (final_line_proj): Conv1d(256, 256, kernel_size=(1,), stride=(1,))\n", + " )\n", + ")" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "MAX_N_POINTS, MAX_N_LINES = 1000, 300\n", + "\n", + "# Evaluation config\n", + "conf = {\n", + " 'name': 'two_view_pipeline',\n", + " 'use_lines': True,\n", + " 'extractor': {\n", + " 'name': 'wireframe',\n", + " 'sp_params': {\n", + " 'force_num_keypoints': False,\n", + " 'max_num_keypoints': MAX_N_POINTS,\n", + " },\n", + " 'wireframe_params': {\n", + " 'merge_points': True,\n", + " 'merge_line_endpoints': True,\n", + " },\n", + " 'max_n_lines': MAX_N_LINES,\n", + " },\n", + " 'matcher': {\n", + " 'name': 'gluestick',\n", + " 'weights': str(GLUESTICK_ROOT / 'resources' / 'weights' / 'checkpoint_GlueStick_MD.tar'),\n", + " 'trainable': False,\n", + " },\n", + " 'ground_truth': {\n", + " 'from_pose_depth': False,\n", + " }\n", + "}\n", + "\n", + "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n", + "\n", + "pipeline_model = TwoViewPipeline(conf).to(device).eval()\n", + "pipeline_model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 163 + }, + "id": "SYTcXss9dpC5", + "outputId": "78b7b6ec-d760-4025-a35c-cec0a4d7dd0c" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Choose the FIRST image from your computer (Recommended resolution: 640x640)\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " \n", + " Upload widget is only available when the cell has been executed in the\n", + " current browser session. Please rerun this cell to enable.\n", + " \n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving img1.jpg to img1 (1).jpg\n", + "Choose the SECOND image from your computer\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " \n", + " Upload widget is only available when the cell has been executed in the\n", + " current browser session. Please rerun this cell to enable.\n", + " \n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving img2.jpg to img2 (1).jpg\n" + ] + } + ], + "source": [ + "# Load input images \n", + "import sys\n", + "\n", + "IN_COLAB = 'google.colab' in sys.modules\n", + "if not IN_COLAB:\n", + " # We are running a notebook in Jupyter\n", + " img_path0 = join('resources', 'img1.jpg')\n", + " img_path1 = join('resources', 'img2.jpg')\n", + "else:\n", + " # We are running in Colab: Load from user's disk using Colab tools\n", + " from google.colab import files\n", + " print('Choose the FIRST image from your computer (Recommended resolution: 640x640)')\n", + " uploaded_files = files.upload()\n", + " img_path0 = list(uploaded_files.keys())[0]\n", + " print('Choose the SECOND image from your computer')\n", + " uploaded_files = files.upload()\n", + " img_path1 = list(uploaded_files.keys())[0]" + ] + }, + { + "cell_type": "code", + "source": [ + "img = cv2.imread(img_path0, cv2.IMREAD_GRAYSCALE)\n", + "\n", + "gray0 = cv2.imread(img_path0, 0)\n", + "gray1 = cv2.imread(img_path1, 0)\n", + "\n", + "# Plot them using matplotlib\n", + "f, axarr = plt.subplots(1, 2)\n", + "axarr[0].imshow(gray0, cmap='gray')\n", + "axarr[1].imshow(gray1, cmap='gray')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 386 + }, + "id": "h8cWFvtih1c-", + "outputId": "ea02228c-8227-4cdf-d1bd-b9ddbf3af11d" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 8 + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "pKtIXPqxdpC6" + }, + "outputs": [], + "source": [ + "# Convert images into torch and execute GlueStick💥\n", + "\n", + "torch_gray0, torch_gray1 = numpy_image_to_torch(gray0), numpy_image_to_torch(gray1)\n", + "torch_gray0, torch_gray1 = torch_gray0.to(device)[None], torch_gray1.to(device)[None]\n", + "x = {'image0': torch_gray0, 'image1': torch_gray1}\n", + "pred = pipeline_model(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "upsEtgjudpC6", + "outputId": "fbac085e-0d07-4436-d845-0da145045984" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Detected Keypoints: 1560 img1, 1558 img2\n", + "Detected Lines: 300 img1, 300 img2\n", + "\n", + "Matched 443 points and 108 lines\n" + ] + } + ], + "source": [ + "print(f\"Detected Keypoints: {pred['keypoints0'].shape[1]} img1, {pred['keypoints1'].shape[1]} img2\")\n", + "print(f\"Detected Lines: {pred['lines0'].shape[1]} img1, {pred['lines1'].shape[1]} img2\\n\")\n", + "print(f\"Matched {(pred['matches0'] >= 0).sum()} points and {(pred['line_matches0'] >= 0).sum()} lines\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eV29wX9MdpC7" + }, + "source": [ + "Show some matches" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "Qy314eoPdpC7" + }, + "outputs": [], + "source": [ + "pred = batch_to_np(pred)\n", + "kp0, kp1 = pred[\"keypoints0\"], pred[\"keypoints1\"]\n", + "m0 = pred[\"matches0\"]\n", + "\n", + "line_seg0, line_seg1 = pred[\"lines0\"], pred[\"lines1\"]\n", + "line_matches = pred[\"line_matches0\"]\n", + "\n", + "valid_matches = m0 != -1\n", + "match_indices = m0[valid_matches]\n", + "matched_kps0 = kp0[valid_matches]\n", + "matched_kps1 = kp1[match_indices]\n", + "\n", + "valid_matches = line_matches != -1\n", + "match_indices = line_matches[valid_matches]\n", + "matched_lines0 = line_seg0[valid_matches]\n", + "matched_lines1 = line_seg1[match_indices]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ACHNz8PTdpC8" + }, + "source": [ + "## Detected Lines" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 487 + }, + "id": "GDsSua4RdpC8", + "outputId": "31ef0700-e884-439e-e026-fc9a16c8cbdc" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ], + "source": [ + "img0, img1 = cv2.cvtColor(gray0, cv2.COLOR_GRAY2BGR), cv2.cvtColor(gray1, cv2.COLOR_GRAY2BGR)\n", + "plot_images([img0, img1], ['Image 1 - detected lines', 'Image 2 - detected lines'], pad=0.5)\n", + "plot_lines([line_seg0, line_seg1], ps=3, lw=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RCF0V9PrdpC9" + }, + "source": [ + "## Detected Points " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 487 + }, + "id": "aoqEF86ydpC9", + "outputId": "5b8b68f6-ca14-4f6f-939a-9e98a85c9768" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ], + "source": [ + "plot_images([img0, img1], ['Image 1 - detected points', 'Image 2 - detected points'], pad=0.5)\n", + "plot_keypoints([kp0, kp1], colors='c')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CtkevloydpC-" + }, + "source": [ + "## Matched Lines\n", + "(Each match has a different color) " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 487 + }, + "id": "oTmOvqOldpC-", + "outputId": "7d091385-94df-498e-fea4-0b5032729cea" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ], + "source": [ + "plot_images([img0, img1], ['Image 1 - line matches', 'Image 2 - line matches'], pad=0.5)\n", + "plot_color_line_matches([matched_lines0, matched_lines1], lw=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kfXg1clhdpC_" + }, + "source": [ + "## Matched Points" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 487 + }, + "id": "6Rfv5FvOdpC_", + "outputId": "1af0439b-77db-4f55-f7c8-c0736cf7c7aa" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ], + "source": [ + "plot_images([img0, img1], ['Image 1 - point matches', 'Image 2 - point matches'], pad=0.5)\n", + "plot_matches(matched_kps0, matched_kps1, 'green', lw=1, ps=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "Kve9xdngdpC_" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/third_party/GlueStick/requirements.txt b/third_party/GlueStick/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..6ccf01735a036ad91060ac884bbc94da275dd487 --- /dev/null +++ b/third_party/GlueStick/requirements.txt @@ -0,0 +1,12 @@ +numpy +matplotlib +scipy +scikit_learn +seaborn +omegaconf==2.2.* +opencv-python==4.7.0.* +torch>=1.12 +torchvision>=0.13 +setuptools +tqdm +git+https://github.com/iago-suarez/pytlsd.git@37ac583 diff --git a/third_party/GlueStick/resources/demo_seq1.gif b/third_party/GlueStick/resources/demo_seq1.gif new file mode 100644 index 0000000000000000000000000000000000000000..c758b0f8df3cae51a45d0c94ca6a9fad03f3011d --- /dev/null +++ b/third_party/GlueStick/resources/demo_seq1.gif @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:917f9243daaffb896909582dfb9889e7b8638e230cc7466e6e6829b5a112cecb +size 22805528 diff --git a/third_party/GlueStick/resources/img1.jpg b/third_party/GlueStick/resources/img1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cb81115885913737e5260e4a9d04ffaf15cb741b --- /dev/null +++ b/third_party/GlueStick/resources/img1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8f829bcdb249e851488be4b3e9cd87c58713c5dc54a2d1333c82ad4f17b7048 +size 1209431 diff --git a/third_party/GlueStick/resources/img2.jpg b/third_party/GlueStick/resources/img2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1ac6ef6b3504288cc7d53808030e04443d92c395 --- /dev/null +++ b/third_party/GlueStick/resources/img2.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b91f870167f67ad8e3a0e57bdcd9a9062d8cea41e9c60685e6135941823d327 +size 1184304 diff --git a/third_party/GlueStick/resources/weights/superpoint_v1.pth b/third_party/GlueStick/resources/weights/superpoint_v1.pth new file mode 100644 index 0000000000000000000000000000000000000000..7648726e3a3dfa2581e86bfa9c5a2a05cfb9bf74 --- /dev/null +++ b/third_party/GlueStick/resources/weights/superpoint_v1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52b6708629640ca883673b5d5c097c4ddad37d8048b33f09c8ca0d69db12c40e +size 5206086 diff --git a/third_party/GlueStick/setup.py b/third_party/GlueStick/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..f0caa063e99cf6d7784fe7d54af08dbb66811627 --- /dev/null +++ b/third_party/GlueStick/setup.py @@ -0,0 +1,3 @@ +from setuptools import setup + +setup(name='gluestick', version="0.0", packages=['gluestick']) diff --git a/third_party/SGMNet/.gitignore b/third_party/SGMNet/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..7e99e367f8443d86e5e8825b9fda39dfbb39630d --- /dev/null +++ b/third_party/SGMNet/.gitignore @@ -0,0 +1 @@ +*.pyc \ No newline at end of file diff --git a/third_party/SGMNet/LICENSE b/third_party/SGMNet/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..944d16f2d01f3550dd7061bfbc1dc2f73b77cfbb --- /dev/null +++ b/third_party/SGMNet/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 Hongkai Chen + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/third_party/SGMNet/README.md b/third_party/SGMNet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c01115fb33623295fb74314ad33cb340af70509d --- /dev/null +++ b/third_party/SGMNet/README.md @@ -0,0 +1,295 @@ +# SGMNet Implementation + +![Framework](assets/teaser.png) + +PyTorch implementation of SGMNet for ICCV'21 paper ["Learning to Match Features with Seeded Graph Matching Network"](https://arxiv.org/abs/2108.08771), by Hongkai Chen, Zixin Luo, Jiahui Zhang, Lei Zhou, Xuyang Bai, Zeyu Hu, Chiew-Lan Tai, Long Quan. + +This work focuses on keypoint-based image matching problem. We mitigate the qudratic complexity issue for typical GNN-based matching by leveraging a restrited set of pre-matched seeds. + +This repo contains training, evaluation and basic demo sripts used in our paper. As baseline, it also includes **our implementation** for [SuperGlue](https://arxiv.org/abs/1911.11763). If you find this project useful, please cite: + +``` +@article{chen2021sgmnet, + title={Learning to Match Features with Seeded Graph Matching Network}, + author={Chen, Hongkai and Luo, Zixin and Zhang, Jiahui and Zhou, Lei and Bai, Xuyang and Hu, Zeyu and Tai, Chiew-Lan and Quan, Long}, + journal={International Conference on Computer Vision (ICCV)}, + year={2021} +} +``` + +Part of the code is borrowed or ported from + +[SuperPoint](https://github.com/magicleap/SuperPointPretrainedNetwork), for SuperPoint implementation, + +[SuperGlue](https://github.com/magicleap/SuperGluePretrainedNetwork), for SuperGlue implementation and exact auc computation, + +[OANet](https://github.com/zjhthu/OANet), for training scheme, + +[PointCN](https://github.com/vcg-uvic/learned-correspondence-release), for implementaion of PointCN block and geometric transformations, + +[FM-Bench](https://github.com/JiawangBian/FM-Bench), for evaluation of fundamental matrix estimation. + + +Please also cite these works if you find the corresponding code useful. + + +## Requirements + +We use PyTorch 1.6, later version should also be compatible. Please refer to [requirements.txt](requirements.txt) for other dependencies. + +If you are using conda, you may configure the environment as: + +```bash +conda create --name sgmnet python=3.7 -y && \ +pip install -r requirements.txt && \ +conda activate sgmnet +``` + +## Get started + +Clone the repo: +```bash +git clone https://github.com/vdvchen/SGMNet.git && \ +``` +download model weights from [here](https://drive.google.com/file/d/1Ca0WmKSSt2G6P7m8YAOlSAHEFar_TAWb/view?usp=sharing) + +extract weights by +```bash +tar -xvf weights.tar.gz +``` + +A quick demo for image matching can be called by: + +```bash +cd demo && python demo.py --config_path configs/sgm_config.yaml +``` +The resutls will be saved as **match.png** in demo folder. You may configure the matcher in corresponding yaml file. + + +## Evaluation + + +We demonstrate evaluation process with RootSIFT and SGMNet. Evaluation with other features/matchers can be conducted by configuring the corresponding yaml files. + +### 1. YFCC Evaluation + +Refer to [OANet](https://github.com/zjhthu/OANet) repo to download raw YFCC100M dataset + + +**Data Generation** + +1. Configure **datadump/configs/yfcc_root.yaml** for the following entries + + **rawdata_dir**: path for yfcc rawdata + **feature_dump_dir**: dump path for extracted features + **dataset_dump_dir**: dump path for generated dataset + **extractor**: configuration for keypoint extractor (2k RootSIFT by default) + +2. Generate data by + ```bash + cd datadump + python dump.py --config_path configs/yfcc_root.yaml + ``` + An h5py data file will be generated under **dataset_dump_dir**, e.g. **yfcc_root_2000.hdf5** + +**Evaluation**: + +1. Configure **evaluation/configs/eval/yfcc_eval_sgm.yaml** for the following entries + + **reader.rawdata_dir**: path for yfcc_rawdata + **reader.dataset_dir**: path for generated h5py dataset file + **matcher**: configuration for sgmnet (we use the default setting) + +2. To run evaluation, + ```bash + cd evaluation + python evaluate.py --config_path configs/eval/yfcc_eval_sgm.yaml + ``` + +For 2k RootSIFT matching, similar results as below should be obtained, +```bash +auc th: [5 10 15 20 25 30] +approx auc: [0.634 0.729 0.783 0.818 0.843 0.861] +exact auc: [0.355 0.552 0.655 0.719 0.762 0.793] +mean match score: 17.06 +mean precision: 86.08 +``` + +### 2. ScanNet Evaluation + +Download processed [ScanNet evaluation data](https://drive.google.com/file/d/14s-Ce8Vq7XedzKon8MZSB_Mz_iC6oFPy/view?usp=sharing). + + +**Data Generation** + +1. Configure **datadump/configs/scannet_root.yaml** for the following entries + + **rawdata_dir**: path for ScanNet raw data + **feature_dump_dir**: dump path for extracted features + **dataset_dump_dir**: dump path for generated dataset + **extractor**: configuration for keypoint extractor (2k RootSIFT by default) + +2. Generate data by + ```bash + cd datadump + python dump.py --config_path configs/scannet_root.yaml + ``` + An h5py data file will be generated under **dataset_dump_dir**, e.g. **scannet_root_2000.hdf5** + +**Evaluation**: + +1. Configure **evaluation/configs/eval/scannet_eval_sgm.yaml** for the following entries + + **reader.rawdata_dir**: path for ScanNet evaluation data + **reader.dataset_dir**: path for generated h5py dataset file + **matcher**: configuration for sgmnet (we use the default setting) + +2. To run evaluation, + ```bash + cd evaluation + python evaluate.py --config_path configs/eval/scannet_eval_sgm.yaml + ``` + +For 2k RootSIFT matching, similar results as below should be obtained, +```bash +auc th: [5 10 15 20 25 30] +approx auc: [0.322 0.427 0.493 0.541 0.577 0.606] +exact auc: [0.125 0.283 0.383 0.452 0.503 0.541] +mean match score: 8.79 +mean precision: 45.54 +``` + +### 3. FM-Bench Evaluation + +Refer to [FM-Bench](https://github.com/JiawangBian/FM-Bench) repo to download raw FM-Bench dataset + +**Data Generation** + +1. Configure **datadump/configs/fmbench_root.yaml** for the following entries + + **rawdata_dir**: path for fmbench raw data + **feature_dump_dir**: dump path for extracted features + **dataset_dump_dir**: dump path for generated dataset + **extractor**: configuration for keypoint extractor (4k RootSIFT by default) + +2. Generate data by + ```bash + cd datadump + python dump.py --config_path configs/fmbench_root.yaml + ``` + An h5py data file will be generated under **dataset_dump_dir**, e.g. **fmbench_root_4000.hdf5** + +**Evaluation**: + +1. Configure **evaluation/configs/eval/fm_eval_sgm.yaml** for the following entries + + **reader.rawdata_dir**: path for fmbench raw data + **reader.dataset_dir**: path for generated h5py dataset file + **matcher**: configuration for sgmnet (we use the default setting) + +2. To run evaluation, + ```bash + cd evaluation + python evaluate.py --config_path configs/eval/fm_eval_sgm.yaml + ``` + +For 4k RootSIFT matching, similar results as below should be obtained, +```bash +CPC results: +F_recall: 0.617 +precision: 0.7489 +precision_post: 0.8399 +num_corr: 663.838 +num_corr_post: 284.455 + +KITTI results: +F_recall: 0.911 +precision: 0.9035133886251774 +precision_post: 0.9837278538989989 +num_corr: 1670.548 +num_corr_post: 1121.902 + +TUM results: +F_recall: 0.666 +precision: 0.6520260208250837 +precision_post: 0.731507123852191 +num_corr: 1650.579 +num_corr_post: 941.846 + +Tanks_and_Temples results: +F_recall: 0.855 +precision: 0.7452896681043316 +precision_post: 0.8020184635328004 +num_corr: 946.571 +num_corr_post: 466.865 +``` + +### 4. Run time and memory Evaluation + +We provide a script to test run time and memory consumption, for a quick start, run + +```bash +cd evaluation +python eval_cost.py --matcher_name SGM --config_path configs/cost/sgm_cost.yaml --num_kpt=4000 +``` +You may configure the matcher in corresponding yaml files. + + +## Visualization + +For visualization of matching results on different dataset, add **--vis_folder** argument on evaluation command, e.g. + +```bash +cd evaluation +python evaluate.py --config_path configs/eval/***.yaml --vis_folder visualization +``` + + +## Training + +We train both SGMNet and SuperGlue on [GL3D](https://github.com/lzx551402/GL3D) dataset. The training data is pre-generated in an offline manner, which yields about 400k pairs in total. + +To generate training/validation dataset + +1. Download [GL3D](https://github.com/lzx551402/GL3D) rawdata + +2. Configure **datadump/configs/gl3d.yaml**. Some important entries are + + **rawdata_dir**: path for GL3D raw data + **feature_dump_dir**: path for extracted features + **dataset_dump_dir**: path for generated dataset + **pairs_per_seq**: number of pairs sampled for each sequence + **angle_th**: angle threshold for sampled pairs + **overlap_th**: common track threshold for sampled pairs + **extractor**: configuration for keypoint extractor + +3. dump dataset by +```bash +cd datadump +python dump.py --config_path configs/gl3d.yaml +``` + +Two parts of data will be generated. (1) Extracted features and keypoints will be placed under **feature_dump_dir** (2) Pairwise dataset will be placed under **dataset_dump_dir**. + +4. After data generation, configure **train/train_sgm.sh** for necessary entries, including + **rawdata_path**: path for GL3D raw data + **desc_path**: path for extracted features + **dataset_path**: path for generated dataset + **desc_suffix**: suffix for keypoint files, _root_1000.hdf5 for 1k RootSIFT by default. + **log_base**: log directory for training + +5. run SGMNet training scripts by +```bash +bash train_sgm.sh +``` + +our training scripts support multi-gpu training, which can be enabled by configure **train/train_sgm.sh** for these entries + + **CUDA_VISIBLE_DEVICES**: id of gpus to be used + **nproc_per_node**: number of gpus to be used + +run SuperGlue training scripts by + +```bash +bash train_sg.sh +``` diff --git a/third_party/SGMNet/assets/scannet_eval_list.txt b/third_party/SGMNet/assets/scannet_eval_list.txt new file mode 100644 index 0000000000000000000000000000000000000000..8c3338fac3c3ae0a2837c819dc0ee21ed8bc2012 --- /dev/null +++ b/third_party/SGMNet/assets/scannet_eval_list.txt @@ -0,0 +1,1500 @@ +scene0707_00/img/15.jpg scene0707_00/img/585.jpg +scene0707_00/img/45.jpg scene0707_00/img/105.jpg +scene0707_00/img/45.jpg scene0707_00/img/690.jpg +scene0707_00/img/60.jpg scene0707_00/img/585.jpg +scene0707_00/img/90.jpg scene0707_00/img/660.jpg +scene0707_00/img/105.jpg scene0707_00/img/600.jpg +scene0707_00/img/135.jpg scene0707_00/img/165.jpg +scene0707_00/img/150.jpg scene0707_00/img/660.jpg +scene0707_00/img/150.jpg scene0707_00/img/690.jpg +scene0707_00/img/165.jpg scene0707_00/img/660.jpg +scene0707_00/img/375.jpg scene0707_00/img/450.jpg +scene0707_00/img/510.jpg scene0707_00/img/540.jpg +scene0707_00/img/525.jpg scene0707_00/img/540.jpg +scene0707_00/img/585.jpg scene0707_00/img/630.jpg +scene0707_00/img/765.jpg scene0707_00/img/780.jpg +scene0708_00/img/15.jpg scene0708_00/img/960.jpg +scene0708_00/img/60.jpg scene0708_00/img/1125.jpg +scene0708_00/img/75.jpg scene0708_00/img/1140.jpg +scene0708_00/img/105.jpg scene0708_00/img/165.jpg +scene0708_00/img/165.jpg scene0708_00/img/225.jpg +scene0708_00/img/210.jpg scene0708_00/img/255.jpg +scene0708_00/img/225.jpg scene0708_00/img/240.jpg +scene0708_00/img/300.jpg scene0708_00/img/360.jpg +scene0708_00/img/420.jpg scene0708_00/img/480.jpg +scene0708_00/img/525.jpg scene0708_00/img/645.jpg +scene0708_00/img/540.jpg scene0708_00/img/645.jpg +scene0708_00/img/555.jpg scene0708_00/img/645.jpg +scene0708_00/img/645.jpg scene0708_00/img/675.jpg +scene0708_00/img/660.jpg scene0708_00/img/690.jpg +scene0708_00/img/990.jpg scene0708_00/img/1035.jpg +scene0709_00/img/15.jpg scene0709_00/img/930.jpg +scene0709_00/img/30.jpg scene0709_00/img/90.jpg +scene0709_00/img/45.jpg scene0709_00/img/930.jpg +scene0709_00/img/105.jpg scene0709_00/img/915.jpg +scene0709_00/img/120.jpg scene0709_00/img/930.jpg +scene0709_00/img/135.jpg scene0709_00/img/930.jpg +scene0709_00/img/375.jpg scene0709_00/img/405.jpg +scene0709_00/img/510.jpg scene0709_00/img/645.jpg +scene0709_00/img/510.jpg scene0709_00/img/675.jpg +scene0709_00/img/525.jpg scene0709_00/img/675.jpg +scene0709_00/img/540.jpg scene0709_00/img/645.jpg +scene0709_00/img/540.jpg scene0709_00/img/675.jpg +scene0709_00/img/570.jpg scene0709_00/img/585.jpg +scene0709_00/img/690.jpg scene0709_00/img/720.jpg +scene0709_00/img/915.jpg scene0709_00/img/930.jpg +scene0710_00/img/0.jpg scene0710_00/img/165.jpg +scene0710_00/img/0.jpg scene0710_00/img/600.jpg +scene0710_00/img/0.jpg scene0710_00/img/1755.jpg +scene0710_00/img/15.jpg scene0710_00/img/765.jpg +scene0710_00/img/135.jpg scene0710_00/img/1800.jpg +scene0710_00/img/150.jpg scene0710_00/img/1725.jpg +scene0710_00/img/165.jpg scene0710_00/img/735.jpg +scene0710_00/img/570.jpg scene0710_00/img/765.jpg +scene0710_00/img/600.jpg scene0710_00/img/735.jpg +scene0710_00/img/615.jpg scene0710_00/img/780.jpg +scene0710_00/img/810.jpg scene0710_00/img/870.jpg +scene0710_00/img/975.jpg scene0710_00/img/1005.jpg +scene0710_00/img/1020.jpg scene0710_00/img/1050.jpg +scene0710_00/img/1530.jpg scene0710_00/img/1590.jpg +scene0710_00/img/1605.jpg scene0710_00/img/1740.jpg +scene0711_00/img/45.jpg scene0711_00/img/900.jpg +scene0711_00/img/225.jpg scene0711_00/img/2370.jpg +scene0711_00/img/420.jpg scene0711_00/img/2790.jpg +scene0711_00/img/450.jpg scene0711_00/img/2940.jpg +scene0711_00/img/675.jpg scene0711_00/img/750.jpg +scene0711_00/img/1380.jpg scene0711_00/img/1440.jpg +scene0711_00/img/1455.jpg scene0711_00/img/1560.jpg +scene0711_00/img/1455.jpg scene0711_00/img/3165.jpg +scene0711_00/img/1680.jpg scene0711_00/img/1995.jpg +scene0711_00/img/1695.jpg scene0711_00/img/1995.jpg +scene0711_00/img/1905.jpg scene0711_00/img/2895.jpg +scene0711_00/img/1965.jpg scene0711_00/img/2085.jpg +scene0711_00/img/2085.jpg scene0711_00/img/2835.jpg +scene0711_00/img/2580.jpg scene0711_00/img/2685.jpg +scene0711_00/img/2910.jpg scene0711_00/img/3270.jpg +scene0712_00/img/270.jpg scene0712_00/img/4785.jpg +scene0712_00/img/645.jpg scene0712_00/img/1140.jpg +scene0712_00/img/855.jpg scene0712_00/img/4560.jpg +scene0712_00/img/870.jpg scene0712_00/img/4770.jpg +scene0712_00/img/1230.jpg scene0712_00/img/3675.jpg +scene0712_00/img/1950.jpg scene0712_00/img/4155.jpg +scene0712_00/img/2400.jpg scene0712_00/img/2895.jpg +scene0712_00/img/2460.jpg scene0712_00/img/2655.jpg +scene0712_00/img/2490.jpg scene0712_00/img/4005.jpg +scene0712_00/img/2775.jpg scene0712_00/img/2910.jpg +scene0712_00/img/3015.jpg scene0712_00/img/3075.jpg +scene0712_00/img/3660.jpg scene0712_00/img/4755.jpg +scene0712_00/img/4200.jpg scene0712_00/img/4260.jpg +scene0712_00/img/4410.jpg scene0712_00/img/4425.jpg +scene0712_00/img/4650.jpg scene0712_00/img/4680.jpg +scene0713_00/img/75.jpg scene0713_00/img/420.jpg +scene0713_00/img/90.jpg scene0713_00/img/150.jpg +scene0713_00/img/600.jpg scene0713_00/img/1275.jpg +scene0713_00/img/645.jpg scene0713_00/img/945.jpg +scene0713_00/img/690.jpg scene0713_00/img/750.jpg +scene0713_00/img/885.jpg scene0713_00/img/2055.jpg +scene0713_00/img/945.jpg scene0713_00/img/2085.jpg +scene0713_00/img/1200.jpg scene0713_00/img/1215.jpg +scene0713_00/img/1215.jpg scene0713_00/img/1230.jpg +scene0713_00/img/1215.jpg scene0713_00/img/2130.jpg +scene0713_00/img/1320.jpg scene0713_00/img/2025.jpg +scene0713_00/img/1350.jpg scene0713_00/img/1920.jpg +scene0713_00/img/1575.jpg scene0713_00/img/1680.jpg +scene0713_00/img/1665.jpg scene0713_00/img/1710.jpg +scene0713_00/img/2070.jpg scene0713_00/img/2085.jpg +scene0714_00/img/15.jpg scene0714_00/img/630.jpg +scene0714_00/img/45.jpg scene0714_00/img/705.jpg +scene0714_00/img/45.jpg scene0714_00/img/720.jpg +scene0714_00/img/105.jpg scene0714_00/img/525.jpg +scene0714_00/img/285.jpg scene0714_00/img/915.jpg +scene0714_00/img/300.jpg scene0714_00/img/915.jpg +scene0714_00/img/480.jpg scene0714_00/img/525.jpg +scene0714_00/img/510.jpg scene0714_00/img/705.jpg +scene0714_00/img/540.jpg scene0714_00/img/735.jpg +scene0714_00/img/555.jpg scene0714_00/img/660.jpg +scene0714_00/img/585.jpg scene0714_00/img/750.jpg +scene0714_00/img/615.jpg scene0714_00/img/750.jpg +scene0714_00/img/855.jpg scene0714_00/img/885.jpg +scene0714_00/img/855.jpg scene0714_00/img/1020.jpg +scene0714_00/img/900.jpg scene0714_00/img/1005.jpg +scene0715_00/img/15.jpg scene0715_00/img/45.jpg +scene0715_00/img/45.jpg scene0715_00/img/105.jpg +scene0715_00/img/45.jpg scene0715_00/img/495.jpg +scene0715_00/img/75.jpg scene0715_00/img/540.jpg +scene0715_00/img/120.jpg scene0715_00/img/525.jpg +scene0715_00/img/135.jpg scene0715_00/img/150.jpg +scene0715_00/img/165.jpg scene0715_00/img/585.jpg +scene0715_00/img/195.jpg scene0715_00/img/585.jpg +scene0715_00/img/240.jpg scene0715_00/img/285.jpg +scene0715_00/img/270.jpg scene0715_00/img/300.jpg +scene0715_00/img/315.jpg scene0715_00/img/345.jpg +scene0715_00/img/330.jpg scene0715_00/img/345.jpg +scene0715_00/img/345.jpg scene0715_00/img/360.jpg +scene0715_00/img/465.jpg scene0715_00/img/480.jpg +scene0715_00/img/480.jpg scene0715_00/img/510.jpg +scene0716_00/img/0.jpg scene0716_00/img/630.jpg +scene0716_00/img/30.jpg scene0716_00/img/615.jpg +scene0716_00/img/30.jpg scene0716_00/img/660.jpg +scene0716_00/img/75.jpg scene0716_00/img/645.jpg +scene0716_00/img/105.jpg scene0716_00/img/660.jpg +scene0716_00/img/120.jpg scene0716_00/img/150.jpg +scene0716_00/img/315.jpg scene0716_00/img/345.jpg +scene0716_00/img/315.jpg scene0716_00/img/390.jpg +scene0716_00/img/315.jpg scene0716_00/img/405.jpg +scene0716_00/img/360.jpg scene0716_00/img/405.jpg +scene0716_00/img/360.jpg scene0716_00/img/465.jpg +scene0716_00/img/375.jpg scene0716_00/img/390.jpg +scene0716_00/img/390.jpg scene0716_00/img/435.jpg +scene0716_00/img/480.jpg scene0716_00/img/525.jpg +scene0716_00/img/630.jpg scene0716_00/img/675.jpg +scene0717_00/img/30.jpg scene0717_00/img/75.jpg +scene0717_00/img/150.jpg scene0717_00/img/825.jpg +scene0717_00/img/180.jpg scene0717_00/img/975.jpg +scene0717_00/img/210.jpg scene0717_00/img/945.jpg +scene0717_00/img/255.jpg scene0717_00/img/885.jpg +scene0717_00/img/360.jpg scene0717_00/img/390.jpg +scene0717_00/img/405.jpg scene0717_00/img/450.jpg +scene0717_00/img/405.jpg scene0717_00/img/465.jpg +scene0717_00/img/405.jpg scene0717_00/img/480.jpg +scene0717_00/img/735.jpg scene0717_00/img/765.jpg +scene0717_00/img/780.jpg scene0717_00/img/915.jpg +scene0717_00/img/780.jpg scene0717_00/img/945.jpg +scene0717_00/img/810.jpg scene0717_00/img/825.jpg +scene0717_00/img/825.jpg scene0717_00/img/855.jpg +scene0717_00/img/855.jpg scene0717_00/img/885.jpg +scene0718_00/img/15.jpg scene0718_00/img/60.jpg +scene0718_00/img/30.jpg scene0718_00/img/75.jpg +scene0718_00/img/60.jpg scene0718_00/img/75.jpg +scene0718_00/img/90.jpg scene0718_00/img/105.jpg +scene0718_00/img/90.jpg scene0718_00/img/120.jpg +scene0718_00/img/120.jpg scene0718_00/img/135.jpg +scene0718_00/img/135.jpg scene0718_00/img/150.jpg +scene0718_00/img/150.jpg scene0718_00/img/165.jpg +scene0718_00/img/150.jpg scene0718_00/img/180.jpg +scene0718_00/img/180.jpg scene0718_00/img/195.jpg +scene0718_00/img/195.jpg scene0718_00/img/210.jpg +scene0718_00/img/210.jpg scene0718_00/img/240.jpg +scene0718_00/img/225.jpg scene0718_00/img/255.jpg +scene0718_00/img/255.jpg scene0718_00/img/270.jpg +scene0718_00/img/285.jpg scene0718_00/img/300.jpg +scene0719_00/img/15.jpg scene0719_00/img/705.jpg +scene0719_00/img/60.jpg scene0719_00/img/795.jpg +scene0719_00/img/75.jpg scene0719_00/img/780.jpg +scene0719_00/img/180.jpg scene0719_00/img/1020.jpg +scene0719_00/img/255.jpg scene0719_00/img/315.jpg +scene0719_00/img/300.jpg scene0719_00/img/1080.jpg +scene0719_00/img/360.jpg scene0719_00/img/1170.jpg +scene0719_00/img/570.jpg scene0719_00/img/660.jpg +scene0719_00/img/705.jpg scene0719_00/img/735.jpg +scene0719_00/img/735.jpg scene0719_00/img/780.jpg +scene0719_00/img/750.jpg scene0719_00/img/870.jpg +scene0719_00/img/780.jpg scene0719_00/img/810.jpg +scene0719_00/img/870.jpg scene0719_00/img/900.jpg +scene0719_00/img/1005.jpg scene0719_00/img/1035.jpg +scene0719_00/img/1080.jpg scene0719_00/img/1095.jpg +scene0720_00/img/0.jpg scene0720_00/img/2520.jpg +scene0720_00/img/180.jpg scene0720_00/img/2580.jpg +scene0720_00/img/210.jpg scene0720_00/img/300.jpg +scene0720_00/img/615.jpg scene0720_00/img/660.jpg +scene0720_00/img/615.jpg scene0720_00/img/2490.jpg +scene0720_00/img/690.jpg scene0720_00/img/1575.jpg +scene0720_00/img/720.jpg scene0720_00/img/2460.jpg +scene0720_00/img/1095.jpg scene0720_00/img/1125.jpg +scene0720_00/img/1140.jpg scene0720_00/img/1290.jpg +scene0720_00/img/1200.jpg scene0720_00/img/1875.jpg +scene0720_00/img/1350.jpg scene0720_00/img/1410.jpg +scene0720_00/img/1485.jpg scene0720_00/img/2415.jpg +scene0720_00/img/1695.jpg scene0720_00/img/2685.jpg +scene0720_00/img/1935.jpg scene0720_00/img/2445.jpg +scene0720_00/img/2280.jpg scene0720_00/img/2385.jpg +scene0721_00/img/105.jpg scene0721_00/img/3600.jpg +scene0721_00/img/375.jpg scene0721_00/img/480.jpg +scene0721_00/img/375.jpg scene0721_00/img/2745.jpg +scene0721_00/img/705.jpg scene0721_00/img/765.jpg +scene0721_00/img/1185.jpg scene0721_00/img/2055.jpg +scene0721_00/img/1215.jpg scene0721_00/img/1890.jpg +scene0721_00/img/1320.jpg scene0721_00/img/2250.jpg +scene0721_00/img/1365.jpg scene0721_00/img/1515.jpg +scene0721_00/img/1365.jpg scene0721_00/img/1695.jpg +scene0721_00/img/1515.jpg scene0721_00/img/1545.jpg +scene0721_00/img/1560.jpg scene0721_00/img/1695.jpg +scene0721_00/img/1620.jpg scene0721_00/img/1665.jpg +scene0721_00/img/3285.jpg scene0721_00/img/3330.jpg +scene0721_00/img/3390.jpg scene0721_00/img/3510.jpg +scene0721_00/img/3645.jpg scene0721_00/img/3765.jpg +scene0722_00/img/0.jpg scene0722_00/img/630.jpg +scene0722_00/img/45.jpg scene0722_00/img/615.jpg +scene0722_00/img/45.jpg scene0722_00/img/735.jpg +scene0722_00/img/75.jpg scene0722_00/img/120.jpg +scene0722_00/img/90.jpg scene0722_00/img/795.jpg +scene0722_00/img/135.jpg scene0722_00/img/780.jpg +scene0722_00/img/165.jpg scene0722_00/img/900.jpg +scene0722_00/img/195.jpg scene0722_00/img/945.jpg +scene0722_00/img/300.jpg scene0722_00/img/345.jpg +scene0722_00/img/450.jpg scene0722_00/img/465.jpg +scene0722_00/img/540.jpg scene0722_00/img/570.jpg +scene0722_00/img/675.jpg scene0722_00/img/690.jpg +scene0722_00/img/750.jpg scene0722_00/img/765.jpg +scene0722_00/img/795.jpg scene0722_00/img/855.jpg +scene0722_00/img/855.jpg scene0722_00/img/885.jpg +scene0723_00/img/0.jpg scene0723_00/img/255.jpg +scene0723_00/img/0.jpg scene0723_00/img/1635.jpg +scene0723_00/img/15.jpg scene0723_00/img/1590.jpg +scene0723_00/img/75.jpg scene0723_00/img/1665.jpg +scene0723_00/img/195.jpg scene0723_00/img/210.jpg +scene0723_00/img/210.jpg scene0723_00/img/1590.jpg +scene0723_00/img/270.jpg scene0723_00/img/1635.jpg +scene0723_00/img/435.jpg scene0723_00/img/780.jpg +scene0723_00/img/465.jpg scene0723_00/img/795.jpg +scene0723_00/img/510.jpg scene0723_00/img/555.jpg +scene0723_00/img/510.jpg scene0723_00/img/810.jpg +scene0723_00/img/1185.jpg scene0723_00/img/1605.jpg +scene0723_00/img/1260.jpg scene0723_00/img/1530.jpg +scene0723_00/img/1290.jpg scene0723_00/img/1380.jpg +scene0723_00/img/1620.jpg scene0723_00/img/1695.jpg +scene0724_00/img/0.jpg scene0724_00/img/705.jpg +scene0724_00/img/30.jpg scene0724_00/img/810.jpg +scene0724_00/img/90.jpg scene0724_00/img/780.jpg +scene0724_00/img/105.jpg scene0724_00/img/750.jpg +scene0724_00/img/120.jpg scene0724_00/img/780.jpg +scene0724_00/img/135.jpg scene0724_00/img/780.jpg +scene0724_00/img/225.jpg scene0724_00/img/360.jpg +scene0724_00/img/300.jpg scene0724_00/img/1365.jpg +scene0724_00/img/330.jpg scene0724_00/img/375.jpg +scene0724_00/img/330.jpg scene0724_00/img/1365.jpg +scene0724_00/img/375.jpg scene0724_00/img/390.jpg +scene0724_00/img/465.jpg scene0724_00/img/1275.jpg +scene0724_00/img/705.jpg scene0724_00/img/1395.jpg +scene0724_00/img/720.jpg scene0724_00/img/765.jpg +scene0724_00/img/900.jpg scene0724_00/img/930.jpg +scene0725_00/img/0.jpg scene0725_00/img/960.jpg +scene0725_00/img/105.jpg scene0725_00/img/165.jpg +scene0725_00/img/135.jpg scene0725_00/img/180.jpg +scene0725_00/img/255.jpg scene0725_00/img/285.jpg +scene0725_00/img/345.jpg scene0725_00/img/390.jpg +scene0725_00/img/435.jpg scene0725_00/img/450.jpg +scene0725_00/img/465.jpg scene0725_00/img/510.jpg +scene0725_00/img/540.jpg scene0725_00/img/555.jpg +scene0725_00/img/555.jpg scene0725_00/img/570.jpg +scene0725_00/img/570.jpg scene0725_00/img/975.jpg +scene0725_00/img/735.jpg scene0725_00/img/750.jpg +scene0725_00/img/840.jpg scene0725_00/img/870.jpg +scene0725_00/img/885.jpg scene0725_00/img/1005.jpg +scene0725_00/img/930.jpg scene0725_00/img/990.jpg +scene0725_00/img/945.jpg scene0725_00/img/1005.jpg +scene0726_00/img/0.jpg scene0726_00/img/690.jpg +scene0726_00/img/15.jpg scene0726_00/img/675.jpg +scene0726_00/img/45.jpg scene0726_00/img/1110.jpg +scene0726_00/img/105.jpg scene0726_00/img/240.jpg +scene0726_00/img/120.jpg scene0726_00/img/225.jpg +scene0726_00/img/135.jpg scene0726_00/img/210.jpg +scene0726_00/img/165.jpg scene0726_00/img/390.jpg +scene0726_00/img/465.jpg scene0726_00/img/570.jpg +scene0726_00/img/480.jpg scene0726_00/img/810.jpg +scene0726_00/img/570.jpg scene0726_00/img/750.jpg +scene0726_00/img/780.jpg scene0726_00/img/855.jpg +scene0726_00/img/840.jpg scene0726_00/img/855.jpg +scene0726_00/img/885.jpg scene0726_00/img/915.jpg +scene0726_00/img/990.jpg scene0726_00/img/1005.jpg +scene0726_00/img/1215.jpg scene0726_00/img/1245.jpg +scene0727_00/img/0.jpg scene0727_00/img/1905.jpg +scene0727_00/img/45.jpg scene0727_00/img/765.jpg +scene0727_00/img/60.jpg scene0727_00/img/390.jpg +scene0727_00/img/120.jpg scene0727_00/img/345.jpg +scene0727_00/img/150.jpg scene0727_00/img/195.jpg +scene0727_00/img/150.jpg scene0727_00/img/1905.jpg +scene0727_00/img/195.jpg scene0727_00/img/210.jpg +scene0727_00/img/240.jpg scene0727_00/img/1965.jpg +scene0727_00/img/270.jpg scene0727_00/img/1980.jpg +scene0727_00/img/450.jpg scene0727_00/img/540.jpg +scene0727_00/img/795.jpg scene0727_00/img/1335.jpg +scene0727_00/img/1125.jpg scene0727_00/img/1185.jpg +scene0727_00/img/1185.jpg scene0727_00/img/1695.jpg +scene0727_00/img/1245.jpg scene0727_00/img/1320.jpg +scene0727_00/img/1275.jpg scene0727_00/img/1695.jpg +scene0728_00/img/60.jpg scene0728_00/img/300.jpg +scene0728_00/img/105.jpg scene0728_00/img/915.jpg +scene0728_00/img/120.jpg scene0728_00/img/375.jpg +scene0728_00/img/150.jpg scene0728_00/img/885.jpg +scene0728_00/img/165.jpg scene0728_00/img/315.jpg +scene0728_00/img/180.jpg scene0728_00/img/1020.jpg +scene0728_00/img/240.jpg scene0728_00/img/345.jpg +scene0728_00/img/330.jpg scene0728_00/img/1035.jpg +scene0728_00/img/360.jpg scene0728_00/img/960.jpg +scene0728_00/img/375.jpg scene0728_00/img/945.jpg +scene0728_00/img/420.jpg scene0728_00/img/975.jpg +scene0728_00/img/510.jpg scene0728_00/img/525.jpg +scene0728_00/img/555.jpg scene0728_00/img/585.jpg +scene0728_00/img/660.jpg scene0728_00/img/825.jpg +scene0728_00/img/885.jpg scene0728_00/img/900.jpg +scene0729_00/img/90.jpg scene0729_00/img/1155.jpg +scene0729_00/img/120.jpg scene0729_00/img/1170.jpg +scene0729_00/img/225.jpg scene0729_00/img/255.jpg +scene0729_00/img/240.jpg scene0729_00/img/300.jpg +scene0729_00/img/240.jpg scene0729_00/img/330.jpg +scene0729_00/img/240.jpg scene0729_00/img/720.jpg +scene0729_00/img/285.jpg scene0729_00/img/390.jpg +scene0729_00/img/390.jpg scene0729_00/img/420.jpg +scene0729_00/img/450.jpg scene0729_00/img/495.jpg +scene0729_00/img/585.jpg scene0729_00/img/720.jpg +scene0729_00/img/690.jpg scene0729_00/img/735.jpg +scene0729_00/img/705.jpg scene0729_00/img/735.jpg +scene0729_00/img/870.jpg scene0729_00/img/885.jpg +scene0729_00/img/885.jpg scene0729_00/img/900.jpg +scene0729_00/img/1020.jpg scene0729_00/img/1110.jpg +scene0730_00/img/150.jpg scene0730_00/img/390.jpg +scene0730_00/img/165.jpg scene0730_00/img/390.jpg +scene0730_00/img/180.jpg scene0730_00/img/210.jpg +scene0730_00/img/315.jpg scene0730_00/img/1140.jpg +scene0730_00/img/330.jpg scene0730_00/img/345.jpg +scene0730_00/img/330.jpg scene0730_00/img/360.jpg +scene0730_00/img/360.jpg scene0730_00/img/375.jpg +scene0730_00/img/360.jpg scene0730_00/img/510.jpg +scene0730_00/img/510.jpg scene0730_00/img/1095.jpg +scene0730_00/img/660.jpg scene0730_00/img/960.jpg +scene0730_00/img/765.jpg scene0730_00/img/780.jpg +scene0730_00/img/795.jpg scene0730_00/img/885.jpg +scene0730_00/img/810.jpg scene0730_00/img/840.jpg +scene0730_00/img/1050.jpg scene0730_00/img/1125.jpg +scene0730_00/img/1140.jpg scene0730_00/img/1170.jpg +scene0731_00/img/0.jpg scene0731_00/img/255.jpg +scene0731_00/img/0.jpg scene0731_00/img/1050.jpg +scene0731_00/img/45.jpg scene0731_00/img/1080.jpg +scene0731_00/img/75.jpg scene0731_00/img/120.jpg +scene0731_00/img/180.jpg scene0731_00/img/225.jpg +scene0731_00/img/180.jpg scene0731_00/img/255.jpg +scene0731_00/img/240.jpg scene0731_00/img/255.jpg +scene0731_00/img/240.jpg scene0731_00/img/1080.jpg +scene0731_00/img/315.jpg scene0731_00/img/345.jpg +scene0731_00/img/420.jpg scene0731_00/img/990.jpg +scene0731_00/img/495.jpg scene0731_00/img/525.jpg +scene0731_00/img/540.jpg scene0731_00/img/870.jpg +scene0731_00/img/630.jpg scene0731_00/img/810.jpg +scene0731_00/img/900.jpg scene0731_00/img/915.jpg +scene0731_00/img/1065.jpg scene0731_00/img/1110.jpg +scene0732_00/img/60.jpg scene0732_00/img/105.jpg +scene0732_00/img/120.jpg scene0732_00/img/405.jpg +scene0732_00/img/240.jpg scene0732_00/img/300.jpg +scene0732_00/img/240.jpg scene0732_00/img/1410.jpg +scene0732_00/img/255.jpg scene0732_00/img/270.jpg +scene0732_00/img/450.jpg scene0732_00/img/465.jpg +scene0732_00/img/510.jpg scene0732_00/img/540.jpg +scene0732_00/img/630.jpg scene0732_00/img/1125.jpg +scene0732_00/img/795.jpg scene0732_00/img/1260.jpg +scene0732_00/img/810.jpg scene0732_00/img/840.jpg +scene0732_00/img/825.jpg scene0732_00/img/1170.jpg +scene0732_00/img/945.jpg scene0732_00/img/1140.jpg +scene0732_00/img/1050.jpg scene0732_00/img/1080.jpg +scene0732_00/img/1485.jpg scene0732_00/img/1515.jpg +scene0732_00/img/1500.jpg scene0732_00/img/1515.jpg +scene0733_00/img/0.jpg scene0733_00/img/210.jpg +scene0733_00/img/30.jpg scene0733_00/img/60.jpg +scene0733_00/img/45.jpg scene0733_00/img/90.jpg +scene0733_00/img/150.jpg scene0733_00/img/195.jpg +scene0733_00/img/210.jpg scene0733_00/img/255.jpg +scene0733_00/img/255.jpg scene0733_00/img/390.jpg +scene0733_00/img/270.jpg scene0733_00/img/345.jpg +scene0733_00/img/480.jpg scene0733_00/img/525.jpg +scene0733_00/img/615.jpg scene0733_00/img/720.jpg +scene0733_00/img/810.jpg scene0733_00/img/870.jpg +scene0733_00/img/870.jpg scene0733_00/img/900.jpg +scene0733_00/img/930.jpg scene0733_00/img/945.jpg +scene0733_00/img/945.jpg scene0733_00/img/990.jpg +scene0733_00/img/1065.jpg scene0733_00/img/1155.jpg +scene0733_00/img/1080.jpg scene0733_00/img/1155.jpg +scene0734_00/img/0.jpg scene0734_00/img/240.jpg +scene0734_00/img/15.jpg scene0734_00/img/1755.jpg +scene0734_00/img/195.jpg scene0734_00/img/810.jpg +scene0734_00/img/210.jpg scene0734_00/img/1755.jpg +scene0734_00/img/285.jpg scene0734_00/img/465.jpg +scene0734_00/img/300.jpg scene0734_00/img/330.jpg +scene0734_00/img/405.jpg scene0734_00/img/1725.jpg +scene0734_00/img/570.jpg scene0734_00/img/945.jpg +scene0734_00/img/630.jpg scene0734_00/img/1185.jpg +scene0734_00/img/690.jpg scene0734_00/img/1380.jpg +scene0734_00/img/720.jpg scene0734_00/img/885.jpg +scene0734_00/img/930.jpg scene0734_00/img/1185.jpg +scene0734_00/img/945.jpg scene0734_00/img/975.jpg +scene0734_00/img/1005.jpg scene0734_00/img/1095.jpg +scene0734_00/img/1485.jpg scene0734_00/img/1575.jpg +scene0735_00/img/180.jpg scene0735_00/img/660.jpg +scene0735_00/img/225.jpg scene0735_00/img/690.jpg +scene0735_00/img/255.jpg scene0735_00/img/435.jpg +scene0735_00/img/285.jpg scene0735_00/img/300.jpg +scene0735_00/img/300.jpg scene0735_00/img/315.jpg +scene0735_00/img/315.jpg scene0735_00/img/330.jpg +scene0735_00/img/420.jpg scene0735_00/img/450.jpg +scene0735_00/img/420.jpg scene0735_00/img/465.jpg +scene0735_00/img/420.jpg scene0735_00/img/495.jpg +scene0735_00/img/420.jpg scene0735_00/img/555.jpg +scene0735_00/img/450.jpg scene0735_00/img/645.jpg +scene0735_00/img/480.jpg scene0735_00/img/570.jpg +scene0735_00/img/510.jpg scene0735_00/img/645.jpg +scene0735_00/img/525.jpg scene0735_00/img/645.jpg +scene0735_00/img/540.jpg scene0735_00/img/645.jpg +scene0736_00/img/0.jpg scene0736_00/img/4710.jpg +scene0736_00/img/735.jpg scene0736_00/img/2130.jpg +scene0736_00/img/990.jpg scene0736_00/img/1200.jpg +scene0736_00/img/1005.jpg scene0736_00/img/1365.jpg +scene0736_00/img/1275.jpg scene0736_00/img/5970.jpg +scene0736_00/img/1425.jpg scene0736_00/img/4710.jpg +scene0736_00/img/1470.jpg scene0736_00/img/6075.jpg +scene0736_00/img/1800.jpg scene0736_00/img/1830.jpg +scene0736_00/img/2370.jpg scene0736_00/img/2850.jpg +scene0736_00/img/4245.jpg scene0736_00/img/6255.jpg +scene0736_00/img/4530.jpg scene0736_00/img/5580.jpg +scene0736_00/img/6045.jpg scene0736_00/img/6450.jpg +scene0736_00/img/6060.jpg scene0736_00/img/6450.jpg +scene0736_00/img/6480.jpg scene0736_00/img/7140.jpg +scene0736_00/img/6870.jpg scene0736_00/img/7020.jpg +scene0737_00/img/285.jpg scene0737_00/img/2985.jpg +scene0737_00/img/525.jpg scene0737_00/img/2520.jpg +scene0737_00/img/885.jpg scene0737_00/img/930.jpg +scene0737_00/img/930.jpg scene0737_00/img/1095.jpg +scene0737_00/img/990.jpg scene0737_00/img/1110.jpg +scene0737_00/img/990.jpg scene0737_00/img/3000.jpg +scene0737_00/img/1140.jpg scene0737_00/img/3030.jpg +scene0737_00/img/1170.jpg scene0737_00/img/1320.jpg +scene0737_00/img/1170.jpg scene0737_00/img/1335.jpg +scene0737_00/img/1185.jpg scene0737_00/img/1230.jpg +scene0737_00/img/1230.jpg scene0737_00/img/1335.jpg +scene0737_00/img/1245.jpg scene0737_00/img/1350.jpg +scene0737_00/img/1965.jpg scene0737_00/img/2730.jpg +scene0737_00/img/2205.jpg scene0737_00/img/2640.jpg +scene0737_00/img/2220.jpg scene0737_00/img/2295.jpg +scene0738_00/img/30.jpg scene0738_00/img/105.jpg +scene0738_00/img/60.jpg scene0738_00/img/1545.jpg +scene0738_00/img/225.jpg scene0738_00/img/300.jpg +scene0738_00/img/270.jpg scene0738_00/img/420.jpg +scene0738_00/img/495.jpg scene0738_00/img/525.jpg +scene0738_00/img/510.jpg scene0738_00/img/645.jpg +scene0738_00/img/630.jpg scene0738_00/img/1290.jpg +scene0738_00/img/720.jpg scene0738_00/img/780.jpg +scene0738_00/img/720.jpg scene0738_00/img/885.jpg +scene0738_00/img/795.jpg scene0738_00/img/900.jpg +scene0738_00/img/840.jpg scene0738_00/img/1050.jpg +scene0738_00/img/885.jpg scene0738_00/img/1065.jpg +scene0738_00/img/990.jpg scene0738_00/img/1035.jpg +scene0738_00/img/990.jpg scene0738_00/img/1185.jpg +scene0738_00/img/1455.jpg scene0738_00/img/1470.jpg +scene0739_00/img/150.jpg scene0739_00/img/2235.jpg +scene0739_00/img/495.jpg scene0739_00/img/1995.jpg +scene0739_00/img/630.jpg scene0739_00/img/870.jpg +scene0739_00/img/990.jpg scene0739_00/img/1785.jpg +scene0739_00/img/990.jpg scene0739_00/img/4065.jpg +scene0739_00/img/1335.jpg scene0739_00/img/2955.jpg +scene0739_00/img/1785.jpg scene0739_00/img/4110.jpg +scene0739_00/img/1845.jpg scene0739_00/img/2085.jpg +scene0739_00/img/2055.jpg scene0739_00/img/4440.jpg +scene0739_00/img/2655.jpg scene0739_00/img/2715.jpg +scene0739_00/img/2925.jpg scene0739_00/img/4065.jpg +scene0739_00/img/3045.jpg scene0739_00/img/3615.jpg +scene0739_00/img/4050.jpg scene0739_00/img/4440.jpg +scene0739_00/img/4110.jpg scene0739_00/img/4230.jpg +scene0739_00/img/4110.jpg scene0739_00/img/4380.jpg +scene0740_00/img/210.jpg scene0740_00/img/825.jpg +scene0740_00/img/585.jpg scene0740_00/img/2505.jpg +scene0740_00/img/660.jpg scene0740_00/img/2445.jpg +scene0740_00/img/720.jpg scene0740_00/img/1605.jpg +scene0740_00/img/1065.jpg scene0740_00/img/1155.jpg +scene0740_00/img/1200.jpg scene0740_00/img/2490.jpg +scene0740_00/img/1215.jpg scene0740_00/img/2370.jpg +scene0740_00/img/1230.jpg scene0740_00/img/1350.jpg +scene0740_00/img/1275.jpg scene0740_00/img/2175.jpg +scene0740_00/img/1290.jpg scene0740_00/img/1665.jpg +scene0740_00/img/1425.jpg scene0740_00/img/1770.jpg +scene0740_00/img/1500.jpg scene0740_00/img/1860.jpg +scene0740_00/img/1545.jpg scene0740_00/img/2070.jpg +scene0740_00/img/1545.jpg scene0740_00/img/2145.jpg +scene0740_00/img/2235.jpg scene0740_00/img/2445.jpg +scene0741_00/img/105.jpg scene0741_00/img/1740.jpg +scene0741_00/img/150.jpg scene0741_00/img/1740.jpg +scene0741_00/img/210.jpg scene0741_00/img/1740.jpg +scene0741_00/img/375.jpg scene0741_00/img/405.jpg +scene0741_00/img/435.jpg scene0741_00/img/810.jpg +scene0741_00/img/495.jpg scene0741_00/img/915.jpg +scene0741_00/img/555.jpg scene0741_00/img/1545.jpg +scene0741_00/img/555.jpg scene0741_00/img/1605.jpg +scene0741_00/img/660.jpg scene0741_00/img/855.jpg +scene0741_00/img/675.jpg scene0741_00/img/1635.jpg +scene0741_00/img/870.jpg scene0741_00/img/2085.jpg +scene0741_00/img/1080.jpg scene0741_00/img/1950.jpg +scene0741_00/img/1140.jpg scene0741_00/img/1470.jpg +scene0741_00/img/1170.jpg scene0741_00/img/1290.jpg +scene0741_00/img/2130.jpg scene0741_00/img/2175.jpg +scene0742_00/img/0.jpg scene0742_00/img/120.jpg +scene0742_00/img/45.jpg scene0742_00/img/660.jpg +scene0742_00/img/90.jpg scene0742_00/img/675.jpg +scene0742_00/img/120.jpg scene0742_00/img/705.jpg +scene0742_00/img/120.jpg scene0742_00/img/720.jpg +scene0742_00/img/135.jpg scene0742_00/img/720.jpg +scene0742_00/img/150.jpg scene0742_00/img/735.jpg +scene0742_00/img/165.jpg scene0742_00/img/750.jpg +scene0742_00/img/225.jpg scene0742_00/img/345.jpg +scene0742_00/img/285.jpg scene0742_00/img/330.jpg +scene0742_00/img/360.jpg scene0742_00/img/375.jpg +scene0742_00/img/405.jpg scene0742_00/img/540.jpg +scene0742_00/img/420.jpg scene0742_00/img/570.jpg +scene0742_00/img/435.jpg scene0742_00/img/585.jpg +scene0742_00/img/615.jpg scene0742_00/img/645.jpg +scene0743_00/img/0.jpg scene0743_00/img/1230.jpg +scene0743_00/img/15.jpg scene0743_00/img/240.jpg +scene0743_00/img/45.jpg scene0743_00/img/1530.jpg +scene0743_00/img/165.jpg scene0743_00/img/435.jpg +scene0743_00/img/420.jpg scene0743_00/img/1635.jpg +scene0743_00/img/495.jpg scene0743_00/img/1560.jpg +scene0743_00/img/585.jpg scene0743_00/img/630.jpg +scene0743_00/img/600.jpg scene0743_00/img/705.jpg +scene0743_00/img/615.jpg scene0743_00/img/1380.jpg +scene0743_00/img/645.jpg scene0743_00/img/1380.jpg +scene0743_00/img/660.jpg scene0743_00/img/750.jpg +scene0743_00/img/675.jpg scene0743_00/img/765.jpg +scene0743_00/img/915.jpg scene0743_00/img/1020.jpg +scene0743_00/img/1245.jpg scene0743_00/img/1290.jpg +scene0743_00/img/1425.jpg scene0743_00/img/1440.jpg +scene0744_00/img/105.jpg scene0744_00/img/2595.jpg +scene0744_00/img/120.jpg scene0744_00/img/2220.jpg +scene0744_00/img/180.jpg scene0744_00/img/1500.jpg +scene0744_00/img/180.jpg scene0744_00/img/2475.jpg +scene0744_00/img/195.jpg scene0744_00/img/1560.jpg +scene0744_00/img/210.jpg scene0744_00/img/615.jpg +scene0744_00/img/210.jpg scene0744_00/img/630.jpg +scene0744_00/img/330.jpg scene0744_00/img/2115.jpg +scene0744_00/img/390.jpg scene0744_00/img/585.jpg +scene0744_00/img/585.jpg scene0744_00/img/2310.jpg +scene0744_00/img/615.jpg scene0744_00/img/1620.jpg +scene0744_00/img/630.jpg scene0744_00/img/1500.jpg +scene0744_00/img/840.jpg scene0744_00/img/2265.jpg +scene0744_00/img/1110.jpg scene0744_00/img/1170.jpg +scene0744_00/img/1905.jpg scene0744_00/img/1935.jpg +scene0745_00/img/45.jpg scene0745_00/img/1620.jpg +scene0745_00/img/90.jpg scene0745_00/img/135.jpg +scene0745_00/img/90.jpg scene0745_00/img/1635.jpg +scene0745_00/img/240.jpg scene0745_00/img/270.jpg +scene0745_00/img/375.jpg scene0745_00/img/435.jpg +scene0745_00/img/405.jpg scene0745_00/img/1590.jpg +scene0745_00/img/675.jpg scene0745_00/img/720.jpg +scene0745_00/img/675.jpg scene0745_00/img/765.jpg +scene0745_00/img/1200.jpg scene0745_00/img/1410.jpg +scene0745_00/img/1215.jpg scene0745_00/img/1440.jpg +scene0745_00/img/1275.jpg scene0745_00/img/1350.jpg +scene0745_00/img/1290.jpg scene0745_00/img/1335.jpg +scene0745_00/img/1365.jpg scene0745_00/img/1380.jpg +scene0745_00/img/1365.jpg scene0745_00/img/1395.jpg +scene0745_00/img/1410.jpg scene0745_00/img/1470.jpg +scene0746_00/img/15.jpg scene0746_00/img/1800.jpg +scene0746_00/img/135.jpg scene0746_00/img/165.jpg +scene0746_00/img/180.jpg scene0746_00/img/2520.jpg +scene0746_00/img/240.jpg scene0746_00/img/825.jpg +scene0746_00/img/390.jpg scene0746_00/img/555.jpg +scene0746_00/img/690.jpg scene0746_00/img/975.jpg +scene0746_00/img/720.jpg scene0746_00/img/765.jpg +scene0746_00/img/1095.jpg scene0746_00/img/1260.jpg +scene0746_00/img/1170.jpg scene0746_00/img/1665.jpg +scene0746_00/img/1170.jpg scene0746_00/img/1875.jpg +scene0746_00/img/1215.jpg scene0746_00/img/2250.jpg +scene0746_00/img/1410.jpg scene0746_00/img/1440.jpg +scene0746_00/img/1845.jpg scene0746_00/img/1980.jpg +scene0746_00/img/1920.jpg scene0746_00/img/1935.jpg +scene0746_00/img/2475.jpg scene0746_00/img/2610.jpg +scene0747_00/img/0.jpg scene0747_00/img/1530.jpg +scene0747_00/img/30.jpg scene0747_00/img/810.jpg +scene0747_00/img/30.jpg scene0747_00/img/1485.jpg +scene0747_00/img/270.jpg scene0747_00/img/3030.jpg +scene0747_00/img/285.jpg scene0747_00/img/2865.jpg +scene0747_00/img/360.jpg scene0747_00/img/465.jpg +scene0747_00/img/405.jpg scene0747_00/img/585.jpg +scene0747_00/img/720.jpg scene0747_00/img/1350.jpg +scene0747_00/img/810.jpg scene0747_00/img/885.jpg +scene0747_00/img/855.jpg scene0747_00/img/4815.jpg +scene0747_00/img/915.jpg scene0747_00/img/4845.jpg +scene0747_00/img/1035.jpg scene0747_00/img/1560.jpg +scene0747_00/img/2070.jpg scene0747_00/img/2085.jpg +scene0747_00/img/3225.jpg scene0747_00/img/3300.jpg +scene0747_00/img/4215.jpg scene0747_00/img/4245.jpg +scene0748_00/img/45.jpg scene0748_00/img/1320.jpg +scene0748_00/img/210.jpg scene0748_00/img/630.jpg +scene0748_00/img/240.jpg scene0748_00/img/1890.jpg +scene0748_00/img/255.jpg scene0748_00/img/2010.jpg +scene0748_00/img/525.jpg scene0748_00/img/1155.jpg +scene0748_00/img/705.jpg scene0748_00/img/1395.jpg +scene0748_00/img/840.jpg scene0748_00/img/885.jpg +scene0748_00/img/900.jpg scene0748_00/img/1260.jpg +scene0748_00/img/1005.jpg scene0748_00/img/1050.jpg +scene0748_00/img/1095.jpg scene0748_00/img/2190.jpg +scene0748_00/img/1830.jpg scene0748_00/img/2415.jpg +scene0748_00/img/1890.jpg scene0748_00/img/2190.jpg +scene0748_00/img/1920.jpg scene0748_00/img/2040.jpg +scene0748_00/img/1950.jpg scene0748_00/img/2070.jpg +scene0748_00/img/2565.jpg scene0748_00/img/2580.jpg +scene0749_00/img/15.jpg scene0749_00/img/495.jpg +scene0749_00/img/30.jpg scene0749_00/img/75.jpg +scene0749_00/img/135.jpg scene0749_00/img/150.jpg +scene0749_00/img/270.jpg scene0749_00/img/750.jpg +scene0749_00/img/285.jpg scene0749_00/img/960.jpg +scene0749_00/img/360.jpg scene0749_00/img/1740.jpg +scene0749_00/img/390.jpg scene0749_00/img/1800.jpg +scene0749_00/img/405.jpg scene0749_00/img/420.jpg +scene0749_00/img/525.jpg scene0749_00/img/1335.jpg +scene0749_00/img/675.jpg scene0749_00/img/840.jpg +scene0749_00/img/840.jpg scene0749_00/img/870.jpg +scene0749_00/img/1050.jpg scene0749_00/img/1935.jpg +scene0749_00/img/1080.jpg scene0749_00/img/1815.jpg +scene0749_00/img/1200.jpg scene0749_00/img/1545.jpg +scene0749_00/img/1650.jpg scene0749_00/img/1695.jpg +scene0750_00/img/0.jpg scene0750_00/img/1020.jpg +scene0750_00/img/15.jpg scene0750_00/img/660.jpg +scene0750_00/img/15.jpg scene0750_00/img/780.jpg +scene0750_00/img/15.jpg scene0750_00/img/1410.jpg +scene0750_00/img/30.jpg scene0750_00/img/765.jpg +scene0750_00/img/180.jpg scene0750_00/img/270.jpg +scene0750_00/img/285.jpg scene0750_00/img/330.jpg +scene0750_00/img/300.jpg scene0750_00/img/360.jpg +scene0750_00/img/300.jpg scene0750_00/img/570.jpg +scene0750_00/img/660.jpg scene0750_00/img/1005.jpg +scene0750_00/img/750.jpg scene0750_00/img/1410.jpg +scene0750_00/img/765.jpg scene0750_00/img/915.jpg +scene0750_00/img/885.jpg scene0750_00/img/945.jpg +scene0750_00/img/1095.jpg scene0750_00/img/1155.jpg +scene0750_00/img/1530.jpg scene0750_00/img/1545.jpg +scene0751_00/img/0.jpg scene0751_00/img/1020.jpg +scene0751_00/img/15.jpg scene0751_00/img/225.jpg +scene0751_00/img/150.jpg scene0751_00/img/1065.jpg +scene0751_00/img/180.jpg scene0751_00/img/225.jpg +scene0751_00/img/225.jpg scene0751_00/img/1020.jpg +scene0751_00/img/285.jpg scene0751_00/img/555.jpg +scene0751_00/img/285.jpg scene0751_00/img/615.jpg +scene0751_00/img/300.jpg scene0751_00/img/630.jpg +scene0751_00/img/375.jpg scene0751_00/img/660.jpg +scene0751_00/img/405.jpg scene0751_00/img/585.jpg +scene0751_00/img/435.jpg scene0751_00/img/555.jpg +scene0751_00/img/600.jpg scene0751_00/img/750.jpg +scene0751_00/img/825.jpg scene0751_00/img/870.jpg +scene0751_00/img/1635.jpg scene0751_00/img/1755.jpg +scene0751_00/img/1680.jpg scene0751_00/img/1755.jpg +scene0752_00/img/75.jpg scene0752_00/img/1440.jpg +scene0752_00/img/75.jpg scene0752_00/img/1530.jpg +scene0752_00/img/165.jpg scene0752_00/img/2130.jpg +scene0752_00/img/480.jpg scene0752_00/img/2775.jpg +scene0752_00/img/705.jpg scene0752_00/img/2160.jpg +scene0752_00/img/705.jpg scene0752_00/img/2295.jpg +scene0752_00/img/750.jpg scene0752_00/img/780.jpg +scene0752_00/img/750.jpg scene0752_00/img/1695.jpg +scene0752_00/img/1005.jpg scene0752_00/img/1065.jpg +scene0752_00/img/1020.jpg scene0752_00/img/1200.jpg +scene0752_00/img/1080.jpg scene0752_00/img/1125.jpg +scene0752_00/img/1635.jpg scene0752_00/img/1650.jpg +scene0752_00/img/1650.jpg scene0752_00/img/2835.jpg +scene0752_00/img/2025.jpg scene0752_00/img/2970.jpg +scene0752_00/img/2505.jpg scene0752_00/img/2535.jpg +scene0753_00/img/30.jpg scene0753_00/img/1320.jpg +scene0753_00/img/75.jpg scene0753_00/img/1245.jpg +scene0753_00/img/90.jpg scene0753_00/img/1515.jpg +scene0753_00/img/195.jpg scene0753_00/img/285.jpg +scene0753_00/img/330.jpg scene0753_00/img/2445.jpg +scene0753_00/img/360.jpg scene0753_00/img/2385.jpg +scene0753_00/img/510.jpg scene0753_00/img/615.jpg +scene0753_00/img/585.jpg scene0753_00/img/660.jpg +scene0753_00/img/690.jpg scene0753_00/img/720.jpg +scene0753_00/img/1155.jpg scene0753_00/img/1845.jpg +scene0753_00/img/1320.jpg scene0753_00/img/1440.jpg +scene0753_00/img/1725.jpg scene0753_00/img/3075.jpg +scene0753_00/img/2205.jpg scene0753_00/img/2325.jpg +scene0753_00/img/2430.jpg scene0753_00/img/2475.jpg +scene0753_00/img/2580.jpg scene0753_00/img/2850.jpg +scene0754_00/img/0.jpg scene0754_00/img/3105.jpg +scene0754_00/img/75.jpg scene0754_00/img/3105.jpg +scene0754_00/img/90.jpg scene0754_00/img/720.jpg +scene0754_00/img/150.jpg scene0754_00/img/405.jpg +scene0754_00/img/180.jpg scene0754_00/img/300.jpg +scene0754_00/img/345.jpg scene0754_00/img/3150.jpg +scene0754_00/img/645.jpg scene0754_00/img/1005.jpg +scene0754_00/img/1020.jpg scene0754_00/img/1065.jpg +scene0754_00/img/1440.jpg scene0754_00/img/2760.jpg +scene0754_00/img/1455.jpg scene0754_00/img/2970.jpg +scene0754_00/img/1695.jpg scene0754_00/img/3075.jpg +scene0754_00/img/1725.jpg scene0754_00/img/3120.jpg +scene0754_00/img/1845.jpg scene0754_00/img/1935.jpg +scene0754_00/img/2130.jpg scene0754_00/img/2190.jpg +scene0754_00/img/2685.jpg scene0754_00/img/2790.jpg +scene0755_00/img/120.jpg scene0755_00/img/2055.jpg +scene0755_00/img/690.jpg scene0755_00/img/2865.jpg +scene0755_00/img/720.jpg scene0755_00/img/2910.jpg +scene0755_00/img/735.jpg scene0755_00/img/2790.jpg +scene0755_00/img/900.jpg scene0755_00/img/1110.jpg +scene0755_00/img/1320.jpg scene0755_00/img/3480.jpg +scene0755_00/img/1440.jpg scene0755_00/img/1470.jpg +scene0755_00/img/1440.jpg scene0755_00/img/1980.jpg +scene0755_00/img/1560.jpg scene0755_00/img/2310.jpg +scene0755_00/img/1605.jpg scene0755_00/img/1650.jpg +scene0755_00/img/1695.jpg scene0755_00/img/1740.jpg +scene0755_00/img/1830.jpg scene0755_00/img/3420.jpg +scene0755_00/img/2010.jpg scene0755_00/img/2370.jpg +scene0755_00/img/2415.jpg scene0755_00/img/2475.jpg +scene0755_00/img/2460.jpg scene0755_00/img/2535.jpg +scene0756_00/img/75.jpg scene0756_00/img/2400.jpg +scene0756_00/img/345.jpg scene0756_00/img/3465.jpg +scene0756_00/img/405.jpg scene0756_00/img/3495.jpg +scene0756_00/img/450.jpg scene0756_00/img/1770.jpg +scene0756_00/img/855.jpg scene0756_00/img/1260.jpg +scene0756_00/img/1050.jpg scene0756_00/img/1110.jpg +scene0756_00/img/1320.jpg scene0756_00/img/1455.jpg +scene0756_00/img/1425.jpg scene0756_00/img/1470.jpg +scene0756_00/img/1545.jpg scene0756_00/img/1575.jpg +scene0756_00/img/1680.jpg scene0756_00/img/1725.jpg +scene0756_00/img/2385.jpg scene0756_00/img/2850.jpg +scene0756_00/img/2535.jpg scene0756_00/img/3000.jpg +scene0756_00/img/2580.jpg scene0756_00/img/2700.jpg +scene0756_00/img/2610.jpg scene0756_00/img/2910.jpg +scene0756_00/img/3405.jpg scene0756_00/img/3465.jpg +scene0757_00/img/345.jpg scene0757_00/img/405.jpg +scene0757_00/img/1410.jpg scene0757_00/img/1455.jpg +scene0757_00/img/1575.jpg scene0757_00/img/1590.jpg +scene0757_00/img/2010.jpg scene0757_00/img/3345.jpg +scene0757_00/img/2145.jpg scene0757_00/img/7665.jpg +scene0757_00/img/2280.jpg scene0757_00/img/7815.jpg +scene0757_00/img/2505.jpg scene0757_00/img/2550.jpg +scene0757_00/img/2715.jpg scene0757_00/img/2940.jpg +scene0757_00/img/2835.jpg scene0757_00/img/8325.jpg +scene0757_00/img/3000.jpg scene0757_00/img/3045.jpg +scene0757_00/img/3630.jpg scene0757_00/img/3930.jpg +scene0757_00/img/4035.jpg scene0757_00/img/5475.jpg +scene0757_00/img/4665.jpg scene0757_00/img/4800.jpg +scene0757_00/img/4770.jpg scene0757_00/img/5175.jpg +scene0757_00/img/4815.jpg scene0757_00/img/4845.jpg +scene0758_00/img/45.jpg scene0758_00/img/1500.jpg +scene0758_00/img/120.jpg scene0758_00/img/180.jpg +scene0758_00/img/150.jpg scene0758_00/img/1110.jpg +scene0758_00/img/165.jpg scene0758_00/img/510.jpg +scene0758_00/img/345.jpg scene0758_00/img/1755.jpg +scene0758_00/img/360.jpg scene0758_00/img/930.jpg +scene0758_00/img/405.jpg scene0758_00/img/1215.jpg +scene0758_00/img/450.jpg scene0758_00/img/1110.jpg +scene0758_00/img/555.jpg scene0758_00/img/600.jpg +scene0758_00/img/840.jpg scene0758_00/img/870.jpg +scene0758_00/img/960.jpg scene0758_00/img/1005.jpg +scene0758_00/img/1080.jpg scene0758_00/img/1170.jpg +scene0758_00/img/1155.jpg scene0758_00/img/1185.jpg +scene0758_00/img/1185.jpg scene0758_00/img/1230.jpg +scene0758_00/img/1200.jpg scene0758_00/img/1710.jpg +scene0759_00/img/15.jpg scene0759_00/img/1500.jpg +scene0759_00/img/45.jpg scene0759_00/img/75.jpg +scene0759_00/img/120.jpg scene0759_00/img/1695.jpg +scene0759_00/img/210.jpg scene0759_00/img/270.jpg +scene0759_00/img/300.jpg scene0759_00/img/990.jpg +scene0759_00/img/435.jpg scene0759_00/img/1425.jpg +scene0759_00/img/450.jpg scene0759_00/img/1440.jpg +scene0759_00/img/465.jpg scene0759_00/img/1455.jpg +scene0759_00/img/570.jpg scene0759_00/img/765.jpg +scene0759_00/img/645.jpg scene0759_00/img/705.jpg +scene0759_00/img/870.jpg scene0759_00/img/885.jpg +scene0759_00/img/930.jpg scene0759_00/img/945.jpg +scene0759_00/img/990.jpg scene0759_00/img/1005.jpg +scene0759_00/img/1155.jpg scene0759_00/img/1770.jpg +scene0759_00/img/1515.jpg scene0759_00/img/1590.jpg +scene0760_00/img/0.jpg scene0760_00/img/975.jpg +scene0760_00/img/30.jpg scene0760_00/img/1470.jpg +scene0760_00/img/255.jpg scene0760_00/img/555.jpg +scene0760_00/img/270.jpg scene0760_00/img/1560.jpg +scene0760_00/img/390.jpg scene0760_00/img/1110.jpg +scene0760_00/img/405.jpg scene0760_00/img/1080.jpg +scene0760_00/img/435.jpg scene0760_00/img/1095.jpg +scene0760_00/img/435.jpg scene0760_00/img/1110.jpg +scene0760_00/img/540.jpg scene0760_00/img/1200.jpg +scene0760_00/img/570.jpg scene0760_00/img/585.jpg +scene0760_00/img/690.jpg scene0760_00/img/720.jpg +scene0760_00/img/690.jpg scene0760_00/img/735.jpg +scene0760_00/img/795.jpg scene0760_00/img/885.jpg +scene0760_00/img/840.jpg scene0760_00/img/885.jpg +scene0760_00/img/915.jpg scene0760_00/img/1500.jpg +scene0761_00/img/645.jpg scene0761_00/img/2370.jpg +scene0761_00/img/1860.jpg scene0761_00/img/2040.jpg +scene0761_00/img/2175.jpg scene0761_00/img/2820.jpg +scene0761_00/img/2280.jpg scene0761_00/img/2310.jpg +scene0761_00/img/2385.jpg scene0761_00/img/2880.jpg +scene0761_00/img/2385.jpg scene0761_00/img/2955.jpg +scene0761_00/img/2715.jpg scene0761_00/img/5100.jpg +scene0761_00/img/2970.jpg scene0761_00/img/3000.jpg +scene0761_00/img/3540.jpg scene0761_00/img/3960.jpg +scene0761_00/img/3795.jpg scene0761_00/img/3825.jpg +scene0761_00/img/3825.jpg scene0761_00/img/5145.jpg +scene0761_00/img/4125.jpg scene0761_00/img/4200.jpg +scene0761_00/img/4185.jpg scene0761_00/img/4350.jpg +scene0761_00/img/4230.jpg scene0761_00/img/4380.jpg +scene0761_00/img/4995.jpg scene0761_00/img/5100.jpg +scene0762_00/img/0.jpg scene0762_00/img/1590.jpg +scene0762_00/img/15.jpg scene0762_00/img/1500.jpg +scene0762_00/img/30.jpg scene0762_00/img/1470.jpg +scene0762_00/img/60.jpg scene0762_00/img/1590.jpg +scene0762_00/img/165.jpg scene0762_00/img/660.jpg +scene0762_00/img/180.jpg scene0762_00/img/225.jpg +scene0762_00/img/195.jpg scene0762_00/img/375.jpg +scene0762_00/img/375.jpg scene0762_00/img/585.jpg +scene0762_00/img/435.jpg scene0762_00/img/480.jpg +scene0762_00/img/450.jpg scene0762_00/img/645.jpg +scene0762_00/img/495.jpg scene0762_00/img/585.jpg +scene0762_00/img/1125.jpg scene0762_00/img/1215.jpg +scene0762_00/img/1215.jpg scene0762_00/img/1275.jpg +scene0762_00/img/1350.jpg scene0762_00/img/1395.jpg +scene0762_00/img/1515.jpg scene0762_00/img/1560.jpg +scene0763_00/img/75.jpg scene0763_00/img/450.jpg +scene0763_00/img/90.jpg scene0763_00/img/450.jpg +scene0763_00/img/105.jpg scene0763_00/img/255.jpg +scene0763_00/img/135.jpg scene0763_00/img/525.jpg +scene0763_00/img/225.jpg scene0763_00/img/300.jpg +scene0763_00/img/360.jpg scene0763_00/img/390.jpg +scene0763_00/img/405.jpg scene0763_00/img/450.jpg +scene0763_00/img/480.jpg scene0763_00/img/495.jpg +scene0763_00/img/525.jpg scene0763_00/img/555.jpg +scene0763_00/img/585.jpg scene0763_00/img/930.jpg +scene0763_00/img/585.jpg scene0763_00/img/945.jpg +scene0763_00/img/630.jpg scene0763_00/img/1035.jpg +scene0763_00/img/660.jpg scene0763_00/img/1080.jpg +scene0763_00/img/765.jpg scene0763_00/img/1035.jpg +scene0763_00/img/1035.jpg scene0763_00/img/1080.jpg +scene0764_00/img/105.jpg scene0764_00/img/390.jpg +scene0764_00/img/240.jpg scene0764_00/img/1080.jpg +scene0764_00/img/255.jpg scene0764_00/img/750.jpg +scene0764_00/img/270.jpg scene0764_00/img/705.jpg +scene0764_00/img/360.jpg scene0764_00/img/645.jpg +scene0764_00/img/465.jpg scene0764_00/img/555.jpg +scene0764_00/img/510.jpg scene0764_00/img/555.jpg +scene0764_00/img/555.jpg scene0764_00/img/2250.jpg +scene0764_00/img/675.jpg scene0764_00/img/1005.jpg +scene0764_00/img/885.jpg scene0764_00/img/2370.jpg +scene0764_00/img/900.jpg scene0764_00/img/2340.jpg +scene0764_00/img/1335.jpg scene0764_00/img/1485.jpg +scene0764_00/img/1635.jpg scene0764_00/img/1890.jpg +scene0764_00/img/1695.jpg scene0764_00/img/1830.jpg +scene0764_00/img/1905.jpg scene0764_00/img/1980.jpg +scene0765_00/img/45.jpg scene0765_00/img/135.jpg +scene0765_00/img/45.jpg scene0765_00/img/1905.jpg +scene0765_00/img/165.jpg scene0765_00/img/1185.jpg +scene0765_00/img/180.jpg scene0765_00/img/705.jpg +scene0765_00/img/360.jpg scene0765_00/img/780.jpg +scene0765_00/img/690.jpg scene0765_00/img/870.jpg +scene0765_00/img/870.jpg scene0765_00/img/885.jpg +scene0765_00/img/915.jpg scene0765_00/img/1860.jpg +scene0765_00/img/1035.jpg scene0765_00/img/1215.jpg +scene0765_00/img/1125.jpg scene0765_00/img/1890.jpg +scene0765_00/img/1155.jpg scene0765_00/img/1920.jpg +scene0765_00/img/1215.jpg scene0765_00/img/1935.jpg +scene0765_00/img/1500.jpg scene0765_00/img/1770.jpg +scene0765_00/img/1785.jpg scene0765_00/img/1800.jpg +scene0765_00/img/1875.jpg scene0765_00/img/1935.jpg +scene0766_00/img/150.jpg scene0766_00/img/1020.jpg +scene0766_00/img/210.jpg scene0766_00/img/960.jpg +scene0766_00/img/240.jpg scene0766_00/img/1680.jpg +scene0766_00/img/270.jpg scene0766_00/img/1395.jpg +scene0766_00/img/285.jpg scene0766_00/img/1380.jpg +scene0766_00/img/690.jpg scene0766_00/img/765.jpg +scene0766_00/img/690.jpg scene0766_00/img/1845.jpg +scene0766_00/img/1035.jpg scene0766_00/img/1515.jpg +scene0766_00/img/1050.jpg scene0766_00/img/1380.jpg +scene0766_00/img/1425.jpg scene0766_00/img/1485.jpg +scene0766_00/img/1605.jpg scene0766_00/img/1665.jpg +scene0766_00/img/1905.jpg scene0766_00/img/2640.jpg +scene0766_00/img/2040.jpg scene0766_00/img/2190.jpg +scene0766_00/img/2700.jpg scene0766_00/img/3420.jpg +scene0766_00/img/3345.jpg scene0766_00/img/3375.jpg +scene0767_00/img/30.jpg scene0767_00/img/270.jpg +scene0767_00/img/30.jpg scene0767_00/img/1350.jpg +scene0767_00/img/135.jpg scene0767_00/img/600.jpg +scene0767_00/img/150.jpg scene0767_00/img/570.jpg +scene0767_00/img/180.jpg scene0767_00/img/390.jpg +scene0767_00/img/195.jpg scene0767_00/img/1275.jpg +scene0767_00/img/255.jpg scene0767_00/img/1920.jpg +scene0767_00/img/570.jpg scene0767_00/img/615.jpg +scene0767_00/img/840.jpg scene0767_00/img/930.jpg +scene0767_00/img/990.jpg scene0767_00/img/1695.jpg +scene0767_00/img/1005.jpg scene0767_00/img/1110.jpg +scene0767_00/img/1170.jpg scene0767_00/img/1230.jpg +scene0767_00/img/1170.jpg scene0767_00/img/1590.jpg +scene0767_00/img/1350.jpg scene0767_00/img/1380.jpg +scene0767_00/img/1605.jpg scene0767_00/img/1755.jpg +scene0768_00/img/540.jpg scene0768_00/img/2745.jpg +scene0768_00/img/1095.jpg scene0768_00/img/3435.jpg +scene0768_00/img/1230.jpg scene0768_00/img/2070.jpg +scene0768_00/img/1320.jpg scene0768_00/img/1545.jpg +scene0768_00/img/1335.jpg scene0768_00/img/3390.jpg +scene0768_00/img/1575.jpg scene0768_00/img/3495.jpg +scene0768_00/img/1695.jpg scene0768_00/img/1740.jpg +scene0768_00/img/2190.jpg scene0768_00/img/2475.jpg +scene0768_00/img/2205.jpg scene0768_00/img/2865.jpg +scene0768_00/img/2415.jpg scene0768_00/img/2820.jpg +scene0768_00/img/2430.jpg scene0768_00/img/2775.jpg +scene0768_00/img/3315.jpg scene0768_00/img/4020.jpg +scene0768_00/img/3345.jpg scene0768_00/img/3375.jpg +scene0768_00/img/3345.jpg scene0768_00/img/3435.jpg +scene0768_00/img/3915.jpg scene0768_00/img/3990.jpg +scene0769_00/img/0.jpg scene0769_00/img/1185.jpg +scene0769_00/img/105.jpg scene0769_00/img/1185.jpg +scene0769_00/img/135.jpg scene0769_00/img/165.jpg +scene0769_00/img/150.jpg scene0769_00/img/195.jpg +scene0769_00/img/240.jpg scene0769_00/img/480.jpg +scene0769_00/img/255.jpg scene0769_00/img/315.jpg +scene0769_00/img/255.jpg scene0769_00/img/330.jpg +scene0769_00/img/300.jpg scene0769_00/img/705.jpg +scene0769_00/img/390.jpg scene0769_00/img/420.jpg +scene0769_00/img/540.jpg scene0769_00/img/705.jpg +scene0769_00/img/600.jpg scene0769_00/img/660.jpg +scene0769_00/img/645.jpg scene0769_00/img/660.jpg +scene0769_00/img/645.jpg scene0769_00/img/705.jpg +scene0769_00/img/750.jpg scene0769_00/img/795.jpg +scene0769_00/img/975.jpg scene0769_00/img/1005.jpg +scene0770_00/img/45.jpg scene0770_00/img/1425.jpg +scene0770_00/img/105.jpg scene0770_00/img/1365.jpg +scene0770_00/img/120.jpg scene0770_00/img/1380.jpg +scene0770_00/img/570.jpg scene0770_00/img/615.jpg +scene0770_00/img/720.jpg scene0770_00/img/1830.jpg +scene0770_00/img/975.jpg scene0770_00/img/1050.jpg +scene0770_00/img/1095.jpg scene0770_00/img/2100.jpg +scene0770_00/img/1170.jpg scene0770_00/img/1215.jpg +scene0770_00/img/1335.jpg scene0770_00/img/1365.jpg +scene0770_00/img/1530.jpg scene0770_00/img/1635.jpg +scene0770_00/img/1785.jpg scene0770_00/img/1845.jpg +scene0770_00/img/2235.jpg scene0770_00/img/2325.jpg +scene0770_00/img/2595.jpg scene0770_00/img/2700.jpg +scene0770_00/img/2895.jpg scene0770_00/img/2925.jpg +scene0770_00/img/3120.jpg scene0770_00/img/3180.jpg +scene0771_00/img/0.jpg scene0771_00/img/1050.jpg +scene0771_00/img/90.jpg scene0771_00/img/480.jpg +scene0771_00/img/105.jpg scene0771_00/img/465.jpg +scene0771_00/img/135.jpg scene0771_00/img/615.jpg +scene0771_00/img/375.jpg scene0771_00/img/450.jpg +scene0771_00/img/420.jpg scene0771_00/img/780.jpg +scene0771_00/img/435.jpg scene0771_00/img/930.jpg +scene0771_00/img/465.jpg scene0771_00/img/1020.jpg +scene0771_00/img/675.jpg scene0771_00/img/705.jpg +scene0771_00/img/690.jpg scene0771_00/img/855.jpg +scene0771_00/img/750.jpg scene0771_00/img/795.jpg +scene0771_00/img/750.jpg scene0771_00/img/810.jpg +scene0771_00/img/885.jpg scene0771_00/img/930.jpg +scene0771_00/img/900.jpg scene0771_00/img/960.jpg +scene0771_00/img/1005.jpg scene0771_00/img/1035.jpg +scene0772_00/img/30.jpg scene0772_00/img/1710.jpg +scene0772_00/img/75.jpg scene0772_00/img/165.jpg +scene0772_00/img/90.jpg scene0772_00/img/105.jpg +scene0772_00/img/345.jpg scene0772_00/img/510.jpg +scene0772_00/img/915.jpg scene0772_00/img/975.jpg +scene0772_00/img/1020.jpg scene0772_00/img/1050.jpg +scene0772_00/img/1080.jpg scene0772_00/img/1155.jpg +scene0772_00/img/1440.jpg scene0772_00/img/1635.jpg +scene0772_00/img/1470.jpg scene0772_00/img/1515.jpg +scene0772_00/img/1560.jpg scene0772_00/img/2190.jpg +scene0772_00/img/1605.jpg scene0772_00/img/1785.jpg +scene0772_00/img/1635.jpg scene0772_00/img/1755.jpg +scene0772_00/img/1680.jpg scene0772_00/img/1845.jpg +scene0772_00/img/1725.jpg scene0772_00/img/1830.jpg +scene0772_00/img/2205.jpg scene0772_00/img/2235.jpg +scene0773_00/img/15.jpg scene0773_00/img/105.jpg +scene0773_00/img/120.jpg scene0773_00/img/180.jpg +scene0773_00/img/300.jpg scene0773_00/img/375.jpg +scene0773_00/img/390.jpg scene0773_00/img/420.jpg +scene0773_00/img/765.jpg scene0773_00/img/885.jpg +scene0773_00/img/765.jpg scene0773_00/img/915.jpg +scene0773_00/img/960.jpg scene0773_00/img/1140.jpg +scene0773_00/img/1410.jpg scene0773_00/img/1800.jpg +scene0773_00/img/1425.jpg scene0773_00/img/1830.jpg +scene0773_00/img/1440.jpg scene0773_00/img/1800.jpg +scene0773_00/img/1470.jpg scene0773_00/img/1860.jpg +scene0773_00/img/1560.jpg scene0773_00/img/1605.jpg +scene0773_00/img/1740.jpg scene0773_00/img/1875.jpg +scene0773_00/img/1815.jpg scene0773_00/img/1920.jpg +scene0773_00/img/2040.jpg scene0773_00/img/2070.jpg +scene0774_00/img/30.jpg scene0774_00/img/1290.jpg +scene0774_00/img/210.jpg scene0774_00/img/1995.jpg +scene0774_00/img/225.jpg scene0774_00/img/345.jpg +scene0774_00/img/240.jpg scene0774_00/img/270.jpg +scene0774_00/img/465.jpg scene0774_00/img/495.jpg +scene0774_00/img/585.jpg scene0774_00/img/690.jpg +scene0774_00/img/720.jpg scene0774_00/img/765.jpg +scene0774_00/img/855.jpg scene0774_00/img/975.jpg +scene0774_00/img/1050.jpg scene0774_00/img/1080.jpg +scene0774_00/img/1080.jpg scene0774_00/img/1155.jpg +scene0774_00/img/1125.jpg scene0774_00/img/1440.jpg +scene0774_00/img/1560.jpg scene0774_00/img/1620.jpg +scene0774_00/img/1740.jpg scene0774_00/img/1860.jpg +scene0774_00/img/1905.jpg scene0774_00/img/1950.jpg +scene0774_00/img/2055.jpg scene0774_00/img/2100.jpg +scene0775_00/img/15.jpg scene0775_00/img/105.jpg +scene0775_00/img/30.jpg scene0775_00/img/1605.jpg +scene0775_00/img/240.jpg scene0775_00/img/345.jpg +scene0775_00/img/390.jpg scene0775_00/img/480.jpg +scene0775_00/img/495.jpg scene0775_00/img/525.jpg +scene0775_00/img/615.jpg scene0775_00/img/735.jpg +scene0775_00/img/765.jpg scene0775_00/img/840.jpg +scene0775_00/img/765.jpg scene0775_00/img/1005.jpg +scene0775_00/img/810.jpg scene0775_00/img/900.jpg +scene0775_00/img/825.jpg scene0775_00/img/1035.jpg +scene0775_00/img/1410.jpg scene0775_00/img/1440.jpg +scene0775_00/img/1455.jpg scene0775_00/img/1875.jpg +scene0775_00/img/1740.jpg scene0775_00/img/1935.jpg +scene0775_00/img/1800.jpg scene0775_00/img/1845.jpg +scene0775_00/img/2055.jpg scene0775_00/img/2085.jpg +scene0776_00/img/30.jpg scene0776_00/img/60.jpg +scene0776_00/img/90.jpg scene0776_00/img/210.jpg +scene0776_00/img/135.jpg scene0776_00/img/180.jpg +scene0776_00/img/375.jpg scene0776_00/img/3435.jpg +scene0776_00/img/420.jpg scene0776_00/img/555.jpg +scene0776_00/img/840.jpg scene0776_00/img/960.jpg +scene0776_00/img/1470.jpg scene0776_00/img/1575.jpg +scene0776_00/img/2370.jpg scene0776_00/img/2460.jpg +scene0776_00/img/2700.jpg scene0776_00/img/2775.jpg +scene0776_00/img/2910.jpg scene0776_00/img/2985.jpg +scene0776_00/img/2925.jpg scene0776_00/img/3120.jpg +scene0776_00/img/3075.jpg scene0776_00/img/3240.jpg +scene0776_00/img/3165.jpg scene0776_00/img/3225.jpg +scene0776_00/img/3195.jpg scene0776_00/img/3330.jpg +scene0776_00/img/3360.jpg scene0776_00/img/3405.jpg +scene0777_00/img/15.jpg scene0777_00/img/120.jpg +scene0777_00/img/75.jpg scene0777_00/img/1935.jpg +scene0777_00/img/105.jpg scene0777_00/img/1935.jpg +scene0777_00/img/105.jpg scene0777_00/img/2025.jpg +scene0777_00/img/285.jpg scene0777_00/img/1815.jpg +scene0777_00/img/465.jpg scene0777_00/img/555.jpg +scene0777_00/img/465.jpg scene0777_00/img/585.jpg +scene0777_00/img/570.jpg scene0777_00/img/705.jpg +scene0777_00/img/750.jpg scene0777_00/img/795.jpg +scene0777_00/img/855.jpg scene0777_00/img/1095.jpg +scene0777_00/img/930.jpg scene0777_00/img/1125.jpg +scene0777_00/img/1095.jpg scene0777_00/img/1170.jpg +scene0777_00/img/1125.jpg scene0777_00/img/1155.jpg +scene0777_00/img/1620.jpg scene0777_00/img/1635.jpg +scene0777_00/img/1815.jpg scene0777_00/img/1920.jpg +scene0778_00/img/0.jpg scene0778_00/img/195.jpg +scene0778_00/img/0.jpg scene0778_00/img/285.jpg +scene0778_00/img/45.jpg scene0778_00/img/1545.jpg +scene0778_00/img/60.jpg scene0778_00/img/165.jpg +scene0778_00/img/75.jpg scene0778_00/img/105.jpg +scene0778_00/img/120.jpg scene0778_00/img/165.jpg +scene0778_00/img/180.jpg scene0778_00/img/210.jpg +scene0778_00/img/345.jpg scene0778_00/img/1590.jpg +scene0778_00/img/345.jpg scene0778_00/img/1650.jpg +scene0778_00/img/435.jpg scene0778_00/img/1635.jpg +scene0778_00/img/465.jpg scene0778_00/img/555.jpg +scene0778_00/img/525.jpg scene0778_00/img/630.jpg +scene0778_00/img/645.jpg scene0778_00/img/795.jpg +scene0778_00/img/1170.jpg scene0778_00/img/1200.jpg +scene0778_00/img/1200.jpg scene0778_00/img/1320.jpg +scene0779_00/img/0.jpg scene0779_00/img/1335.jpg +scene0779_00/img/15.jpg scene0779_00/img/210.jpg +scene0779_00/img/15.jpg scene0779_00/img/270.jpg +scene0779_00/img/30.jpg scene0779_00/img/150.jpg +scene0779_00/img/60.jpg scene0779_00/img/105.jpg +scene0779_00/img/60.jpg scene0779_00/img/165.jpg +scene0779_00/img/225.jpg scene0779_00/img/285.jpg +scene0779_00/img/375.jpg scene0779_00/img/555.jpg +scene0779_00/img/420.jpg scene0779_00/img/555.jpg +scene0779_00/img/735.jpg scene0779_00/img/990.jpg +scene0779_00/img/780.jpg scene0779_00/img/810.jpg +scene0779_00/img/795.jpg scene0779_00/img/930.jpg +scene0779_00/img/795.jpg scene0779_00/img/945.jpg +scene0779_00/img/870.jpg scene0779_00/img/915.jpg +scene0779_00/img/1065.jpg scene0779_00/img/1110.jpg +scene0780_00/img/0.jpg scene0780_00/img/1635.jpg +scene0780_00/img/30.jpg scene0780_00/img/1695.jpg +scene0780_00/img/120.jpg scene0780_00/img/255.jpg +scene0780_00/img/165.jpg scene0780_00/img/300.jpg +scene0780_00/img/810.jpg scene0780_00/img/840.jpg +scene0780_00/img/810.jpg scene0780_00/img/870.jpg +scene0780_00/img/900.jpg scene0780_00/img/1140.jpg +scene0780_00/img/1365.jpg scene0780_00/img/1485.jpg +scene0780_00/img/1380.jpg scene0780_00/img/1725.jpg +scene0780_00/img/1425.jpg scene0780_00/img/1440.jpg +scene0780_00/img/1500.jpg scene0780_00/img/1650.jpg +scene0780_00/img/1530.jpg scene0780_00/img/1770.jpg +scene0780_00/img/1650.jpg scene0780_00/img/1695.jpg +scene0780_00/img/1695.jpg scene0780_00/img/1830.jpg +scene0780_00/img/1905.jpg scene0780_00/img/1935.jpg +scene0781_00/img/30.jpg scene0781_00/img/240.jpg +scene0781_00/img/75.jpg scene0781_00/img/2070.jpg +scene0781_00/img/120.jpg scene0781_00/img/2070.jpg +scene0781_00/img/210.jpg scene0781_00/img/2220.jpg +scene0781_00/img/225.jpg scene0781_00/img/1830.jpg +scene0781_00/img/240.jpg scene0781_00/img/2055.jpg +scene0781_00/img/285.jpg scene0781_00/img/2235.jpg +scene0781_00/img/360.jpg scene0781_00/img/2040.jpg +scene0781_00/img/1155.jpg scene0781_00/img/1215.jpg +scene0781_00/img/1230.jpg scene0781_00/img/1290.jpg +scene0781_00/img/1605.jpg scene0781_00/img/1650.jpg +scene0781_00/img/1710.jpg scene0781_00/img/1860.jpg +scene0781_00/img/1860.jpg scene0781_00/img/1920.jpg +scene0781_00/img/1875.jpg scene0781_00/img/2145.jpg +scene0781_00/img/2145.jpg scene0781_00/img/2220.jpg +scene0782_00/img/15.jpg scene0782_00/img/105.jpg +scene0782_00/img/75.jpg scene0782_00/img/1365.jpg +scene0782_00/img/90.jpg scene0782_00/img/420.jpg +scene0782_00/img/105.jpg scene0782_00/img/1350.jpg +scene0782_00/img/195.jpg scene0782_00/img/345.jpg +scene0782_00/img/240.jpg scene0782_00/img/1455.jpg +scene0782_00/img/255.jpg scene0782_00/img/1470.jpg +scene0782_00/img/375.jpg scene0782_00/img/1410.jpg +scene0782_00/img/435.jpg scene0782_00/img/510.jpg +scene0782_00/img/435.jpg scene0782_00/img/1485.jpg +scene0782_00/img/555.jpg scene0782_00/img/1365.jpg +scene0782_00/img/645.jpg scene0782_00/img/780.jpg +scene0782_00/img/990.jpg scene0782_00/img/1155.jpg +scene0782_00/img/1260.jpg scene0782_00/img/1290.jpg +scene0782_00/img/1335.jpg scene0782_00/img/1365.jpg +scene0783_00/img/0.jpg scene0783_00/img/1395.jpg +scene0783_00/img/120.jpg scene0783_00/img/1290.jpg +scene0783_00/img/120.jpg scene0783_00/img/1515.jpg +scene0783_00/img/150.jpg scene0783_00/img/1425.jpg +scene0783_00/img/210.jpg scene0783_00/img/1245.jpg +scene0783_00/img/345.jpg scene0783_00/img/1500.jpg +scene0783_00/img/420.jpg scene0783_00/img/540.jpg +scene0783_00/img/465.jpg scene0783_00/img/1305.jpg +scene0783_00/img/465.jpg scene0783_00/img/1530.jpg +scene0783_00/img/480.jpg scene0783_00/img/1290.jpg +scene0783_00/img/585.jpg scene0783_00/img/1395.jpg +scene0783_00/img/675.jpg scene0783_00/img/720.jpg +scene0783_00/img/780.jpg scene0783_00/img/870.jpg +scene0783_00/img/1245.jpg scene0783_00/img/1365.jpg +scene0783_00/img/1290.jpg scene0783_00/img/1320.jpg +scene0784_00/img/1125.jpg scene0784_00/img/1725.jpg +scene0784_00/img/1140.jpg scene0784_00/img/1785.jpg +scene0784_00/img/1875.jpg scene0784_00/img/4920.jpg +scene0784_00/img/1950.jpg scene0784_00/img/2820.jpg +scene0784_00/img/1965.jpg scene0784_00/img/2895.jpg +scene0784_00/img/1995.jpg scene0784_00/img/2745.jpg +scene0784_00/img/2115.jpg scene0784_00/img/2805.jpg +scene0784_00/img/2535.jpg scene0784_00/img/2580.jpg +scene0784_00/img/2655.jpg scene0784_00/img/2790.jpg +scene0784_00/img/2820.jpg scene0784_00/img/2865.jpg +scene0784_00/img/3825.jpg scene0784_00/img/4785.jpg +scene0784_00/img/3855.jpg scene0784_00/img/4080.jpg +scene0784_00/img/3885.jpg scene0784_00/img/4440.jpg +scene0784_00/img/3960.jpg scene0784_00/img/4020.jpg +scene0784_00/img/4215.jpg scene0784_00/img/4260.jpg +scene0785_00/img/90.jpg scene0785_00/img/120.jpg +scene0785_00/img/105.jpg scene0785_00/img/1995.jpg +scene0785_00/img/270.jpg scene0785_00/img/555.jpg +scene0785_00/img/450.jpg scene0785_00/img/555.jpg +scene0785_00/img/540.jpg scene0785_00/img/3900.jpg +scene0785_00/img/720.jpg scene0785_00/img/3330.jpg +scene0785_00/img/750.jpg scene0785_00/img/795.jpg +scene0785_00/img/765.jpg scene0785_00/img/3930.jpg +scene0785_00/img/885.jpg scene0785_00/img/3975.jpg +scene0785_00/img/1110.jpg scene0785_00/img/1305.jpg +scene0785_00/img/1185.jpg scene0785_00/img/1320.jpg +scene0785_00/img/1530.jpg scene0785_00/img/1710.jpg +scene0785_00/img/2835.jpg scene0785_00/img/2955.jpg +scene0785_00/img/2955.jpg scene0785_00/img/2970.jpg +scene0785_00/img/3210.jpg scene0785_00/img/3405.jpg +scene0786_00/img/15.jpg scene0786_00/img/1140.jpg +scene0786_00/img/30.jpg scene0786_00/img/1155.jpg +scene0786_00/img/225.jpg scene0786_00/img/300.jpg +scene0786_00/img/240.jpg scene0786_00/img/285.jpg +scene0786_00/img/240.jpg scene0786_00/img/1755.jpg +scene0786_00/img/345.jpg scene0786_00/img/375.jpg +scene0786_00/img/345.jpg scene0786_00/img/495.jpg +scene0786_00/img/540.jpg scene0786_00/img/630.jpg +scene0786_00/img/855.jpg scene0786_00/img/915.jpg +scene0786_00/img/1080.jpg scene0786_00/img/1275.jpg +scene0786_00/img/1290.jpg scene0786_00/img/1335.jpg +scene0786_00/img/1290.jpg scene0786_00/img/1635.jpg +scene0786_00/img/1365.jpg scene0786_00/img/1545.jpg +scene0786_00/img/1530.jpg scene0786_00/img/1620.jpg +scene0786_00/img/1695.jpg scene0786_00/img/1725.jpg +scene0787_00/img/30.jpg scene0787_00/img/210.jpg +scene0787_00/img/165.jpg scene0787_00/img/390.jpg +scene0787_00/img/540.jpg scene0787_00/img/2865.jpg +scene0787_00/img/615.jpg scene0787_00/img/855.jpg +scene0787_00/img/645.jpg scene0787_00/img/2880.jpg +scene0787_00/img/660.jpg scene0787_00/img/690.jpg +scene0787_00/img/930.jpg scene0787_00/img/990.jpg +scene0787_00/img/945.jpg scene0787_00/img/990.jpg +scene0787_00/img/1680.jpg scene0787_00/img/1725.jpg +scene0787_00/img/1755.jpg scene0787_00/img/2355.jpg +scene0787_00/img/1770.jpg scene0787_00/img/1875.jpg +scene0787_00/img/1815.jpg scene0787_00/img/1890.jpg +scene0787_00/img/2145.jpg scene0787_00/img/2175.jpg +scene0787_00/img/2415.jpg scene0787_00/img/2430.jpg +scene0787_00/img/2475.jpg scene0787_00/img/2745.jpg +scene0788_00/img/75.jpg scene0788_00/img/90.jpg +scene0788_00/img/150.jpg scene0788_00/img/195.jpg +scene0788_00/img/150.jpg scene0788_00/img/720.jpg +scene0788_00/img/165.jpg scene0788_00/img/705.jpg +scene0788_00/img/180.jpg scene0788_00/img/195.jpg +scene0788_00/img/285.jpg scene0788_00/img/375.jpg +scene0788_00/img/360.jpg scene0788_00/img/375.jpg +scene0788_00/img/375.jpg scene0788_00/img/600.jpg +scene0788_00/img/390.jpg scene0788_00/img/675.jpg +scene0788_00/img/495.jpg scene0788_00/img/570.jpg +scene0788_00/img/510.jpg scene0788_00/img/570.jpg +scene0788_00/img/540.jpg scene0788_00/img/645.jpg +scene0788_00/img/555.jpg scene0788_00/img/615.jpg +scene0788_00/img/660.jpg scene0788_00/img/690.jpg +scene0788_00/img/975.jpg scene0788_00/img/1005.jpg +scene0789_00/img/45.jpg scene0789_00/img/210.jpg +scene0789_00/img/60.jpg scene0789_00/img/210.jpg +scene0789_00/img/165.jpg scene0789_00/img/210.jpg +scene0789_00/img/165.jpg scene0789_00/img/300.jpg +scene0789_00/img/165.jpg scene0789_00/img/360.jpg +scene0789_00/img/195.jpg scene0789_00/img/465.jpg +scene0789_00/img/210.jpg scene0789_00/img/240.jpg +scene0789_00/img/345.jpg scene0789_00/img/435.jpg +scene0789_00/img/480.jpg scene0789_00/img/765.jpg +scene0789_00/img/540.jpg scene0789_00/img/750.jpg +scene0789_00/img/555.jpg scene0789_00/img/750.jpg +scene0789_00/img/570.jpg scene0789_00/img/630.jpg +scene0789_00/img/630.jpg scene0789_00/img/750.jpg +scene0789_00/img/645.jpg scene0789_00/img/780.jpg +scene0789_00/img/660.jpg scene0789_00/img/750.jpg +scene0790_00/img/30.jpg scene0790_00/img/60.jpg +scene0790_00/img/90.jpg scene0790_00/img/1005.jpg +scene0790_00/img/180.jpg scene0790_00/img/315.jpg +scene0790_00/img/225.jpg scene0790_00/img/300.jpg +scene0790_00/img/330.jpg scene0790_00/img/375.jpg +scene0790_00/img/360.jpg scene0790_00/img/420.jpg +scene0790_00/img/390.jpg scene0790_00/img/465.jpg +scene0790_00/img/465.jpg scene0790_00/img/525.jpg +scene0790_00/img/480.jpg scene0790_00/img/525.jpg +scene0790_00/img/555.jpg scene0790_00/img/585.jpg +scene0790_00/img/675.jpg scene0790_00/img/765.jpg +scene0790_00/img/690.jpg scene0790_00/img/780.jpg +scene0790_00/img/705.jpg scene0790_00/img/825.jpg +scene0790_00/img/885.jpg scene0790_00/img/975.jpg +scene0790_00/img/930.jpg scene0790_00/img/960.jpg +scene0791_00/img/0.jpg scene0791_00/img/2340.jpg +scene0791_00/img/15.jpg scene0791_00/img/2280.jpg +scene0791_00/img/60.jpg scene0791_00/img/1620.jpg +scene0791_00/img/60.jpg scene0791_00/img/1695.jpg +scene0791_00/img/105.jpg scene0791_00/img/135.jpg +scene0791_00/img/165.jpg scene0791_00/img/2370.jpg +scene0791_00/img/1515.jpg scene0791_00/img/2160.jpg +scene0791_00/img/1545.jpg scene0791_00/img/1650.jpg +scene0791_00/img/1545.jpg scene0791_00/img/1665.jpg +scene0791_00/img/1545.jpg scene0791_00/img/2190.jpg +scene0791_00/img/1590.jpg scene0791_00/img/2355.jpg +scene0791_00/img/1890.jpg scene0791_00/img/2010.jpg +scene0791_00/img/1905.jpg scene0791_00/img/2010.jpg +scene0791_00/img/2205.jpg scene0791_00/img/2235.jpg +scene0791_00/img/2250.jpg scene0791_00/img/2310.jpg +scene0792_00/img/30.jpg scene0792_00/img/225.jpg +scene0792_00/img/45.jpg scene0792_00/img/240.jpg +scene0792_00/img/60.jpg scene0792_00/img/180.jpg +scene0792_00/img/60.jpg scene0792_00/img/255.jpg +scene0792_00/img/90.jpg scene0792_00/img/180.jpg +scene0792_00/img/150.jpg scene0792_00/img/195.jpg +scene0792_00/img/150.jpg scene0792_00/img/225.jpg +scene0792_00/img/255.jpg scene0792_00/img/330.jpg +scene0792_00/img/390.jpg scene0792_00/img/450.jpg +scene0792_00/img/450.jpg scene0792_00/img/525.jpg +scene0792_00/img/450.jpg scene0792_00/img/540.jpg +scene0792_00/img/555.jpg scene0792_00/img/600.jpg +scene0792_00/img/585.jpg scene0792_00/img/615.jpg +scene0792_00/img/600.jpg scene0792_00/img/660.jpg +scene0792_00/img/615.jpg scene0792_00/img/630.jpg +scene0793_00/img/0.jpg scene0793_00/img/1725.jpg +scene0793_00/img/105.jpg scene0793_00/img/1560.jpg +scene0793_00/img/525.jpg scene0793_00/img/1770.jpg +scene0793_00/img/540.jpg scene0793_00/img/555.jpg +scene0793_00/img/570.jpg scene0793_00/img/2790.jpg +scene0793_00/img/645.jpg scene0793_00/img/2580.jpg +scene0793_00/img/660.jpg scene0793_00/img/720.jpg +scene0793_00/img/1185.jpg scene0793_00/img/1245.jpg +scene0793_00/img/1245.jpg scene0793_00/img/1905.jpg +scene0793_00/img/1650.jpg scene0793_00/img/1695.jpg +scene0793_00/img/1890.jpg scene0793_00/img/2145.jpg +scene0793_00/img/1920.jpg scene0793_00/img/1950.jpg +scene0793_00/img/2025.jpg scene0793_00/img/3375.jpg +scene0793_00/img/2100.jpg scene0793_00/img/2175.jpg +scene0793_00/img/2385.jpg scene0793_00/img/2430.jpg +scene0794_00/img/15.jpg scene0794_00/img/60.jpg +scene0794_00/img/15.jpg scene0794_00/img/825.jpg +scene0794_00/img/45.jpg scene0794_00/img/945.jpg +scene0794_00/img/60.jpg scene0794_00/img/570.jpg +scene0794_00/img/120.jpg scene0794_00/img/300.jpg +scene0794_00/img/120.jpg scene0794_00/img/390.jpg +scene0794_00/img/150.jpg scene0794_00/img/930.jpg +scene0794_00/img/165.jpg scene0794_00/img/840.jpg +scene0794_00/img/330.jpg scene0794_00/img/810.jpg +scene0794_00/img/345.jpg scene0794_00/img/540.jpg +scene0794_00/img/345.jpg scene0794_00/img/795.jpg +scene0794_00/img/420.jpg scene0794_00/img/660.jpg +scene0794_00/img/645.jpg scene0794_00/img/675.jpg +scene0794_00/img/765.jpg scene0794_00/img/1110.jpg +scene0794_00/img/930.jpg scene0794_00/img/960.jpg +scene0795_00/img/0.jpg scene0795_00/img/300.jpg +scene0795_00/img/30.jpg scene0795_00/img/90.jpg +scene0795_00/img/45.jpg scene0795_00/img/405.jpg +scene0795_00/img/60.jpg scene0795_00/img/525.jpg +scene0795_00/img/75.jpg scene0795_00/img/150.jpg +scene0795_00/img/75.jpg scene0795_00/img/195.jpg +scene0795_00/img/165.jpg scene0795_00/img/765.jpg +scene0795_00/img/420.jpg scene0795_00/img/510.jpg +scene0795_00/img/465.jpg scene0795_00/img/720.jpg +scene0795_00/img/480.jpg scene0795_00/img/750.jpg +scene0795_00/img/495.jpg scene0795_00/img/660.jpg +scene0795_00/img/525.jpg scene0795_00/img/675.jpg +scene0795_00/img/615.jpg scene0795_00/img/795.jpg +scene0795_00/img/660.jpg scene0795_00/img/810.jpg +scene0795_00/img/675.jpg scene0795_00/img/780.jpg +scene0796_00/img/30.jpg scene0796_00/img/210.jpg +scene0796_00/img/75.jpg scene0796_00/img/360.jpg +scene0796_00/img/225.jpg scene0796_00/img/285.jpg +scene0796_00/img/270.jpg scene0796_00/img/330.jpg +scene0796_00/img/360.jpg scene0796_00/img/450.jpg +scene0796_00/img/540.jpg scene0796_00/img/855.jpg +scene0796_00/img/540.jpg scene0796_00/img/1005.jpg +scene0796_00/img/555.jpg scene0796_00/img/885.jpg +scene0796_00/img/615.jpg scene0796_00/img/840.jpg +scene0796_00/img/645.jpg scene0796_00/img/795.jpg +scene0796_00/img/645.jpg scene0796_00/img/945.jpg +scene0796_00/img/660.jpg scene0796_00/img/840.jpg +scene0796_00/img/855.jpg scene0796_00/img/885.jpg +scene0796_00/img/885.jpg scene0796_00/img/990.jpg +scene0796_00/img/1065.jpg scene0796_00/img/1095.jpg +scene0797_00/img/15.jpg scene0797_00/img/30.jpg +scene0797_00/img/90.jpg scene0797_00/img/1260.jpg +scene0797_00/img/135.jpg scene0797_00/img/150.jpg +scene0797_00/img/195.jpg scene0797_00/img/300.jpg +scene0797_00/img/210.jpg scene0797_00/img/240.jpg +scene0797_00/img/285.jpg scene0797_00/img/315.jpg +scene0797_00/img/300.jpg scene0797_00/img/435.jpg +scene0797_00/img/345.jpg scene0797_00/img/1350.jpg +scene0797_00/img/420.jpg scene0797_00/img/510.jpg +scene0797_00/img/600.jpg scene0797_00/img/615.jpg +scene0797_00/img/705.jpg scene0797_00/img/765.jpg +scene0797_00/img/720.jpg scene0797_00/img/780.jpg +scene0797_00/img/990.jpg scene0797_00/img/1020.jpg +scene0797_00/img/1155.jpg scene0797_00/img/1170.jpg +scene0797_00/img/1215.jpg scene0797_00/img/1230.jpg +scene0798_00/img/15.jpg scene0798_00/img/135.jpg +scene0798_00/img/60.jpg scene0798_00/img/120.jpg +scene0798_00/img/195.jpg scene0798_00/img/705.jpg +scene0798_00/img/210.jpg scene0798_00/img/780.jpg +scene0798_00/img/300.jpg scene0798_00/img/360.jpg +scene0798_00/img/330.jpg scene0798_00/img/375.jpg +scene0798_00/img/435.jpg scene0798_00/img/615.jpg +scene0798_00/img/480.jpg scene0798_00/img/600.jpg +scene0798_00/img/495.jpg scene0798_00/img/705.jpg +scene0798_00/img/510.jpg scene0798_00/img/540.jpg +scene0798_00/img/555.jpg scene0798_00/img/810.jpg +scene0798_00/img/600.jpg scene0798_00/img/735.jpg +scene0798_00/img/630.jpg scene0798_00/img/645.jpg +scene0798_00/img/630.jpg scene0798_00/img/780.jpg +scene0798_00/img/795.jpg scene0798_00/img/840.jpg +scene0799_00/img/0.jpg scene0799_00/img/1155.jpg +scene0799_00/img/15.jpg scene0799_00/img/195.jpg +scene0799_00/img/75.jpg scene0799_00/img/1155.jpg +scene0799_00/img/90.jpg scene0799_00/img/1065.jpg +scene0799_00/img/90.jpg scene0799_00/img/1125.jpg +scene0799_00/img/180.jpg scene0799_00/img/1095.jpg +scene0799_00/img/180.jpg scene0799_00/img/1125.jpg +scene0799_00/img/240.jpg scene0799_00/img/285.jpg +scene0799_00/img/405.jpg scene0799_00/img/450.jpg +scene0799_00/img/510.jpg scene0799_00/img/555.jpg +scene0799_00/img/645.jpg scene0799_00/img/720.jpg +scene0799_00/img/780.jpg scene0799_00/img/810.jpg +scene0799_00/img/810.jpg scene0799_00/img/840.jpg +scene0799_00/img/855.jpg scene0799_00/img/975.jpg +scene0799_00/img/1080.jpg scene0799_00/img/1125.jpg +scene0800_00/img/120.jpg scene0800_00/img/735.jpg +scene0800_00/img/165.jpg scene0800_00/img/225.jpg +scene0800_00/img/180.jpg scene0800_00/img/210.jpg +scene0800_00/img/225.jpg scene0800_00/img/240.jpg +scene0800_00/img/240.jpg scene0800_00/img/270.jpg +scene0800_00/img/255.jpg scene0800_00/img/315.jpg +scene0800_00/img/255.jpg scene0800_00/img/330.jpg +scene0800_00/img/285.jpg scene0800_00/img/360.jpg +scene0800_00/img/375.jpg scene0800_00/img/405.jpg +scene0800_00/img/435.jpg scene0800_00/img/480.jpg +scene0800_00/img/450.jpg scene0800_00/img/465.jpg +scene0800_00/img/495.jpg scene0800_00/img/540.jpg +scene0800_00/img/555.jpg scene0800_00/img/585.jpg +scene0800_00/img/645.jpg scene0800_00/img/705.jpg +scene0800_00/img/705.jpg scene0800_00/img/735.jpg +scene0801_00/img/15.jpg scene0801_00/img/495.jpg +scene0801_00/img/30.jpg scene0801_00/img/60.jpg +scene0801_00/img/30.jpg scene0801_00/img/165.jpg +scene0801_00/img/90.jpg scene0801_00/img/255.jpg +scene0801_00/img/105.jpg scene0801_00/img/225.jpg +scene0801_00/img/165.jpg scene0801_00/img/255.jpg +scene0801_00/img/165.jpg scene0801_00/img/285.jpg +scene0801_00/img/195.jpg scene0801_00/img/270.jpg +scene0801_00/img/195.jpg scene0801_00/img/480.jpg +scene0801_00/img/195.jpg scene0801_00/img/570.jpg +scene0801_00/img/255.jpg scene0801_00/img/315.jpg +scene0801_00/img/315.jpg scene0801_00/img/465.jpg +scene0801_00/img/345.jpg scene0801_00/img/525.jpg +scene0801_00/img/360.jpg scene0801_00/img/465.jpg +scene0801_00/img/420.jpg scene0801_00/img/495.jpg +scene0802_00/img/15.jpg scene0802_00/img/120.jpg +scene0802_00/img/135.jpg scene0802_00/img/255.jpg +scene0802_00/img/495.jpg scene0802_00/img/570.jpg +scene0802_00/img/570.jpg scene0802_00/img/660.jpg +scene0802_00/img/885.jpg scene0802_00/img/990.jpg +scene0802_00/img/885.jpg scene0802_00/img/1125.jpg +scene0802_00/img/975.jpg scene0802_00/img/1260.jpg +scene0802_00/img/1005.jpg scene0802_00/img/1110.jpg +scene0802_00/img/1050.jpg scene0802_00/img/1230.jpg +scene0802_00/img/1080.jpg scene0802_00/img/1215.jpg +scene0802_00/img/1125.jpg scene0802_00/img/1200.jpg +scene0802_00/img/1125.jpg scene0802_00/img/1260.jpg +scene0802_00/img/1125.jpg scene0802_00/img/1290.jpg +scene0802_00/img/1170.jpg scene0802_00/img/1200.jpg +scene0802_00/img/1275.jpg scene0802_00/img/1365.jpg +scene0803_00/img/0.jpg scene0803_00/img/1770.jpg +scene0803_00/img/120.jpg scene0803_00/img/1770.jpg +scene0803_00/img/150.jpg scene0803_00/img/1650.jpg +scene0803_00/img/180.jpg scene0803_00/img/330.jpg +scene0803_00/img/240.jpg scene0803_00/img/1710.jpg +scene0803_00/img/630.jpg scene0803_00/img/720.jpg +scene0803_00/img/630.jpg scene0803_00/img/915.jpg +scene0803_00/img/780.jpg scene0803_00/img/960.jpg +scene0803_00/img/930.jpg scene0803_00/img/1380.jpg +scene0803_00/img/990.jpg scene0803_00/img/1020.jpg +scene0803_00/img/1095.jpg scene0803_00/img/1425.jpg +scene0803_00/img/1260.jpg scene0803_00/img/1530.jpg +scene0803_00/img/1425.jpg scene0803_00/img/1440.jpg +scene0803_00/img/1620.jpg scene0803_00/img/1650.jpg +scene0803_00/img/1620.jpg scene0803_00/img/1665.jpg +scene0804_00/img/15.jpg scene0804_00/img/960.jpg +scene0804_00/img/120.jpg scene0804_00/img/180.jpg +scene0804_00/img/165.jpg scene0804_00/img/195.jpg +scene0804_00/img/180.jpg scene0804_00/img/195.jpg +scene0804_00/img/180.jpg scene0804_00/img/210.jpg +scene0804_00/img/255.jpg scene0804_00/img/585.jpg +scene0804_00/img/270.jpg scene0804_00/img/570.jpg +scene0804_00/img/450.jpg scene0804_00/img/480.jpg +scene0804_00/img/510.jpg scene0804_00/img/585.jpg +scene0804_00/img/720.jpg scene0804_00/img/840.jpg +scene0804_00/img/735.jpg scene0804_00/img/765.jpg +scene0804_00/img/795.jpg scene0804_00/img/840.jpg +scene0804_00/img/840.jpg scene0804_00/img/870.jpg +scene0804_00/img/840.jpg scene0804_00/img/885.jpg +scene0804_00/img/870.jpg scene0804_00/img/1020.jpg +scene0805_00/img/30.jpg scene0805_00/img/840.jpg +scene0805_00/img/45.jpg scene0805_00/img/90.jpg +scene0805_00/img/60.jpg scene0805_00/img/105.jpg +scene0805_00/img/75.jpg scene0805_00/img/105.jpg +scene0805_00/img/90.jpg scene0805_00/img/930.jpg +scene0805_00/img/165.jpg scene0805_00/img/315.jpg +scene0805_00/img/165.jpg scene0805_00/img/330.jpg +scene0805_00/img/180.jpg scene0805_00/img/240.jpg +scene0805_00/img/210.jpg scene0805_00/img/270.jpg +scene0805_00/img/435.jpg scene0805_00/img/450.jpg +scene0805_00/img/465.jpg scene0805_00/img/495.jpg +scene0805_00/img/495.jpg scene0805_00/img/525.jpg +scene0805_00/img/585.jpg scene0805_00/img/615.jpg +scene0805_00/img/780.jpg scene0805_00/img/870.jpg +scene0805_00/img/795.jpg scene0805_00/img/900.jpg +scene0806_00/img/15.jpg scene0806_00/img/900.jpg +scene0806_00/img/60.jpg scene0806_00/img/300.jpg +scene0806_00/img/75.jpg scene0806_00/img/450.jpg +scene0806_00/img/75.jpg scene0806_00/img/1140.jpg +scene0806_00/img/150.jpg scene0806_00/img/960.jpg +scene0806_00/img/180.jpg scene0806_00/img/1020.jpg +scene0806_00/img/195.jpg scene0806_00/img/300.jpg +scene0806_00/img/225.jpg scene0806_00/img/915.jpg +scene0806_00/img/225.jpg scene0806_00/img/1095.jpg +scene0806_00/img/255.jpg scene0806_00/img/630.jpg +scene0806_00/img/285.jpg scene0806_00/img/450.jpg +scene0806_00/img/375.jpg scene0806_00/img/735.jpg +scene0806_00/img/420.jpg scene0806_00/img/765.jpg +scene0806_00/img/510.jpg scene0806_00/img/630.jpg +scene0806_00/img/705.jpg scene0806_00/img/795.jpg diff --git a/third_party/SGMNet/assets/teaser.png b/third_party/SGMNet/assets/teaser.png new file mode 100644 index 0000000000000000000000000000000000000000..6d14477dc594b50c2a85a8c9e8b2cebb1c3d3c46 --- /dev/null +++ b/third_party/SGMNet/assets/teaser.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cef9b48d3415258d39bc6966e01d5fce62e60b686a255e7f0592d48b306a791a +size 231254 diff --git a/third_party/SGMNet/components/__init__.py b/third_party/SGMNet/components/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c10d2027efcf985c68abf7185f28b947012cae45 --- /dev/null +++ b/third_party/SGMNet/components/__init__.py @@ -0,0 +1,3 @@ +from . import extractors +from . import matchers +from .load_component import load_component \ No newline at end of file diff --git a/third_party/SGMNet/components/evaluators.py b/third_party/SGMNet/components/evaluators.py new file mode 100644 index 0000000000000000000000000000000000000000..59bf0bd7ce3dd085dc86072fc41bad24b9805991 --- /dev/null +++ b/third_party/SGMNet/components/evaluators.py @@ -0,0 +1,127 @@ +import numpy as np +import sys +import os +ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +sys.path.insert(0, ROOT_DIR) + +from utils import evaluation_utils,metrics,fm_utils +import cv2 + +class auc_eval: + def __init__(self,config): + self.config=config + self.err_r,self.err_t,self.err=[],[],[] + self.ms=[] + self.precision=[] + + def run(self,info): + E,r_gt,t_gt=info['e'],info['r_gt'],info['t_gt'] + K1,K2,img1,img2=info['K1'],info['K2'],info['img1'],info['img2'] + corr1,corr2=info['corr1'],info['corr2'] + corr1,corr2=evaluation_utils.normalize_intrinsic(corr1,K1),evaluation_utils.normalize_intrinsic(corr2,K2) + size1,size2=max(img1.shape),max(img2.shape) + scale1,scale2=self.config['rescale']/size1,self.config['rescale']/size2 + #ransac + ransac_th=4./((K1[0,0]+K1[1,1])*scale1+(K2[0,0]+K2[1,1])*scale2) + R_hat,t_hat,E_hat=self.estimate(corr1,corr2,ransac_th) + #get pose error + err_r, err_t=metrics.evaluate_R_t(r_gt,t_gt,R_hat,t_hat) + err=max(err_r,err_t) + + if len(corr1)>1: + inlier_mask=metrics.compute_epi_inlier(corr1,corr2,E,self.config['inlier_th']) + precision=inlier_mask.mean() + ms=inlier_mask.sum()/len(info['x1']) + else: + ms=precision=0 + + return {'err_r':err_r,'err_t':err_t,'err':err,'ms':ms,'precision':precision} + + def res_inqueue(self,res): + self.err_r.append(res['err_r']),self.err_t.append(res['err_t']),self.err.append(res['err']) + self.ms.append(res['ms']),self.precision.append(res['precision']) + + def estimate(self,corr1,corr2,th): + num_inlier = -1 + if corr1.shape[0] >= 5: + E, mask_new = cv2.findEssentialMat(corr1, corr2,method=cv2.RANSAC, threshold=th,prob=1-1e-5) + if E is None: + E=[np.eye(3)] + for _E in np.split(E, len(E) / 3): + _num_inlier, _R, _t, _ = cv2.recoverPose(_E, corr1, corr2,np.eye(3), 1e9,mask=mask_new) + if _num_inlier > num_inlier: + num_inlier = _num_inlier + R = _R + t = _t + E = _E + else: + E,R,t=np.eye(3),np.eye(3),np.zeros(3) + return R,t,E + + def parse(self): + ths = np.arange(7) * 5 + approx_auc=metrics.approx_pose_auc(self.err,ths) + exact_auc=metrics.pose_auc(self.err,ths) + mean_pre,mean_ms=np.mean(np.asarray(self.precision)),np.mean(np.asarray(self.ms)) + + print('auc th: ',ths[1:]) + print('approx auc: ',approx_auc) + print('exact auc: ', exact_auc) + print('mean match score: ',mean_ms*100) + print('mean precision: ',mean_pre*100) + + + +class FMbench_eval: + + def __init__(self,config): + self.config=config + self.pre,self.pre_post,self.sgd=[],[],[] + self.num_corr,self.num_corr_post=[],[] + + def run(self,info): + corr1,corr2=info['corr1'],info['corr2'] + F=info['f'] + img1,img2=info['img1'],info['img2'] + + if len(corr1)>1: + pre_bf=fm_utils.compute_inlier_rate(corr1,corr2,np.flip(img1.shape[:2]),np.flip(img2.shape[:2]),F,th=self.config['inlier_th']).mean() + F_hat,mask_F=cv2.findFundamentalMat(corr1,corr2,method=cv2.FM_RANSAC,ransacReprojThreshold=1,confidence=1-1e-5) + if F_hat is None: + F_hat=np.ones([3,3]) + mask_F=np.ones([len(corr1)]).astype(bool) + else: + mask_F=mask_F.squeeze().astype(bool) + F_hat=F_hat[:3] + pre_af=fm_utils.compute_inlier_rate(corr1[mask_F],corr2[mask_F],np.flip(img1.shape[:2]),np.flip(img2.shape[:2]),F,th=self.config['inlier_th']).mean() + num_corr_af=mask_F.sum() + num_corr=len(corr1) + sgd=fm_utils.compute_SGD(F,F_hat,np.flip(img1.shape[:2]),np.flip(img2.shape[:2])) + else: + pre_bf,pre_af,sgd=0,0,1e8 + num_corr,num_corr_af=0,0 + return {'pre':pre_bf,'pre_post':pre_af,'sgd':sgd,'num_corr':num_corr,'num_corr_post':num_corr_af} + + + def res_inqueue(self,res): + self.pre.append(res['pre']),self.pre_post.append(res['pre_post']),self.sgd.append(res['sgd']) + self.num_corr.append(res['num_corr']),self.num_corr_post.append(res['num_corr_post']) + + def parse(self): + for seq_index in range(len(self.config['seq'])): + seq=self.config['seq'][seq_index] + offset=seq_index*1000 + pre=np.asarray(self.pre)[offset:offset+1000].mean() + pre_post=np.asarray(self.pre_post)[offset:offset+1000].mean() + num_corr=np.asarray(self.num_corr)[offset:offset+1000].mean() + num_corr_post=np.asarray(self.num_corr_post)[offset:offset+1000].mean() + f_recall=(np.asarray(self.sgd)[offset:offset+1000]self.p_th,index[:,0],index2.squeeze(0) + mask_mc=index2[index] == torch.arange(len(p)).cuda() + mask=mask_th&mask_mc + index1,index2=torch.nonzero(mask).squeeze(1),index[mask] + return index1,index2 + + +class NN_Matcher(object): + + def __init__(self,config): + config=namedtuple('config',config.keys())(*config.values()) + self.mutual_check=config.mutual_check + self.ratio_th=config.ratio_th + + def run(self,test_data): + desc1,desc2,x1,x2=test_data['desc1'],test_data['desc2'],test_data['x1'],test_data['x2'] + desc_mat=np.sqrt(abs((desc1**2).sum(-1)[:,np.newaxis]+(desc2**2).sum(-1)[np.newaxis]-2*desc1@desc2.T)) + nn_index=np.argpartition(desc_mat,kth=(1,2),axis=-1) + dis_value12=np.take_along_axis(desc_mat,nn_index, axis=-1) + ratio_score=dis_value12[:,0]/dis_value12[:,1] + nn_index1=nn_index[:,0] + nn_index2=np.argmin(desc_mat,axis=0) + mask_ratio,mask_mutual=ratio_scoreself.config['angle_th'][0],angle_listself.config['overlap_th'][0],overlap_scoreself.config['min_corr'] and len(incorr_index1)>self.config['min_incorr'] and len(incorr_index2)>self.config['min_incorr']: + info['corr'].append(corr_index),info['incorr1'].append(incorr_index1),info['incorr2'].append(incorr_index2) + info['dR'].append(dR),info['dt'].append(dt),info['K1'].append(K1),info['K2'].append(K2),info['img_path1'].append(img_path1),info['img_path2'].append(img_path2) + info['fea_path1'].append(fea_path1),info['fea_path2'].append(fea_path2),info['size1'].append(size1),info['size2'].append(size2) + sample_number+=1 + if sample_number==sample_target: + break + info['pair_num']=sample_number + #dump info + self.dump_info(seq,info) + + + def collect_meta(self): + print('collecting meta info...') + dump_path,seq_list=[],[] + if self.config['dump_train']: + dump_path.append(os.path.join(self.config['dataset_dump_dir'],'train')) + seq_list.append(self.train_list) + if self.config['dump_valid']: + dump_path.append(os.path.join(self.config['dataset_dump_dir'],'valid')) + seq_list.append(self.valid_list) + for pth,seqs in zip(dump_path,seq_list): + if not os.path.exists(pth): + os.mkdir(pth) + pair_num_list,total_pair=[],0 + for seq_index in range(len(seqs)): + seq=seqs[seq_index] + pair_num=np.loadtxt(os.path.join(self.config['dataset_dump_dir'],seq,'pair_num.txt'),dtype=int) + pair_num_list.append(str(pair_num)) + total_pair+=pair_num + pair_num_list=np.stack([np.asarray(seqs,dtype=str),np.asarray(pair_num_list,dtype=str)],axis=1) + pair_num_list=np.concatenate([np.asarray([['total',str(total_pair)]]),pair_num_list],axis=0) + np.savetxt(os.path.join(pth,'pair_num.txt'),pair_num_list,fmt='%s') + + def format_dump_data(self): + print('Formatting data...') + iteration_num=len(self.seq_list)//self.config['num_process'] + if len(self.seq_list)%self.config['num_process']!=0: + iteration_num+=1 + pool=Pool(self.config['num_process']) + for index in trange(iteration_num): + indices=range(index*self.config['num_process'],min((index+1)*self.config['num_process'],len(self.seq_list))) + pool.map(self.format_seq,indices) + pool.close() + pool.join() + + self.collect_meta() \ No newline at end of file diff --git a/third_party/SGMNet/datadump/dumper/scannet.py b/third_party/SGMNet/datadump/dumper/scannet.py new file mode 100644 index 0000000000000000000000000000000000000000..2556f727fcc9b4c621e44d9ee5cb4e99cb19b7e8 --- /dev/null +++ b/third_party/SGMNet/datadump/dumper/scannet.py @@ -0,0 +1,72 @@ +import os +import glob +import pickle +from posixpath import basename +import numpy as np +import h5py +from .base_dumper import BaseDumper + +import sys +ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../")) +sys.path.insert(0, ROOT_DIR) +import utils + +class scannet(BaseDumper): + def get_seqs(self): + self.pair_list=np.loadtxt('../assets/scannet_eval_list.txt',dtype=str) + self.seq_list=np.unique(np.asarray([path.split('/')[0] for path in self.pair_list[:,0]],dtype=str)) + self.dump_seq,self.img_seq=[],[] + for seq in self.seq_list: + dump_dir=os.path.join(self.config['feature_dump_dir'],seq) + cur_img_seq=glob.glob(os.path.join(os.path.join(self.config['rawdata_dir'],seq,'img','*.jpg'))) + cur_dump_seq=[os.path.join(dump_dir,path.split('/')[-1])+'_'+self.config['extractor']['name']+'_'+str(self.config['extractor']['num_kpt'])\ + +'.hdf5' for path in cur_img_seq] + self.img_seq+=cur_img_seq + self.dump_seq+=cur_dump_seq + + def format_dump_folder(self): + if not os.path.exists(self.config['feature_dump_dir']): + os.mkdir(self.config['feature_dump_dir']) + for seq in self.seq_list: + seq_dir=os.path.join(self.config['feature_dump_dir'],seq) + if not os.path.exists(seq_dir): + os.mkdir(seq_dir) + + def format_dump_data(self): + print('Formatting data...') + self.data={'K1':[],'K2':[],'R':[],'T':[],'e':[],'f':[],'fea_path1':[],'fea_path2':[],'img_path1':[],'img_path2':[]} + + for pair in self.pair_list: + img_path1,img_path2=pair[0],pair[1] + seq=img_path1.split('/')[0] + index1,index2=int(img_path1.split('/')[-1][:-4]),int(img_path2.split('/')[-1][:-4]) + ex1,ex2=np.loadtxt(os.path.join(self.config['rawdata_dir'],seq,'extrinsic',str(index1)+'.txt'),dtype=float),\ + np.loadtxt(os.path.join(self.config['rawdata_dir'],seq,'extrinsic',str(index2)+'.txt'),dtype=float) + K1,K2=np.loadtxt(os.path.join(self.config['rawdata_dir'],seq,'intrinsic',str(index1)+'.txt'),dtype=float),\ + np.loadtxt(os.path.join(self.config['rawdata_dir'],seq,'intrinsic',str(index2)+'.txt'),dtype=float) + + + relative_extrinsic=np.matmul(np.linalg.inv(ex2),ex1) + dR,dt=relative_extrinsic[:3,:3],relative_extrinsic[:3,3] + dt /= np.sqrt(np.sum(dt**2)) + + e_gt_unnorm = np.reshape(np.matmul( + np.reshape(utils.evaluation_utils.np_skew_symmetric(dt.astype('float64').reshape(1, 3)), (3, 3)), + np.reshape(dR.astype('float64'), (3, 3))), (3, 3)) + e_gt = e_gt_unnorm / np.linalg.norm(e_gt_unnorm) + f_gt_unnorm=np.linalg.inv(K2.T)@e_gt@np.linalg.inv(K1) + f_gt = f_gt_unnorm / np.linalg.norm(f_gt_unnorm) + + self.data['K1'].append(K1),self.data['K2'].append(K2) + self.data['R'].append(dR),self.data['T'].append(dt) + self.data['e'].append(e_gt),self.data['f'].append(f_gt) + + dump_seq_dir=os.path.join(self.config['feature_dump_dir'],seq) + fea_path1,fea_path2=os.path.join(dump_seq_dir,img_path1.split('/')[-1]+'_'+self.config['extractor']['name'] + +'_'+str(self.config['extractor']['num_kpt'])+'.hdf5'),\ + os.path.join(dump_seq_dir,img_path2.split('/')[-1]+'_'+self.config['extractor']['name'] + +'_'+str(self.config['extractor']['num_kpt'])+'.hdf5') + self.data['img_path1'].append(img_path1),self.data['img_path2'].append(img_path2) + self.data['fea_path1'].append(fea_path1),self.data['fea_path2'].append(fea_path2) + + self.form_standard_dataset() diff --git a/third_party/SGMNet/datadump/dumper/yfcc.py b/third_party/SGMNet/datadump/dumper/yfcc.py new file mode 100644 index 0000000000000000000000000000000000000000..0c52e4324bba3e5ed424fe58af7a94fd3132b1e5 --- /dev/null +++ b/third_party/SGMNet/datadump/dumper/yfcc.py @@ -0,0 +1,87 @@ +import os +import glob +import pickle +import numpy as np +import h5py +from .base_dumper import BaseDumper + +import sys +ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../")) +sys.path.insert(0, ROOT_DIR) +import utils + +class yfcc(BaseDumper): + + def get_seqs(self): + data_dir=os.path.join(self.config['rawdata_dir'],'yfcc100m') + for seq in self.config['data_seq']: + for split in self.config['data_split']: + split_dir=os.path.join(data_dir,seq,split) + dump_dir=os.path.join(self.config['feature_dump_dir'],seq,split) + cur_img_seq=glob.glob(os.path.join(split_dir,'images','*.jpg')) + cur_dump_seq=[os.path.join(dump_dir,path.split('/')[-1])+'_'+self.config['extractor']['name']+'_'+str(self.config['extractor']['num_kpt'])\ + +'.hdf5' for path in cur_img_seq] + self.img_seq+=cur_img_seq + self.dump_seq+=cur_dump_seq + + def format_dump_folder(self): + if not os.path.exists(self.config['feature_dump_dir']): + os.mkdir(self.config['feature_dump_dir']) + for seq in self.config['data_seq']: + seq_dir=os.path.join(self.config['feature_dump_dir'],seq) + if not os.path.exists(seq_dir): + os.mkdir(seq_dir) + for split in self.config['data_split']: + split_dir=os.path.join(seq_dir,split) + if not os.path.exists(split_dir): + os.mkdir(split_dir) + + def format_dump_data(self): + print('Formatting data...') + pair_path=os.path.join(self.config['rawdata_dir'],'pairs') + self.data={'K1':[],'K2':[],'R':[],'T':[],'e':[],'f':[],'fea_path1':[],'fea_path2':[],'img_path1':[],'img_path2':[]} + + for seq in self.config['data_seq']: + pair_name=os.path.join(pair_path,seq+'-te-1000-pairs.pkl') + with open(pair_name, 'rb') as f: + pairs=pickle.load(f) + + #generate id list + seq_dir=os.path.join(self.config['rawdata_dir'],'yfcc100m',seq,'test') + name_list=np.loadtxt(os.path.join(seq_dir,'images.txt'),dtype=str) + cam_name_list=np.loadtxt(os.path.join(seq_dir,'calibration.txt'),dtype=str) + + for cur_pair in pairs: + index1,index2=cur_pair[0],cur_pair[1] + cam1,cam2=h5py.File(os.path.join(seq_dir,cam_name_list[index1]),'r'),h5py.File(os.path.join(seq_dir,cam_name_list[index2]),'r') + K1,K2=cam1['K'][()],cam2['K'][()] + [w1,h1],[w2,h2]=cam1['imsize'][()][0],cam2['imsize'][()][0] + cx1,cy1,cx2,cy2 = (w1 - 1.0) * 0.5,(h1 - 1.0) * 0.5, (w2 - 1.0) * 0.5,(h2 - 1.0) * 0.5 + K1[0,2],K1[1,2],K2[0,2],K2[1,2]=cx1,cy1,cx2,cy2 + + R1,R2,t1,t2=cam1['R'][()],cam2['R'][()],cam1['T'][()].reshape([3,1]),cam2['T'][()].reshape([3,1]) + dR = np.dot(R2, R1.T) + dt = t2 - np.dot(dR, t1) + dt /= np.sqrt(np.sum(dt**2)) + + e_gt_unnorm = np.reshape(np.matmul( + np.reshape(utils.evaluation_utils.np_skew_symmetric(dt.astype('float64').reshape(1, 3)), (3, 3)), + np.reshape(dR.astype('float64'), (3, 3))), (3, 3)) + e_gt = e_gt_unnorm / np.linalg.norm(e_gt_unnorm) + f_gt_unnorm=np.linalg.inv(K2.T)@e_gt@np.linalg.inv(K1) + f_gt = f_gt_unnorm / np.linalg.norm(f_gt_unnorm) + + self.data['K1'].append(K1),self.data['K2'].append(K2) + self.data['R'].append(dR),self.data['T'].append(dt) + self.data['e'].append(e_gt),self.data['f'].append(f_gt) + + img_path1,img_path2=os.path.join('yfcc100m',seq,'test',name_list[index1]),os.path.join('yfcc100m',seq,'test',name_list[index2]) + dump_seq_dir=os.path.join(self.config['feature_dump_dir'],seq,'test') + fea_path1,fea_path2=os.path.join(dump_seq_dir,name_list[index1].split('/')[-1]+'_'+self.config['extractor']['name'] + +'_'+str(self.config['extractor']['num_kpt'])+'.hdf5'),\ + os.path.join(dump_seq_dir,name_list[index2].split('/')[-1]+'_'+self.config['extractor']['name'] + +'_'+str(self.config['extractor']['num_kpt'])+'.hdf5') + self.data['img_path1'].append(img_path1),self.data['img_path2'].append(img_path2) + self.data['fea_path1'].append(fea_path1),self.data['fea_path2'].append(fea_path2) + + self.form_standard_dataset() diff --git a/third_party/SGMNet/demo/configs/nn_config.yaml b/third_party/SGMNet/demo/configs/nn_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a87bfafce0cb7f8ab64e59311923d309aabcfab9 --- /dev/null +++ b/third_party/SGMNet/demo/configs/nn_config.yaml @@ -0,0 +1,10 @@ +extractor: + name: root + num_kpt: 4000 + resize: [-1] + det_th: 0.00001 + +matcher: + name: NN + ratio_th: 0.9 + mutual_check: True \ No newline at end of file diff --git a/third_party/SGMNet/demo/configs/sgm_config.yaml b/third_party/SGMNet/demo/configs/sgm_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..91de752010daa54ef0b508ef79d2dc4ac23945ec --- /dev/null +++ b/third_party/SGMNet/demo/configs/sgm_config.yaml @@ -0,0 +1,21 @@ +extractor: + name: root + num_kpt: 4000 + resize: [-1] + det_th: 0.00001 + +matcher: + name: SGM + model_dir: ../weights/sgm/root + seed_top_k: [256,256] + seed_radius_coe: 0.01 + net_channels: 128 + layer_num: 9 + head: 4 + seedlayer: [0,6] + use_mc_seeding: True + use_score_encoding: False + conf_bar: [1.11,0.1] + sink_iter: [10,100] + detach_iter: 1000000 + p_th: 0.2 diff --git a/third_party/SGMNet/demo/demo.py b/third_party/SGMNet/demo/demo.py new file mode 100644 index 0000000000000000000000000000000000000000..cbe277e26d09121f5517854a7ea014b0797a2bde --- /dev/null +++ b/third_party/SGMNet/demo/demo.py @@ -0,0 +1,45 @@ +import cv2 +import yaml +import numpy as np +import os +import sys + +ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +sys.path.insert(0, ROOT_DIR) +from components import load_component +from utils import evaluation_utils + +import argparse +parser = argparse.ArgumentParser() +parser.add_argument('--config_path', type=str, default='configs/sgm_config.yaml', + help='number of processes.') +parser.add_argument('--img1_path', type=str, default='demo_1.jpg', + help='number of processes.') +parser.add_argument('--img2_path', type=str, default='demo_2.jpg', + help='number of processes.') + + +args = parser.parse_args() + +if __name__=='__main__': + with open(args.config_path, 'r') as f: + demo_config = yaml.load(f) + + extractor=load_component('extractor',demo_config['extractor']['name'],demo_config['extractor']) + + img1,img2=cv2.imread(args.img1_path),cv2.imread(args.img2_path) + size1,size2=np.flip(np.asarray(img1.shape[:2])),np.flip(np.asarray(img2.shape[:2])) + kpt1,desc1=extractor.run(args.img1_path) + kpt2,desc2=extractor.run(args.img2_path) + + matcher=load_component('matcher',demo_config['matcher']['name'],demo_config['matcher']) + test_data={'x1':kpt1,'x2':kpt2,'desc1':desc1,'desc2':desc2,'size1':size1,'size2':size2} + corr1,corr2= matcher.run(test_data) + + #draw points + dis_points_1 = evaluation_utils.draw_points(img1, kpt1) + dis_points_2 = evaluation_utils.draw_points(img2, kpt2) + + #visualize match + display=evaluation_utils.draw_match(dis_points_1,dis_points_2,corr1,corr2) + cv2.imwrite('match.png',display) diff --git a/third_party/SGMNet/demo/demo_1.jpg b/third_party/SGMNet/demo/demo_1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..187c36e942d7d8fa4d1b09661fa3b9ddd01939ee --- /dev/null +++ b/third_party/SGMNet/demo/demo_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f52b8feb635d19473200d6bc89e37a07a0728bfd37a6a63dd0915f111b86b51 +size 296810 diff --git a/third_party/SGMNet/demo/demo_2.jpg b/third_party/SGMNet/demo/demo_2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..513cbeb46369b086886e6271b928d6a17d5075cc --- /dev/null +++ b/third_party/SGMNet/demo/demo_2.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c2cab0e68625150ca0aa1fa7d0c54675ed7e3e1f7125a820215aa2a5d7f3e6f +size 227732 diff --git a/third_party/SGMNet/evaluation/configs/cost/sg_cost.yaml b/third_party/SGMNet/evaluation/configs/cost/sg_cost.yaml new file mode 100644 index 0000000000000000000000000000000000000000..05ea5ddc7bce8ad94d3ef3ec350363b5cc846ed8 --- /dev/null +++ b/third_party/SGMNet/evaluation/configs/cost/sg_cost.yaml @@ -0,0 +1,4 @@ +net_channels: 128 +layer_num: 9 +head: 4 +use_score_encoding: True \ No newline at end of file diff --git a/third_party/SGMNet/evaluation/configs/cost/sgm_cost.yaml b/third_party/SGMNet/evaluation/configs/cost/sgm_cost.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2f43193fb63fb26d50a8c3abd3cf53c43734dbca --- /dev/null +++ b/third_party/SGMNet/evaluation/configs/cost/sgm_cost.yaml @@ -0,0 +1,11 @@ +seed_top_k: [256,256] +seed_radius_coe: 0.01 +net_channels: 128 +layer_num: 9 +head: 4 +seedlayer: [0,6] +use_mc_seeding: True +use_score_encoding: False +conf_bar: [1,0] +sink_iter: [10,10] +detach_iter: 1000000 \ No newline at end of file diff --git a/third_party/SGMNet/evaluation/configs/eval/fm_eval_nn.yaml b/third_party/SGMNet/evaluation/configs/eval/fm_eval_nn.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0d467a814559a27938f010dbf79a8e208551b2b5 --- /dev/null +++ b/third_party/SGMNet/evaluation/configs/eval/fm_eval_nn.yaml @@ -0,0 +1,18 @@ +reader: + name: standard + rawdata_dir: FM-Bench/Dataset + dataset_dir: test_fmbench_root/fmbench_root_4000.hdf5 + num_kpt: 4000 + +matcher: + name: NN + mutual_check: False + ratio_th: 0.8 + +evaluator: + name: FM + seq: ['CPC','KITTI','TUM','Tanks_and_Temples'] + num_pair: 4000 + inlier_th: 0.003 + sgd_inlier_th: 0.05 + diff --git a/third_party/SGMNet/evaluation/configs/eval/fm_eval_sg.yaml b/third_party/SGMNet/evaluation/configs/eval/fm_eval_sg.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ec22b1340d62fad20f22584ddbded30fcc59d1c9 --- /dev/null +++ b/third_party/SGMNet/evaluation/configs/eval/fm_eval_sg.yaml @@ -0,0 +1,22 @@ +reader: + name: standard + rawdata_dir: FM-Bench/Dataset + dataset_dir: test_fmbench_root/fmbench_root_4000.hdf5 + num_kpt: 4000 + +matcher: + name: SG + model_dir: ../weights/sg/root + net_channels: 128 + layer_num: 9 + head: 4 + use_score_encoding: True + sink_iter: [100] + p_th: 0.2 + +evaluator: + name: FM + seq: ['CPC','KITTI','TUM','Tanks_and_Temples'] + num_pair: 4000 + inlier_th: 0.003 + sgd_inlier_th: 0.05 diff --git a/third_party/SGMNet/evaluation/configs/eval/fm_eval_sgm.yaml b/third_party/SGMNet/evaluation/configs/eval/fm_eval_sgm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cd23165c95451cd44063a2b6cccea21c68fb6fa0 --- /dev/null +++ b/third_party/SGMNet/evaluation/configs/eval/fm_eval_sgm.yaml @@ -0,0 +1,28 @@ +reader: + name: standard + rawdata_dir: FM-Bench/Dataset + dataset_dir: test_fmbench_root/fmbench_root_4000.hdf5 + num_kpt: 4000 + +matcher: + name: SGM + model_dir: ../weights/sgm/root + seed_top_k: [256,256] + seed_radius_coe: 0.01 + net_channels: 128 + layer_num: 9 + head: 4 + seedlayer: [0,6] + use_mc_seeding: True + use_score_encoding: False + conf_bar: [1.11,0.1] #set to [1,0.1] for sp + sink_iter: [10,100] + detach_iter: 1000000 + p_th: 0.2 + +evaluator: + name: FM + seq: ['CPC','KITTI','TUM','Tanks_and_Temples'] + num_pair: 4000 + inlier_th: 0.003 + sgd_inlier_th: 0.05 diff --git a/third_party/SGMNet/evaluation/configs/eval/scannet_eval_nn.yaml b/third_party/SGMNet/evaluation/configs/eval/scannet_eval_nn.yaml new file mode 100644 index 0000000000000000000000000000000000000000..51ad5402b6266b60a365181371be8a5e64751d2f --- /dev/null +++ b/third_party/SGMNet/evaluation/configs/eval/scannet_eval_nn.yaml @@ -0,0 +1,17 @@ +reader: + name: standard + rawdata_dir: scannet_eval + dataset_dir: scannet_test_root/scannet_root_2000.hdf5 + num_kpt: 2000 + +matcher: + name: NN + mutual_check: False + ratio_th: 0.8 + +evaluator: + name: AUC + rescale: 640 + num_pair: 1500 + inlier_th: 0.005 + diff --git a/third_party/SGMNet/evaluation/configs/eval/scannet_eval_sg.yaml b/third_party/SGMNet/evaluation/configs/eval/scannet_eval_sg.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0d0ef70cfa07b1471816cc7905d6a632599d134c --- /dev/null +++ b/third_party/SGMNet/evaluation/configs/eval/scannet_eval_sg.yaml @@ -0,0 +1,22 @@ +reader: + name: standard + rawdata_dir: scannet_eval + dataset_dir: scannet_test_root/scannet_root_2000.hdf5 + num_kpt: 2000 + +matcher: + name: SG + model_dir: ../weights/sg/root + net_channels: 128 + layer_num: 9 + head: 4 + use_score_encoding: True + sink_iter: [100] + p_th: 0.2 + +evaluator: + name: AUC + rescale: 640 + num_pair: 1500 + inlier_th: 0.005 + diff --git a/third_party/SGMNet/evaluation/configs/eval/scannet_eval_sgm.yaml b/third_party/SGMNet/evaluation/configs/eval/scannet_eval_sgm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e524845a514e6d8d50f97bced5c9beeaed26ebe5 --- /dev/null +++ b/third_party/SGMNet/evaluation/configs/eval/scannet_eval_sgm.yaml @@ -0,0 +1,28 @@ +reader: + name: standard + rawdata_dir: scannet_eval + dataset_dir: scannet_test_root/scannet_root_2000.hdf5 + num_kpt: 2000 + +matcher: + name: SGM + model_dir: ../weights/sgm/root + seed_top_k: [128,128] + seed_radius_coe: 0.01 + net_channels: 128 + layer_num: 9 + head: 4 + seedlayer: [0,6] + use_mc_seeding: True + use_score_encoding: False + conf_bar: [1.11,0.1] + sink_iter: [10,100] + detach_iter: 1000000 + p_th: 0.2 + +evaluator: + name: AUC + rescale: 640 + num_pair: 1500 + inlier_th: 0.005 + diff --git a/third_party/SGMNet/evaluation/configs/eval/yfcc_eval_nn.yaml b/third_party/SGMNet/evaluation/configs/eval/yfcc_eval_nn.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8ecd1eef2cff9b93f3665a9cf4af6bc9f68339f0 --- /dev/null +++ b/third_party/SGMNet/evaluation/configs/eval/yfcc_eval_nn.yaml @@ -0,0 +1,17 @@ +reader: + name: standard + rawdata_dir: yfcc_rawdata + dataset_dir: yfcc_test_root/yfcc_root_2000.hdf5 + num_kpt: 2000 + +matcher: + name: NN + mutual_check: False + ratio_th: 0.8 + +evaluator: + name: AUC + rescale: 1600 + num_pair: 4000 + inlier_th: 0.005 + diff --git a/third_party/SGMNet/evaluation/configs/eval/yfcc_eval_sg.yaml b/third_party/SGMNet/evaluation/configs/eval/yfcc_eval_sg.yaml new file mode 100644 index 0000000000000000000000000000000000000000..beb2b93639160448dd955cd576e5a19a936b08f1 --- /dev/null +++ b/third_party/SGMNet/evaluation/configs/eval/yfcc_eval_sg.yaml @@ -0,0 +1,22 @@ +reader: + name: standard + rawdata_dir: yfcc_rawdata + dataset_dir: yfcc_test_root/yfcc_root_2000.hdf5 + num_kpt: 2000 + +matcher: + name: SG + model_dir: ../weights/sg/root + net_channels: 128 + layer_num: 9 + head: 4 + use_score_encoding: True + sink_iter: [100] + p_th: 0.2 + +evaluator: + name: AUC + rescale: 1600 + num_pair: 4000 + inlier_th: 0.005 + diff --git a/third_party/SGMNet/evaluation/configs/eval/yfcc_eval_sgm.yaml b/third_party/SGMNet/evaluation/configs/eval/yfcc_eval_sgm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6c9aee8a8aa786ff209a5afadf0469f62ef2a50f --- /dev/null +++ b/third_party/SGMNet/evaluation/configs/eval/yfcc_eval_sgm.yaml @@ -0,0 +1,28 @@ +reader: + name: standard + rawdata_dir: yfcc_rawdata + dataset_dir: yfcc_test_root/yfcc_root_2000.hdf5 + num_kpt: 2000 + +matcher: + name: SGM + model_dir: ../weights/sgm/root + seed_top_k: [128,128] + seed_radius_coe: 0.01 + net_channels: 128 + layer_num: 9 + head: 4 + seedlayer: [0,6] + use_mc_seeding: True + use_score_encoding: False + conf_bar: [1.11,0.1] #set to [1,0.1] for sp + sink_iter: [10,100] + detach_iter: 1000000 + p_th: 0.2 + +evaluator: + name: AUC + rescale: 1600 + num_pair: 4000 + inlier_th: 0.005 + diff --git a/third_party/SGMNet/evaluation/eval_cost.py b/third_party/SGMNet/evaluation/eval_cost.py new file mode 100644 index 0000000000000000000000000000000000000000..dd3f88abc93290c96ed3d7fa8624c3534e006911 --- /dev/null +++ b/third_party/SGMNet/evaluation/eval_cost.py @@ -0,0 +1,60 @@ +import torch +import yaml +import time +from collections import OrderedDict,namedtuple +import os +import sys +ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +sys.path.insert(0, ROOT_DIR) + +from sgmnet import matcher as SGM_Model +from superglue import matcher as SG_Model + + +import argparse +parser = argparse.ArgumentParser() +parser.add_argument('--matcher_name', type=str, default='SGM', + help='number of processes.') +parser.add_argument('--config_path', type=str, default='configs/cost/sgm_cost.yaml', + help='number of processes.') +parser.add_argument('--num_kpt', type=int, default=4000, + help='keypoint number, default:100') +parser.add_argument('--iter_num', type=int, default=100, + help='keypoint number, default:100') + + +def test_cost(test_data,model): + with torch.no_grad(): + #warm up call + _=model(test_data) + torch.cuda.synchronize() + a=time.time() + for _ in range(int(args.iter_num)): + _=model(test_data) + torch.cuda.synchronize() + b=time.time() + print('Average time per run(ms): ',(b-a)/args.iter_num*1e3) + print('Peak memory(MB): ',torch.cuda.max_memory_allocated()/1e6) + + +if __name__=='__main__': + torch.backends.cudnn.benchmark=False + args = parser.parse_args() + with open(args.config_path, 'r') as f: + model_config = yaml.load(f) + model_config=namedtuple('model_config',model_config.keys())(*model_config.values()) + + if args.matcher_name=='SGM': + model = SGM_Model(model_config) + elif args.matcher_name=='SG': + model = SG_Model(model_config) + model.cuda(),model.eval() + + test_data = { + 'x1':torch.rand(1,args.num_kpt,2).cuda()-0.5, + 'x2':torch.rand(1,args.num_kpt,2).cuda()-0.5, + 'desc1': torch.rand(1,args.num_kpt,128).cuda(), + 'desc2': torch.rand(1,args.num_kpt,128).cuda() + } + + test_cost(test_data,model) diff --git a/third_party/SGMNet/evaluation/evaluate.py b/third_party/SGMNet/evaluation/evaluate.py new file mode 100644 index 0000000000000000000000000000000000000000..dd5229375caa03b2763bf37a266fb76e80f8e25e --- /dev/null +++ b/third_party/SGMNet/evaluation/evaluate.py @@ -0,0 +1,117 @@ +import os +from torch.multiprocessing import Process,Manager,set_start_method,Pool +import functools +import argparse +import yaml +import numpy as np +import sys +import cv2 +from tqdm import trange +set_start_method('spawn',force=True) + + +ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +sys.path.insert(0, ROOT_DIR) + +from components import load_component +from utils import evaluation_utils,metrics + +parser = argparse.ArgumentParser(description='dump eval data.') +parser.add_argument('--config_path', type=str, default='configs/eval/scannet_eval_sgm.yaml') +parser.add_argument('--num_process_match', type=int, default=4) +parser.add_argument('--num_process_eval', type=int, default=4) +parser.add_argument('--vis_folder',type=str,default=None) +args=parser.parse_args() + +def feed_match(info,matcher): + x1,x2,desc1,desc2,size1,size2=info['x1'],info['x2'],info['desc1'],info['desc2'],info['img1'].shape[:2],info['img2'].shape[:2] + test_data = {'x1': x1,'x2': x2,'desc1': desc1,'desc2': desc2,'size1':np.flip(np.asarray(size1)),'size2':np.flip(np.asarray(size2)) } + corr1,corr2=matcher.run(test_data) + return [corr1,corr2] + + +def reader_handler(config,read_que): + reader=load_component('reader',config['name'],config) + for index in range(len(reader)): + index+=0 + info=reader.run(index) + read_que.put(info) + read_que.put('over') + + +def match_handler(config,read_que,match_que): + matcher=load_component('matcher',config['name'],config) + match_func=functools.partial(feed_match,matcher=matcher) + pool = Pool(args.num_process_match) + cache=[] + while True: + item=read_que.get() + #clear cache + if item=='over': + if len(cache)!=0: + results=pool.map(match_func,cache) + for cur_item,cur_result in zip(cache,results): + cur_item['corr1'],cur_item['corr2']=cur_result[0],cur_result[1] + match_que.put(cur_item) + match_que.put('over') + break + cache.append(item) + #print(len(cache)) + if len(cache)==args.num_process_match: + #matching in parallel + results=pool.map(match_func,cache) + for cur_item,cur_result in zip(cache,results): + cur_item['corr1'],cur_item['corr2']=cur_result[0],cur_result[1] + match_que.put(cur_item) + cache=[] + pool.close() + pool.join() + + +def evaluate_handler(config,match_que): + evaluator=load_component('evaluator',config['name'],config) + pool = Pool(args.num_process_eval) + cache=[] + for _ in trange(config['num_pair']): + item=match_que.get() + if item=='over': + if len(cache)!=0: + results=pool.map(evaluator.run,cache) + for cur_res in results: + evaluator.res_inqueue(cur_res) + break + cache.append(item) + if len(cache)==args.num_process_eval: + results=pool.map(evaluator.run,cache) + for cur_res in results: + evaluator.res_inqueue(cur_res) + cache=[] + if args.vis_folder is not None: + #dump visualization + corr1_norm,corr2_norm=evaluation_utils.normalize_intrinsic(item['corr1'],item['K1']),\ + evaluation_utils.normalize_intrinsic(item['corr2'],item['K2']) + inlier_mask=metrics.compute_epi_inlier(corr1_norm,corr2_norm,item['e'],config['inlier_th']) + display=evaluation_utils.draw_match(item['img1'],item['img2'],item['corr1'],item['corr2'],inlier_mask) + cv2.imwrite(os.path.join(args.vis_folder,str(item['index'])+'.png'),display) + evaluator.parse() + + +if __name__=='__main__': + with open(args.config_path, 'r') as f: + config = yaml.load(f) + if args.vis_folder is not None and not os.path.exists(args.vis_folder): + os.mkdir(args.vis_folder) + + read_que,match_que,estimate_que=Manager().Queue(maxsize=100),Manager().Queue(maxsize=100),Manager().Queue(maxsize=100) + + read_process=Process(target=reader_handler,args=(config['reader'],read_que)) + match_process=Process(target=match_handler,args=(config['matcher'],read_que,match_que)) + evaluate_process=Process(target=evaluate_handler,args=(config['evaluator'],match_que)) + + read_process.start() + match_process.start() + evaluate_process.start() + + read_process.join() + match_process.join() + evaluate_process.join() \ No newline at end of file diff --git a/third_party/SGMNet/requirements.txt b/third_party/SGMNet/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..6a47c9a51a87a3eb4ab3ce80201c328bcd0cd75d --- /dev/null +++ b/third_party/SGMNet/requirements.txt @@ -0,0 +1,6 @@ +numpy +pyyaml==5.1 +h5py +tensorboardX +opencv-contrib-python==4.5.2.52 +tqdm \ No newline at end of file diff --git a/third_party/SGMNet/sgmnet/__init__.py b/third_party/SGMNet/sgmnet/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..828543beceebb10d05fd9d5fdfcc4b1c91e5af6b --- /dev/null +++ b/third_party/SGMNet/sgmnet/__init__.py @@ -0,0 +1 @@ +from .match_model import matcher \ No newline at end of file diff --git a/third_party/SGMNet/sgmnet/match_model.py b/third_party/SGMNet/sgmnet/match_model.py new file mode 100644 index 0000000000000000000000000000000000000000..1e55fa5d042b010f8d9a99e006002563a3961ae7 --- /dev/null +++ b/third_party/SGMNet/sgmnet/match_model.py @@ -0,0 +1,222 @@ +import torch +import torch.nn as nn + +eps=1e-8 + +def sinkhorn(M,r,c,iteration): + p = torch.softmax(M, dim=-1) + u = torch.ones_like(r) + v = torch.ones_like(c) + for _ in range(iteration): + u = r / ((p * v.unsqueeze(-2)).sum(-1) + eps) + v = c / ((p * u.unsqueeze(-1)).sum(-2) + eps) + p = p * u.unsqueeze(-1) * v.unsqueeze(-2) + return p + +def sink_algorithm(M,dustbin,iteration): + M = torch.cat([M, dustbin.expand([M.shape[0], M.shape[1], 1])], dim=-1) + M = torch.cat([M, dustbin.expand([M.shape[0], 1, M.shape[2]])], dim=-2) + r = torch.ones([M.shape[0], M.shape[1] - 1],device='cuda') + r = torch.cat([r, torch.ones([M.shape[0], 1],device='cuda') * M.shape[1]], dim=-1) + c = torch.ones([M.shape[0], M.shape[2] - 1],device='cuda') + c = torch.cat([c, torch.ones([M.shape[0], 1],device='cuda') * M.shape[2]], dim=-1) + p=sinkhorn(M,r,c,iteration) + return p + + +def seeding(nn_index1,nn_index2,x1,x2,topk,match_score,confbar,nms_radius,use_mc=True,test=False): + + #apply mutual check before nms + if use_mc: + mask_not_mutual=nn_index2.gather(dim=-1,index=nn_index1)!=torch.arange(nn_index1.shape[1],device='cuda') + match_score[mask_not_mutual]=-1 + #NMS + pos_dismat1=((x1.norm(p=2,dim=-1)**2).unsqueeze_(-1)+(x1.norm(p=2,dim=-1)**2).unsqueeze_(-2)-2*(x1@x1.transpose(1,2))).abs_().sqrt_() + x2=x2.gather(index=nn_index1.unsqueeze(-1).expand(-1,-1,2),dim=1) + pos_dismat2=((x2.norm(p=2,dim=-1)**2).unsqueeze_(-1)+(x2.norm(p=2,dim=-1)**2).unsqueeze_(-2)-2*(x2@x2.transpose(1,2))).abs_().sqrt_() + radius1, radius2 = nms_radius * pos_dismat1.mean(dim=(1,2),keepdim=True), nms_radius * pos_dismat2.mean(dim=(1,2),keepdim=True) + nms_mask = (pos_dismat1 >= radius1) & (pos_dismat2 >= radius2) + mask_not_local_max=(match_score.unsqueeze(-1)>=match_score.unsqueeze(-2))|nms_mask + mask_not_local_max=~(mask_not_local_max.min(dim=-1).values) + match_score[mask_not_local_max] = -1 + + #confidence bar + match_score[match_score0 + if test: + topk=min(mask_survive.sum(dim=1)[0]+2,topk) + _,topindex = torch.topk(match_score,topk,dim=-1)#b*k + seed_index1,seed_index2=topindex,nn_index1.gather(index=topindex,dim=-1) + return seed_index1,seed_index2 + + + +class PointCN(nn.Module): + def __init__(self, channels,out_channels): + nn.Module.__init__(self) + self.shot_cut = nn.Conv1d(channels, out_channels, kernel_size=1) + self.conv = nn.Sequential( + nn.InstanceNorm1d(channels, eps=1e-3), + nn.SyncBatchNorm(channels), + nn.ReLU(), + nn.Conv1d(channels, channels, kernel_size=1), + nn.InstanceNorm1d(channels, eps=1e-3), + nn.SyncBatchNorm(channels), + nn.ReLU(), + nn.Conv1d(channels, out_channels, kernel_size=1) + ) + + def forward(self, x): + return self.conv(x) + self.shot_cut(x) + + +class attention_propagantion(nn.Module): + + def __init__(self,channel,head): + nn.Module.__init__(self) + self.head=head + self.head_dim=channel//head + self.query_filter,self.key_filter,self.value_filter=nn.Conv1d(channel,channel,kernel_size=1),nn.Conv1d(channel,channel,kernel_size=1),\ + nn.Conv1d(channel,channel,kernel_size=1) + self.mh_filter=nn.Conv1d(channel,channel,kernel_size=1) + self.cat_filter=nn.Sequential(nn.Conv1d(2*channel,2*channel, kernel_size=1), nn.SyncBatchNorm(2*channel), nn.ReLU(), + nn.Conv1d(2*channel, channel, kernel_size=1)) + + def forward(self,desc1,desc2,weight_v=None): + #desc1(q) attend to desc2(k,v) + batch_size=desc1.shape[0] + query,key,value=self.query_filter(desc1).view(batch_size,self.head,self.head_dim,-1),self.key_filter(desc2).view(batch_size,self.head,self.head_dim,-1),\ + self.value_filter(desc2).view(batch_size,self.head,self.head_dim,-1) + if weight_v is not None: + value=value*weight_v.view(batch_size,1,1,-1) + score=torch.softmax(torch.einsum('bhdn,bhdm->bhnm',query,key)/ self.head_dim ** 0.5,dim=-1) + add_value=torch.einsum('bhnm,bhdm->bhdn',score,value).reshape(batch_size,self.head_dim*self.head,-1) + add_value=self.mh_filter(add_value) + desc1_new=desc1+self.cat_filter(torch.cat([desc1,add_value],dim=1)) + return desc1_new + + +class hybrid_block(nn.Module): + def __init__(self,channel,head): + nn.Module.__init__(self) + self.head=head + self.channel=channel + self.attention_block_down = attention_propagantion(channel, head) + self.cluster_filter=nn.Sequential(nn.Conv1d(2*channel,2*channel, kernel_size=1), nn.SyncBatchNorm(2*channel), nn.ReLU(), + nn.Conv1d(2*channel, 2*channel, kernel_size=1)) + self.cross_filter=attention_propagantion(channel,head) + self.confidence_filter=PointCN(2*channel,1) + self.attention_block_self=attention_propagantion(channel,head) + self.attention_block_up=attention_propagantion(channel,head) + + def forward(self,desc1,desc2,seed_index1,seed_index2): + cluster1, cluster2 = desc1.gather(dim=-1, index=seed_index1.unsqueeze(1).expand(-1, self.channel, -1)), \ + desc2.gather(dim=-1, index=seed_index2.unsqueeze(1).expand(-1, self.channel, -1)) + + #pooling + cluster1, cluster2 = self.attention_block_down(cluster1, desc1), self.attention_block_down(cluster2, desc2) + concate_cluster=self.cluster_filter(torch.cat([cluster1,cluster2],dim=1)) + #filtering + cluster1,cluster2=self.cross_filter(concate_cluster[:,:self.channel],concate_cluster[:,self.channel:]),\ + self.cross_filter(concate_cluster[:,self.channel:],concate_cluster[:,:self.channel]) + cluster1,cluster2=self.attention_block_self(cluster1,cluster1),self.attention_block_self(cluster2,cluster2) + #unpooling + seed_weight=self.confidence_filter(torch.cat([cluster1,cluster2],dim=1)) + seed_weight=torch.sigmoid(seed_weight).squeeze(1) + desc1_new,desc2_new=self.attention_block_up(desc1,cluster1,seed_weight),self.attention_block_up(desc2,cluster2,seed_weight) + return desc1_new,desc2_new,seed_weight + + + +class matcher(nn.Module): + def __init__(self,config): + nn.Module.__init__(self) + self.seed_top_k=config.seed_top_k + self.conf_bar=config.conf_bar + self.seed_radius_coe=config.seed_radius_coe + self.use_score_encoding=config.use_score_encoding + self.detach_iter=config.detach_iter + self.seedlayer=config.seedlayer + self.layer_num=config.layer_num + self.sink_iter=config.sink_iter + + self.position_encoder = nn.Sequential(nn.Conv1d(3, 32, kernel_size=1) if config.use_score_encoding else nn.Conv1d(2, 32, kernel_size=1), + nn.SyncBatchNorm(32),nn.ReLU(), + nn.Conv1d(32, 64, kernel_size=1), nn.SyncBatchNorm(64),nn.ReLU(), + nn.Conv1d(64, 128, kernel_size=1), nn.SyncBatchNorm(128),nn.ReLU(), + nn.Conv1d(128, 256, kernel_size=1), nn.SyncBatchNorm(256),nn.ReLU(), + nn.Conv1d(256, config.net_channels, kernel_size=1)) + + + self.hybrid_block=nn.Sequential(*[hybrid_block(config.net_channels, config.head) for _ in range(config.layer_num)]) + self.final_project = nn.Conv1d(config.net_channels, config.net_channels, kernel_size=1) + self.dustbin=nn.Parameter(torch.tensor(1.5,dtype=torch.float32)) + + #if reseeding + if len(config.seedlayer)!=1: + self.mid_dustbin=nn.ParameterDict({str(i):nn.Parameter(torch.tensor(2,dtype=torch.float32)) for i in config.seedlayer[1:]}) + self.mid_final_project = nn.Conv1d(config.net_channels, config.net_channels, kernel_size=1) + + def forward(self,data,test_mode=True): + x1, x2, desc1, desc2 = data['x1'][:,:,:2], data['x2'][:,:,:2], data['desc1'], data['desc2'] + desc1, desc2 = torch.nn.functional.normalize(desc1,dim=-1), torch.nn.functional.normalize(desc2,dim=-1) + if test_mode: + encode_x1,encode_x2=data['x1'],data['x2'] + else: + encode_x1,encode_x2=data['aug_x1'], data['aug_x2'] + + #preparation + desc_dismat=(2-2*torch.matmul(desc1,desc2.transpose(1,2))).sqrt_() + values,nn_index=torch.topk(desc_dismat,k=2,largest=False,dim=-1,sorted=True) + nn_index2=torch.min(desc_dismat,dim=1).indices.squeeze(1) + inverse_ratio_score,nn_index1=values[:,:,1]/values[:,:,0],nn_index[:,:,0]#get inverse score + + #initial seeding + seed_index1,seed_index2=seeding(nn_index1,nn_index2,x1,x2,self.seed_top_k[0],inverse_ratio_score,self.conf_bar[0],\ + self.seed_radius_coe,test=test_mode) + + #position encoding + desc1,desc2=desc1.transpose(1,2),desc2.transpose(1,2) + if not self.use_score_encoding: + encode_x1,encode_x2=encode_x1[:,:,:2],encode_x2[:,:,:2] + encode_x1,encode_x2=encode_x1.transpose(1,2),encode_x2.transpose(1,2) + x1_pos_embedding, x2_pos_embedding = self.position_encoder(encode_x1), self.position_encoder(encode_x2) + aug_desc1, aug_desc2 = x1_pos_embedding + desc1, x2_pos_embedding + desc2 + + seed_weight_tower,mid_p_tower,seed_index_tower,nn_index_tower=[],[],[],[] + seed_index_tower.append(torch.stack([seed_index1, seed_index2],dim=-1)) + nn_index_tower.append(nn_index1) + + seed_para_index=0 + for i in range(self.layer_num): + #mid seeding + if i in self.seedlayer and i!= 0: + seed_para_index+=1 + aug_desc1,aug_desc2=self.mid_final_project(aug_desc1),self.mid_final_project(aug_desc2) + M=torch.matmul(aug_desc1.transpose(1,2),aug_desc2) + p=sink_algorithm(M,self.mid_dustbin[str(i)],self.sink_iter[seed_para_index-1]) + mid_p_tower.append(p) + #rematching with p + values,nn_index=torch.topk(p[:,:-1,:-1],k=1,dim=-1) + nn_index2=torch.max(p[:,:-1,:-1],dim=1).indices.squeeze(1) + p_match_score,nn_index1=values[:,:,0],nn_index[:,:,0] + #reseeding + seed_index1, seed_index2 = seeding(nn_index1,nn_index2,x1,x2,self.seed_top_k[seed_para_index],p_match_score,\ + self.conf_bar[seed_para_index],self.seed_radius_coe,test=test_mode) + seed_index_tower.append(torch.stack([seed_index1, seed_index2],dim=-1)), nn_index_tower.append(nn_index1) + if not test_mode and data['step']bhnm',query1,key1)/self.head_dim**0.5,dim=-1),\ + torch.softmax(torch.einsum('bdhn,bdhm->bhnm',query2,key2)/self.head_dim**0.5,dim=-1) + add_value1, add_value2 = torch.einsum('bhnm,bdhm->bdhn', score1, value1), torch.einsum('bhnm,bdhm->bdhn',score2, value2) + else: + score1,score2 = torch.softmax(torch.einsum('bdhn,bdhm->bhnm', query1, key2) / self.head_dim ** 0.5,dim=-1), \ + torch.softmax(torch.einsum('bdhn,bdhm->bhnm', query2, key1) / self.head_dim ** 0.5, dim=-1) + add_value1, add_value2 =torch.einsum('bhnm,bdhm->bdhn',score1,value2),torch.einsum('bhnm,bdhm->bdhn',score2,value1) + add_value1,add_value2=self.mh_filter(add_value1.contiguous().view(batch_size,self.head*self.head_dim,n)),self.mh_filter(add_value2.contiguous().view(batch_size,self.head*self.head_dim,m)) + fea11, fea22 = torch.cat([fea1, add_value1], dim=1), torch.cat([fea2, add_value2], dim=1) + fea1, fea2 = fea1+self.attention_filter(fea11), fea2+self.attention_filter(fea22) + + return fea1,fea2 + + +class matcher(nn.Module): + def __init__(self, config): + nn.Module.__init__(self) + self.use_score_encoding=config.use_score_encoding + self.layer_num=config.layer_num + self.sink_iter=config.sink_iter + self.position_encoder = nn.Sequential(nn.Conv1d(3, 32, kernel_size=1) if config.use_score_encoding else nn.Conv1d(2, 32, kernel_size=1), + nn.SyncBatchNorm(32), nn.ReLU(), + nn.Conv1d(32, 64, kernel_size=1), nn.SyncBatchNorm(64),nn.ReLU(), + nn.Conv1d(64, 128, kernel_size=1), nn.SyncBatchNorm(128), nn.ReLU(), + nn.Conv1d(128, 256, kernel_size=1), nn.SyncBatchNorm(256), nn.ReLU(), + nn.Conv1d(256, config.net_channels, kernel_size=1)) + + self.dustbin=nn.Parameter(torch.tensor(1,dtype=torch.float32,device='cuda')) + self.self_attention_block=nn.Sequential(*[attention_block(config.net_channels,config.head,'self') for _ in range(config.layer_num)]) + self.cross_attention_block=nn.Sequential(*[attention_block(config.net_channels,config.head,'cross') for _ in range(config.layer_num)]) + self.final_project=nn.Conv1d(config.net_channels, config.net_channels, kernel_size=1) + + def forward(self,data,test_mode=True): + desc1, desc2 = data['desc1'], data['desc2'] + desc1, desc2 = torch.nn.functional.normalize(desc1,dim=-1), torch.nn.functional.normalize(desc2,dim=-1) + desc1,desc2=desc1.transpose(1,2),desc2.transpose(1,2) + if test_mode: + encode_x1,encode_x2=data['x1'],data['x2'] + else: + encode_x1,encode_x2=data['aug_x1'], data['aug_x2'] + if not self.use_score_encoding: + encode_x1,encode_x2=encode_x1[:,:,:2],encode_x2[:,:,:2] + + encode_x1,encode_x2=encode_x1.transpose(1,2),encode_x2.transpose(1,2) + + x1_pos_embedding, x2_pos_embedding = self.position_encoder(encode_x1), self.position_encoder(encode_x2) + aug_desc1, aug_desc2 = x1_pos_embedding + desc1, x2_pos_embedding+desc2 + for i in range(self.layer_num): + aug_desc1,aug_desc2=self.self_attention_block[i](aug_desc1,aug_desc2) + aug_desc1,aug_desc2=self.cross_attention_block[i](aug_desc1,aug_desc2) + + aug_desc1,aug_desc2=self.final_project(aug_desc1),self.final_project(aug_desc2) + desc_mat = torch.matmul(aug_desc1.transpose(1, 2), aug_desc2) + p = sink_algorithm(desc_mat, self.dustbin,self.sink_iter[0]) + return {'p':p} + + diff --git a/third_party/SGMNet/superpoint/__init__.py b/third_party/SGMNet/superpoint/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..111c8882a7bc7512c6191ca86a0e71c3b1404233 --- /dev/null +++ b/third_party/SGMNet/superpoint/__init__.py @@ -0,0 +1 @@ +from .superpoint import SuperPoint \ No newline at end of file diff --git a/third_party/SGMNet/superpoint/superpoint.py b/third_party/SGMNet/superpoint/superpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..d4e3ce481409264a3188270ad01aa62b1614377f --- /dev/null +++ b/third_party/SGMNet/superpoint/superpoint.py @@ -0,0 +1,140 @@ +import torch +from torch import nn + + +def simple_nms(scores, nms_radius): + assert(nms_radius >= 0) + + def max_pool(x): + return torch.nn.functional.max_pool2d( + x, kernel_size=nms_radius*2+1, stride=1, padding=nms_radius) + + zeros = torch.zeros_like(scores) + max_mask = scores == max_pool(scores) + for _ in range(2): + supp_mask = max_pool(max_mask.float()) > 0 + supp_scores = torch.where(supp_mask, zeros, scores) + new_max_mask = supp_scores == max_pool(supp_scores) + max_mask = max_mask | (new_max_mask & (~supp_mask)) + return torch.where(max_mask, scores, zeros) + + +def remove_borders(keypoints, scores, b, h, w): + mask_h = (keypoints[:, 0] >= b) & (keypoints[:, 0] < (h - b)) + mask_w = (keypoints[:, 1] >= b) & (keypoints[:, 1] < (w - b)) + mask = mask_h & mask_w + return keypoints[mask], scores[mask] + + +def top_k_keypoints(keypoints, scores, k): + if k >= len(keypoints): + return keypoints, scores + scores, indices = torch.topk(scores, k, dim=0) + return keypoints[indices], scores + + +def sample_descriptors(keypoints, descriptors, s): + b, c, h, w = descriptors.shape + keypoints = keypoints - s / 2 + 0.5 + keypoints /= torch.tensor([(w*s - s/2 - 0.5), (h*s - s/2 - 0.5)], + ).to(keypoints)[None] + keypoints = keypoints*2 - 1 # normalize to (-1, 1) + args = {'align_corners': True} if int(torch.__version__[2]) > 2 else {} + descriptors = torch.nn.functional.grid_sample( + descriptors, keypoints.view(b, 1, -1, 2), mode='bilinear', **args) + descriptors = torch.nn.functional.normalize( + descriptors.reshape(b, c, -1), p=2, dim=1) + return descriptors + + +class SuperPoint(nn.Module): + + def __init__(self, config): + super().__init__() + self.config = {**config} + + self.relu = nn.ReLU(inplace=True) + self.pool = nn.MaxPool2d(kernel_size=2, stride=2) + c1, c2, c3, c4, c5 = 64, 64, 128, 128, 256 + + self.conv1a = nn.Conv2d(1, c1, kernel_size=3, stride=1, padding=1) + self.conv1b = nn.Conv2d(c1, c1, kernel_size=3, stride=1, padding=1) + self.conv2a = nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1) + self.conv2b = nn.Conv2d(c2, c2, kernel_size=3, stride=1, padding=1) + self.conv3a = nn.Conv2d(c2, c3, kernel_size=3, stride=1, padding=1) + self.conv3b = nn.Conv2d(c3, c3, kernel_size=3, stride=1, padding=1) + self.conv4a = nn.Conv2d(c3, c4, kernel_size=3, stride=1, padding=1) + self.conv4b = nn.Conv2d(c4, c4, kernel_size=3, stride=1, padding=1) + + self.convPa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1) + self.convPb = nn.Conv2d(c5, 65, kernel_size=1, stride=1, padding=0) + + self.convDa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1) + self.convDb = nn.Conv2d( + c5, self.config['descriptor_dim'], + kernel_size=1, stride=1, padding=0) + + self.load_state_dict(torch.load(config['model_path'])) + + mk = self.config['max_keypoints'] + if mk == 0 or mk < -1: + raise ValueError('\"max_keypoints\" must be positive or \"-1\"') + + print('Loaded SuperPoint model') + + def forward(self, data): + # Shared Encoder + x = self.relu(self.conv1a(data)) + x = self.relu(self.conv1b(x)) + x = self.pool(x) + x = self.relu(self.conv2a(x)) + x = self.relu(self.conv2b(x)) + x = self.pool(x) + x = self.relu(self.conv3a(x)) + x = self.relu(self.conv3b(x)) + x = self.pool(x) + x = self.relu(self.conv4a(x)) + x = self.relu(self.conv4b(x)) + # Compute the dense keypoint scores + cPa = self.relu(self.convPa(x)) + scores = self.convPb(cPa) + scores = torch.nn.functional.softmax(scores, 1)[:, :-1] + b, c, h, w = scores.shape + scores = scores.permute(0, 2, 3, 1).reshape(b, h, w, 8, 8) + scores = scores.permute(0, 1, 3, 2, 4).reshape(b, h*8, w*8) + scores = simple_nms(scores, self.config['nms_radius']) + + # Extract keypoints + keypoints = [ + torch.nonzero(s > self.config['detection_threshold']) + for s in scores] + scores = [s[tuple(k.t())] for s, k in zip(scores, keypoints)] + + # Discard keypoints near the image borders + keypoints, scores = list(zip(*[ + remove_borders(k, s, self.config['remove_borders'], h*8, w*8) + for k, s in zip(keypoints, scores)])) + + # Keep the k keypoints with highest score + if self.config['max_keypoints'] >= 0: + keypoints, scores = list(zip(*[ + top_k_keypoints(k, s, self.config['max_keypoints']) + for k, s in zip(keypoints, scores)])) + + # Convert (h, w) to (x, y) + keypoints = [torch.flip(k, [1]).float() for k in keypoints] + + # Compute the dense descriptors + cDa = self.relu(self.convDa(x)) + descriptors = self.convDb(cDa) + descriptors = torch.nn.functional.normalize(descriptors, p=2, dim=1) + + # Extract descriptors + descriptors = [sample_descriptors(k[None], d[None], 8)[0] + for k, d in zip(keypoints, descriptors)] + + return { + 'keypoints': keypoints, + 'scores': scores, + 'descriptors': descriptors, + } diff --git a/third_party/SGMNet/train/config.py b/third_party/SGMNet/train/config.py new file mode 100644 index 0000000000000000000000000000000000000000..31c4c1c6deef3d6dd568897f4202d96456586376 --- /dev/null +++ b/third_party/SGMNet/train/config.py @@ -0,0 +1,126 @@ +import argparse + +def str2bool(v): + return v.lower() in ("true", "1") + + +arg_lists = [] +parser = argparse.ArgumentParser() + + +def add_argument_group(name): + arg = parser.add_argument_group(name) + arg_lists.append(arg) + return arg + + +# ----------------------------------------------------------------------------- +# Network +net_arg = add_argument_group("Network") +net_arg.add_argument( + "--model_name", type=str,default='SGM', help="" + "model for training") +net_arg.add_argument( + "--config_path", type=str,default='configs/sgm.yaml', help="" + "config path for model") + +# ----------------------------------------------------------------------------- +# Data +data_arg = add_argument_group("Data") +data_arg.add_argument( + "--rawdata_path", type=str, default='rawdata', help="" + "path for rawdata") +data_arg.add_argument( + "--dataset_path", type=str, default='dataset', help="" + "path for dataset") +data_arg.add_argument( + "--desc_path", type=str, default='desc', help="" + "path for descriptor(kpt) dir") +data_arg.add_argument( + "--num_kpt", type=int, default=1000, help="" + "number of kpt for training") +data_arg.add_argument( + "--input_normalize", type=str, default='img', help="" + "normalize type for input kpt, img or intrinsic") +data_arg.add_argument( + "--data_aug", type=str2bool, default=True, help="" + "apply kpt coordinate homography augmentation") +data_arg.add_argument( + "--desc_suffix", type=str, default='suffix', help="" + "desc file suffix") + + +# ----------------------------------------------------------------------------- +# Loss +loss_arg = add_argument_group("loss") +loss_arg.add_argument( + "--momentum", type=float, default=0.9, help="" + "momentum") +loss_arg.add_argument( + "--seed_loss_weight", type=float, default=250, help="" + "confidence loss weight for sgm") +loss_arg.add_argument( + "--mid_loss_weight", type=float, default=1, help="" + "midseeding loss weight for sgm") +loss_arg.add_argument( + "--inlier_th", type=float, default=5e-3, help="" + "inlier threshold for epipolar distance (for sgm and visualization)") + + +# ----------------------------------------------------------------------------- +# Training +train_arg = add_argument_group("Train") +train_arg.add_argument( + "--train_lr", type=float, default=1e-4, help="" + "learning rate") +train_arg.add_argument( + "--train_batch_size", type=int, default=16, help="" + "batch size") +train_arg.add_argument( + "--gpu_id", type=str,default='0', help='id(s) for CUDA_VISIBLE_DEVICES') +train_arg.add_argument( + "--train_iter", type=int, default=1000000, help="" + "training iterations to perform") +train_arg.add_argument( + "--log_base", type=str, default="./log/", help="" + "log path") +train_arg.add_argument( + "--val_intv", type=int, default=20000, help="" + "validation interval") +train_arg.add_argument( + "--save_intv", type=int, default=1000, help="" + "summary interval") +train_arg.add_argument( + "--log_intv", type=int, default=100, help="" + "log interval") +train_arg.add_argument( + "--decay_rate", type=float, default=0.999996, help="" + "lr decay rate") +train_arg.add_argument( + "--decay_iter", type=float, default=300000, help="" + "lr decay iter") +train_arg.add_argument( + "--local_rank", type=int, default=0, help="" + "local rank for ddp") +train_arg.add_argument( + "--train_vis_folder", type=str, default='.', help="" + "visualization folder during training") + +# ----------------------------------------------------------------------------- +# Visualization +vis_arg = add_argument_group('Visualization') +vis_arg.add_argument( + "--tqdm_width", type=int, default=79, help="" + "width of the tqdm bar" +) + +def get_config(): + config, unparsed = parser.parse_known_args() + return config, unparsed + + +def print_usage(): + parser.print_usage() + +# +# config.py ends here \ No newline at end of file diff --git a/third_party/SGMNet/train/configs/sg.yaml b/third_party/SGMNet/train/configs/sg.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bb03f39f9d8445b1e345d8f8f6ac17eb6d981bc1 --- /dev/null +++ b/third_party/SGMNet/train/configs/sg.yaml @@ -0,0 +1,5 @@ +net_channels: 128 +layer_num: 9 +head: 4 +use_score_encoding: True +p_th: 0.2 \ No newline at end of file diff --git a/third_party/SGMNet/train/configs/sgm.yaml b/third_party/SGMNet/train/configs/sgm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d674adf562a8932192a0a3bb1a993cf90d28e989 --- /dev/null +++ b/third_party/SGMNet/train/configs/sgm.yaml @@ -0,0 +1,12 @@ +seed_top_k: [128,128] +seed_radius_coe: 0.01 +net_channels: 128 +layer_num: 9 +head: 4 +seedlayer: [0,6] +use_mc_seeding: True +use_score_encoding: False +conf_bar: [1,0.1] +sink_iter: [10,100] +detach_iter: 140000 +p_th: 0.2 \ No newline at end of file diff --git a/third_party/SGMNet/train/dataset.py b/third_party/SGMNet/train/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..d07a84e9588b755a86119363f08860187d1668c0 --- /dev/null +++ b/third_party/SGMNet/train/dataset.py @@ -0,0 +1,143 @@ +import numpy as np +import torch +import torch.utils.data as data +import cv2 +import os +import h5py +import random + +import sys +ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../")) +sys.path.insert(0, ROOT_DIR) + +from utils import train_utils,evaluation_utils + +torch.multiprocessing.set_sharing_strategy('file_system') + + +class Offline_Dataset(data.Dataset): + def __init__(self,config,mode): + assert mode=='train' or mode=='valid' + + self.config = config + self.mode = mode + metadir=os.path.join(config.dataset_path,'valid') if mode=='valid' else os.path.join(config.dataset_path,'train') + + pair_num_list=np.loadtxt(os.path.join(metadir,'pair_num.txt'),dtype=str) + self.total_pairs=int(pair_num_list[0,1]) + self.pair_seq_list,self.accu_pair_num=train_utils.parse_pair_seq(pair_num_list) + + + def collate_fn(self, batch): + batch_size, num_pts = len(batch), batch[0]['x1'].shape[0] + + data = {} + dtype=['x1','x2','kpt1','kpt2','desc1','desc2','num_corr','num_incorr1','num_incorr2','e_gt','pscore1','pscore2','img_path1','img_path2'] + for key in dtype: + data[key]=[] + for sample in batch: + for key in dtype: + data[key].append(sample[key]) + + for key in ['x1', 'x2','kpt1','kpt2', 'desc1', 'desc2','e_gt','pscore1','pscore2']: + data[key] = torch.from_numpy(np.stack(data[key])).float() + for key in ['num_corr', 'num_incorr1', 'num_incorr2']: + data[key] = torch.from_numpy(np.stack(data[key])).int() + + # kpt augmentation with random homography + if (self.mode == 'train' and self.config.data_aug): + homo_mat = torch.from_numpy(train_utils.get_rnd_homography(batch_size)).unsqueeze(1) + aug_seed=random.random() + if aug_seed<0.5: + x1_homo = torch.cat([data['x1'], torch.ones([batch_size, num_pts, 1])], dim=-1).unsqueeze(-1) + x1_homo = torch.matmul(homo_mat.float(), x1_homo.float()).squeeze(-1) + data['aug_x1'] = x1_homo[:, :, :2] / x1_homo[:, :, 2].unsqueeze(-1) + data['aug_x2']=data['x2'] + else: + x2_homo = torch.cat([data['x2'], torch.ones([batch_size, num_pts, 1])], dim=-1).unsqueeze(-1) + x2_homo = torch.matmul(homo_mat.float(), x2_homo.float()).squeeze(-1) + data['aug_x2'] = x2_homo[:, :, :2] / x2_homo[:, :, 2].unsqueeze(-1) + data['aug_x1']=data['x1'] + else: + data['aug_x1'],data['aug_x2']=data['x1'],data['x2'] + return data + + + def __getitem__(self, index): + seq=self.pair_seq_list[index] + index_within_seq=index-self.accu_pair_num[seq] + + with h5py.File(os.path.join(self.config.dataset_path,seq,'info.h5py'),'r') as data: + R,t = data['dR'][str(index_within_seq)][()], data['dt'][str(index_within_seq)][()] + egt = np.reshape(np.matmul(np.reshape(evaluation_utils.np_skew_symmetric(t.astype('float64').reshape(1, 3)), (3, 3)),np.reshape(R.astype('float64'), (3, 3))), (3, 3)) + egt = egt / np.linalg.norm(egt) + K1, K2 = data['K1'][str(index_within_seq)][()],data['K2'][str(index_within_seq)][()] + size1,size2=data['size1'][str(index_within_seq)][()],data['size2'][str(index_within_seq)][()] + + img_path1,img_path2=data['img_path1'][str(index_within_seq)][()][0].decode(),data['img_path2'][str(index_within_seq)][()][0].decode() + img_name1,img_name2=img_path1.split('/')[-1],img_path2.split('/')[-1] + img_path1,img_path2=os.path.join(self.config.rawdata_path,img_path1),os.path.join(self.config.rawdata_path,img_path2) + fea_path1,fea_path2=os.path.join(self.config.desc_path,seq,img_name1+self.config.desc_suffix),\ + os.path.join(self.config.desc_path,seq,img_name2+self.config.desc_suffix) + with h5py.File(fea_path1,'r') as fea1, h5py.File(fea_path2,'r') as fea2: + desc1,kpt1,pscore1=fea1['descriptors'][()],fea1['keypoints'][()][:,:2],fea1['keypoints'][()][:,2] + desc2,kpt2,pscore2=fea2['descriptors'][()],fea2['keypoints'][()][:,:2],fea2['keypoints'][()][:,2] + kpt1,kpt2,desc1,desc2=kpt1[:self.config.num_kpt],kpt2[:self.config.num_kpt],desc1[:self.config.num_kpt],desc2[:self.config.num_kpt] + + # normalize kpt + if self.config.input_normalize=='intrinsic': + x1, x2 = np.concatenate([kpt1, np.ones([kpt1.shape[0], 1])], axis=-1), np.concatenate( + [kpt2, np.ones([kpt2.shape[0], 1])], axis=-1) + x1, x2 = np.matmul(np.linalg.inv(K1), x1.T).T[:, :2], np.matmul(np.linalg.inv(K2), x2.T).T[:, :2] + elif self.config.input_normalize=='img' : + x1,x2=(kpt1-size1/2)/size1,(kpt2-size2/2)/size2 + S1_inv,S2_inv=np.asarray([[size1[0],0,0.5*size1[0]],[0,size1[1],0.5*size1[1]],[0,0,1]]),\ + np.asarray([[size2[0],0,0.5*size2[0]],[0,size2[1],0.5*size2[1]],[0,0,1]]) + M1,M2=np.matmul(np.linalg.inv(K1),S1_inv),np.matmul(np.linalg.inv(K2),S2_inv) + egt=np.matmul(np.matmul(M2.transpose(),egt),M1) + egt = egt / np.linalg.norm(egt) + else: + raise NotImplementedError + + corr=data['corr'][str(index_within_seq)][()] + incorr1,incorr2=data['incorr1'][str(index_within_seq)][()],data['incorr2'][str(index_within_seq)][()] + + #permute kpt + valid_corr=corr[corr.max(axis=-1)= cur_kpt1): + sub_idx1 =np.random.choice(len(invalid_index1), cur_kpt1,replace=False) + if (invalid_index2.shape[0] < cur_kpt2): + sub_idx2 = np.concatenate([np.arange(len(invalid_index2)),np.random.randint(len(invalid_index2),size=cur_kpt2-len(invalid_index2))]) + if (invalid_index2.shape[0] >= cur_kpt2): + sub_idx2 = np.random.choice(len(invalid_index2), cur_kpt2,replace=False) + + per_idx1,per_idx2=np.concatenate([valid_corr[:,0],valid_incorr1,invalid_index1[sub_idx1]]),\ + np.concatenate([valid_corr[:,1],valid_incorr2,invalid_index2[sub_idx2]]) + + pscore1,pscore2=pscore1[per_idx1][:,np.newaxis],pscore2[per_idx2][:,np.newaxis] + x1,x2=x1[per_idx1][:,:2],x2[per_idx2][:,:2] + desc1,desc2=desc1[per_idx1],desc2[per_idx2] + kpt1,kpt2=kpt1[per_idx1],kpt2[per_idx2] + + return {'x1': x1, 'x2': x2, 'kpt1':kpt1,'kpt2':kpt2,'desc1': desc1, 'desc2': desc2, 'num_corr': num_corr, 'num_incorr1': num_incorr1,'num_incorr2': num_incorr2,'e_gt':egt,\ + 'pscore1':pscore1,'pscore2':pscore2,'img_path1':img_path1,'img_path2':img_path2} + + def __len__(self): + return self.total_pairs + + diff --git a/third_party/SGMNet/train/loss.py b/third_party/SGMNet/train/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..fad4234fc5827321c31e72c08ad4a3466bad1c30 --- /dev/null +++ b/third_party/SGMNet/train/loss.py @@ -0,0 +1,125 @@ +import torch +import numpy as np + + +def batch_episym(x1, x2, F): + batch_size, num_pts = x1.shape[0], x1.shape[1] + x1 = torch.cat([x1, x1.new_ones(batch_size, num_pts,1)], dim=-1).reshape(batch_size, num_pts,3,1) + x2 = torch.cat([x2, x2.new_ones(batch_size, num_pts,1)], dim=-1).reshape(batch_size, num_pts,3,1) + F = F.reshape(-1,1,3,3).repeat(1,num_pts,1,1) + x2Fx1 = torch.matmul(x2.transpose(2,3), torch.matmul(F, x1)).reshape(batch_size,num_pts) + Fx1 = torch.matmul(F,x1).reshape(batch_size,num_pts,3) + Ftx2 = torch.matmul(F.transpose(2,3),x2).reshape(batch_size,num_pts,3) + ys = (x2Fx1**2 * ( + 1.0 / (Fx1[:, :, 0]**2 + Fx1[:, :, 1]**2 + 1e-15) + + 1.0 / (Ftx2[:, :, 0]**2 + Ftx2[:, :, 1]**2 + 1e-15))).sqrt() + return ys + + +def CELoss(seed_x1,seed_x2,e,confidence,inlier_th,batch_mask=1): + #seed_x: b*k*2 + ys=batch_episym(seed_x1,seed_x2,e) + mask_pos,mask_neg=(ys<=inlier_th).float(),(ys>inlier_th).float() + num_pos,num_neg=torch.relu(torch.sum(mask_pos, dim=1) - 1.0) + 1.0,torch.relu(torch.sum(mask_neg, dim=1) - 1.0) + 1.0 + loss_pos,loss_neg=-torch.log(abs(confidence) + 1e-8)*mask_pos,-torch.log(abs(1-confidence)+1e-8)*mask_neg + classif_loss = torch.mean(loss_pos * 0.5 / num_pos.unsqueeze(-1) + loss_neg * 0.5 / num_neg.unsqueeze(-1),dim=-1) + classif_loss =classif_loss*batch_mask + classif_loss=classif_loss.mean() + precision = torch.mean( + torch.sum((confidence > 0.5).type(confidence.type()) * mask_pos, dim=1) / + (torch.sum((confidence > 0.5).type(confidence.type()), dim=1)+1e-8) + ) + recall = torch.mean( + torch.sum((confidence > 0.5).type(confidence.type()) * mask_pos, dim=1) / + num_pos + ) + return classif_loss,precision,recall + + +def CorrLoss(desc_mat,batch_num_corr,batch_num_incorr1,batch_num_incorr2): + total_loss_corr,total_loss_incorr=0,0 + total_acc_corr,total_acc_incorr=0,0 + batch_size = desc_mat.shape[0] + log_p=torch.log(abs(desc_mat)+1e-8) + + for i in range(batch_size): + cur_log_p=log_p[i] + num_corr=batch_num_corr[i] + num_incorr1,num_incorr2=batch_num_incorr1[i],batch_num_incorr2[i] + + #loss and acc + loss_corr = -torch.diag(cur_log_p)[:num_corr].mean() + loss_incorr=(-cur_log_p[num_corr:num_corr+num_incorr1,-1].mean()-cur_log_p[-1,num_corr:num_corr+num_incorr2].mean())/2 + + value_row, row_index = torch.max(desc_mat[i,:-1,:-1], dim=-1) + value_col, col_index = torch.max(desc_mat[i,:-1,:-1], dim=-2) + acc_incorr=((value_row[num_corr:num_corr+num_incorr1]<0.2).float().mean()+ + (value_col[num_corr:num_corr+num_incorr2]<0.2).float().mean())/2 + + acc_row_mask = row_index[:num_corr] == torch.arange(num_corr).cuda() + acc_col_mask = col_index[:num_corr] == torch.arange(num_corr).cuda() + acc = (acc_col_mask & acc_row_mask).float().mean() + + total_loss_corr+=loss_corr + total_loss_incorr+=loss_incorr + total_acc_corr += acc + total_acc_incorr+=acc_incorr + + total_acc_corr/=batch_size + total_acc_incorr/=batch_size + total_loss_corr/=batch_size + total_loss_incorr/=batch_size + return total_loss_corr,total_loss_incorr,total_acc_corr,total_acc_incorr + + +class SGMLoss: + def __init__(self,config,model_config): + self.config=config + self.model_config=model_config + + def run(self,data,result): + loss_corr,loss_incorr,acc_corr,acc_incorr=CorrLoss(result['p'],data['num_corr'],data['num_incorr1'],data['num_incorr2']) + loss_mid_corr_tower,loss_mid_incorr_tower,acc_mid_tower=[],[],[] + + #mid loss + for i in range(len(result['mid_p'])): + mid_p=result['mid_p'][i] + loss_mid_corr,loss_mid_incorr,mid_acc_corr,mid_acc_incorr=CorrLoss(mid_p,data['num_corr'],data['num_incorr1'],data['num_incorr2']) + loss_mid_corr_tower.append(loss_mid_corr),loss_mid_incorr_tower.append(loss_mid_incorr),acc_mid_tower.append(mid_acc_corr) + if len(result['mid_p']) != 0: + loss_mid_corr_tower,loss_mid_incorr_tower, acc_mid_tower = torch.stack(loss_mid_corr_tower), torch.stack(loss_mid_incorr_tower), torch.stack(acc_mid_tower) + else: + loss_mid_corr_tower,loss_mid_incorr_tower, acc_mid_tower= torch.zeros(1).cuda(), torch.zeros(1).cuda(),torch.zeros(1).cuda() + + #seed confidence loss + classif_loss_tower,classif_precision_tower,classif_recall_tower=[],[],[] + for layer in range(len(result['seed_conf'])): + confidence=result['seed_conf'][layer] + seed_index=result['seed_index'][(np.asarray(self.model_config.seedlayer)<=layer).nonzero()[0][-1]] + seed_x1,seed_x2=data['x1'].gather(dim=1, index=seed_index[:,:,0,None].expand(-1, -1,2)),\ + data['x2'].gather(dim=1, index=seed_index[:,:,1,None].expand(-1, -1,2)) + classif_loss,classif_precision,classif_recall=CELoss(seed_x1,seed_x2,data['e_gt'],confidence,self.config.inlier_th) + classif_loss_tower.append(classif_loss), classif_precision_tower.append(classif_precision), classif_recall_tower.append(classif_recall) + classif_loss, classif_precision_tower, classif_recall_tower=torch.stack(classif_loss_tower).mean(),torch.stack(classif_precision_tower), \ + torch.stack(classif_recall_tower) + + + classif_loss*=self.config.seed_loss_weight + loss_mid_corr_tower*=self.config.mid_loss_weight + loss_mid_incorr_tower*=self.config.mid_loss_weight + total_loss=loss_corr+loss_incorr+classif_loss+loss_mid_corr_tower.sum()+loss_mid_incorr_tower.sum() + + return {'loss_corr':loss_corr,'loss_incorr':loss_incorr,'acc_corr':acc_corr,'acc_incorr':acc_incorr,'loss_seed_conf':classif_loss, + 'pre_seed_conf':classif_precision_tower,'recall_seed_conf':classif_recall_tower,'loss_corr_mid':loss_mid_corr_tower, + 'loss_incorr_mid':loss_mid_incorr_tower,'mid_acc_corr':acc_mid_tower,'total_loss':total_loss} + +class SGLoss: + def __init__(self,config,model_config): + self.config=config + self.model_config=model_config + + def run(self,data,result): + loss_corr,loss_incorr,acc_corr,acc_incorr=CorrLoss(result['p'],data['num_corr'],data['num_incorr1'],data['num_incorr2']) + total_loss=loss_corr+loss_incorr + return {'loss_corr':loss_corr,'loss_incorr':loss_incorr,'acc_corr':acc_corr,'acc_incorr':acc_incorr,'total_loss':total_loss} + \ No newline at end of file diff --git a/third_party/SGMNet/train/main.py b/third_party/SGMNet/train/main.py new file mode 100644 index 0000000000000000000000000000000000000000..9d4c8fff432a3b2d58c82b9e5f2897a4e702b2dd --- /dev/null +++ b/third_party/SGMNet/train/main.py @@ -0,0 +1,61 @@ +import torch.utils.data +from dataset import Offline_Dataset +import yaml +from sgmnet.match_model import matcher as SGM_Model +from superglue.match_model import matcher as SG_Model +import torch.distributed as dist +import torch +import os +from collections import namedtuple +from train import train +from config import get_config, print_usage + + +def main(config,model_config): + """The main function.""" + # Initialize network + if config.model_name=='SGM': + model = SGM_Model(model_config) + elif config.model_name=='SG': + model= SG_Model(model_config) + else: + raise NotImplementedError + + #initialize ddp + torch.cuda.set_device(config.local_rank) + device = torch.device(f'cuda:{config.local_rank}') + model.to(device) + dist.init_process_group(backend='nccl',init_method='env://') + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[config.local_rank]) + + if config.local_rank==0: + os.system('nvidia-smi') + + #initialize dataset + train_dataset = Offline_Dataset(config,'train') + train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset,shuffle=True) + train_loader=torch.utils.data.DataLoader(train_dataset, batch_size=config.train_batch_size//torch.distributed.get_world_size(), + num_workers=8//dist.get_world_size(), pin_memory=False,sampler=train_sampler,collate_fn=train_dataset.collate_fn) + + valid_dataset = Offline_Dataset(config,'valid') + valid_sampler = torch.utils.data.distributed.DistributedSampler(valid_dataset,shuffle=False) + valid_loader=torch.utils.data.DataLoader(valid_dataset, batch_size=config.train_batch_size, + num_workers=8//dist.get_world_size(), pin_memory=False,collate_fn=valid_dataset.collate_fn,sampler=valid_sampler) + + if config.local_rank==0: + print('start training .....') + train(model,train_loader, valid_loader, config,model_config) + +if __name__ == "__main__": + # ---------------------------------------- + # Parse configuration + config, unparsed = get_config() + with open(config.config_path, 'r') as f: + model_config = yaml.load(f) + model_config=namedtuple('model_config',model_config.keys())(*model_config.values()) + # If we have unparsed arguments, print usage and exit + if len(unparsed) > 0: + print_usage() + exit(1) + + main(config,model_config) diff --git a/third_party/SGMNet/train/train.py b/third_party/SGMNet/train/train.py new file mode 100644 index 0000000000000000000000000000000000000000..31e848e1d2e5f028d4ff3abaf0cc446be7d89c65 --- /dev/null +++ b/third_party/SGMNet/train/train.py @@ -0,0 +1,160 @@ +import torch +import torch.optim as optim +from tqdm import trange +import os +from tensorboardX import SummaryWriter +import numpy as np +import cv2 +from loss import SGMLoss,SGLoss +from valid import valid,dump_train_vis + +import sys +ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +sys.path.insert(0, ROOT_DIR) + + +from utils import train_utils + +def train_step(optimizer, model, match_loss, data,step,pre_avg_loss): + data['step']=step + result=model(data,test_mode=False) + loss_res=match_loss.run(data,result) + + optimizer.zero_grad() + loss_res['total_loss'].backward() + #apply reduce on all record tensor + for key in loss_res.keys(): + loss_res[key]=train_utils.reduce_tensor(loss_res[key],'mean') + + if loss_res['total_loss']<7*pre_avg_loss or step<200 or pre_avg_loss==0: + optimizer.step() + unusual_loss=False + else: + optimizer.zero_grad() + unusual_loss=True + return loss_res,unusual_loss + + +def train(model, train_loader, valid_loader, config,model_config): + model.train() + optimizer = optim.Adam(model.parameters(), lr=config.train_lr) + + if config.model_name=='SGM': + match_loss = SGMLoss(config,model_config) + elif config.model_name=='SG': + match_loss= SGLoss(config,model_config) + else: + raise NotImplementedError + + checkpoint_path = os.path.join(config.log_base, 'checkpoint.pth') + config.resume = os.path.isfile(checkpoint_path) + if config.resume: + if config.local_rank==0: + print('==> Resuming from checkpoint..') + checkpoint = torch.load(checkpoint_path,map_location='cuda:{}'.format(config.local_rank)) + model.load_state_dict(checkpoint['state_dict']) + best_acc = checkpoint['best_acc'] + start_step = checkpoint['step'] + optimizer.load_state_dict(checkpoint['optimizer']) + else: + best_acc = -1 + start_step = 0 + train_loader_iter = iter(train_loader) + + if config.local_rank==0: + writer=SummaryWriter(os.path.join(config.log_base,'log_file')) + + train_loader.sampler.set_epoch(start_step*config.train_batch_size//len(train_loader.dataset)) + pre_avg_loss=0 + + progress_bar=trange(start_step, config.train_iter,ncols=config.tqdm_width) if config.local_rank==0 else range(start_step, config.train_iter) + for step in progress_bar: + try: + train_data = next(train_loader_iter) + except StopIteration: + if config.local_rank==0: + print('epoch: ',step*config.train_batch_size//len(train_loader.dataset)) + train_loader.sampler.set_epoch(step*config.train_batch_size//len(train_loader.dataset)) + train_loader_iter = iter(train_loader) + train_data = next(train_loader_iter) + + train_data = train_utils.tocuda(train_data) + lr=min(config.train_lr*config.decay_rate**(step-config.decay_iter),config.train_lr) + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + # run training + loss_res,unusual_loss = train_step(optimizer, model, match_loss, train_data,step-start_step,pre_avg_loss) + if (step-start_step)<=200: + pre_avg_loss=loss_res['total_loss'].data + if (step-start_step)>200 and not unusual_loss: + pre_avg_loss=pre_avg_loss.data*0.9+loss_res['total_loss'].data*0.1 + if unusual_loss and config.local_rank==0: + print('unusual loss! pre_avg_loss: ',pre_avg_loss,'cur_loss: ',loss_res['total_loss'].data) + #log + if config.local_rank==0 and step%config.log_intv==0 and not unusual_loss: + writer.add_scalar('TotalLoss',loss_res['total_loss'],step) + writer.add_scalar('CorrLoss',loss_res['loss_corr'],step) + writer.add_scalar('InCorrLoss', loss_res['loss_incorr'], step) + writer.add_scalar('dustbin', model.module.dustbin, step) + + if config.model_name=='SGM': + writer.add_scalar('SeedConfLoss', loss_res['loss_seed_conf'], step) + writer.add_scalar('MidCorrLoss', loss_res['loss_corr_mid'].sum(), step) + writer.add_scalar('MidInCorrLoss', loss_res['loss_incorr_mid'].sum(), step) + + + # valid ans save + b_save = ((step + 1) % config.save_intv) == 0 + b_validate = ((step + 1) % config.val_intv) == 0 + if b_validate: + total_loss,acc_corr,acc_incorr,seed_precision_tower,seed_recall_tower,acc_mid=valid(valid_loader, model, match_loss, config,model_config) + if config.local_rank==0: + writer.add_scalar('ValidAcc', acc_corr, step) + writer.add_scalar('ValidLoss', total_loss, step) + + if config.model_name=='SGM': + for i in range(len(seed_recall_tower)): + writer.add_scalar('seed_conf_pre_%d'%i,seed_precision_tower[i],step) + writer.add_scalar('seed_conf_recall_%d' % i, seed_precision_tower[i], step) + for i in range(len(acc_mid)): + writer.add_scalar('acc_mid%d'%i,acc_mid[i],step) + print('acc_corr: ',acc_corr.data,'acc_incorr: ',acc_incorr.data,'seed_conf_pre: ',seed_precision_tower.mean().data, + 'seed_conf_recall: ',seed_recall_tower.mean().data,'acc_mid: ',acc_mid.mean().data) + else: + print('acc_corr: ',acc_corr.data,'acc_incorr: ',acc_incorr.data) + + #saving best + if acc_corr > best_acc: + print("Saving best model with va_res = {}".format(acc_corr)) + best_acc = acc_corr + save_dict={'step': step + 1, + 'state_dict': model.state_dict(), + 'best_acc': best_acc, + 'optimizer' : optimizer.state_dict()} + save_dict.update(save_dict) + torch.save(save_dict, os.path.join(config.log_base, 'model_best.pth')) + + if b_save: + if config.local_rank==0: + save_dict={'step': step + 1, + 'state_dict': model.state_dict(), + 'best_acc': best_acc, + 'optimizer' : optimizer.state_dict()} + torch.save(save_dict, checkpoint_path) + + #draw match results + model.eval() + with torch.no_grad(): + if config.local_rank==0: + if not os.path.exists(os.path.join(config.train_vis_folder,'train_vis')): + os.mkdir(os.path.join(config.train_vis_folder,'train_vis')) + if not os.path.exists(os.path.join(config.train_vis_folder,'train_vis',config.log_base)): + os.mkdir(os.path.join(config.train_vis_folder,'train_vis',config.log_base)) + os.mkdir(os.path.join(config.train_vis_folder,'train_vis',config.log_base,str(step))) + res=model(train_data) + dump_train_vis(res,train_data,step,config) + model.train() + + if config.local_rank==0: + writer.close() diff --git a/third_party/SGMNet/train/train_sg.sh b/third_party/SGMNet/train/train_sg.sh new file mode 100644 index 0000000000000000000000000000000000000000..a6ba093dfcaad6005520b65a068c60d7e93b03f8 --- /dev/null +++ b/third_party/SGMNet/train/train_sg.sh @@ -0,0 +1,10 @@ +OMP_NUM_THREADS=2 CUDA_VISIBLE_DEVICES='0' python -m torch.distributed.launch --nproc_per_node=1 --master_port 23003 main.py \ +--model_name=SG \ +--config_path=configs/sg.yaml \ +--rawdata_path=rawdata \ +--desc_path=desc_path \ +--desc_suffix=_root_1000.hdf5 \ +--dataset_path=dataset_path \ +--log_base=log_root_1k_sg \ +--num_kpt=1000 \ +--train_iter=900000 \ No newline at end of file diff --git a/third_party/SGMNet/train/train_sgm.sh b/third_party/SGMNet/train/train_sgm.sh new file mode 100644 index 0000000000000000000000000000000000000000..f82704e04746ec3353ae2e39f727b55fc072043b --- /dev/null +++ b/third_party/SGMNet/train/train_sgm.sh @@ -0,0 +1,10 @@ +OMP_NUM_THREADS=2 CUDA_VISIBLE_DEVICES='0' python -m torch.distributed.launch --nproc_per_node=1 --master_port 23003 main.py \ +--model_name=SGM \ +--config_path=configs/sgm.yaml \ +--rawdata_path=rawdata \ +--desc_path=desc_path \ +--desc_suffix=_root_1000.hdf5 \ +--dataset_path=dataset_path \ +--log_base=log_root_1k_sgm \ +--num_kpt=1000 \ +--train_iter=900000 \ No newline at end of file diff --git a/third_party/SGMNet/train/valid.py b/third_party/SGMNet/train/valid.py new file mode 100644 index 0000000000000000000000000000000000000000..443694d85104730cd50aeb342326ce593dc5684d --- /dev/null +++ b/third_party/SGMNet/train/valid.py @@ -0,0 +1,77 @@ +import torch +import numpy as np +import cv2 +import os +from loss import batch_episym +from tqdm import tqdm + +import sys +ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +sys.path.insert(0, ROOT_DIR) + +from utils import evaluation_utils,train_utils + + +def valid(valid_loader, model,match_loss, config,model_config): + model.eval() + loader_iter = iter(valid_loader) + num_pair = 0 + total_loss,total_acc_corr,total_acc_incorr=0,0,0 + total_precision,total_recall=torch.zeros(model_config.layer_num ,device='cuda'),\ + torch.zeros(model_config.layer_num ,device='cuda') + total_acc_mid=torch.zeros(len(model_config.seedlayer)-1,device='cuda') + + with torch.no_grad(): + if config.local_rank==0: + loader_iter=tqdm(loader_iter) + print('validating...') + for test_data in loader_iter: + num_pair+= 1 + test_data = train_utils.tocuda(test_data) + res= model(test_data) + loss_res=match_loss.run(test_data,res) + + total_acc_corr+=loss_res['acc_corr'] + total_acc_incorr+=loss_res['acc_incorr'] + total_loss+=loss_res['total_loss'] + + if config.model_name=='SGM': + total_acc_mid+=loss_res['mid_acc_corr'] + total_precision,total_recall=total_precision+loss_res['pre_seed_conf'],total_recall+loss_res['recall_seed_conf'] + + total_acc_corr/=num_pair + total_acc_incorr /= num_pair + total_precision/=num_pair + total_recall/=num_pair + total_acc_mid/=num_pair + + #apply tensor reduction + total_loss,total_acc_corr,total_acc_incorr,total_precision,total_recall,total_acc_mid=train_utils.reduce_tensor(total_loss,'sum'),\ + train_utils.reduce_tensor(total_acc_corr,'mean'),train_utils.reduce_tensor(total_acc_incorr,'mean'),\ + train_utils.reduce_tensor(total_precision,'mean'),train_utils.reduce_tensor(total_recall,'mean'),train_utils.reduce_tensor(total_acc_mid,'mean') + model.train() + return total_loss,total_acc_corr,total_acc_incorr,total_precision,total_recall,total_acc_mid + + + +def dump_train_vis(res,data,step,config): + #batch matching + p=res['p'][:,:-1,:-1] + score,index1=torch.max(p,dim=-1) + _,index2=torch.max(p,dim=-2) + mask_th=score>0.2 + mask_mc=index2.gather(index=index1,dim=1) == torch.arange(len(p[0])).cuda()[None] + mask_p=mask_th&mask_mc#B*N + + corr1,corr2=data['x1'],data['x2'].gather(index=index1[:,:,None].expand(-1,-1,2),dim=1) + corr1_kpt,corr2_kpt=data['kpt1'],data['kpt2'].gather(index=index1[:,:,None].expand(-1,-1,2),dim=1) + epi_dis=batch_episym(corr1,corr2,data['e_gt']) + mask_inlier=epi_dis0,i0,j 0, + depth_top_right > 0 + ), + np.logical_and( + depth_down_left > 0, + depth_down_left > 0 + ) + ) + ids=ids[valid_depth] + depth_top_left,depth_top_right,depth_down_left,depth_down_right=depth_top_left[valid_depth],depth_top_right[valid_depth],\ + depth_down_left[valid_depth],depth_down_right[valid_depth] + + i,j,i_top_left,j_top_left=i[valid_depth],j[valid_depth],i_top_left[valid_depth],j_top_left[valid_depth] + + # Interpolation + dist_i_top_left = i - i_top_left.astype(np.float32) + dist_j_top_left = j - j_top_left.astype(np.float32) + w_top_left = (1 - dist_i_top_left) * (1 - dist_j_top_left) + w_top_right = (1 - dist_i_top_left) * dist_j_top_left + w_bottom_left = dist_i_top_left * (1 - dist_j_top_left) + w_bottom_right = dist_i_top_left * dist_j_top_left + + interpolated_depth = ( + w_top_left * depth_top_left + + w_top_right * depth_top_right+ + w_bottom_left * depth_down_left + + w_bottom_right * depth_down_right + ) + return [interpolated_depth, ids] + + +def reprojection(depth_map,kpt,dR,dt,K1_img2depth,K1,K2): + #warp kpt from img1 to img2 + def swap_axis(data): + return np.stack([data[:, 1], data[:, 0]], axis=-1) + + kp_depth = unnorm_kp(K1_img2depth,kpt) + uv_depth = swap_axis(kp_depth) + z,valid_idx = interpolate_depth(uv_depth, depth_map) + + norm_kp=norm_kpt(K1,kpt) + norm_kp_valid = np.concatenate([norm_kp[valid_idx, :], np.ones((len(valid_idx), 1))], axis=-1) + xyz_valid = norm_kp_valid * z.reshape(-1, 1) + xyz2 = np.matmul(xyz_valid, dR.T) + dt.reshape(1, 3) + xy2 = xyz2[:, :2] / xyz2[:, 2:] + kp2, valid = np.ones(kpt.shape) * 1e5, np.zeros(kpt.shape[0]) + kp2[valid_idx] = unnorm_kp(K2,xy2) + valid[valid_idx] = 1 + return kp2, valid.astype(bool) + +def reprojection_2s(kp1, kp2,depth1, depth2, K1, K2, dR, dt, size1,size2): + #size:H*W + depth_size1,depth_size2 = [depth1.shape[0], depth1.shape[1]], [depth2.shape[0], depth2.shape[1]] + scale_1= [float(depth_size1[0]) / size1[0], float(depth_size1[1]) / size1[1], 1] + scale_2= [float(depth_size2[0]) / size2[0], float(depth_size2[1]) / size2[1], 1] + K1_img2depth, K2_img2depth = np.diag(np.asarray(scale_1)), np.diag(np.asarray(scale_2)) + kp1_2_proj, valid1_2 = reprojection(depth1, kp1, dR, dt, K1_img2depth,K1,K2) + kp2_1_proj, valid2_1 = reprojection(depth2, kp2, dR.T, -np.matmul(dR.T, dt), K2_img2depth,K2,K1) + return [kp1_2_proj,kp2_1_proj],[valid1_2,valid2_1] + +def make_corr(kp1,kp2,desc1,desc2,depth1,depth2,K1,K2,dR,dt,size1,size2,corr_th,incorr_th,check_desc=False): + #make reprojection + [kp1_2,kp2_1],[valid1_2,valid2_1]=reprojection_2s(kp1,kp2,depth1,depth2,K1,K2,dR,dt,size1,size2) + num_pts1, num_pts2 = kp1.shape[0], kp2.shape[0] + #reprojection error + dis_mat1=np.sqrt(abs((kp1 ** 2).sum(1,keepdims=True) + (kp2_1 ** 2).sum(1,keepdims=False)[np.newaxis] - 2 * np.matmul(kp1, kp2_1.T))) + dis_mat2 =np.sqrt(abs((kp2 ** 2).sum(1,keepdims=True) + (kp1_2 ** 2).sum(1,keepdims=False)[np.newaxis] - 2 * np.matmul(kp2,kp1_2.T))) + repro_error = np.maximum(dis_mat1,dis_mat2.T) #n1*n2 + + # find corr index + nn_sort1 = np.argmin(repro_error, axis=1) + nn_sort2 = np.argmin(repro_error, axis=0) + mask_mutual = nn_sort2[nn_sort1] == np.arange(kp1.shape[0]) + mask_inlier=np.take_along_axis(repro_error,indices=nn_sort1[:,np.newaxis],axis=-1).squeeze(1)1,mask_samepos2.sum(-1)>1) + duplicated_index=np.nonzero(duplicated_mask)[0] + + unique_corr_index=corr_index[~duplicated_mask] + clean_duplicated_corr=[] + for index in duplicated_index: + cur_desc1, cur_desc2 = desc1[mask_samepos1[index]], desc2[mask_samepos2[index]] + cur_desc_mat = np.matmul(cur_desc1, cur_desc2.T) + cur_max_index =[np.argmax(cur_desc_mat)//cur_desc_mat.shape[1],np.argmax(cur_desc_mat)%cur_desc_mat.shape[1]] + clean_duplicated_corr.append(np.stack([np.arange(num_pts1)[mask_samepos1[index]][cur_max_index[0]], + np.arange(num_pts2)[mask_samepos2[index]][cur_max_index[1]]])) + + clean_corr_index=unique_corr_index + if len(clean_duplicated_corr)!=0: + clean_duplicated_corr=np.stack(clean_duplicated_corr,axis=0) + clean_corr_index=np.concatenate([clean_corr_index,clean_duplicated_corr],axis=0) + else: + clean_corr_index=corr_index + # find incorr + mask_incorr1 = np.min(dis_mat2.T[valid1_2], axis=-1) > incorr_th + mask_incorr2 = np.min(dis_mat1.T[valid2_1], axis=-1) > incorr_th + incorr_index1, incorr_index2 = np.arange(num_pts1)[valid1_2][mask_incorr1.squeeze()], \ + np.arange(num_pts2)[valid2_1][mask_incorr2.squeeze()] + + return clean_corr_index,incorr_index1,incorr_index2 + diff --git a/third_party/SGMNet/utils/evaluation_utils.py b/third_party/SGMNet/utils/evaluation_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..82c4715a192d3c361c849896b035cd91ee56dc42 --- /dev/null +++ b/third_party/SGMNet/utils/evaluation_utils.py @@ -0,0 +1,58 @@ +import numpy as np +import h5py +import cv2 + +def normalize_intrinsic(x,K): + #print(x,K) + return (x-K[:2,2])/np.diag(K)[:2] + +def normalize_size(x,size,scale=1): + size=size.reshape([1,2]) + norm_fac=size.max() + return (x-size/2+0.5)/(norm_fac*scale) + +def np_skew_symmetric(v): + zero = np.zeros_like(v[:, 0]) + M = np.stack([ + zero, -v[:, 2], v[:, 1], + v[:, 2], zero, -v[:, 0], + -v[:, 1], v[:, 0], zero, + ], axis=1) + return M + +def draw_points(img,points,color=(0,255,0),radius=3): + dp = [(int(points[i, 0]), int(points[i, 1])) for i in range(points.shape[0])] + for i in range(points.shape[0]): + cv2.circle(img, dp[i],radius=radius,color=color) + return img + + +def draw_match(img1, img2, corr1, corr2,inlier=[True],color=None,radius1=1,radius2=1,resize=None): + if resize is not None: + scale1,scale2=[img1.shape[1]/resize[0],img1.shape[0]/resize[1]],[img2.shape[1]/resize[0],img2.shape[0]/resize[1]] + img1,img2=cv2.resize(img1, resize, interpolation=cv2.INTER_AREA),cv2.resize(img2, resize, interpolation=cv2.INTER_AREA) + corr1,corr2=corr1/np.asarray(scale1)[np.newaxis],corr2/np.asarray(scale2)[np.newaxis] + corr1_key = [cv2.KeyPoint(corr1[i, 0], corr1[i, 1], radius1) for i in range(corr1.shape[0])] + corr2_key = [cv2.KeyPoint(corr2[i, 0], corr2[i, 1], radius2) for i in range(corr2.shape[0])] + + assert len(corr1) == len(corr2) + + draw_matches = [cv2.DMatch(i, i, 0) for i in range(len(corr1))] + if color is None: + color = [(0, 255, 0) if cur_inlier else (0,0,255) for cur_inlier in inlier] + if len(color)==1: + display = cv2.drawMatches(img1, corr1_key, img2, corr2_key, draw_matches, None, + matchColor=color[0], + singlePointColor=color[0], + flags=4 + ) + else: + height,width=max(img1.shape[0],img2.shape[0]),img1.shape[1]+img2.shape[1] + display=np.zeros([height,width,3],np.uint8) + display[:img1.shape[0],:img1.shape[1]]=img1 + display[:img2.shape[0],img1.shape[1]:]=img2 + for i in range(len(corr1)): + left_x,left_y,right_x,right_y=int(corr1[i][0]),int(corr1[i][1]),int(corr2[i][0]+img1.shape[1]),int(corr2[i][1]) + cur_color=(int(color[i][0]),int(color[i][1]),int(color[i][2])) + cv2.line(display, (left_x,left_y), (right_x,right_y),cur_color,1,lineType=cv2.LINE_AA) + return display \ No newline at end of file diff --git a/third_party/SGMNet/utils/fm_utils.py b/third_party/SGMNet/utils/fm_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f9cbbeefe5d6b59c1ae1fa26cdaa42146ad22a74 --- /dev/null +++ b/third_party/SGMNet/utils/fm_utils.py @@ -0,0 +1,95 @@ +import numpy as np + + +def line_to_border(line,size): + #line:(a,b,c), ax+by+c=0 + #size:(W,H) + H,W=size[1],size[0] + a,b,c=line[0],line[1],line[2] + epsa=1e-8 if a>=0 else -1e-8 + epsb=1e-8 if b>=0 else -1e-8 + intersection_list=[] + + y_left=-c/(b+epsb) + y_right=(-c-a*(W-1))/(b+epsb) + x_top=-c/(a+epsa) + x_down=(-c-b*(H-1))/(a+epsa) + + if y_left>=0 and y_left<=H-1: + intersection_list.append([0,y_left]) + if y_right>=0 and y_right<=H-1: + intersection_list.append([W-1,y_right]) + if x_top>=0 and x_top<=W-1: + intersection_list.append([x_top,0]) + if x_down>=0 and x_down<=W-1: + intersection_list.append([x_down,H-1]) + if len(intersection_list)!=2: + return None + intersection_list=np.asarray(intersection_list) + return intersection_list + +def find_point_in_line(end_point): + x_span,y_span=end_point[1,0]-end_point[0,0],end_point[1,1]-end_point[0,1] + mv=np.random.uniform() + point=np.asarray([end_point[0,0]+x_span*mv,end_point[0,1]+y_span*mv]) + return point + +def epi_line(point,F): + homo=np.concatenate([point,np.ones([len(point),1])],axis=-1) + epi=np.matmul(homo,F.T) + return epi + +def dis_point_to_line(line,point): + homo=np.concatenate([point,np.ones([len(point),1])],axis=-1) + dis=line*homo + dis=dis.sum(axis=-1)/(np.linalg.norm(line[:,:2],axis=-1)+1e-8) + return abs(dis) + +def SGD_oneiter(F1,F2,size1,size2): + H1,W1=size1[1],size1[0] + factor1 = 1 / np.linalg.norm(size1) + factor2 = 1 / np.linalg.norm(size2) + p0=np.asarray([(W1-1)*np.random.uniform(),(H1-1)*np.random.uniform()]) + epi1=epi_line(p0[np.newaxis],F1)[0] + border_point1=line_to_border(epi1,size2) + if border_point1 is None: + return -1 + + p1=find_point_in_line(border_point1) + epi2=epi_line(p0[np.newaxis],F2) + d1=dis_point_to_line(epi2,p1[np.newaxis])[0]*factor2 + epi3=epi_line(p1[np.newaxis],F2.T) + d2=dis_point_to_line(epi3,p0[np.newaxis])[0]*factor1 + return (d1+d2)/2 + +def compute_SGD(F1,F2,size1,size2): + np.random.seed(1234) + N=1000 + max_iter=N*10 + count,sgd=0,0 + for i in range(max_iter): + d1=SGD_oneiter(F1,F2,size1,size2) + if d1<0: + continue + d2=SGD_oneiter(F2,F1,size1,size2) + if d2<0: + continue + count+=1 + sgd+=(d1+d2)/2 + if count==N: + break + if count==0: + return 1 + else: + return sgd/count + +def compute_inlier_rate(x1,x2,size1,size2,F_gt,th=0.003): + t1,t2=np.linalg.norm(size1)*th,np.linalg.norm(size2)*th + epi1,epi2=epi_line(x1,F_gt),epi_line(x2,F_gt.T) + dis1,dis2=dis_point_to_line(epi1,x2),dis_point_to_line(epi2,x1) + mask_inlier=np.logical_and(dis1`_ + +:Organization: + Laboratory for Fluorescence Dynamics, University of California, Irvine + +:Version: 2015.07.18 + +Requirements +------------ +* `CPython 2.7 or 3.4 `_ +* `Numpy 1.9 `_ +* `Transformations.c 2015.07.18 `_ + (recommended for speedup of some functions) + +Notes +----- +The API is not stable yet and is expected to change between revisions. + +This Python code is not optimized for speed. Refer to the transformations.c +module for a faster implementation of some functions. + +Documentation in HTML format can be generated with epydoc. + +Matrices (M) can be inverted using numpy.linalg.inv(M), be concatenated using +numpy.dot(M0, M1), or transform homogeneous coordinate arrays (v) using +numpy.dot(M, v) for shape (4, \*) column vectors, respectively +numpy.dot(v, M.T) for shape (\*, 4) row vectors ("array of points"). + +This module follows the "column vectors on the right" and "row major storage" +(C contiguous) conventions. The translation components are in the right column +of the transformation matrix, i.e. M[:3, 3]. +The transpose of the transformation matrices may have to be used to interface +with other graphics systems, e.g. with OpenGL's glMultMatrixd(). See also [16]. + +Calculations are carried out with numpy.float64 precision. + +Vector, point, quaternion, and matrix function arguments are expected to be +"array like", i.e. tuple, list, or numpy arrays. + +Return types are numpy arrays unless specified otherwise. + +Angles are in radians unless specified otherwise. + +Quaternions w+ix+jy+kz are represented as [w, x, y, z]. + +A triple of Euler angles can be applied/interpreted in 24 ways, which can +be specified using a 4 character string or encoded 4-tuple: + + *Axes 4-string*: e.g. 'sxyz' or 'ryxy' + + - first character : rotations are applied to 's'tatic or 'r'otating frame + - remaining characters : successive rotation axis 'x', 'y', or 'z' + + *Axes 4-tuple*: e.g. (0, 0, 0, 0) or (1, 1, 1, 1) + + - inner axis: code of axis ('x':0, 'y':1, 'z':2) of rightmost matrix. + - parity : even (0) if inner axis 'x' is followed by 'y', 'y' is followed + by 'z', or 'z' is followed by 'x'. Otherwise odd (1). + - repetition : first and last axis are same (1) or different (0). + - frame : rotations are applied to static (0) or rotating (1) frame. + +Other Python packages and modules for 3D transformations and quaternions: + +* `Transforms3d `_ + includes most code of this module. +* `Blender.mathutils `_ +* `numpy-dtypes `_ + +References +---------- +(1) Matrices and transformations. Ronald Goldman. + In "Graphics Gems I", pp 472-475. Morgan Kaufmann, 1990. +(2) More matrices and transformations: shear and pseudo-perspective. + Ronald Goldman. In "Graphics Gems II", pp 320-323. Morgan Kaufmann, 1991. +(3) Decomposing a matrix into simple transformations. Spencer Thomas. + In "Graphics Gems II", pp 320-323. Morgan Kaufmann, 1991. +(4) Recovering the data from the transformation matrix. Ronald Goldman. + In "Graphics Gems II", pp 324-331. Morgan Kaufmann, 1991. +(5) Euler angle conversion. Ken Shoemake. + In "Graphics Gems IV", pp 222-229. Morgan Kaufmann, 1994. +(6) Arcball rotation control. Ken Shoemake. + In "Graphics Gems IV", pp 175-192. Morgan Kaufmann, 1994. +(7) Representing attitude: Euler angles, unit quaternions, and rotation + vectors. James Diebel. 2006. +(8) A discussion of the solution for the best rotation to relate two sets + of vectors. W Kabsch. Acta Cryst. 1978. A34, 827-828. +(9) Closed-form solution of absolute orientation using unit quaternions. + BKP Horn. J Opt Soc Am A. 1987. 4(4):629-642. +(10) Quaternions. Ken Shoemake. + http://www.sfu.ca/~jwa3/cmpt461/files/quatut.pdf +(11) From quaternion to matrix and back. JMP van Waveren. 2005. + http://www.intel.com/cd/ids/developer/asmo-na/eng/293748.htm +(12) Uniform random rotations. Ken Shoemake. + In "Graphics Gems III", pp 124-132. Morgan Kaufmann, 1992. +(13) Quaternion in molecular modeling. CFF Karney. + J Mol Graph Mod, 25(5):595-604 +(14) New method for extracting the quaternion from a rotation matrix. + Itzhack Y Bar-Itzhack, J Guid Contr Dynam. 2000. 23(6): 1085-1087. +(15) Multiple View Geometry in Computer Vision. Hartley and Zissermann. + Cambridge University Press; 2nd Ed. 2004. Chapter 4, Algorithm 4.7, p 130. +(16) Column Vectors vs. Row Vectors. + http://steve.hollasch.net/cgindex/math/matrix/column-vec.html + +Examples +-------- +>>> alpha, beta, gamma = 0.123, -1.234, 2.345 +>>> origin, xaxis, yaxis, zaxis = [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1] +>>> I = identity_matrix() +>>> Rx = rotation_matrix(alpha, xaxis) +>>> Ry = rotation_matrix(beta, yaxis) +>>> Rz = rotation_matrix(gamma, zaxis) +>>> R = concatenate_matrices(Rx, Ry, Rz) +>>> euler = euler_from_matrix(R, 'rxyz') +>>> numpy.allclose([alpha, beta, gamma], euler) +True +>>> Re = euler_matrix(alpha, beta, gamma, 'rxyz') +>>> is_same_transform(R, Re) +True +>>> al, be, ga = euler_from_matrix(Re, 'rxyz') +>>> is_same_transform(Re, euler_matrix(al, be, ga, 'rxyz')) +True +>>> qx = quaternion_about_axis(alpha, xaxis) +>>> qy = quaternion_about_axis(beta, yaxis) +>>> qz = quaternion_about_axis(gamma, zaxis) +>>> q = quaternion_multiply(qx, qy) +>>> q = quaternion_multiply(q, qz) +>>> Rq = quaternion_matrix(q) +>>> is_same_transform(R, Rq) +True +>>> S = scale_matrix(1.23, origin) +>>> T = translation_matrix([1, 2, 3]) +>>> Z = shear_matrix(beta, xaxis, origin, zaxis) +>>> R = random_rotation_matrix(numpy.random.rand(3)) +>>> M = concatenate_matrices(T, R, Z, S) +>>> scale, shear, angles, trans, persp = decompose_matrix(M) +>>> numpy.allclose(scale, 1.23) +True +>>> numpy.allclose(trans, [1, 2, 3]) +True +>>> numpy.allclose(shear, [0, math.tan(beta), 0]) +True +>>> is_same_transform(R, euler_matrix(axes='sxyz', *angles)) +True +>>> M1 = compose_matrix(scale, shear, angles, trans, persp) +>>> is_same_transform(M, M1) +True +>>> v0, v1 = random_vector(3), random_vector(3) +>>> M = rotation_matrix(angle_between_vectors(v0, v1), vector_product(v0, v1)) +>>> v2 = numpy.dot(v0, M[:3,:3].T) +>>> numpy.allclose(unit_vector(v1), unit_vector(v2)) +True + +""" + +from __future__ import division, print_function + +import math + +import numpy + +__version__ = '2015.07.18' +__docformat__ = 'restructuredtext en' +__all__ = () + + +def identity_matrix(): + """Return 4x4 identity/unit matrix. + + >>> I = identity_matrix() + >>> numpy.allclose(I, numpy.dot(I, I)) + True + >>> numpy.sum(I), numpy.trace(I) + (4.0, 4.0) + >>> numpy.allclose(I, numpy.identity(4)) + True + + """ + return numpy.identity(4) + + +def translation_matrix(direction): + """Return matrix to translate by direction vector. + + >>> v = numpy.random.random(3) - 0.5 + >>> numpy.allclose(v, translation_matrix(v)[:3, 3]) + True + + """ + M = numpy.identity(4) + M[:3, 3] = direction[:3] + return M + + +def translation_from_matrix(matrix): + """Return translation vector from translation matrix. + + >>> v0 = numpy.random.random(3) - 0.5 + >>> v1 = translation_from_matrix(translation_matrix(v0)) + >>> numpy.allclose(v0, v1) + True + + """ + return numpy.array(matrix, copy=False)[:3, 3].copy() + + +def reflection_matrix(point, normal): + """Return matrix to mirror at plane defined by point and normal vector. + + >>> v0 = numpy.random.random(4) - 0.5 + >>> v0[3] = 1. + >>> v1 = numpy.random.random(3) - 0.5 + >>> R = reflection_matrix(v0, v1) + >>> numpy.allclose(2, numpy.trace(R)) + True + >>> numpy.allclose(v0, numpy.dot(R, v0)) + True + >>> v2 = v0.copy() + >>> v2[:3] += v1 + >>> v3 = v0.copy() + >>> v2[:3] -= v1 + >>> numpy.allclose(v2, numpy.dot(R, v3)) + True + + """ + normal = unit_vector(normal[:3]) + M = numpy.identity(4) + M[:3, :3] -= 2.0 * numpy.outer(normal, normal) + M[:3, 3] = (2.0 * numpy.dot(point[:3], normal)) * normal + return M + + +def reflection_from_matrix(matrix): + """Return mirror plane point and normal vector from reflection matrix. + + >>> v0 = numpy.random.random(3) - 0.5 + >>> v1 = numpy.random.random(3) - 0.5 + >>> M0 = reflection_matrix(v0, v1) + >>> point, normal = reflection_from_matrix(M0) + >>> M1 = reflection_matrix(point, normal) + >>> is_same_transform(M0, M1) + True + + """ + M = numpy.array(matrix, dtype=numpy.float64, copy=False) + # normal: unit eigenvector corresponding to eigenvalue -1 + w, V = numpy.linalg.eig(M[:3, :3]) + i = numpy.where(abs(numpy.real(w) + 1.0) < 1e-8)[0] + if not len(i): + raise ValueError("no unit eigenvector corresponding to eigenvalue -1") + normal = numpy.real(V[:, i[0]]).squeeze() + # point: any unit eigenvector corresponding to eigenvalue 1 + w, V = numpy.linalg.eig(M) + i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0] + if not len(i): + raise ValueError("no unit eigenvector corresponding to eigenvalue 1") + point = numpy.real(V[:, i[-1]]).squeeze() + point /= point[3] + return point, normal + + +def rotation_matrix(angle, direction, point=None): + """Return matrix to rotate about axis defined by point and direction. + + >>> R = rotation_matrix(math.pi/2, [0, 0, 1], [1, 0, 0]) + >>> numpy.allclose(numpy.dot(R, [0, 0, 0, 1]), [1, -1, 0, 1]) + True + >>> angle = (random.random() - 0.5) * (2*math.pi) + >>> direc = numpy.random.random(3) - 0.5 + >>> point = numpy.random.random(3) - 0.5 + >>> R0 = rotation_matrix(angle, direc, point) + >>> R1 = rotation_matrix(angle-2*math.pi, direc, point) + >>> is_same_transform(R0, R1) + True + >>> R0 = rotation_matrix(angle, direc, point) + >>> R1 = rotation_matrix(-angle, -direc, point) + >>> is_same_transform(R0, R1) + True + >>> I = numpy.identity(4, numpy.float64) + >>> numpy.allclose(I, rotation_matrix(math.pi*2, direc)) + True + >>> numpy.allclose(2, numpy.trace(rotation_matrix(math.pi/2, + ... direc, point))) + True + + """ + sina = math.sin(angle) + cosa = math.cos(angle) + direction = unit_vector(direction[:3]) + # rotation matrix around unit vector + R = numpy.diag([cosa, cosa, cosa]) + R += numpy.outer(direction, direction) * (1.0 - cosa) + direction *= sina + R += numpy.array([[ 0.0, -direction[2], direction[1]], + [ direction[2], 0.0, -direction[0]], + [-direction[1], direction[0], 0.0]]) + M = numpy.identity(4) + M[:3, :3] = R + if point is not None: + # rotation not around origin + point = numpy.array(point[:3], dtype=numpy.float64, copy=False) + M[:3, 3] = point - numpy.dot(R, point) + return M + + +def rotation_from_matrix(matrix): + """Return rotation angle and axis from rotation matrix. + + >>> angle = (random.random() - 0.5) * (2*math.pi) + >>> direc = numpy.random.random(3) - 0.5 + >>> point = numpy.random.random(3) - 0.5 + >>> R0 = rotation_matrix(angle, direc, point) + >>> angle, direc, point = rotation_from_matrix(R0) + >>> R1 = rotation_matrix(angle, direc, point) + >>> is_same_transform(R0, R1) + True + + """ + R = numpy.array(matrix, dtype=numpy.float64, copy=False) + R33 = R[:3, :3] + # direction: unit eigenvector of R33 corresponding to eigenvalue of 1 + w, W = numpy.linalg.eig(R33.T) + i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0] + if not len(i): + raise ValueError("no unit eigenvector corresponding to eigenvalue 1") + direction = numpy.real(W[:, i[-1]]).squeeze() + # point: unit eigenvector of R33 corresponding to eigenvalue of 1 + w, Q = numpy.linalg.eig(R) + i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0] + if not len(i): + raise ValueError("no unit eigenvector corresponding to eigenvalue 1") + point = numpy.real(Q[:, i[-1]]).squeeze() + point /= point[3] + # rotation angle depending on direction + cosa = (numpy.trace(R33) - 1.0) / 2.0 + if abs(direction[2]) > 1e-8: + sina = (R[1, 0] + (cosa-1.0)*direction[0]*direction[1]) / direction[2] + elif abs(direction[1]) > 1e-8: + sina = (R[0, 2] + (cosa-1.0)*direction[0]*direction[2]) / direction[1] + else: + sina = (R[2, 1] + (cosa-1.0)*direction[1]*direction[2]) / direction[0] + angle = math.atan2(sina, cosa) + return angle, direction, point + + +def scale_matrix(factor, origin=None, direction=None): + """Return matrix to scale by factor around origin in direction. + + Use factor -1 for point symmetry. + + >>> v = (numpy.random.rand(4, 5) - 0.5) * 20 + >>> v[3] = 1 + >>> S = scale_matrix(-1.234) + >>> numpy.allclose(numpy.dot(S, v)[:3], -1.234*v[:3]) + True + >>> factor = random.random() * 10 - 5 + >>> origin = numpy.random.random(3) - 0.5 + >>> direct = numpy.random.random(3) - 0.5 + >>> S = scale_matrix(factor, origin) + >>> S = scale_matrix(factor, origin, direct) + + """ + if direction is None: + # uniform scaling + M = numpy.diag([factor, factor, factor, 1.0]) + if origin is not None: + M[:3, 3] = origin[:3] + M[:3, 3] *= 1.0 - factor + else: + # nonuniform scaling + direction = unit_vector(direction[:3]) + factor = 1.0 - factor + M = numpy.identity(4) + M[:3, :3] -= factor * numpy.outer(direction, direction) + if origin is not None: + M[:3, 3] = (factor * numpy.dot(origin[:3], direction)) * direction + return M + + +def scale_from_matrix(matrix): + """Return scaling factor, origin and direction from scaling matrix. + + >>> factor = random.random() * 10 - 5 + >>> origin = numpy.random.random(3) - 0.5 + >>> direct = numpy.random.random(3) - 0.5 + >>> S0 = scale_matrix(factor, origin) + >>> factor, origin, direction = scale_from_matrix(S0) + >>> S1 = scale_matrix(factor, origin, direction) + >>> is_same_transform(S0, S1) + True + >>> S0 = scale_matrix(factor, origin, direct) + >>> factor, origin, direction = scale_from_matrix(S0) + >>> S1 = scale_matrix(factor, origin, direction) + >>> is_same_transform(S0, S1) + True + + """ + M = numpy.array(matrix, dtype=numpy.float64, copy=False) + M33 = M[:3, :3] + factor = numpy.trace(M33) - 2.0 + try: + # direction: unit eigenvector corresponding to eigenvalue factor + w, V = numpy.linalg.eig(M33) + i = numpy.where(abs(numpy.real(w) - factor) < 1e-8)[0][0] + direction = numpy.real(V[:, i]).squeeze() + direction /= vector_norm(direction) + except IndexError: + # uniform scaling + factor = (factor + 2.0) / 3.0 + direction = None + # origin: any eigenvector corresponding to eigenvalue 1 + w, V = numpy.linalg.eig(M) + i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0] + if not len(i): + raise ValueError("no eigenvector corresponding to eigenvalue 1") + origin = numpy.real(V[:, i[-1]]).squeeze() + origin /= origin[3] + return factor, origin, direction + + +def projection_matrix(point, normal, direction=None, + perspective=None, pseudo=False): + """Return matrix to project onto plane defined by point and normal. + + Using either perspective point, projection direction, or none of both. + + If pseudo is True, perspective projections will preserve relative depth + such that Perspective = dot(Orthogonal, PseudoPerspective). + + >>> P = projection_matrix([0, 0, 0], [1, 0, 0]) + >>> numpy.allclose(P[1:, 1:], numpy.identity(4)[1:, 1:]) + True + >>> point = numpy.random.random(3) - 0.5 + >>> normal = numpy.random.random(3) - 0.5 + >>> direct = numpy.random.random(3) - 0.5 + >>> persp = numpy.random.random(3) - 0.5 + >>> P0 = projection_matrix(point, normal) + >>> P1 = projection_matrix(point, normal, direction=direct) + >>> P2 = projection_matrix(point, normal, perspective=persp) + >>> P3 = projection_matrix(point, normal, perspective=persp, pseudo=True) + >>> is_same_transform(P2, numpy.dot(P0, P3)) + True + >>> P = projection_matrix([3, 0, 0], [1, 1, 0], [1, 0, 0]) + >>> v0 = (numpy.random.rand(4, 5) - 0.5) * 20 + >>> v0[3] = 1 + >>> v1 = numpy.dot(P, v0) + >>> numpy.allclose(v1[1], v0[1]) + True + >>> numpy.allclose(v1[0], 3-v1[1]) + True + + """ + M = numpy.identity(4) + point = numpy.array(point[:3], dtype=numpy.float64, copy=False) + normal = unit_vector(normal[:3]) + if perspective is not None: + # perspective projection + perspective = numpy.array(perspective[:3], dtype=numpy.float64, + copy=False) + M[0, 0] = M[1, 1] = M[2, 2] = numpy.dot(perspective-point, normal) + M[:3, :3] -= numpy.outer(perspective, normal) + if pseudo: + # preserve relative depth + M[:3, :3] -= numpy.outer(normal, normal) + M[:3, 3] = numpy.dot(point, normal) * (perspective+normal) + else: + M[:3, 3] = numpy.dot(point, normal) * perspective + M[3, :3] = -normal + M[3, 3] = numpy.dot(perspective, normal) + elif direction is not None: + # parallel projection + direction = numpy.array(direction[:3], dtype=numpy.float64, copy=False) + scale = numpy.dot(direction, normal) + M[:3, :3] -= numpy.outer(direction, normal) / scale + M[:3, 3] = direction * (numpy.dot(point, normal) / scale) + else: + # orthogonal projection + M[:3, :3] -= numpy.outer(normal, normal) + M[:3, 3] = numpy.dot(point, normal) * normal + return M + + +def projection_from_matrix(matrix, pseudo=False): + """Return projection plane and perspective point from projection matrix. + + Return values are same as arguments for projection_matrix function: + point, normal, direction, perspective, and pseudo. + + >>> point = numpy.random.random(3) - 0.5 + >>> normal = numpy.random.random(3) - 0.5 + >>> direct = numpy.random.random(3) - 0.5 + >>> persp = numpy.random.random(3) - 0.5 + >>> P0 = projection_matrix(point, normal) + >>> result = projection_from_matrix(P0) + >>> P1 = projection_matrix(*result) + >>> is_same_transform(P0, P1) + True + >>> P0 = projection_matrix(point, normal, direct) + >>> result = projection_from_matrix(P0) + >>> P1 = projection_matrix(*result) + >>> is_same_transform(P0, P1) + True + >>> P0 = projection_matrix(point, normal, perspective=persp, pseudo=False) + >>> result = projection_from_matrix(P0, pseudo=False) + >>> P1 = projection_matrix(*result) + >>> is_same_transform(P0, P1) + True + >>> P0 = projection_matrix(point, normal, perspective=persp, pseudo=True) + >>> result = projection_from_matrix(P0, pseudo=True) + >>> P1 = projection_matrix(*result) + >>> is_same_transform(P0, P1) + True + + """ + M = numpy.array(matrix, dtype=numpy.float64, copy=False) + M33 = M[:3, :3] + w, V = numpy.linalg.eig(M) + i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0] + if not pseudo and len(i): + # point: any eigenvector corresponding to eigenvalue 1 + point = numpy.real(V[:, i[-1]]).squeeze() + point /= point[3] + # direction: unit eigenvector corresponding to eigenvalue 0 + w, V = numpy.linalg.eig(M33) + i = numpy.where(abs(numpy.real(w)) < 1e-8)[0] + if not len(i): + raise ValueError("no eigenvector corresponding to eigenvalue 0") + direction = numpy.real(V[:, i[0]]).squeeze() + direction /= vector_norm(direction) + # normal: unit eigenvector of M33.T corresponding to eigenvalue 0 + w, V = numpy.linalg.eig(M33.T) + i = numpy.where(abs(numpy.real(w)) < 1e-8)[0] + if len(i): + # parallel projection + normal = numpy.real(V[:, i[0]]).squeeze() + normal /= vector_norm(normal) + return point, normal, direction, None, False + else: + # orthogonal projection, where normal equals direction vector + return point, direction, None, None, False + else: + # perspective projection + i = numpy.where(abs(numpy.real(w)) > 1e-8)[0] + if not len(i): + raise ValueError( + "no eigenvector not corresponding to eigenvalue 0") + point = numpy.real(V[:, i[-1]]).squeeze() + point /= point[3] + normal = - M[3, :3] + perspective = M[:3, 3] / numpy.dot(point[:3], normal) + if pseudo: + perspective -= normal + return point, normal, None, perspective, pseudo + + +def clip_matrix(left, right, bottom, top, near, far, perspective=False): + """Return matrix to obtain normalized device coordinates from frustum. + + The frustum bounds are axis-aligned along x (left, right), + y (bottom, top) and z (near, far). + + Normalized device coordinates are in range [-1, 1] if coordinates are + inside the frustum. + + If perspective is True the frustum is a truncated pyramid with the + perspective point at origin and direction along z axis, otherwise an + orthographic canonical view volume (a box). + + Homogeneous coordinates transformed by the perspective clip matrix + need to be dehomogenized (divided by w coordinate). + + >>> frustum = numpy.random.rand(6) + >>> frustum[1] += frustum[0] + >>> frustum[3] += frustum[2] + >>> frustum[5] += frustum[4] + >>> M = clip_matrix(perspective=False, *frustum) + >>> numpy.dot(M, [frustum[0], frustum[2], frustum[4], 1]) + array([-1., -1., -1., 1.]) + >>> numpy.dot(M, [frustum[1], frustum[3], frustum[5], 1]) + array([ 1., 1., 1., 1.]) + >>> M = clip_matrix(perspective=True, *frustum) + >>> v = numpy.dot(M, [frustum[0], frustum[2], frustum[4], 1]) + >>> v / v[3] + array([-1., -1., -1., 1.]) + >>> v = numpy.dot(M, [frustum[1], frustum[3], frustum[4], 1]) + >>> v / v[3] + array([ 1., 1., -1., 1.]) + + """ + if left >= right or bottom >= top or near >= far: + raise ValueError("invalid frustum") + if perspective: + if near <= _EPS: + raise ValueError("invalid frustum: near <= 0") + t = 2.0 * near + M = [[t/(left-right), 0.0, (right+left)/(right-left), 0.0], + [0.0, t/(bottom-top), (top+bottom)/(top-bottom), 0.0], + [0.0, 0.0, (far+near)/(near-far), t*far/(far-near)], + [0.0, 0.0, -1.0, 0.0]] + else: + M = [[2.0/(right-left), 0.0, 0.0, (right+left)/(left-right)], + [0.0, 2.0/(top-bottom), 0.0, (top+bottom)/(bottom-top)], + [0.0, 0.0, 2.0/(far-near), (far+near)/(near-far)], + [0.0, 0.0, 0.0, 1.0]] + return numpy.array(M) + + +def shear_matrix(angle, direction, point, normal): + """Return matrix to shear by angle along direction vector on shear plane. + + The shear plane is defined by a point and normal vector. The direction + vector must be orthogonal to the plane's normal vector. + + A point P is transformed by the shear matrix into P" such that + the vector P-P" is parallel to the direction vector and its extent is + given by the angle of P-P'-P", where P' is the orthogonal projection + of P onto the shear plane. + + >>> angle = (random.random() - 0.5) * 4*math.pi + >>> direct = numpy.random.random(3) - 0.5 + >>> point = numpy.random.random(3) - 0.5 + >>> normal = numpy.cross(direct, numpy.random.random(3)) + >>> S = shear_matrix(angle, direct, point, normal) + >>> numpy.allclose(1, numpy.linalg.det(S)) + True + + """ + normal = unit_vector(normal[:3]) + direction = unit_vector(direction[:3]) + if abs(numpy.dot(normal, direction)) > 1e-6: + raise ValueError("direction and normal vectors are not orthogonal") + angle = math.tan(angle) + M = numpy.identity(4) + M[:3, :3] += angle * numpy.outer(direction, normal) + M[:3, 3] = -angle * numpy.dot(point[:3], normal) * direction + return M + + +def shear_from_matrix(matrix): + """Return shear angle, direction and plane from shear matrix. + + >>> angle = (random.random() - 0.5) * 4*math.pi + >>> direct = numpy.random.random(3) - 0.5 + >>> point = numpy.random.random(3) - 0.5 + >>> normal = numpy.cross(direct, numpy.random.random(3)) + >>> S0 = shear_matrix(angle, direct, point, normal) + >>> angle, direct, point, normal = shear_from_matrix(S0) + >>> S1 = shear_matrix(angle, direct, point, normal) + >>> is_same_transform(S0, S1) + True + + """ + M = numpy.array(matrix, dtype=numpy.float64, copy=False) + M33 = M[:3, :3] + # normal: cross independent eigenvectors corresponding to the eigenvalue 1 + w, V = numpy.linalg.eig(M33) + i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-4)[0] + if len(i) < 2: + raise ValueError("no two linear independent eigenvectors found %s" % w) + V = numpy.real(V[:, i]).squeeze().T + lenorm = -1.0 + for i0, i1 in ((0, 1), (0, 2), (1, 2)): + n = numpy.cross(V[i0], V[i1]) + w = vector_norm(n) + if w > lenorm: + lenorm = w + normal = n + normal /= lenorm + # direction and angle + direction = numpy.dot(M33 - numpy.identity(3), normal) + angle = vector_norm(direction) + direction /= angle + angle = math.atan(angle) + # point: eigenvector corresponding to eigenvalue 1 + w, V = numpy.linalg.eig(M) + i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0] + if not len(i): + raise ValueError("no eigenvector corresponding to eigenvalue 1") + point = numpy.real(V[:, i[-1]]).squeeze() + point /= point[3] + return angle, direction, point, normal + + +def decompose_matrix(matrix): + """Return sequence of transformations from transformation matrix. + + matrix : array_like + Non-degenerative homogeneous transformation matrix + + Return tuple of: + scale : vector of 3 scaling factors + shear : list of shear factors for x-y, x-z, y-z axes + angles : list of Euler angles about static x, y, z axes + translate : translation vector along x, y, z axes + perspective : perspective partition of matrix + + Raise ValueError if matrix is of wrong type or degenerative. + + >>> T0 = translation_matrix([1, 2, 3]) + >>> scale, shear, angles, trans, persp = decompose_matrix(T0) + >>> T1 = translation_matrix(trans) + >>> numpy.allclose(T0, T1) + True + >>> S = scale_matrix(0.123) + >>> scale, shear, angles, trans, persp = decompose_matrix(S) + >>> scale[0] + 0.123 + >>> R0 = euler_matrix(1, 2, 3) + >>> scale, shear, angles, trans, persp = decompose_matrix(R0) + >>> R1 = euler_matrix(*angles) + >>> numpy.allclose(R0, R1) + True + + """ + M = numpy.array(matrix, dtype=numpy.float64, copy=True).T + if abs(M[3, 3]) < _EPS: + raise ValueError("M[3, 3] is zero") + M /= M[3, 3] + P = M.copy() + P[:, 3] = 0.0, 0.0, 0.0, 1.0 + if not numpy.linalg.det(P): + raise ValueError("matrix is singular") + + scale = numpy.zeros((3, )) + shear = [0.0, 0.0, 0.0] + angles = [0.0, 0.0, 0.0] + + if any(abs(M[:3, 3]) > _EPS): + perspective = numpy.dot(M[:, 3], numpy.linalg.inv(P.T)) + M[:, 3] = 0.0, 0.0, 0.0, 1.0 + else: + perspective = numpy.array([0.0, 0.0, 0.0, 1.0]) + + translate = M[3, :3].copy() + M[3, :3] = 0.0 + + row = M[:3, :3].copy() + scale[0] = vector_norm(row[0]) + row[0] /= scale[0] + shear[0] = numpy.dot(row[0], row[1]) + row[1] -= row[0] * shear[0] + scale[1] = vector_norm(row[1]) + row[1] /= scale[1] + shear[0] /= scale[1] + shear[1] = numpy.dot(row[0], row[2]) + row[2] -= row[0] * shear[1] + shear[2] = numpy.dot(row[1], row[2]) + row[2] -= row[1] * shear[2] + scale[2] = vector_norm(row[2]) + row[2] /= scale[2] + shear[1:] /= scale[2] + + if numpy.dot(row[0], numpy.cross(row[1], row[2])) < 0: + numpy.negative(scale, scale) + numpy.negative(row, row) + + angles[1] = math.asin(-row[0, 2]) + if math.cos(angles[1]): + angles[0] = math.atan2(row[1, 2], row[2, 2]) + angles[2] = math.atan2(row[0, 1], row[0, 0]) + else: + #angles[0] = math.atan2(row[1, 0], row[1, 1]) + angles[0] = math.atan2(-row[2, 1], row[1, 1]) + angles[2] = 0.0 + + return scale, shear, angles, translate, perspective + + +def compose_matrix(scale=None, shear=None, angles=None, translate=None, + perspective=None): + """Return transformation matrix from sequence of transformations. + + This is the inverse of the decompose_matrix function. + + Sequence of transformations: + scale : vector of 3 scaling factors + shear : list of shear factors for x-y, x-z, y-z axes + angles : list of Euler angles about static x, y, z axes + translate : translation vector along x, y, z axes + perspective : perspective partition of matrix + + >>> scale = numpy.random.random(3) - 0.5 + >>> shear = numpy.random.random(3) - 0.5 + >>> angles = (numpy.random.random(3) - 0.5) * (2*math.pi) + >>> trans = numpy.random.random(3) - 0.5 + >>> persp = numpy.random.random(4) - 0.5 + >>> M0 = compose_matrix(scale, shear, angles, trans, persp) + >>> result = decompose_matrix(M0) + >>> M1 = compose_matrix(*result) + >>> is_same_transform(M0, M1) + True + + """ + M = numpy.identity(4) + if perspective is not None: + P = numpy.identity(4) + P[3, :] = perspective[:4] + M = numpy.dot(M, P) + if translate is not None: + T = numpy.identity(4) + T[:3, 3] = translate[:3] + M = numpy.dot(M, T) + if angles is not None: + R = euler_matrix(angles[0], angles[1], angles[2], 'sxyz') + M = numpy.dot(M, R) + if shear is not None: + Z = numpy.identity(4) + Z[1, 2] = shear[2] + Z[0, 2] = shear[1] + Z[0, 1] = shear[0] + M = numpy.dot(M, Z) + if scale is not None: + S = numpy.identity(4) + S[0, 0] = scale[0] + S[1, 1] = scale[1] + S[2, 2] = scale[2] + M = numpy.dot(M, S) + M /= M[3, 3] + return M + + +def orthogonalization_matrix(lengths, angles): + """Return orthogonalization matrix for crystallographic cell coordinates. + + Angles are expected in degrees. + + The de-orthogonalization matrix is the inverse. + + >>> O = orthogonalization_matrix([10, 10, 10], [90, 90, 90]) + >>> numpy.allclose(O[:3, :3], numpy.identity(3, float) * 10) + True + >>> O = orthogonalization_matrix([9.8, 12.0, 15.5], [87.2, 80.7, 69.7]) + >>> numpy.allclose(numpy.sum(O), 43.063229) + True + + """ + a, b, c = lengths + angles = numpy.radians(angles) + sina, sinb, _ = numpy.sin(angles) + cosa, cosb, cosg = numpy.cos(angles) + co = (cosa * cosb - cosg) / (sina * sinb) + return numpy.array([ + [ a*sinb*math.sqrt(1.0-co*co), 0.0, 0.0, 0.0], + [-a*sinb*co, b*sina, 0.0, 0.0], + [ a*cosb, b*cosa, c, 0.0], + [ 0.0, 0.0, 0.0, 1.0]]) + + +def affine_matrix_from_points(v0, v1, shear=True, scale=True, usesvd=True): + """Return affine transform matrix to register two point sets. + + v0 and v1 are shape (ndims, \*) arrays of at least ndims non-homogeneous + coordinates, where ndims is the dimensionality of the coordinate space. + + If shear is False, a similarity transformation matrix is returned. + If also scale is False, a rigid/Euclidean transformation matrix + is returned. + + By default the algorithm by Hartley and Zissermann [15] is used. + If usesvd is True, similarity and Euclidean transformation matrices + are calculated by minimizing the weighted sum of squared deviations + (RMSD) according to the algorithm by Kabsch [8]. + Otherwise, and if ndims is 3, the quaternion based algorithm by Horn [9] + is used, which is slower when using this Python implementation. + + The returned matrix performs rotation, translation and uniform scaling + (if specified). + + >>> v0 = [[0, 1031, 1031, 0], [0, 0, 1600, 1600]] + >>> v1 = [[675, 826, 826, 677], [55, 52, 281, 277]] + >>> affine_matrix_from_points(v0, v1) + array([[ 0.14549, 0.00062, 675.50008], + [ 0.00048, 0.14094, 53.24971], + [ 0. , 0. , 1. ]]) + >>> T = translation_matrix(numpy.random.random(3)-0.5) + >>> R = random_rotation_matrix(numpy.random.random(3)) + >>> S = scale_matrix(random.random()) + >>> M = concatenate_matrices(T, R, S) + >>> v0 = (numpy.random.rand(4, 100) - 0.5) * 20 + >>> v0[3] = 1 + >>> v1 = numpy.dot(M, v0) + >>> v0[:3] += numpy.random.normal(0, 1e-8, 300).reshape(3, -1) + >>> M = affine_matrix_from_points(v0[:3], v1[:3]) + >>> numpy.allclose(v1, numpy.dot(M, v0)) + True + + More examples in superimposition_matrix() + + """ + v0 = numpy.array(v0, dtype=numpy.float64, copy=True) + v1 = numpy.array(v1, dtype=numpy.float64, copy=True) + + ndims = v0.shape[0] + if ndims < 2 or v0.shape[1] < ndims or v0.shape != v1.shape: + raise ValueError("input arrays are of wrong shape or type") + + # move centroids to origin + t0 = -numpy.mean(v0, axis=1) + M0 = numpy.identity(ndims+1) + M0[:ndims, ndims] = t0 + v0 += t0.reshape(ndims, 1) + t1 = -numpy.mean(v1, axis=1) + M1 = numpy.identity(ndims+1) + M1[:ndims, ndims] = t1 + v1 += t1.reshape(ndims, 1) + + if shear: + # Affine transformation + A = numpy.concatenate((v0, v1), axis=0) + u, s, vh = numpy.linalg.svd(A.T) + vh = vh[:ndims].T + B = vh[:ndims] + C = vh[ndims:2*ndims] + t = numpy.dot(C, numpy.linalg.pinv(B)) + t = numpy.concatenate((t, numpy.zeros((ndims, 1))), axis=1) + M = numpy.vstack((t, ((0.0,)*ndims) + (1.0,))) + elif usesvd or ndims != 3: + # Rigid transformation via SVD of covariance matrix + u, s, vh = numpy.linalg.svd(numpy.dot(v1, v0.T)) + # rotation matrix from SVD orthonormal bases + R = numpy.dot(u, vh) + if numpy.linalg.det(R) < 0.0: + # R does not constitute right handed system + R -= numpy.outer(u[:, ndims-1], vh[ndims-1, :]*2.0) + s[-1] *= -1.0 + # homogeneous transformation matrix + M = numpy.identity(ndims+1) + M[:ndims, :ndims] = R + else: + # Rigid transformation matrix via quaternion + # compute symmetric matrix N + xx, yy, zz = numpy.sum(v0 * v1, axis=1) + xy, yz, zx = numpy.sum(v0 * numpy.roll(v1, -1, axis=0), axis=1) + xz, yx, zy = numpy.sum(v0 * numpy.roll(v1, -2, axis=0), axis=1) + N = [[xx+yy+zz, 0.0, 0.0, 0.0], + [yz-zy, xx-yy-zz, 0.0, 0.0], + [zx-xz, xy+yx, yy-xx-zz, 0.0], + [xy-yx, zx+xz, yz+zy, zz-xx-yy]] + # quaternion: eigenvector corresponding to most positive eigenvalue + w, V = numpy.linalg.eigh(N) + q = V[:, numpy.argmax(w)] + q /= vector_norm(q) # unit quaternion + # homogeneous transformation matrix + M = quaternion_matrix(q) + + if scale and not shear: + # Affine transformation; scale is ratio of RMS deviations from centroid + v0 *= v0 + v1 *= v1 + M[:ndims, :ndims] *= math.sqrt(numpy.sum(v1) / numpy.sum(v0)) + + # move centroids back + M = numpy.dot(numpy.linalg.inv(M1), numpy.dot(M, M0)) + M /= M[ndims, ndims] + return M + + +def superimposition_matrix(v0, v1, scale=False, usesvd=True): + """Return matrix to transform given 3D point set into second point set. + + v0 and v1 are shape (3, \*) or (4, \*) arrays of at least 3 points. + + The parameters scale and usesvd are explained in the more general + affine_matrix_from_points function. + + The returned matrix is a similarity or Euclidean transformation matrix. + This function has a fast C implementation in transformations.c. + + >>> v0 = numpy.random.rand(3, 10) + >>> M = superimposition_matrix(v0, v0) + >>> numpy.allclose(M, numpy.identity(4)) + True + >>> R = random_rotation_matrix(numpy.random.random(3)) + >>> v0 = [[1,0,0], [0,1,0], [0,0,1], [1,1,1]] + >>> v1 = numpy.dot(R, v0) + >>> M = superimposition_matrix(v0, v1) + >>> numpy.allclose(v1, numpy.dot(M, v0)) + True + >>> v0 = (numpy.random.rand(4, 100) - 0.5) * 20 + >>> v0[3] = 1 + >>> v1 = numpy.dot(R, v0) + >>> M = superimposition_matrix(v0, v1) + >>> numpy.allclose(v1, numpy.dot(M, v0)) + True + >>> S = scale_matrix(random.random()) + >>> T = translation_matrix(numpy.random.random(3)-0.5) + >>> M = concatenate_matrices(T, R, S) + >>> v1 = numpy.dot(M, v0) + >>> v0[:3] += numpy.random.normal(0, 1e-9, 300).reshape(3, -1) + >>> M = superimposition_matrix(v0, v1, scale=True) + >>> numpy.allclose(v1, numpy.dot(M, v0)) + True + >>> M = superimposition_matrix(v0, v1, scale=True, usesvd=False) + >>> numpy.allclose(v1, numpy.dot(M, v0)) + True + >>> v = numpy.empty((4, 100, 3)) + >>> v[:, :, 0] = v0 + >>> M = superimposition_matrix(v0, v1, scale=True, usesvd=False) + >>> numpy.allclose(v1, numpy.dot(M, v[:, :, 0])) + True + + """ + v0 = numpy.array(v0, dtype=numpy.float64, copy=False)[:3] + v1 = numpy.array(v1, dtype=numpy.float64, copy=False)[:3] + return affine_matrix_from_points(v0, v1, shear=False, + scale=scale, usesvd=usesvd) + + +def euler_matrix(ai, aj, ak, axes='sxyz'): + """Return homogeneous rotation matrix from Euler angles and axis sequence. + + ai, aj, ak : Euler's roll, pitch and yaw angles + axes : One of 24 axis sequences as string or encoded tuple + + >>> R = euler_matrix(1, 2, 3, 'syxz') + >>> numpy.allclose(numpy.sum(R[0]), -1.34786452) + True + >>> R = euler_matrix(1, 2, 3, (0, 1, 0, 1)) + >>> numpy.allclose(numpy.sum(R[0]), -0.383436184) + True + >>> ai, aj, ak = (4*math.pi) * (numpy.random.random(3) - 0.5) + >>> for axes in _AXES2TUPLE.keys(): + ... R = euler_matrix(ai, aj, ak, axes) + >>> for axes in _TUPLE2AXES.keys(): + ... R = euler_matrix(ai, aj, ak, axes) + + """ + try: + firstaxis, parity, repetition, frame = _AXES2TUPLE[axes] + except (AttributeError, KeyError): + _TUPLE2AXES[axes] # validation + firstaxis, parity, repetition, frame = axes + + i = firstaxis + j = _NEXT_AXIS[i+parity] + k = _NEXT_AXIS[i-parity+1] + + if frame: + ai, ak = ak, ai + if parity: + ai, aj, ak = -ai, -aj, -ak + + si, sj, sk = math.sin(ai), math.sin(aj), math.sin(ak) + ci, cj, ck = math.cos(ai), math.cos(aj), math.cos(ak) + cc, cs = ci*ck, ci*sk + sc, ss = si*ck, si*sk + + M = numpy.identity(4) + if repetition: + M[i, i] = cj + M[i, j] = sj*si + M[i, k] = sj*ci + M[j, i] = sj*sk + M[j, j] = -cj*ss+cc + M[j, k] = -cj*cs-sc + M[k, i] = -sj*ck + M[k, j] = cj*sc+cs + M[k, k] = cj*cc-ss + else: + M[i, i] = cj*ck + M[i, j] = sj*sc-cs + M[i, k] = sj*cc+ss + M[j, i] = cj*sk + M[j, j] = sj*ss+cc + M[j, k] = sj*cs-sc + M[k, i] = -sj + M[k, j] = cj*si + M[k, k] = cj*ci + return M + + +def euler_from_matrix(matrix, axes='sxyz'): + """Return Euler angles from rotation matrix for specified axis sequence. + + axes : One of 24 axis sequences as string or encoded tuple + + Note that many Euler angle triplets can describe one matrix. + + >>> R0 = euler_matrix(1, 2, 3, 'syxz') + >>> al, be, ga = euler_from_matrix(R0, 'syxz') + >>> R1 = euler_matrix(al, be, ga, 'syxz') + >>> numpy.allclose(R0, R1) + True + >>> angles = (4*math.pi) * (numpy.random.random(3) - 0.5) + >>> for axes in _AXES2TUPLE.keys(): + ... R0 = euler_matrix(axes=axes, *angles) + ... R1 = euler_matrix(axes=axes, *euler_from_matrix(R0, axes)) + ... if not numpy.allclose(R0, R1): print(axes, "failed") + + """ + try: + firstaxis, parity, repetition, frame = _AXES2TUPLE[axes.lower()] + except (AttributeError, KeyError): + _TUPLE2AXES[axes] # validation + firstaxis, parity, repetition, frame = axes + + i = firstaxis + j = _NEXT_AXIS[i+parity] + k = _NEXT_AXIS[i-parity+1] + + M = numpy.array(matrix, dtype=numpy.float64, copy=False)[:3, :3] + if repetition: + sy = math.sqrt(M[i, j]*M[i, j] + M[i, k]*M[i, k]) + if sy > _EPS: + ax = math.atan2( M[i, j], M[i, k]) + ay = math.atan2( sy, M[i, i]) + az = math.atan2( M[j, i], -M[k, i]) + else: + ax = math.atan2(-M[j, k], M[j, j]) + ay = math.atan2( sy, M[i, i]) + az = 0.0 + else: + cy = math.sqrt(M[i, i]*M[i, i] + M[j, i]*M[j, i]) + if cy > _EPS: + ax = math.atan2( M[k, j], M[k, k]) + ay = math.atan2(-M[k, i], cy) + az = math.atan2( M[j, i], M[i, i]) + else: + ax = math.atan2(-M[j, k], M[j, j]) + ay = math.atan2(-M[k, i], cy) + az = 0.0 + + if parity: + ax, ay, az = -ax, -ay, -az + if frame: + ax, az = az, ax + return ax, ay, az + + +def euler_from_quaternion(quaternion, axes='sxyz'): + """Return Euler angles from quaternion for specified axis sequence. + + >>> angles = euler_from_quaternion([0.99810947, 0.06146124, 0, 0]) + >>> numpy.allclose(angles, [0.123, 0, 0]) + True + + """ + return euler_from_matrix(quaternion_matrix(quaternion), axes) + + +def quaternion_from_euler(ai, aj, ak, axes='sxyz'): + """Return quaternion from Euler angles and axis sequence. + + ai, aj, ak : Euler's roll, pitch and yaw angles + axes : One of 24 axis sequences as string or encoded tuple + + >>> q = quaternion_from_euler(1, 2, 3, 'ryxz') + >>> numpy.allclose(q, [0.435953, 0.310622, -0.718287, 0.444435]) + True + + """ + try: + firstaxis, parity, repetition, frame = _AXES2TUPLE[axes.lower()] + except (AttributeError, KeyError): + _TUPLE2AXES[axes] # validation + firstaxis, parity, repetition, frame = axes + + i = firstaxis + 1 + j = _NEXT_AXIS[i+parity-1] + 1 + k = _NEXT_AXIS[i-parity] + 1 + + if frame: + ai, ak = ak, ai + if parity: + aj = -aj + + ai /= 2.0 + aj /= 2.0 + ak /= 2.0 + ci = math.cos(ai) + si = math.sin(ai) + cj = math.cos(aj) + sj = math.sin(aj) + ck = math.cos(ak) + sk = math.sin(ak) + cc = ci*ck + cs = ci*sk + sc = si*ck + ss = si*sk + + q = numpy.empty((4, )) + if repetition: + q[0] = cj*(cc - ss) + q[i] = cj*(cs + sc) + q[j] = sj*(cc + ss) + q[k] = sj*(cs - sc) + else: + q[0] = cj*cc + sj*ss + q[i] = cj*sc - sj*cs + q[j] = cj*ss + sj*cc + q[k] = cj*cs - sj*sc + if parity: + q[j] *= -1.0 + + return q + + +def quaternion_about_axis(angle, axis): + """Return quaternion for rotation about axis. + + >>> q = quaternion_about_axis(0.123, [1, 0, 0]) + >>> numpy.allclose(q, [0.99810947, 0.06146124, 0, 0]) + True + + """ + q = numpy.array([0.0, axis[0], axis[1], axis[2]]) + qlen = vector_norm(q) + if qlen > _EPS: + q *= math.sin(angle/2.0) / qlen + q[0] = math.cos(angle/2.0) + return q + + +def quaternion_matrix(quaternion): + """Return homogeneous rotation matrix from quaternion. + + >>> M = quaternion_matrix([0.99810947, 0.06146124, 0, 0]) + >>> numpy.allclose(M, rotation_matrix(0.123, [1, 0, 0])) + True + >>> M = quaternion_matrix([1, 0, 0, 0]) + >>> numpy.allclose(M, numpy.identity(4)) + True + >>> M = quaternion_matrix([0, 1, 0, 0]) + >>> numpy.allclose(M, numpy.diag([1, -1, -1, 1])) + True + + """ + q = numpy.array(quaternion, dtype=numpy.float64, copy=True) + n = numpy.dot(q, q) + if n < _EPS: + return numpy.identity(4) + q *= math.sqrt(2.0 / n) + q = numpy.outer(q, q) + return numpy.array([ + [1.0-q[2, 2]-q[3, 3], q[1, 2]-q[3, 0], q[1, 3]+q[2, 0], 0.0], + [ q[1, 2]+q[3, 0], 1.0-q[1, 1]-q[3, 3], q[2, 3]-q[1, 0], 0.0], + [ q[1, 3]-q[2, 0], q[2, 3]+q[1, 0], 1.0-q[1, 1]-q[2, 2], 0.0], + [ 0.0, 0.0, 0.0, 1.0]]) + + +def quaternion_from_matrix(matrix, isprecise=False): + """Return quaternion from rotation matrix. + + If isprecise is True, the input matrix is assumed to be a precise rotation + matrix and a faster algorithm is used. + + >>> q = quaternion_from_matrix(numpy.identity(4), True) + >>> numpy.allclose(q, [1, 0, 0, 0]) + True + >>> q = quaternion_from_matrix(numpy.diag([1, -1, -1, 1])) + >>> numpy.allclose(q, [0, 1, 0, 0]) or numpy.allclose(q, [0, -1, 0, 0]) + True + >>> R = rotation_matrix(0.123, (1, 2, 3)) + >>> q = quaternion_from_matrix(R, True) + >>> numpy.allclose(q, [0.9981095, 0.0164262, 0.0328524, 0.0492786]) + True + >>> R = [[-0.545, 0.797, 0.260, 0], [0.733, 0.603, -0.313, 0], + ... [-0.407, 0.021, -0.913, 0], [0, 0, 0, 1]] + >>> q = quaternion_from_matrix(R) + >>> numpy.allclose(q, [0.19069, 0.43736, 0.87485, -0.083611]) + True + >>> R = [[0.395, 0.362, 0.843, 0], [-0.626, 0.796, -0.056, 0], + ... [-0.677, -0.498, 0.529, 0], [0, 0, 0, 1]] + >>> q = quaternion_from_matrix(R) + >>> numpy.allclose(q, [0.82336615, -0.13610694, 0.46344705, -0.29792603]) + True + >>> R = random_rotation_matrix() + >>> q = quaternion_from_matrix(R) + >>> is_same_transform(R, quaternion_matrix(q)) + True + >>> R = euler_matrix(0.0, 0.0, numpy.pi/2.0) + >>> numpy.allclose(quaternion_from_matrix(R, isprecise=False), + ... quaternion_from_matrix(R, isprecise=True)) + True + + """ + M = numpy.array(matrix, dtype=numpy.float64, copy=False)[:4, :4] + if isprecise: + q = numpy.empty((4, )) + t = numpy.trace(M) + if t > M[3, 3]: + q[0] = t + q[3] = M[1, 0] - M[0, 1] + q[2] = M[0, 2] - M[2, 0] + q[1] = M[2, 1] - M[1, 2] + else: + i, j, k = 1, 2, 3 + if M[1, 1] > M[0, 0]: + i, j, k = 2, 3, 1 + if M[2, 2] > M[i, i]: + i, j, k = 3, 1, 2 + t = M[i, i] - (M[j, j] + M[k, k]) + M[3, 3] + q[i] = t + q[j] = M[i, j] + M[j, i] + q[k] = M[k, i] + M[i, k] + q[3] = M[k, j] - M[j, k] + q *= 0.5 / math.sqrt(t * M[3, 3]) + else: + m00 = M[0, 0] + m01 = M[0, 1] + m02 = M[0, 2] + m10 = M[1, 0] + m11 = M[1, 1] + m12 = M[1, 2] + m20 = M[2, 0] + m21 = M[2, 1] + m22 = M[2, 2] + # symmetric matrix K + K = numpy.array([[m00-m11-m22, 0.0, 0.0, 0.0], + [m01+m10, m11-m00-m22, 0.0, 0.0], + [m02+m20, m12+m21, m22-m00-m11, 0.0], + [m21-m12, m02-m20, m10-m01, m00+m11+m22]]) + K /= 3.0 + # quaternion is eigenvector of K that corresponds to largest eigenvalue + w, V = numpy.linalg.eigh(K) + q = V[[3, 0, 1, 2], numpy.argmax(w)] + if q[0] < 0.0: + numpy.negative(q, q) + return q + + +def quaternion_multiply(quaternion1, quaternion0): + """Return multiplication of two quaternions. + + >>> q = quaternion_multiply([4, 1, -2, 3], [8, -5, 6, 7]) + >>> numpy.allclose(q, [28, -44, -14, 48]) + True + + """ + w0, x0, y0, z0 = quaternion0 + w1, x1, y1, z1 = quaternion1 + return numpy.array([-x1*x0 - y1*y0 - z1*z0 + w1*w0, + x1*w0 + y1*z0 - z1*y0 + w1*x0, + -x1*z0 + y1*w0 + z1*x0 + w1*y0, + x1*y0 - y1*x0 + z1*w0 + w1*z0], dtype=numpy.float64) + + +def quaternion_conjugate(quaternion): + """Return conjugate of quaternion. + + >>> q0 = random_quaternion() + >>> q1 = quaternion_conjugate(q0) + >>> q1[0] == q0[0] and all(q1[1:] == -q0[1:]) + True + + """ + q = numpy.array(quaternion, dtype=numpy.float64, copy=True) + numpy.negative(q[1:], q[1:]) + return q + + +def quaternion_inverse(quaternion): + """Return inverse of quaternion. + + >>> q0 = random_quaternion() + >>> q1 = quaternion_inverse(q0) + >>> numpy.allclose(quaternion_multiply(q0, q1), [1, 0, 0, 0]) + True + + """ + q = numpy.array(quaternion, dtype=numpy.float64, copy=True) + numpy.negative(q[1:], q[1:]) + return q / numpy.dot(q, q) + + +def quaternion_real(quaternion): + """Return real part of quaternion. + + >>> quaternion_real([3, 0, 1, 2]) + 3.0 + + """ + return float(quaternion[0]) + + +def quaternion_imag(quaternion): + """Return imaginary part of quaternion. + + >>> quaternion_imag([3, 0, 1, 2]) + array([ 0., 1., 2.]) + + """ + return numpy.array(quaternion[1:4], dtype=numpy.float64, copy=True) + + +def quaternion_slerp(quat0, quat1, fraction, spin=0, shortestpath=True): + """Return spherical linear interpolation between two quaternions. + + >>> q0 = random_quaternion() + >>> q1 = random_quaternion() + >>> q = quaternion_slerp(q0, q1, 0) + >>> numpy.allclose(q, q0) + True + >>> q = quaternion_slerp(q0, q1, 1, 1) + >>> numpy.allclose(q, q1) + True + >>> q = quaternion_slerp(q0, q1, 0.5) + >>> angle = math.acos(numpy.dot(q0, q)) + >>> numpy.allclose(2, math.acos(numpy.dot(q0, q1)) / angle) or \ + numpy.allclose(2, math.acos(-numpy.dot(q0, q1)) / angle) + True + + """ + q0 = unit_vector(quat0[:4]) + q1 = unit_vector(quat1[:4]) + if fraction == 0.0: + return q0 + elif fraction == 1.0: + return q1 + d = numpy.dot(q0, q1) + if abs(abs(d) - 1.0) < _EPS: + return q0 + if shortestpath and d < 0.0: + # invert rotation + d = -d + numpy.negative(q1, q1) + angle = math.acos(d) + spin * math.pi + if abs(angle) < _EPS: + return q0 + isin = 1.0 / math.sin(angle) + q0 *= math.sin((1.0 - fraction) * angle) * isin + q1 *= math.sin(fraction * angle) * isin + q0 += q1 + return q0 + + +def random_quaternion(rand=None): + """Return uniform random unit quaternion. + + rand: array like or None + Three independent random variables that are uniformly distributed + between 0 and 1. + + >>> q = random_quaternion() + >>> numpy.allclose(1, vector_norm(q)) + True + >>> q = random_quaternion(numpy.random.random(3)) + >>> len(q.shape), q.shape[0]==4 + (1, True) + + """ + if rand is None: + rand = numpy.random.rand(3) + else: + assert len(rand) == 3 + r1 = numpy.sqrt(1.0 - rand[0]) + r2 = numpy.sqrt(rand[0]) + pi2 = math.pi * 2.0 + t1 = pi2 * rand[1] + t2 = pi2 * rand[2] + return numpy.array([numpy.cos(t2)*r2, numpy.sin(t1)*r1, + numpy.cos(t1)*r1, numpy.sin(t2)*r2]) + + +def random_rotation_matrix(rand=None): + """Return uniform random rotation matrix. + + rand: array like + Three independent random variables that are uniformly distributed + between 0 and 1 for each returned quaternion. + + >>> R = random_rotation_matrix() + >>> numpy.allclose(numpy.dot(R.T, R), numpy.identity(4)) + True + + """ + return quaternion_matrix(random_quaternion(rand)) + + +class Arcball(object): + """Virtual Trackball Control. + + >>> ball = Arcball() + >>> ball = Arcball(initial=numpy.identity(4)) + >>> ball.place([320, 320], 320) + >>> ball.down([500, 250]) + >>> ball.drag([475, 275]) + >>> R = ball.matrix() + >>> numpy.allclose(numpy.sum(R), 3.90583455) + True + >>> ball = Arcball(initial=[1, 0, 0, 0]) + >>> ball.place([320, 320], 320) + >>> ball.setaxes([1, 1, 0], [-1, 1, 0]) + >>> ball.constrain = True + >>> ball.down([400, 200]) + >>> ball.drag([200, 400]) + >>> R = ball.matrix() + >>> numpy.allclose(numpy.sum(R), 0.2055924) + True + >>> ball.next() + + """ + def __init__(self, initial=None): + """Initialize virtual trackball control. + + initial : quaternion or rotation matrix + + """ + self._axis = None + self._axes = None + self._radius = 1.0 + self._center = [0.0, 0.0] + self._vdown = numpy.array([0.0, 0.0, 1.0]) + self._constrain = False + if initial is None: + self._qdown = numpy.array([1.0, 0.0, 0.0, 0.0]) + else: + initial = numpy.array(initial, dtype=numpy.float64) + if initial.shape == (4, 4): + self._qdown = quaternion_from_matrix(initial) + elif initial.shape == (4, ): + initial /= vector_norm(initial) + self._qdown = initial + else: + raise ValueError("initial not a quaternion or matrix") + self._qnow = self._qpre = self._qdown + + def place(self, center, radius): + """Place Arcball, e.g. when window size changes. + + center : sequence[2] + Window coordinates of trackball center. + radius : float + Radius of trackball in window coordinates. + + """ + self._radius = float(radius) + self._center[0] = center[0] + self._center[1] = center[1] + + def setaxes(self, *axes): + """Set axes to constrain rotations.""" + if axes is None: + self._axes = None + else: + self._axes = [unit_vector(axis) for axis in axes] + + @property + def constrain(self): + """Return state of constrain to axis mode.""" + return self._constrain + + @constrain.setter + def constrain(self, value): + """Set state of constrain to axis mode.""" + self._constrain = bool(value) + + def down(self, point): + """Set initial cursor window coordinates and pick constrain-axis.""" + self._vdown = arcball_map_to_sphere(point, self._center, self._radius) + self._qdown = self._qpre = self._qnow + if self._constrain and self._axes is not None: + self._axis = arcball_nearest_axis(self._vdown, self._axes) + self._vdown = arcball_constrain_to_axis(self._vdown, self._axis) + else: + self._axis = None + + def drag(self, point): + """Update current cursor window coordinates.""" + vnow = arcball_map_to_sphere(point, self._center, self._radius) + if self._axis is not None: + vnow = arcball_constrain_to_axis(vnow, self._axis) + self._qpre = self._qnow + t = numpy.cross(self._vdown, vnow) + if numpy.dot(t, t) < _EPS: + self._qnow = self._qdown + else: + q = [numpy.dot(self._vdown, vnow), t[0], t[1], t[2]] + self._qnow = quaternion_multiply(q, self._qdown) + + def next(self, acceleration=0.0): + """Continue rotation in direction of last drag.""" + q = quaternion_slerp(self._qpre, self._qnow, 2.0+acceleration, False) + self._qpre, self._qnow = self._qnow, q + + def matrix(self): + """Return homogeneous rotation matrix.""" + return quaternion_matrix(self._qnow) + + +def arcball_map_to_sphere(point, center, radius): + """Return unit sphere coordinates from window coordinates.""" + v0 = (point[0] - center[0]) / radius + v1 = (center[1] - point[1]) / radius + n = v0*v0 + v1*v1 + if n > 1.0: + # position outside of sphere + n = math.sqrt(n) + return numpy.array([v0/n, v1/n, 0.0]) + else: + return numpy.array([v0, v1, math.sqrt(1.0 - n)]) + + +def arcball_constrain_to_axis(point, axis): + """Return sphere point perpendicular to axis.""" + v = numpy.array(point, dtype=numpy.float64, copy=True) + a = numpy.array(axis, dtype=numpy.float64, copy=True) + v -= a * numpy.dot(a, v) # on plane + n = vector_norm(v) + if n > _EPS: + if v[2] < 0.0: + numpy.negative(v, v) + v /= n + return v + if a[2] == 1.0: + return numpy.array([1.0, 0.0, 0.0]) + return unit_vector([-a[1], a[0], 0.0]) + + +def arcball_nearest_axis(point, axes): + """Return axis, which arc is nearest to point.""" + point = numpy.array(point, dtype=numpy.float64, copy=False) + nearest = None + mx = -1.0 + for axis in axes: + t = numpy.dot(arcball_constrain_to_axis(point, axis), point) + if t > mx: + nearest = axis + mx = t + return nearest + + +# epsilon for testing whether a number is close to zero +_EPS = numpy.finfo(float).eps * 4.0 + +# axis sequences for Euler angles +_NEXT_AXIS = [1, 2, 0, 1] + +# map axes strings to/from tuples of inner axis, parity, repetition, frame +_AXES2TUPLE = { + 'sxyz': (0, 0, 0, 0), 'sxyx': (0, 0, 1, 0), 'sxzy': (0, 1, 0, 0), + 'sxzx': (0, 1, 1, 0), 'syzx': (1, 0, 0, 0), 'syzy': (1, 0, 1, 0), + 'syxz': (1, 1, 0, 0), 'syxy': (1, 1, 1, 0), 'szxy': (2, 0, 0, 0), + 'szxz': (2, 0, 1, 0), 'szyx': (2, 1, 0, 0), 'szyz': (2, 1, 1, 0), + 'rzyx': (0, 0, 0, 1), 'rxyx': (0, 0, 1, 1), 'ryzx': (0, 1, 0, 1), + 'rxzx': (0, 1, 1, 1), 'rxzy': (1, 0, 0, 1), 'ryzy': (1, 0, 1, 1), + 'rzxy': (1, 1, 0, 1), 'ryxy': (1, 1, 1, 1), 'ryxz': (2, 0, 0, 1), + 'rzxz': (2, 0, 1, 1), 'rxyz': (2, 1, 0, 1), 'rzyz': (2, 1, 1, 1)} + +_TUPLE2AXES = dict((v, k) for k, v in _AXES2TUPLE.items()) + + +def vector_norm(data, axis=None, out=None): + """Return length, i.e. Euclidean norm, of ndarray along axis. + + >>> v = numpy.random.random(3) + >>> n = vector_norm(v) + >>> numpy.allclose(n, numpy.linalg.norm(v)) + True + >>> v = numpy.random.rand(6, 5, 3) + >>> n = vector_norm(v, axis=-1) + >>> numpy.allclose(n, numpy.sqrt(numpy.sum(v*v, axis=2))) + True + >>> n = vector_norm(v, axis=1) + >>> numpy.allclose(n, numpy.sqrt(numpy.sum(v*v, axis=1))) + True + >>> v = numpy.random.rand(5, 4, 3) + >>> n = numpy.empty((5, 3)) + >>> vector_norm(v, axis=1, out=n) + >>> numpy.allclose(n, numpy.sqrt(numpy.sum(v*v, axis=1))) + True + >>> vector_norm([]) + 0.0 + >>> vector_norm([1]) + 1.0 + + """ + data = numpy.array(data, dtype=numpy.float64, copy=True) + if out is None: + if data.ndim == 1: + return math.sqrt(numpy.dot(data, data)) + data *= data + out = numpy.atleast_1d(numpy.sum(data, axis=axis)) + numpy.sqrt(out, out) + return out + else: + data *= data + numpy.sum(data, axis=axis, out=out) + numpy.sqrt(out, out) + + +def unit_vector(data, axis=None, out=None): + """Return ndarray normalized by length, i.e. Euclidean norm, along axis. + + >>> v0 = numpy.random.random(3) + >>> v1 = unit_vector(v0) + >>> numpy.allclose(v1, v0 / numpy.linalg.norm(v0)) + True + >>> v0 = numpy.random.rand(5, 4, 3) + >>> v1 = unit_vector(v0, axis=-1) + >>> v2 = v0 / numpy.expand_dims(numpy.sqrt(numpy.sum(v0*v0, axis=2)), 2) + >>> numpy.allclose(v1, v2) + True + >>> v1 = unit_vector(v0, axis=1) + >>> v2 = v0 / numpy.expand_dims(numpy.sqrt(numpy.sum(v0*v0, axis=1)), 1) + >>> numpy.allclose(v1, v2) + True + >>> v1 = numpy.empty((5, 4, 3)) + >>> unit_vector(v0, axis=1, out=v1) + >>> numpy.allclose(v1, v2) + True + >>> list(unit_vector([])) + [] + >>> list(unit_vector([1])) + [1.0] + + """ + if out is None: + data = numpy.array(data, dtype=numpy.float64, copy=True) + if data.ndim == 1: + data /= math.sqrt(numpy.dot(data, data)) + return data + else: + if out is not data: + out[:] = numpy.array(data, copy=False) + data = out + length = numpy.atleast_1d(numpy.sum(data*data, axis)) + numpy.sqrt(length, length) + if axis is not None: + length = numpy.expand_dims(length, axis) + data /= length + if out is None: + return data + + +def random_vector(size): + """Return array of random doubles in the half-open interval [0.0, 1.0). + + >>> v = random_vector(10000) + >>> numpy.all(v >= 0) and numpy.all(v < 1) + True + >>> v0 = random_vector(10) + >>> v1 = random_vector(10) + >>> numpy.any(v0 == v1) + False + + """ + return numpy.random.random(size) + + +def vector_product(v0, v1, axis=0): + """Return vector perpendicular to vectors. + + >>> v = vector_product([2, 0, 0], [0, 3, 0]) + >>> numpy.allclose(v, [0, 0, 6]) + True + >>> v0 = [[2, 0, 0, 2], [0, 2, 0, 2], [0, 0, 2, 2]] + >>> v1 = [[3], [0], [0]] + >>> v = vector_product(v0, v1) + >>> numpy.allclose(v, [[0, 0, 0, 0], [0, 0, 6, 6], [0, -6, 0, -6]]) + True + >>> v0 = [[2, 0, 0], [2, 0, 0], [0, 2, 0], [2, 0, 0]] + >>> v1 = [[0, 3, 0], [0, 0, 3], [0, 0, 3], [3, 3, 3]] + >>> v = vector_product(v0, v1, axis=1) + >>> numpy.allclose(v, [[0, 0, 6], [0, -6, 0], [6, 0, 0], [0, -6, 6]]) + True + + """ + return numpy.cross(v0, v1, axis=axis) + + +def angle_between_vectors(v0, v1, directed=True, axis=0): + """Return angle between vectors. + + If directed is False, the input vectors are interpreted as undirected axes, + i.e. the maximum angle is pi/2. + + >>> a = angle_between_vectors([1, -2, 3], [-1, 2, -3]) + >>> numpy.allclose(a, math.pi) + True + >>> a = angle_between_vectors([1, -2, 3], [-1, 2, -3], directed=False) + >>> numpy.allclose(a, 0) + True + >>> v0 = [[2, 0, 0, 2], [0, 2, 0, 2], [0, 0, 2, 2]] + >>> v1 = [[3], [0], [0]] + >>> a = angle_between_vectors(v0, v1) + >>> numpy.allclose(a, [0, 1.5708, 1.5708, 0.95532]) + True + >>> v0 = [[2, 0, 0], [2, 0, 0], [0, 2, 0], [2, 0, 0]] + >>> v1 = [[0, 3, 0], [0, 0, 3], [0, 0, 3], [3, 3, 3]] + >>> a = angle_between_vectors(v0, v1, axis=1) + >>> numpy.allclose(a, [1.5708, 1.5708, 1.5708, 0.95532]) + True + + """ + v0 = numpy.array(v0, dtype=numpy.float64, copy=False) + v1 = numpy.array(v1, dtype=numpy.float64, copy=False) + dot = numpy.sum(v0 * v1, axis=axis) + dot /= vector_norm(v0, axis=axis) * vector_norm(v1, axis=axis) + return numpy.arccos(dot if directed else numpy.fabs(dot)) + + +def inverse_matrix(matrix): + """Return inverse of square transformation matrix. + + >>> M0 = random_rotation_matrix() + >>> M1 = inverse_matrix(M0.T) + >>> numpy.allclose(M1, numpy.linalg.inv(M0.T)) + True + >>> for size in range(1, 7): + ... M0 = numpy.random.rand(size, size) + ... M1 = inverse_matrix(M0) + ... if not numpy.allclose(M1, numpy.linalg.inv(M0)): print(size) + + """ + return numpy.linalg.inv(matrix) + + +def concatenate_matrices(*matrices): + """Return concatenation of series of transformation matrices. + + >>> M = numpy.random.rand(16).reshape((4, 4)) - 0.5 + >>> numpy.allclose(M, concatenate_matrices(M)) + True + >>> numpy.allclose(numpy.dot(M, M.T), concatenate_matrices(M, M.T)) + True + + """ + M = numpy.identity(4) + for i in matrices: + M = numpy.dot(M, i) + return M + + +def is_same_transform(matrix0, matrix1): + """Return True if two matrices perform same transformation. + + >>> is_same_transform(numpy.identity(4), numpy.identity(4)) + True + >>> is_same_transform(numpy.identity(4), random_rotation_matrix()) + False + + """ + matrix0 = numpy.array(matrix0, dtype=numpy.float64, copy=True) + matrix0 /= matrix0[3, 3] + matrix1 = numpy.array(matrix1, dtype=numpy.float64, copy=True) + matrix1 /= matrix1[3, 3] + return numpy.allclose(matrix0, matrix1) + + +def _import_module(name, package=None, warn=True, prefix='_py_', ignore='_'): + """Try import all public attributes from module into global namespace. + + Existing attributes with name clashes are renamed with prefix. + Attributes starting with underscore are ignored by default. + + Return True on successful import. + + """ + import warnings + from importlib import import_module + try: + if not package: + module = import_module(name) + else: + module = import_module('.' + name, package=package) + except ImportError: + if warn: + #warnings.warn("failed to import module %s" % name) + pass + else: + for attr in dir(module): + if ignore and attr.startswith(ignore): + continue + if prefix: + if attr in globals(): + globals()[prefix + attr] = globals()[attr] + elif warn: + warnings.warn("no Python implementation of " + attr) + globals()[attr] = getattr(module, attr) + return True + + +_import_module('_transformations') + +if __name__ == "__main__": + import doctest + import random # used in doctests + numpy.set_printoptions(suppress=True, precision=5) + doctest.testmod() + diff --git a/third_party/SGMNet/weights/sg/root/model_best.pth b/third_party/SGMNet/weights/sg/root/model_best.pth new file mode 100644 index 0000000000000000000000000000000000000000..98e13d45f4b8b32877883bb57915e091d99b852c --- /dev/null +++ b/third_party/SGMNet/weights/sg/root/model_best.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b38d22d1031fd0104be122fb0b63bb6887ff74bea7eceef951c7205d5f40993 +size 12428635 diff --git a/third_party/SGMNet/weights/sgm/root/model_best.pth b/third_party/SGMNet/weights/sgm/root/model_best.pth new file mode 100644 index 0000000000000000000000000000000000000000..0dea4c0eb8326177e4964b4de099ca31bae7c755 --- /dev/null +++ b/third_party/SGMNet/weights/sgm/root/model_best.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f82acedb478d5d27d6fe0ef945df1d22570f085163d3651d12179d6d4b1374ea +size 31850287 diff --git a/third_party/SGMNet/weights/sp/superpoint_v1.pth b/third_party/SGMNet/weights/sp/superpoint_v1.pth new file mode 100644 index 0000000000000000000000000000000000000000..7648726e3a3dfa2581e86bfa9c5a2a05cfb9bf74 --- /dev/null +++ b/third_party/SGMNet/weights/sp/superpoint_v1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52b6708629640ca883673b5d5c097c4ddad37d8048b33f09c8ca0d69db12c40e +size 5206086 diff --git a/third_party/SOLD2/.gitignore b/third_party/SOLD2/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..b6e47617de110dea7ca47e087ff1347cc2646eda --- /dev/null +++ b/third_party/SOLD2/.gitignore @@ -0,0 +1,129 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/third_party/SOLD2/LICENSE b/third_party/SOLD2/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..a78ff590248398498242d1eba03791ad0288bdf2 --- /dev/null +++ b/third_party/SOLD2/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Rémi Pautrat + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/third_party/SOLD2/README.md b/third_party/SOLD2/README.md new file mode 100644 index 0000000000000000000000000000000000000000..69713c07084d26ab689532c29293d056bc84f655 --- /dev/null +++ b/third_party/SOLD2/README.md @@ -0,0 +1,216 @@ +# SOLD² - Self-supervised Occlusion-aware Line Description and Detection + +This repository contains the implementation of the paper: [SOLD² : Self-supervised Occlusion-aware Line Description and Detection](https://arxiv.org/abs/2104.03362), J-T. Lin*, R. Pautrat*, V. Larsson, M. Oswald and M. Pollefeys (Oral at CVPR 2021). + +SOLD² is a deep line segment detector and descriptor that can be trained without hand-labelled line segments and that can robustly match lines even in the presence of occlusion. + +## Demos + +Matching in the presence of occlusion: +![demo_occlusion](assets/videos/demo_occlusion.gif) + +Matching with a moving camera: +![demo_moving_camera](assets/videos/demo_moving_camera.gif) + +## Usage + +### Using from kornia + +SOLD² is integrated into [kornia](https://github.com/kornia/kornia) library since version 0.6.7. + + ``` + pip install kornia==0.6.7 + ``` + + Then you can import it as + ```python3 + from kornia.feature import SOLD2 + ``` + + See tutorial on using SOLD² from kornia [here](https://kornia-tutorials.readthedocs.io/en/latest/line_detection_and_matching_sold2.html). + +### Installation + +We recommend using this code in a Python environment (e.g. venv or conda). The following script installs the necessary requirements with pip: +```bash +pip install -r requirements.txt +``` + +Set your dataset and experiment paths (where you will store your datasets and checkpoints of your experiments) by modifying the file `config/project_config.py`. Both variables `DATASET_ROOT` and `EXP_PATH` have to be set. + +Install the Python package: +```bash +pip install -e . +``` + +You can download the version of the [Wireframe dataset](https://github.com/huangkuns/wireframe) that we used during our training and testing [here](https://www.polybox.ethz.ch/index.php/s/IfdEf7RoHol7jeg). This repository also includes some files to train on the [Holicity dataset](https://holicity.io/) to add more outdoor images, but note that we did not extensively test this dataset and the original paper was based on the Wireframe dataset only. + +### Training your own model + +All training parameters are located in configuration files in the folder `config`. Training SOLD² from scratch requires several steps, some of which taking several days, depending on the size of your dataset. + +
+Step 1: Train on a synthetic dataset + +The following command will create the synthetic dataset and start training the model on it: +```bash +python -m sold2.experiment --mode train --dataset_config sold2/config/synthetic_dataset.yaml --model_config sold2/config/train_detector.yaml --exp_name sold2_synth +``` +
+ +
+Step 2: Export the raw pseudo ground truth on the Wireframe dataset with homography adaptation + +Note that this step can take one to several days depending on your machine and on the size of the dataset. You can set the batch size to the maximum capacity that your GPU can handle. Prior to this step, make sure that the dataset config file `config/wireframe_dataset.yaml` has the lines `gt_source_train` and `gt_source_test` commented and you should also disable the photometric and homographic augmentations. +```bash +python -m sold2.experiment --exp_name wireframe_train --mode export --resume_path --model_config sold2/config/train_detector.yaml --dataset_config sold2/config/wireframe_dataset.yaml --checkpoint_name --export_dataset_mode train --export_batch_size 4 +``` + +You can similarly perform the same for the test set: +```bash +python -m sold2.experiment --exp_name wireframe_test --mode export --resume_path --model_config sold2/config/train_detector.yaml --dataset_config sold2/config/wireframe_dataset.yaml --checkpoint_name --export_dataset_mode test --export_batch_size 4 +``` +
+ +
+ Step3: Compute the ground truth line segments from the raw data + +```bash +python -m sold2.postprocess.convert_homography_results sold2/config/export_line_features.yaml +``` + +We recommend testing the results on a few samples of your dataset to check the quality of the output, and modifying the hyperparameters if need be. Using a `detect_thresh=0.5` and `inlier_thresh=0.99` proved to be successful for the Wireframe dataset in our case for example. +
+ +
+ Step 4: Train the detector on the Wireframe dataset + +We found it easier to pretrain the detector alone first, before fine-tuning it with the descriptor part. +Uncomment the lines 'gt_source_train' and 'gt_source_test' in `config/wireframe_dataset.yaml` and fill them with the path to the h5 file generated in the previous step. +```bash +python -m sold2.experiment --mode train --dataset_config sold2/config/wireframe_dataset.yaml --model_config sold2/config/train_detector.yaml --exp_name sold2_wireframe +``` + +Alternatively, you can also fine-tune the already trained synthetic model: +```bash +python -m sold2.experiment --mode train --dataset_config sold2/config/wireframe_dataset.yaml --model_config sold2/config/train_detector.yaml --exp_name sold2_wireframe --pretrained --pretrained_path --checkpoint_name +``` + +Lastly, you can resume a training that was stopped: +```bash +python -m sold2.experiment --mode train --dataset_config sold2/config/wireframe_dataset.yaml --model_config sold2/config/train_detector.yaml --exp_name sold2_wireframe --resume --resume_path --checkpoint_name +``` +
+ +
+ Step 5: Train the full pipeline on the Wireframe dataset + +You first need to modify the field 'return_type' in `config/wireframe_dataset.yaml` to 'paired_desc'. The following command will then train the full model (detector + descriptor) on the Wireframe dataset: +```bash +python -m sold2.experiment --mode train --dataset_config sold2/config/wireframe_dataset.yaml --model_config sold2/config/train_full_pipeline.yaml --exp_name sold2_full_wireframe --pretrained --pretrained_path --checkpoint_name +``` +
+ + +### Pretrained models + +We provide the checkpoints of two pretrained models: +- [sold2_synthetic.tar](https://www.polybox.ethz.ch/index.php/s/Lu8jWo7nMKal9yb): SOLD² detector trained on the synthetic dataset only. +- [sold2_wireframe.tar](https://www.polybox.ethz.ch/index.php/s/blOrW89gqSLoHOk): full version of SOLD² trained on the Wireframe dataset. + +Note that you do not need to untar the models, you can directly used them as they are. + + +### How to use it + +We provide a [notebook](notebooks/match_lines.ipynb) showing how to use the trained model of SOLD². Additionally, you can use the model to export line features (segments and descriptor maps) as follows: +```bash +python -m sold2.export_line_features --img_list --output_folder --checkpoint_path +``` + +You can tune some of the line detection parameters in `config/export_line_features.yaml`, in particular the 'detect_thresh' and 'inlier_thresh' to adapt them to your trained model and type of images. As the line detection can be sensitive to the image resolution, we recommend using it with images in the range 300~800 px per side. + + + +## Results + +Comparison of repeatability and localization error to the state of the art on the [Wireframe dataset](https://github.com/huangkuns/wireframe) for an error threshold of 5 pixels in structural and orthogonal distances: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Structural distanceOrthogonal distance
Rep-5Loc-5Rep-5Loc-5
LCNN0.4342.5890.5701.725
HAWP0.4512.6250.5371.725
DeepHough0.4192.5760.6181.720
TP-LSD TP5120.5632.4670.7461.450
LSD0.3582.0790.7070.825
Ours with NMS0.5571.9950.8011.119
Ours0.6162.0190.9140.816
+ +Matching precision-recall curves on the [Wireframe](https://github.com/huangkuns/wireframe) and [ETH3D](https://www.eth3d.net/) datasets: +![pred_lines_pr_curve](assets/results/pred_lines_pr_curve.png) + +## Bibtex + +If you use this code in your project, please consider citing the following paper: +```bibtex +@InProceedings{Pautrat_Lin_2021_CVPR, + author = {Pautrat*, Rémi and Lin*, Juan-Ting and Larsson, Viktor and Oswald, Martin R. and Pollefeys, Marc}, + title = {SOLD2: Self-supervised Occlusion-aware Line Description and Detection}, + booktitle = {Computer Vision and Pattern Recognition (CVPR)}, + year = {2021}, +} +``` diff --git a/third_party/SOLD2/assets/images/terrace0.JPG b/third_party/SOLD2/assets/images/terrace0.JPG new file mode 100644 index 0000000000000000000000000000000000000000..e3f688c4d14b490da30b57cd1312b144588efe32 --- /dev/null +++ b/third_party/SOLD2/assets/images/terrace0.JPG @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4198d3c47d8b397f3a40d58e32e516b8e4f9db4e989992dd069b374880412f5 +size 66986 diff --git a/third_party/SOLD2/assets/images/terrace1.JPG b/third_party/SOLD2/assets/images/terrace1.JPG new file mode 100644 index 0000000000000000000000000000000000000000..4605fcf9bec3ed31c92b0a0f067d5cc16411fc9d --- /dev/null +++ b/third_party/SOLD2/assets/images/terrace1.JPG @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d94851889de709b8c8a11b2057e93627a21f623534e6ba2b3a1442b233fd7f20 +size 67363 diff --git a/third_party/SOLD2/assets/results/pred_lines_pr_curve.png b/third_party/SOLD2/assets/results/pred_lines_pr_curve.png new file mode 100644 index 0000000000000000000000000000000000000000..b6d3d1fbbe5b257f0870c5e62c6b661098592ca0 --- /dev/null +++ b/third_party/SOLD2/assets/results/pred_lines_pr_curve.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04428370fa2a9893ce6ce1d1230af76e0ad61b5fa74a0f15d80fa8457f85d76f +size 60081 diff --git a/third_party/SOLD2/assets/videos/demo_moving_camera.gif b/third_party/SOLD2/assets/videos/demo_moving_camera.gif new file mode 100644 index 0000000000000000000000000000000000000000..0da37f5d53abe9b84a4c37215363f1fe46932955 --- /dev/null +++ b/third_party/SOLD2/assets/videos/demo_moving_camera.gif @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34200eb93264718c2a0aa29ac8daf9a0892865bedfad3abde0621e3dd7d418e1 +size 18597020 diff --git a/third_party/SOLD2/assets/videos/demo_occlusion.gif b/third_party/SOLD2/assets/videos/demo_occlusion.gif new file mode 100644 index 0000000000000000000000000000000000000000..7174c078db04c54650067f665724ff3c5cc3d942 --- /dev/null +++ b/third_party/SOLD2/assets/videos/demo_occlusion.gif @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6e76c729bef31f10c048856319c60bed9c6af16331125b4324c7e866df8689b +size 16266338 diff --git a/third_party/SOLD2/notebooks/__init__.py b/third_party/SOLD2/notebooks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/SOLD2/notebooks/match_lines.ipynb b/third_party/SOLD2/notebooks/match_lines.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..f10d98da893d69ea97ab41c53f36796c53ccda40 --- /dev/null +++ b/third_party/SOLD2/notebooks/match_lines.ipynb @@ -0,0 +1,237 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import cv2\n", + "import torch\n", + "\n", + "from sold2.model.line_matcher import LineMatcher\n", + "from sold2.misc.visualize_util import plot_images, plot_lines, plot_line_matches, plot_color_line_matches, plot_keypoints" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Matching from scratch given pairs of images" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\t--------Initializing model----------\n", + "\t [Debug] Adding w_junc with value 0.000000 to model\n", + "\t [Debug] Adding w_heatmap with value 0.000000 to model\n", + "\t [Debug] Adding w_desc with value 0.000000 to model\n", + "\tModel architecture: simple\n", + "\tBackbone: lcnn\n", + "\tJunction decoder: superpoint_decoder\n", + "\tHeatmap decoder: pixel_shuffle\n", + "\t-------------------------------------\n", + "[Debug] detect_thresh: 0.25\n", + "[Debug] num_samples: 64\n", + "[Debug] sampling_method: local_max\n", + "[Debug] inlier_thresh: 0.9\n", + "[Debug] use_candidate_suppression: True\n", + "[Debug] nms_dist_tolerance: 3.0\n", + "[Debug] use_heatmap_refinement: True\n", + "[Debug] heatmap_refine_cfg: {'mode': 'local', 'ratio': 0.2, 'valid_thresh': 0.001, 'num_blocks': 20, 'overlap_ratio': 0.5}\n" + ] + } + ], + "source": [ + "ckpt_path = '../pretrained_models/sold2_wireframe.tar'\n", + "device = 'cuda'\n", + "mode = 'dynamic' # 'dynamic' or 'static'\n", + "\n", + "# Initialize the line matcher\n", + "config = {\n", + " 'model_cfg': {\n", + " 'model_name': \"lcnn_simple\",\n", + " 'model_architecture': \"simple\",\n", + " # Backbone related config\n", + " 'backbone': \"lcnn\",\n", + " 'backbone_cfg': {\n", + " 'input_channel': 1, # Use RGB images or grayscale images.\n", + " 'depth': 4,\n", + " 'num_stacks': 2,\n", + " 'num_blocks': 1,\n", + " 'num_classes': 5\n", + " },\n", + " # Junction decoder related config\n", + " 'junction_decoder': \"superpoint_decoder\",\n", + " 'junc_decoder_cfg': {},\n", + " # Heatmap decoder related config\n", + " 'heatmap_decoder': \"pixel_shuffle\",\n", + " 'heatmap_decoder_cfg': {},\n", + " # Descriptor decoder related config\n", + " 'descriptor_decoder': \"superpoint_descriptor\",\n", + " 'descriptor_decoder_cfg': {},\n", + " # Shared configurations\n", + " 'grid_size': 8,\n", + " 'keep_border_valid': True,\n", + " # Threshold of junction detection\n", + " 'detection_thresh': 0.0153846, # 1/65\n", + " 'max_num_junctions': 300,\n", + " # Threshold of heatmap detection\n", + " 'prob_thresh': 0.5,\n", + " # Weighting related parameters\n", + " 'weighting_policy': mode,\n", + " # [Heatmap loss]\n", + " 'w_heatmap': 0.,\n", + " 'w_heatmap_class': 1,\n", + " 'heatmap_loss_func': \"cross_entropy\",\n", + " 'heatmap_loss_cfg': {\n", + " 'policy': mode\n", + " },\n", + " # [Heatmap consistency loss]\n", + " # [Junction loss]\n", + " 'w_junc': 0.,\n", + " 'junction_loss_func': \"superpoint\",\n", + " 'junction_loss_cfg': {\n", + " 'policy': mode\n", + " },\n", + " # [Descriptor loss]\n", + " 'w_desc': 0.,\n", + " 'descriptor_loss_func': \"regular_sampling\",\n", + " 'descriptor_loss_cfg': {\n", + " 'dist_threshold': 8,\n", + " 'grid_size': 4,\n", + " 'margin': 1,\n", + " 'policy': mode\n", + " },\n", + " },\n", + " 'line_detector_cfg': {\n", + " 'detect_thresh': 0.25, # depending on your images, you might need to tune this parameter\n", + " 'num_samples': 64,\n", + " 'sampling_method': \"local_max\",\n", + " 'inlier_thresh': 0.9,\n", + " \"use_candidate_suppression\": True,\n", + " \"nms_dist_tolerance\": 3.,\n", + " \"use_heatmap_refinement\": True,\n", + " \"heatmap_refine_cfg\": {\n", + " \"mode\": \"local\",\n", + " \"ratio\": 0.2,\n", + " \"valid_thresh\": 1e-3,\n", + " \"num_blocks\": 20,\n", + " \"overlap_ratio\": 0.5\n", + " }\n", + " },\n", + " 'multiscale': False,\n", + " 'line_matcher_cfg': {\n", + " 'cross_check': True,\n", + " 'num_samples': 5,\n", + " 'min_dist_pts': 8,\n", + " 'top_k_candidates': 10,\n", + " 'grid_size': 4\n", + " }\n", + "}\n", + "\n", + "line_matcher = LineMatcher(\n", + " config[\"model_cfg\"], ckpt_path, device, config[\"line_detector_cfg\"],\n", + " config[\"line_matcher_cfg\"], config[\"multiscale\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Read and pre-process the images\n", + "scale_factor = 1 # we recommend resizing the images to a resolution in the range 400~800 pixels\n", + "img1 = '../assets/images/terrace0.JPG'\n", + "img1 = cv2.imread(img1, 0)\n", + "img1 = cv2.resize(img1, (img1.shape[1] // scale_factor, img1.shape[0] // scale_factor),\n", + " interpolation = cv2.INTER_AREA)\n", + "img1 = (img1 / 255.).astype(float)\n", + "torch_img1 = torch.tensor(img1, dtype=torch.float)[None, None]\n", + "img2 = '../assets/images/terrace1.JPG'\n", + "img2 = cv2.imread(img2, 0)\n", + "img2 = cv2.resize(img2, (img2.shape[1] // scale_factor, img2.shape[0] // scale_factor),\n", + " interpolation = cv2.INTER_AREA)\n", + "img2 = (img2 / 255.).astype(float)\n", + "torch_img2 = torch.tensor(img2, dtype=torch.float)[None, None]\n", + "\n", + "# Match the lines\n", + "outputs = line_matcher([torch_img1, torch_img2])\n", + "line_seg1 = outputs[\"line_segments\"][0]\n", + "line_seg2 = outputs[\"line_segments\"][1]\n", + "matches = outputs[\"matches\"]\n", + "\n", + "valid_matches = matches != -1\n", + "match_indices = matches[valid_matches]\n", + "matched_lines1 = line_seg1[valid_matches][:, :, ::-1]\n", + "matched_lines2 = line_seg2[match_indices][:, :, ::-1]\n", + "\n", + "# Plot the matches\n", + "plot_images([img1, img2], ['Image 1 - detected lines', 'Image 2 - detected lines'])\n", + "plot_lines([line_seg1[:, :, ::-1], line_seg2[:, :, ::-1]], ps=3, lw=2)\n", + "plot_images([img1, img2], ['Image 1 - matched lines', 'Image 2 - matched lines'])\n", + "plot_color_line_matches([matched_lines1, matched_lines2], lw=2)" + ] + } + ], + "metadata": { + "file_extension": ".py", + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "mimetype": "text/x-python", + "name": "python", + "npconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/third_party/SOLD2/notebooks/visualize_exported_dataset.ipynb b/third_party/SOLD2/notebooks/visualize_exported_dataset.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..5ca610dc697b5be20d321e2b21215601452029c5 --- /dev/null +++ b/third_party/SOLD2/notebooks/visualize_exported_dataset.ipynb @@ -0,0 +1,404 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import yaml\n", + "\n", + "from sold2.dataset.wireframe_dataset import WireframeDataset\n", + "from sold2.dataset.holicity_dataset import HolicityDataset\n", + "from sold2.dataset.merge_dataset import MergeDataset\n", + "from sold2.misc.visualize_util import plot_junctions, plot_line_segments\n", + "from sold2.misc.visualize_util import plot_images, plot_keypoints" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualize the exported ground truth on the Wireframe dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Info] Initializing wireframe dataset...\n", + "\t Found filename cache wireframe_test_cache.pkl at /home/remi/Documents/datasets/wireframe\n", + "\t Load filename cache...\n", + "[Info] Successfully initialized dataset\n", + "\t Name: wireframe\n", + "\t Mode: test\n", + "\t Gt: /home/remi/Documents/datasets/export_datasets/wireframe_test_adaptation_iter0_epoch043_ce1_detect_0.25_inlier_0.75_local_max_v1.5_refine-v2.h5\n", + "\t Counts: 462\n", + "----------------------------------------\n" + ] + } + ], + "source": [ + "# Initialize the wireframe dataset\n", + "with open(\"../sold2/config/wireframe_dataset.yaml\", \"r\") as f:\n", + " config = yaml.safe_load(f)\n", + "config['return_type'] = 'paired_desc'\n", + "\n", + "wireframe_dataset = WireframeDataset(mode=\"test\", config=config)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Read in one datapoint\n", + "index = 4\n", + "data1 = wireframe_dataset[index]\n", + "\n", + "# Reference data\n", + "ref_img = data1['ref_image'].numpy().squeeze()\n", + "ref_junc = data1['ref_junctions'].numpy()\n", + "ref_line_map = data1['ref_line_map'].numpy()\n", + "ref_line_points = data1['ref_line_points'].numpy()\n", + "\n", + "# Target data\n", + "target_img = data1['target_image'].numpy().squeeze()\n", + "target_junc = data1['target_junctions'].numpy()\n", + "target_line_map = data1['target_line_map'].numpy()\n", + "target_line_points = data1['target_line_points'].numpy()\n", + "\n", + "# Draw the points and lines\n", + "ref_img_with_junc = plot_junctions(ref_img, ref_junc, junc_size=2)\n", + "ref_line_segments = plot_line_segments(ref_img, ref_junc, ref_line_map, junc_size=1)\n", + "target_img_with_junc = plot_junctions(target_img, target_junc, junc_size=2)\n", + "target_line_segments = plot_line_segments(target_img, target_junc, target_line_map, junc_size=1)\n", + "\n", + "# Plot the images\n", + "plot_images([ref_img_with_junc, ref_line_segments], ['Junctions', 'Line segments'])\n", + "plot_images([target_img_with_junc, target_line_segments], ['Warped junctions', 'Warped line segments'])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Draw the line points for training\n", + "ref_img_with_line_points = plot_junctions(ref_img, ref_line_points, junc_size=1)\n", + "target_img_with_line_points = plot_junctions(target_img, target_line_points, junc_size=1)\n", + "\n", + "# Plot the images\n", + "plot_images([ref_img_with_line_points, target_img_with_line_points], ['Ref', 'Target'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualize the exported ground truth on the Holicity dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Info] Initializing Holicity dataset...\n", + "\t Found filename cache holicity_test_cache.pkl at /home/remi/Documents/test_SOLD2_data/datasets/Holicity\n", + "\t Load filename cache...\n", + "[Info] Successfully initialized dataset\n", + "\t Name: Holicity\n", + "\t Mode: test\n", + "\t Gt: holicity_test_homograpy-export_512x512_v1.5_detect_0.25_inlier_0.9_local_max_refine-v2.h5\n", + "\t Counts: 520\n", + "----------------------------------------\n" + ] + } + ], + "source": [ + "# Initialize the Holicity dataset\n", + "with open(\"../sold2/config/holicity_dataset.yaml\", \"r\") as f:\n", + " config = yaml.safe_load(f)\n", + "\n", + "holicity_dataset = HolicityDataset(mode=\"test\", config=config)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Read in one datapoint\n", + "index = 2\n", + "data1 = holicity_dataset[index]\n", + "\n", + "# Reference data\n", + "ref_img = data1['ref_image'].numpy().squeeze()\n", + "ref_junc = data1['ref_junctions'].numpy()\n", + "ref_line_map = data1['ref_line_map'].numpy()\n", + "ref_line_points = data1['ref_line_points'].numpy()\n", + "\n", + "# Target data\n", + "target_img = data1['target_image'].numpy().squeeze()\n", + "target_junc = data1['target_junctions'].numpy()\n", + "target_line_map = data1['target_line_map'].numpy()\n", + "target_line_points = data1['target_line_points'].numpy()\n", + "\n", + "# Draw the points and lines\n", + "ref_img_with_junc = plot_junctions(ref_img, ref_junc, junc_size=2)\n", + "ref_line_segments = plot_line_segments(ref_img, ref_junc, ref_line_map, junc_size=1)\n", + "target_img_with_junc = plot_junctions(target_img, target_junc, junc_size=2)\n", + "target_line_segments = plot_line_segments(target_img, target_junc, target_line_map, junc_size=1)\n", + "\n", + "# Plot the images\n", + "plot_images([ref_img_with_junc, ref_line_segments], ['Junctions', 'Line segments'])\n", + "plot_images([target_img_with_junc, target_line_segments], ['Warped junctions', 'Warped line segments'])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Draw the line points for training\n", + "ref_img_with_line_points = plot_junctions(ref_img, ref_line_points, junc_size=1)\n", + "target_img_with_line_points = plot_junctions(target_img, target_line_points, junc_size=1)\n", + "\n", + "# Plot the images\n", + "plot_images([ref_img_with_line_points, target_img_with_line_points], ['Ref', 'Target'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualize the exported ground truth on the merged dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Info] Initializing wireframe dataset...\n", + "\t Found filename cache wireframe_test_cache.pkl at /home/remi/Documents/test_SOLD2_data/datasets/wireframe\n", + "\t Load filename cache...\n", + "[Info] Successfully initialized dataset\n", + "\t Name: wireframe\n", + "\t Mode: test\n", + "\t Gt: wireframe_test_adaptation_iter0_epoch043_ce1_detect_0.25_inlier_0.75_local_max_v1.5_refine-v2.h5\n", + "\t Counts: 462\n", + "----------------------------------------\n", + "[Info] Initializing Holicity dataset...\n", + "\t Found filename cache holicity_test_cache.pkl at /home/remi/Documents/test_SOLD2_data/datasets/Holicity\n", + "\t Load filename cache...\n", + "[Info] Successfully initialized dataset\n", + "\t Name: Holicity\n", + "\t Mode: test\n", + "\t Gt: holicity_test_homograpy-export_512x512_v1.5_detect_0.25_inlier_0.9_local_max_refine-v2.h5\n", + "\t Counts: 520\n", + "----------------------------------------\n" + ] + } + ], + "source": [ + "# Initialize the merge dataset\n", + "with open(\"../sold2/config/merge_dataset.yaml\", \"r\") as f:\n", + " config = yaml.safe_load(f)\n", + "\n", + "merge_dataset = MergeDataset(mode=\"test\", config=config)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Read in one datapoint\n", + "index = 0\n", + "data1 = merge_dataset[index]\n", + "\n", + "# Reference data\n", + "ref_img = data1['ref_image'].numpy().squeeze()\n", + "ref_junc = data1['ref_junctions'].numpy()\n", + "ref_line_map = data1['ref_line_map'].numpy()\n", + "ref_line_points = data1['ref_line_points'].numpy()\n", + "\n", + "# Target data\n", + "target_img = data1['target_image'].numpy().squeeze()\n", + "target_junc = data1['target_junctions'].numpy()\n", + "target_line_map = data1['target_line_map'].numpy()\n", + "target_line_points = data1['target_line_points'].numpy()\n", + "\n", + "# Draw the points and lines\n", + "ref_img_with_junc = plot_junctions(ref_img, ref_junc, junc_size=2)\n", + "ref_line_segments = plot_line_segments(ref_img, ref_junc, ref_line_map, junc_size=1)\n", + "target_img_with_junc = plot_junctions(target_img, target_junc, junc_size=2)\n", + "target_line_segments = plot_line_segments(target_img, target_junc, target_line_map, junc_size=1)\n", + "\n", + "# Plot the images\n", + "plot_images([ref_img_with_junc, ref_line_segments], ['Junctions', 'Line segments'])\n", + "plot_images([target_img_with_junc, target_line_segments], ['Warped junctions', 'Warped line segments'])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Draw the line points for training\n", + "ref_img_with_line_points = plot_junctions(ref_img, ref_line_points, junc_size=1)\n", + "target_img_with_line_points = plot_junctions(target_img, target_line_points, junc_size=1)\n", + "\n", + "# Plot the images\n", + "plot_images([ref_img_with_line_points, target_img_with_line_points], ['Ref', 'Target'])" + ] + } + ], + "metadata": { + "file_extension": ".py", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + }, + "mimetype": "text/x-python", + "name": "python", + "npconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/third_party/SOLD2/requirements.txt b/third_party/SOLD2/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..421b52557bb98a7663f6bbf8ddca84b5000a0a0f --- /dev/null +++ b/third_party/SOLD2/requirements.txt @@ -0,0 +1,20 @@ +pyyaml +tqdm +attrdict +h5py +numpy +scipy +matplotlib +seaborn +brewer2mpl +torch +torchvision +tensorboard +tensorboardX +opencv-python==4.0.1.23 +opencv-contrib-python==4.0.1.23 +scikit-learn +scikit-image +kornia==0.3.0 +shapely +jupyter diff --git a/third_party/SOLD2/setup.py b/third_party/SOLD2/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..69f72fecdc54cf9b43a7fc55144470e83c5a862d --- /dev/null +++ b/third_party/SOLD2/setup.py @@ -0,0 +1,4 @@ +from setuptools import setup + + +setup(name='sold2', version="0.0", packages=['sold2']) diff --git a/third_party/SOLD2/sold2/config/__init__.py b/third_party/SOLD2/sold2/config/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/SOLD2/sold2/config/export_line_features.yaml b/third_party/SOLD2/sold2/config/export_line_features.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f19c7b6d684b7a826d6f2909b8c9f94528fdbf94 --- /dev/null +++ b/third_party/SOLD2/sold2/config/export_line_features.yaml @@ -0,0 +1,80 @@ +### [Model config] +model_cfg: + ### [Model parameters] + model_name: "lcnn_simple" + model_architecture: "simple" + # Backbone related config + backbone: "lcnn" + backbone_cfg: + input_channel: 1 # Use RGB images or grayscale images. + depth: 4 + num_stacks: 2 + num_blocks: 1 + num_classes: 5 + # Junction decoder related config + junction_decoder: "superpoint_decoder" + junc_decoder_cfg: + # Heatmap decoder related config + heatmap_decoder: "pixel_shuffle" + heatmap_decoder_cfg: + # Descriptor decoder related config + descriptor_decoder: "superpoint_descriptor" + descriptor_decoder_cfg: + # Shared configurations + grid_size: 8 + keep_border_valid: True + # Threshold of junction detection + detection_thresh: 0.0153846 # 1/65 + max_num_junctions: 300 + # Threshold of heatmap detection + prob_thresh: 0.5 + + ### [Loss parameters] + weighting_policy: "dynamic" + # [Heatmap loss] + w_heatmap: 0. + w_heatmap_class: 1 + heatmap_loss_func: "cross_entropy" + heatmap_loss_cfg: + policy: "dynamic" + # [Junction loss] + w_junc: 0. + junction_loss_func: "superpoint" + junction_loss_cfg: + policy: "dynamic" + # [Descriptor loss] + w_desc: 0. + descriptor_loss_func: "regular_sampling" + descriptor_loss_cfg: + dist_threshold: 8 + grid_size: 4 + margin: 1 + policy: "dynamic" + +### [Line detector config] +line_detector_cfg: + detect_thresh: 0.5 + num_samples: 64 + sampling_method: "local_max" + inlier_thresh: 0.99 + use_candidate_suppression: True + nms_dist_tolerance: 3. + use_heatmap_refinement: True + heatmap_refine_cfg: + mode: "local" + ratio: 0.2 + valid_thresh: 0.001 + num_blocks: 20 + overlap_ratio: 0.5 + use_junction_refinement: True + junction_refine_cfg: + num_perturbs: 9 + perturb_interval: 0.25 + +### [Line matcher config] +line_matcher_cfg: + cross_check: True + num_samples: 5 + min_dist_pts: 8 + top_k_candidates: 10 + grid_size: 4 \ No newline at end of file diff --git a/third_party/SOLD2/sold2/config/holicity_dataset.yaml b/third_party/SOLD2/sold2/config/holicity_dataset.yaml new file mode 100644 index 0000000000000000000000000000000000000000..72e9380dbf496dc4b4d6430d58534e0663c85f0e --- /dev/null +++ b/third_party/SOLD2/sold2/config/holicity_dataset.yaml @@ -0,0 +1,76 @@ +### General dataset parameters +dataset_name: "holicity" +train_splits: ["2018-01"] # 5720 images +add_augmentation_to_all_splits: False +gray_scale: True +# Ground truth source ('official' or path to the exported h5 dataset.) +#gt_source_train: "" # Fill with your own export file +#gt_source_test: "" # Fill with your own export file +# Return type: (1) single (to train the detector only) +# or (2) paired_desc (to train the detector + descriptor) +return_type: "single" +random_seed: 0 + +### Descriptor training parameters +# Number of points extracted per line +max_num_samples: 10 +# Max number of training line points extracted in the whole image +max_pts: 1000 +# Min distance between two points on a line (in pixels) +min_dist_pts: 10 +# Small jittering of the sampled points during training +jittering: 0 + +### Data preprocessing configuration +preprocessing: + resize: [512, 512] + blur_size: 11 +augmentation: + random_scaling: + enable: True + range: [0.7, 1.5] + photometric: + enable: True + primitives: ['random_brightness', 'random_contrast', + 'additive_speckle_noise', 'additive_gaussian_noise', + 'additive_shade', 'motion_blur' ] + params: + random_brightness: {brightness: 0.2} + random_contrast: {contrast: [0.3, 1.5]} + additive_gaussian_noise: {stddev_range: [0, 10]} + additive_speckle_noise: {prob_range: [0, 0.0035]} + additive_shade: + transparency_range: [-0.5, 0.5] + kernel_size_range: [100, 150] + motion_blur: {max_kernel_size: 3} + random_order: True + homographic: + enable: True + params: + translation: true + rotation: true + scaling: true + perspective: true + scaling_amplitude: 0.2 + perspective_amplitude_x: 0.2 + perspective_amplitude_y: 0.2 + patch_ratio: 0.85 + max_angle: 1.57 + allow_artifacts: true + valid_border_margin: 3 + +### Homography adaptation configuration +homography_adaptation: + num_iter: 100 + valid_border_margin: 3 + min_counts: 30 + homographies: + translation: true + rotation: true + scaling: true + perspective: true + scaling_amplitude: 0.2 + perspective_amplitude_x: 0.2 + perspective_amplitude_y: 0.2 + allow_artifacts: true + patch_ratio: 0.85 \ No newline at end of file diff --git a/third_party/SOLD2/sold2/config/merge_dataset.yaml b/third_party/SOLD2/sold2/config/merge_dataset.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f70465b71e507cbc9f258a8bbf45f41e435ee9b0 --- /dev/null +++ b/third_party/SOLD2/sold2/config/merge_dataset.yaml @@ -0,0 +1,54 @@ +dataset_name: "merge" +datasets: ["wireframe", "holicity"] +weights: [0.5, 0.5] +gt_source_train: ["", ""] # Fill with your own [wireframe, holicity] exported ground-truth +gt_source_test: ["", ""] # Fill with your own [wireframe, holicity] exported ground-truth +train_splits: ["", "2018-01"] +add_augmentation_to_all_splits: False +gray_scale: True +# Return type: (1) single (original version) (2) paired +return_type: "paired_desc" +# Number of points extracted per line +max_num_samples: 10 +# Max number of training line points extracted in the whole image +max_pts: 1000 +# Min distance between two points on a line (in pixels) +min_dist_pts: 10 +# Small jittering of the sampled points during training +jittering: 0 +# Random seed +random_seed: 0 +# Date preprocessing configuration. +preprocessing: + resize: [512, 512] + blur_size: 11 +augmentation: + photometric: + enable: True + primitives: [ + 'random_brightness', 'random_contrast', 'additive_speckle_noise', + 'additive_gaussian_noise', 'additive_shade', 'motion_blur' ] + params: + random_brightness: {brightness: 0.2} + random_contrast: {contrast: [0.3, 1.5]} + additive_gaussian_noise: {stddev_range: [0, 10]} + additive_speckle_noise: {prob_range: [0, 0.0035]} + additive_shade: + transparency_range: [-0.5, 0.5] + kernel_size_range: [100, 150] + motion_blur: {max_kernel_size: 3} + random_order: True + homographic: + enable: True + params: + translation: true + rotation: true + scaling: true + perspective: true + scaling_amplitude: 0.2 + perspective_amplitude_x: 0.2 + perspective_amplitude_y: 0.2 + patch_ratio: 0.85 + max_angle: 1.57 + allow_artifacts: true + valid_border_margin: 3 diff --git a/third_party/SOLD2/sold2/config/project_config.py b/third_party/SOLD2/sold2/config/project_config.py new file mode 100644 index 0000000000000000000000000000000000000000..42ed00d1c1900e71568d1b06ff4f9d19a295232d --- /dev/null +++ b/third_party/SOLD2/sold2/config/project_config.py @@ -0,0 +1,41 @@ +""" +Project configurations. +""" +import os + + +class Config(object): + """ Datasets and experiments folders for the whole project. """ + ##################### + ## Dataset setting ## + ##################### + DATASET_ROOT = os.getenv("DATASET_ROOT", "./datasets/") # TODO: path to your datasets folder + if not os.path.exists(DATASET_ROOT): + os.makedirs(DATASET_ROOT) + + # Synthetic shape dataset + synthetic_dataroot = os.path.join(DATASET_ROOT, "synthetic_shapes") + synthetic_cache_path = os.path.join(DATASET_ROOT, "synthetic_shapes") + if not os.path.exists(synthetic_dataroot): + os.makedirs(synthetic_dataroot) + + # Exported predictions dataset + export_dataroot = os.path.join(DATASET_ROOT, "export_datasets") + export_cache_path = os.path.join(DATASET_ROOT, "export_datasets") + if not os.path.exists(export_dataroot): + os.makedirs(export_dataroot) + + # Wireframe dataset + wireframe_dataroot = os.path.join(DATASET_ROOT, "wireframe") + wireframe_cache_path = os.path.join(DATASET_ROOT, "wireframe") + + # Holicity dataset + holicity_dataroot = os.path.join(DATASET_ROOT, "Holicity") + holicity_cache_path = os.path.join(DATASET_ROOT, "Holicity") + + ######################## + ## Experiment Setting ## + ######################## + EXP_PATH = os.getenv("EXP_PATH", "./experiments/") # TODO: path to your experiments folder + if not os.path.exists(EXP_PATH): + os.makedirs(EXP_PATH) diff --git a/third_party/SOLD2/sold2/config/synthetic_dataset.yaml b/third_party/SOLD2/sold2/config/synthetic_dataset.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d9fa44522b6c09500100dbc56a11bc8a24d56832 --- /dev/null +++ b/third_party/SOLD2/sold2/config/synthetic_dataset.yaml @@ -0,0 +1,48 @@ +### General dataset parameters +dataset_name: "synthetic_shape" +primitives: "all" +add_augmentation_to_all_splits: True +test_augmentation_seed: 200 +# Shape generation configuration +generation: + split_sizes: {'train': 20000, 'val': 2000, 'test': 400} + random_seed: 10 + image_size: [960, 1280] + min_len: 0.0985 + min_label_len: 0.099 + params: + generate_background: + min_kernel_size: 150 + max_kernel_size: 500 + min_rad_ratio: 0.02 + max_rad_ratio: 0.031 + draw_stripes: + transform_params: [0.1, 0.1] + draw_multiple_polygons: + kernel_boundaries: [50, 100] + +### Data preprocessing configuration. +preprocessing: + resize: [400, 400] + blur_size: 11 +augmentation: + photometric: + enable: True + primitives: 'all' + params: {} + random_order: True + homographic: + enable: True + params: + translation: true + rotation: true + scaling: true + perspective: true + scaling_amplitude: 0.2 + perspective_amplitude_x: 0.2 + perspective_amplitude_y: 0.2 + patch_ratio: 0.8 + max_angle: 1.57 + allow_artifacts: true + translation_overflow: 0.05 + valid_border_margin: 0 diff --git a/third_party/SOLD2/sold2/config/train_detector.yaml b/third_party/SOLD2/sold2/config/train_detector.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c53c35a6464eb1c37a9ea71c939225f793543aec --- /dev/null +++ b/third_party/SOLD2/sold2/config/train_detector.yaml @@ -0,0 +1,51 @@ +### [Model parameters] +model_name: "lcnn_simple" +model_architecture: "simple" +# Backbone related config +backbone: "lcnn" +backbone_cfg: + input_channel: 1 # Use RGB images or grayscale images. + depth: 4 + num_stacks: 2 + num_blocks: 1 + num_classes: 5 +# Junction decoder related config +junction_decoder: "superpoint_decoder" +junc_decoder_cfg: +# Heatmap decoder related config +heatmap_decoder: "pixel_shuffle" +heatmap_decoder_cfg: +# Shared configurations +grid_size: 8 +keep_border_valid: True +# Threshold of junction detection +detection_thresh: 0.0153846 # 1/65 +# Threshold of heatmap detection +prob_thresh: 0.5 + +### [Loss parameters] +weighting_policy: "dynamic" +# [Heatmap loss] +w_heatmap: 0. +w_heatmap_class: 1 +heatmap_loss_func: "cross_entropy" +heatmap_loss_cfg: + policy: "dynamic" +# [Junction loss] +w_junc: 0. +junction_loss_func: "superpoint" +junction_loss_cfg: + policy: "dynamic" + +### [Training parameters] +learning_rate: 0.0005 +epochs: 200 +train: + batch_size: 6 + num_workers: 8 +test: + batch_size: 6 + num_workers: 8 +disp_freq: 100 +summary_freq: 200 +max_ckpt: 150 \ No newline at end of file diff --git a/third_party/SOLD2/sold2/config/train_full_pipeline.yaml b/third_party/SOLD2/sold2/config/train_full_pipeline.yaml new file mode 100644 index 0000000000000000000000000000000000000000..233d898f47110c14beabbe63ee82044d506cc15a --- /dev/null +++ b/third_party/SOLD2/sold2/config/train_full_pipeline.yaml @@ -0,0 +1,62 @@ +### [Model parameters] +model_name: "lcnn_simple" +model_architecture: "simple" +# Backbone related config +backbone: "lcnn" +backbone_cfg: + input_channel: 1 # Use RGB images or grayscale images. + depth: 4 + num_stacks: 2 + num_blocks: 1 + num_classes: 5 +# Junction decoder related config +junction_decoder: "superpoint_decoder" +junc_decoder_cfg: +# Heatmap decoder related config +heatmap_decoder: "pixel_shuffle" +heatmap_decoder_cfg: +# Descriptor decoder related config +descriptor_decoder: "superpoint_descriptor" +descriptor_decoder_cfg: +# Shared configurations +grid_size: 8 +keep_border_valid: True +# Threshold of junction detection +detection_thresh: 0.0153846 # 1/65 +# Threshold of heatmap detection +prob_thresh: 0.5 + +### [Loss parameters] +weighting_policy: "dynamic" +# [Heatmap loss] +w_heatmap: 0. +w_heatmap_class: 1 +heatmap_loss_func: "cross_entropy" +heatmap_loss_cfg: + policy: "dynamic" +# [Junction loss] +w_junc: 0. +junction_loss_func: "superpoint" +junction_loss_cfg: + policy: "dynamic" +# [Descriptor loss] +w_desc: 0. +descriptor_loss_func: "regular_sampling" +descriptor_loss_cfg: + dist_threshold: 8 + grid_size: 4 + margin: 1 + policy: "dynamic" + +### [Training parameters] +learning_rate: 0.0005 +epochs: 130 +train: + batch_size: 4 + num_workers: 8 +test: + batch_size: 4 + num_workers: 8 +disp_freq: 100 +summary_freq: 200 +max_ckpt: 130 \ No newline at end of file diff --git a/third_party/SOLD2/sold2/config/wireframe_dataset.yaml b/third_party/SOLD2/sold2/config/wireframe_dataset.yaml new file mode 100644 index 0000000000000000000000000000000000000000..15abd3dbd6462dca21ac331a802b86a8ef050bff --- /dev/null +++ b/third_party/SOLD2/sold2/config/wireframe_dataset.yaml @@ -0,0 +1,75 @@ +### General dataset parameters +dataset_name: "wireframe" +add_augmentation_to_all_splits: False +gray_scale: True +# Ground truth source ('official' or path to the exported h5 dataset.) +# gt_source_train: "" # Fill with your own export file +# gt_source_test: "" # Fill with your own export file +# Return type: (1) single (to train the detector only) +# or (2) paired_desc (to train the detector + descriptor) +return_type: "single" +random_seed: 0 + +### Descriptor training parameters +# Number of points extracted per line +max_num_samples: 10 +# Max number of training line points extracted in the whole image +max_pts: 1000 +# Min distance between two points on a line (in pixels) +min_dist_pts: 10 +# Small jittering of the sampled points during training +jittering: 0 + +### Data preprocessing configuration +preprocessing: + resize: [512, 512] + blur_size: 11 +augmentation: + random_scaling: + enable: True + range: [0.7, 1.5] + photometric: + enable: True + primitives: ['random_brightness', 'random_contrast', + 'additive_speckle_noise', 'additive_gaussian_noise', + 'additive_shade', 'motion_blur' ] + params: + random_brightness: {brightness: 0.2} + random_contrast: {contrast: [0.3, 1.5]} + additive_gaussian_noise: {stddev_range: [0, 10]} + additive_speckle_noise: {prob_range: [0, 0.0035]} + additive_shade: + transparency_range: [-0.5, 0.5] + kernel_size_range: [100, 150] + motion_blur: {max_kernel_size: 3} + random_order: True + homographic: + enable: True + params: + translation: true + rotation: true + scaling: true + perspective: true + scaling_amplitude: 0.2 + perspective_amplitude_x: 0.2 + perspective_amplitude_y: 0.2 + patch_ratio: 0.85 + max_angle: 1.57 + allow_artifacts: true + valid_border_margin: 3 + +### Homography adaptation configuration +homography_adaptation: + num_iter: 100 + valid_border_margin: 3 + min_counts: 30 + homographies: + translation: true + rotation: true + scaling: true + perspective: true + scaling_amplitude: 0.2 + perspective_amplitude_x: 0.2 + perspective_amplitude_y: 0.2 + allow_artifacts: true + patch_ratio: 0.85 \ No newline at end of file diff --git a/third_party/SOLD2/sold2/dataset/__init__.py b/third_party/SOLD2/sold2/dataset/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/SOLD2/sold2/dataset/dataset_util.py b/third_party/SOLD2/sold2/dataset/dataset_util.py new file mode 100644 index 0000000000000000000000000000000000000000..50439ef3e2958d82719da0f6d10f4a7d98322f9a --- /dev/null +++ b/third_party/SOLD2/sold2/dataset/dataset_util.py @@ -0,0 +1,60 @@ +""" +The interface of initializing different datasets. +""" +from .synthetic_dataset import SyntheticShapes +from .wireframe_dataset import WireframeDataset +from .holicity_dataset import HolicityDataset +from .merge_dataset import MergeDataset + + +def get_dataset(mode="train", dataset_cfg=None): + """ Initialize different dataset based on a configuration. """ + # Check dataset config is given + if dataset_cfg is None: + raise ValueError("[Error] The dataset config is required!") + + # Synthetic dataset + if dataset_cfg["dataset_name"] == "synthetic_shape": + dataset = SyntheticShapes( + mode, dataset_cfg + ) + + # Get the collate_fn + from .synthetic_dataset import synthetic_collate_fn + collate_fn = synthetic_collate_fn + + # Wireframe dataset + elif dataset_cfg["dataset_name"] == "wireframe": + dataset = WireframeDataset( + mode, dataset_cfg + ) + + # Get the collate_fn + from .wireframe_dataset import wireframe_collate_fn + collate_fn = wireframe_collate_fn + + # Holicity dataset + elif dataset_cfg["dataset_name"] == "holicity": + dataset = HolicityDataset( + mode, dataset_cfg + ) + + # Get the collate_fn + from .holicity_dataset import holicity_collate_fn + collate_fn = holicity_collate_fn + + # Dataset merging several datasets in one + elif dataset_cfg["dataset_name"] == "merge": + dataset = MergeDataset( + mode, dataset_cfg + ) + + # Get the collate_fn + from .holicity_dataset import holicity_collate_fn + collate_fn = holicity_collate_fn + + else: + raise ValueError( + "[Error] The dataset '%s' is not supported" % dataset_cfg["dataset_name"]) + + return dataset, collate_fn diff --git a/third_party/SOLD2/sold2/dataset/holicity_dataset.py b/third_party/SOLD2/sold2/dataset/holicity_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..e4437f37bda366983052de902a41467ca01412bd --- /dev/null +++ b/third_party/SOLD2/sold2/dataset/holicity_dataset.py @@ -0,0 +1,797 @@ +""" +File to process and load the Holicity dataset. +""" +import os +import math +import copy +import PIL +import numpy as np +import h5py +import cv2 +import pickle +from skimage.io import imread +from skimage import color +import torch +import torch.utils.data.dataloader as torch_loader +from torch.utils.data import Dataset +from torchvision import transforms + +from ..config.project_config import Config as cfg +from .transforms import photometric_transforms as photoaug +from .transforms import homographic_transforms as homoaug +from .transforms.utils import random_scaling +from .synthetic_util import get_line_heatmap +from ..misc.geometry_utils import warp_points, mask_points +from ..misc.train_utils import parse_h5_data + + +def holicity_collate_fn(batch): + """ Customized collate_fn. """ + batch_keys = ["image", "junction_map", "valid_mask", "heatmap", + "heatmap_pos", "heatmap_neg", "homography", + "line_points", "line_indices"] + list_keys = ["junctions", "line_map", "line_map_pos", + "line_map_neg", "file_key"] + + outputs = {} + for data_key in batch[0].keys(): + batch_match = sum([_ in data_key for _ in batch_keys]) + list_match = sum([_ in data_key for _ in list_keys]) + # print(batch_match, list_match) + if batch_match > 0 and list_match == 0: + outputs[data_key] = torch_loader.default_collate( + [b[data_key] for b in batch]) + elif batch_match == 0 and list_match > 0: + outputs[data_key] = [b[data_key] for b in batch] + elif batch_match == 0 and list_match == 0: + continue + else: + raise ValueError( + "[Error] A key matches batch keys and list keys simultaneously.") + + return outputs + + +class HolicityDataset(Dataset): + def __init__(self, mode="train", config=None): + super(HolicityDataset, self).__init__() + if not mode in ["train", "test"]: + raise ValueError( + "[Error] Unknown mode for Holicity dataset. Only 'train' and 'test'.") + self.mode = mode + + if config is None: + self.config = self.get_default_config() + else: + self.config = config + # Also get the default config + self.default_config = self.get_default_config() + + # Get cache setting + self.dataset_name = self.get_dataset_name() + self.cache_name = self.get_cache_name() + self.cache_path = cfg.holicity_cache_path + + # Get the ground truth source if it exists + self.gt_source = None + if "gt_source_%s"%(self.mode) in self.config: + self.gt_source = self.config.get("gt_source_%s"%(self.mode)) + self.gt_source = os.path.join(cfg.export_dataroot, self.gt_source) + # Check the full path exists + if not os.path.exists(self.gt_source): + raise ValueError( + "[Error] The specified ground truth source does not exist.") + + # Get the filename dataset + print("[Info] Initializing Holicity dataset...") + self.filename_dataset, self.datapoints = self.construct_dataset() + + # Get dataset length + self.dataset_length = len(self.datapoints) + + # Print some info + print("[Info] Successfully initialized dataset") + print("\t Name: Holicity") + print("\t Mode: %s" %(self.mode)) + print("\t Gt: %s" %(self.config.get("gt_source_%s"%(self.mode), + "None"))) + print("\t Counts: %d" %(self.dataset_length)) + print("----------------------------------------") + + ####################################### + ## Dataset construction related APIs ## + ####################################### + def construct_dataset(self): + """ Construct the dataset (from scratch or from cache). """ + # Check if the filename cache exists + # If cache exists, load from cache + if self.check_dataset_cache(): + print("\t Found filename cache %s at %s"%(self.cache_name, + self.cache_path)) + print("\t Load filename cache...") + filename_dataset, datapoints = self.get_filename_dataset_from_cache() + # If not, initialize dataset from scratch + else: + print("\t Can't find filename cache ...") + print("\t Create filename dataset from scratch...") + filename_dataset, datapoints = self.get_filename_dataset() + print("\t Create filename dataset cache...") + self.create_filename_dataset_cache(filename_dataset, datapoints) + + return filename_dataset, datapoints + + def create_filename_dataset_cache(self, filename_dataset, datapoints): + """ Create filename dataset cache for faster initialization. """ + # Check cache path exists + if not os.path.exists(self.cache_path): + os.makedirs(self.cache_path) + + cache_file_path = os.path.join(self.cache_path, self.cache_name) + data = { + "filename_dataset": filename_dataset, + "datapoints": datapoints + } + with open(cache_file_path, "wb") as f: + pickle.dump(data, f, pickle.HIGHEST_PROTOCOL) + + def get_filename_dataset_from_cache(self): + """ Get filename dataset from cache. """ + # Load from pkl cache + cache_file_path = os.path.join(self.cache_path, self.cache_name) + with open(cache_file_path, "rb") as f: + data = pickle.load(f) + + return data["filename_dataset"], data["datapoints"] + + def get_filename_dataset(self): + """ Get the path to the dataset. """ + if self.mode == "train": + # Contains 5720 or 11872 images + dataset_path = [os.path.join(cfg.holicity_dataroot, p) + for p in self.config["train_splits"]] + else: + # Test mode - Contains 520 images + dataset_path = [os.path.join(cfg.holicity_dataroot, "2018-03")] + + # Get paths to all image files + image_paths = [] + for folder in dataset_path: + image_paths += [os.path.join(folder, img) + for img in os.listdir(folder) + if os.path.splitext(img)[-1] == ".jpg"] + image_paths = sorted(image_paths) + + # Verify all the images exist + for idx in range(len(image_paths)): + image_path = image_paths[idx] + if not (os.path.exists(image_path)): + raise ValueError( + "[Error] The image does not exist. %s"%(image_path)) + + # Construct the filename dataset + num_pad = int(math.ceil(math.log10(len(image_paths))) + 1) + filename_dataset = {} + for idx in range(len(image_paths)): + # Get the file key + key = self.get_padded_filename(num_pad, idx) + + filename_dataset[key] = {"image": image_paths[idx]} + + # Get the datapoints + datapoints = list(sorted(filename_dataset.keys())) + + return filename_dataset, datapoints + + def get_dataset_name(self): + """ Get dataset name from dataset config / default config. """ + dataset_name = self.config.get("dataset_name", + self.default_config["dataset_name"]) + dataset_name = dataset_name + "_%s" % self.mode + return dataset_name + + def get_cache_name(self): + """ Get cache name from dataset config / default config. """ + dataset_name = self.config.get("dataset_name", + self.default_config["dataset_name"]) + dataset_name = dataset_name + "_%s" % self.mode + # Compose cache name + cache_name = dataset_name + "_cache.pkl" + return cache_name + + def check_dataset_cache(self): + """ Check if dataset cache exists. """ + cache_file_path = os.path.join(self.cache_path, self.cache_name) + if os.path.exists(cache_file_path): + return True + else: + return False + + @staticmethod + def get_padded_filename(num_pad, idx): + """ Get the padded filename using adaptive padding. """ + file_len = len("%d" % (idx)) + filename = "0" * (num_pad - file_len) + "%d" % (idx) + return filename + + def get_default_config(self): + """ Get the default configuration. """ + return { + "dataset_name": "holicity", + "train_split": "2018-01", + "add_augmentation_to_all_splits": False, + "preprocessing": { + "resize": [512, 512], + "blur_size": 11 + }, + "augmentation":{ + "photometric":{ + "enable": False + }, + "homographic":{ + "enable": False + }, + }, + } + + ############################################ + ## Pytorch and preprocessing related APIs ## + ############################################ + @staticmethod + def get_data_from_path(data_path): + """ Get data from the information from filename dataset. """ + output = {} + + # Get image data + image_path = data_path["image"] + image = imread(image_path) + output["image"] = image + + return output + + @staticmethod + def convert_line_map(lcnn_line_map, num_junctions): + """ Convert the line_pos or line_neg + (represented by two junction indexes) to our line map. """ + # Initialize empty line map + line_map = np.zeros([num_junctions, num_junctions]) + + # Iterate through all the lines + for idx in range(lcnn_line_map.shape[0]): + index1 = lcnn_line_map[idx, 0] + index2 = lcnn_line_map[idx, 1] + + line_map[index1, index2] = 1 + line_map[index2, index1] = 1 + + return line_map + + @staticmethod + def junc_to_junc_map(junctions, image_size): + """ Convert junction points to junction maps. """ + junctions = np.round(junctions).astype(np.int) + # Clip the boundary by image size + junctions[:, 0] = np.clip(junctions[:, 0], 0., image_size[0]-1) + junctions[:, 1] = np.clip(junctions[:, 1], 0., image_size[1]-1) + + # Create junction map + junc_map = np.zeros([image_size[0], image_size[1]]) + junc_map[junctions[:, 0], junctions[:, 1]] = 1 + + return junc_map[..., None].astype(np.int) + + def parse_transforms(self, names, all_transforms): + """ Parse the transform. """ + trans = all_transforms if (names == 'all') \ + else (names if isinstance(names, list) else [names]) + assert set(trans) <= set(all_transforms) + return trans + + def get_photo_transform(self): + """ Get list of photometric transforms (according to the config). """ + # Get the photometric transform config + photo_config = self.config["augmentation"]["photometric"] + if not photo_config["enable"]: + raise ValueError( + "[Error] Photometric augmentation is not enabled.") + + # Parse photometric transforms + trans_lst = self.parse_transforms(photo_config["primitives"], + photoaug.available_augmentations) + trans_config_lst = [photo_config["params"].get(p, {}) + for p in trans_lst] + + # List of photometric augmentation + photometric_trans_lst = [ + getattr(photoaug, trans)(**conf) \ + for (trans, conf) in zip(trans_lst, trans_config_lst) + ] + + return photometric_trans_lst + + def get_homo_transform(self): + """ Get homographic transforms (according to the config). """ + # Get homographic transforms for image + homo_config = self.config["augmentation"]["homographic"]["params"] + if not self.config["augmentation"]["homographic"]["enable"]: + raise ValueError( + "[Error] Homographic augmentation is not enabled") + + # Parse the homographic transforms + image_shape = self.config["preprocessing"]["resize"] + + # Compute the min_label_len from config + try: + min_label_tmp = self.config["generation"]["min_label_len"] + except: + min_label_tmp = None + + # float label len => fraction + if isinstance(min_label_tmp, float): # Skip if not provided + min_label_len = min_label_tmp * min(image_shape) + # int label len => length in pixel + elif isinstance(min_label_tmp, int): + scale_ratio = (self.config["preprocessing"]["resize"] + / self.config["generation"]["image_size"][0]) + min_label_len = (self.config["generation"]["min_label_len"] + * scale_ratio) + # if none => no restriction + else: + min_label_len = 0 + + # Initialize the transform + homographic_trans = homoaug.homography_transform( + image_shape, homo_config, 0, min_label_len) + + return homographic_trans + + def get_line_points(self, junctions, line_map, H1=None, H2=None, + img_size=None, warp=False): + """ Sample evenly points along each line segments + and keep track of line idx. """ + if np.sum(line_map) == 0: + # No segment detected in the image + line_indices = np.zeros(self.config["max_pts"], dtype=int) + line_points = np.zeros((self.config["max_pts"], 2), dtype=float) + return line_points, line_indices + + # Extract all pairs of connected junctions + junc_indices = np.array( + [[i, j] for (i, j) in zip(*np.where(line_map)) if j > i]) + line_segments = np.stack([junctions[junc_indices[:, 0]], + junctions[junc_indices[:, 1]]], axis=1) + # line_segments is (num_lines, 2, 2) + line_lengths = np.linalg.norm( + line_segments[:, 0] - line_segments[:, 1], axis=1) + + # Sample the points separated by at least min_dist_pts along each line + # The number of samples depends on the length of the line + num_samples = np.minimum(line_lengths // self.config["min_dist_pts"], + self.config["max_num_samples"]) + line_points = [] + line_indices = [] + cur_line_idx = 1 + for n in np.arange(2, self.config["max_num_samples"] + 1): + # Consider all lines where we can fit up to n points + cur_line_seg = line_segments[num_samples == n] + line_points_x = np.linspace(cur_line_seg[:, 0, 0], + cur_line_seg[:, 1, 0], + n, axis=-1).flatten() + line_points_y = np.linspace(cur_line_seg[:, 0, 1], + cur_line_seg[:, 1, 1], + n, axis=-1).flatten() + jitter = self.config.get("jittering", 0) + if jitter: + # Add a small random jittering of all points along the line + angles = np.arctan2( + cur_line_seg[:, 1, 0] - cur_line_seg[:, 0, 0], + cur_line_seg[:, 1, 1] - cur_line_seg[:, 0, 1]).repeat(n) + jitter_hyp = (np.random.rand(len(angles)) * 2 - 1) * jitter + line_points_x += jitter_hyp * np.sin(angles) + line_points_y += jitter_hyp * np.cos(angles) + line_points.append(np.stack([line_points_x, line_points_y], axis=-1)) + # Keep track of the line indices for each sampled point + num_cur_lines = len(cur_line_seg) + line_idx = np.arange(cur_line_idx, cur_line_idx + num_cur_lines) + line_indices.append(line_idx.repeat(n)) + cur_line_idx += num_cur_lines + line_points = np.concatenate(line_points, + axis=0)[:self.config["max_pts"]] + line_indices = np.concatenate(line_indices, + axis=0)[:self.config["max_pts"]] + + # Warp the points if need be, and filter unvalid ones + # If the other view is also warped + if warp and H2 is not None: + warp_points2 = warp_points(line_points, H2) + line_points = warp_points(line_points, H1) + mask = mask_points(line_points, img_size) + mask2 = mask_points(warp_points2, img_size) + mask = mask * mask2 + # If the other view is not warped + elif warp and H2 is None: + line_points = warp_points(line_points, H1) + mask = mask_points(line_points, img_size) + else: + if H1 is not None: + raise ValueError("[Error] Wrong combination of homographies.") + # Remove points that would be outside of img_size if warped by H + warped_points = warp_points(line_points, H1) + mask = mask_points(warped_points, img_size) + line_points = line_points[mask] + line_indices = line_indices[mask] + + # Pad the line points to a fixed length + # Index of 0 means padded line + line_indices = np.concatenate([line_indices, np.zeros( + self.config["max_pts"] - len(line_indices))], axis=0) + line_points = np.concatenate( + [line_points, + np.zeros((self.config["max_pts"] - len(line_points), 2), + dtype=float)], axis=0) + + return line_points, line_indices + + def export_preprocessing(self, data, numpy=False): + """ Preprocess the exported data. """ + # Fetch the corresponding entries + image = data["image"] + image_size = image.shape[:2] + + # Resize the image before photometric and homographical augmentations + if not(list(image_size) == self.config["preprocessing"]["resize"]): + # Resize the image and the point location. + size_old = list(image.shape)[:2] # Only H and W dimensions + + image = cv2.resize( + image, tuple(self.config['preprocessing']['resize'][::-1]), + interpolation=cv2.INTER_LINEAR) + image = np.array(image, dtype=np.uint8) + + # Optionally convert the image to grayscale + if self.config["gray_scale"]: + image = (color.rgb2gray(image) * 255.).astype(np.uint8) + + image = photoaug.normalize_image()(image) + + # Convert to tensor and return the results + to_tensor = transforms.ToTensor() + if not numpy: + return {"image": to_tensor(image)} + else: + return {"image": image} + + def train_preprocessing_exported( + self, data, numpy=False, disable_homoaug=False, desc_training=False, + H1=None, H1_scale=None, H2=None, scale=1., h_crop=None, w_crop=None): + """ Train preprocessing for the exported labels. """ + data = copy.deepcopy(data) + # Fetch the corresponding entries + image = data["image"] + junctions = data["junctions"] + line_map = data["line_map"] + image_size = image.shape[:2] + + # Define the random crop for scaling if necessary + if h_crop is None or w_crop is None: + h_crop, w_crop = 0, 0 + if scale > 1: + H, W = self.config["preprocessing"]["resize"] + H_scale, W_scale = round(H * scale), round(W * scale) + if H_scale > H: + h_crop = np.random.randint(H_scale - H) + if W_scale > W: + w_crop = np.random.randint(W_scale - W) + + # Resize the image before photometric and homographical augmentations + if not(list(image_size) == self.config["preprocessing"]["resize"]): + # Resize the image and the point location. + size_old = list(image.shape)[:2] # Only H and W dimensions + + image = cv2.resize( + image, tuple(self.config['preprocessing']['resize'][::-1]), + interpolation=cv2.INTER_LINEAR) + image = np.array(image, dtype=np.uint8) + + # # In HW format + # junctions = (junctions * np.array( + # self.config['preprocessing']['resize'], np.float) + # / np.array(size_old, np.float)) + + # Generate the line heatmap after post-processing + junctions_xy = np.flip(np.round(junctions).astype(np.int32), axis=1) + image_size = image.shape[:2] + heatmap = get_line_heatmap(junctions_xy, line_map, image_size) + + # Optionally convert the image to grayscale + if self.config["gray_scale"]: + image = (color.rgb2gray(image) * 255.).astype(np.uint8) + + # Check if we need to apply augmentations + # In training mode => yes. + # In homography adaptation mode (export mode) => No + if self.config["augmentation"]["photometric"]["enable"]: + photo_trans_lst = self.get_photo_transform() + ### Image transform ### + np.random.shuffle(photo_trans_lst) + image_transform = transforms.Compose( + photo_trans_lst + [photoaug.normalize_image()]) + else: + image_transform = photoaug.normalize_image() + image = image_transform(image) + + # Perform the random scaling + if scale != 1.: + image, junctions, line_map, valid_mask = random_scaling( + image, junctions, line_map, scale, + h_crop=h_crop, w_crop=w_crop) + else: + # Declare default valid mask (all ones) + valid_mask = np.ones(image_size) + + # Initialize the empty output dict + outputs = {} + # Convert to tensor and return the results + to_tensor = transforms.ToTensor() + + # Check homographic augmentation + warp = (self.config["augmentation"]["homographic"]["enable"] + and disable_homoaug == False) + if warp: + homo_trans = self.get_homo_transform() + # Perform homographic transform + if H1 is None: + homo_outputs = homo_trans(image, junctions, line_map, + valid_mask=valid_mask) + else: + homo_outputs = homo_trans( + image, junctions, line_map, homo=H1, scale=H1_scale, + valid_mask=valid_mask) + homography_mat = homo_outputs["homo"] + + # Give the warp of the other view + if H1 is None: + H1 = homo_outputs["homo"] + + # Sample points along each line segments for the descriptor + if desc_training: + line_points, line_indices = self.get_line_points( + junctions, line_map, H1=H1, H2=H2, + img_size=image_size, warp=warp) + + # Record the warped results + if warp: + junctions = homo_outputs["junctions"] # Should be HW format + image = homo_outputs["warped_image"] + line_map = homo_outputs["line_map"] + valid_mask = homo_outputs["valid_mask"] # Same for pos and neg + heatmap = homo_outputs["warped_heatmap"] + + # Optionally put warping information first. + if not numpy: + outputs["homography_mat"] = to_tensor( + homography_mat).to(torch.float32)[0, ...] + else: + outputs["homography_mat"] = homography_mat.astype(np.float32) + + junction_map = self.junc_to_junc_map(junctions, image_size) + + if not numpy: + outputs.update({ + "image": to_tensor(image), + "junctions": to_tensor(junctions).to(torch.float32)[0, ...], + "junction_map": to_tensor(junction_map).to(torch.int), + "line_map": to_tensor(line_map).to(torch.int32)[0, ...], + "heatmap": to_tensor(heatmap).to(torch.int32), + "valid_mask": to_tensor(valid_mask).to(torch.int32) + }) + if desc_training: + outputs.update({ + "line_points": to_tensor( + line_points).to(torch.float32)[0], + "line_indices": torch.tensor(line_indices, + dtype=torch.int) + }) + else: + outputs.update({ + "image": image, + "junctions": junctions.astype(np.float32), + "junction_map": junction_map.astype(np.int32), + "line_map": line_map.astype(np.int32), + "heatmap": heatmap.astype(np.int32), + "valid_mask": valid_mask.astype(np.int32) + }) + if desc_training: + outputs.update({ + "line_points": line_points.astype(np.float32), + "line_indices": line_indices.astype(int) + }) + + return outputs + + def preprocessing_exported_paired_desc(self, data, numpy=False, scale=1.): + """ Train preprocessing for paired data for the exported labels + for descriptor training. """ + outputs = {} + + # Define the random crop for scaling if necessary + h_crop, w_crop = 0, 0 + if scale > 1: + H, W = self.config["preprocessing"]["resize"] + H_scale, W_scale = round(H * scale), round(W * scale) + if H_scale > H: + h_crop = np.random.randint(H_scale - H) + if W_scale > W: + w_crop = np.random.randint(W_scale - W) + + # Sample ref homography first + homo_config = self.config["augmentation"]["homographic"]["params"] + image_shape = self.config["preprocessing"]["resize"] + ref_H, ref_scale = homoaug.sample_homography(image_shape, + **homo_config) + + # Data for target view (All augmentation) + target_data = self.train_preprocessing_exported( + data, numpy=numpy, desc_training=True, H1=None, H2=ref_H, + scale=scale, h_crop=h_crop, w_crop=w_crop) + + # Data for reference view (No homographical augmentation) + ref_data = self.train_preprocessing_exported( + data, numpy=numpy, desc_training=True, H1=ref_H, + H1_scale=ref_scale, H2=target_data['homography_mat'].numpy(), + scale=scale, h_crop=h_crop, w_crop=w_crop) + + # Spread ref data + for key, val in ref_data.items(): + outputs["ref_" + key] = val + + # Spread target data + for key, val in target_data.items(): + outputs["target_" + key] = val + + return outputs + + def test_preprocessing_exported(self, data, numpy=False): + """ Test preprocessing for the exported labels. """ + data = copy.deepcopy(data) + # Fetch the corresponding entries + image = data["image"] + junctions = data["junctions"] + line_map = data["line_map"] + image_size = image.shape[:2] + + # Resize the image before photometric and homographical augmentations + if not(list(image_size) == self.config["preprocessing"]["resize"]): + # Resize the image and the point location. + size_old = list(image.shape)[:2] # Only H and W dimensions + + image = cv2.resize( + image, tuple(self.config['preprocessing']['resize'][::-1]), + interpolation=cv2.INTER_LINEAR) + image = np.array(image, dtype=np.uint8) + + # # In HW format + # junctions = (junctions * np.array( + # self.config['preprocessing']['resize'], np.float) + # / np.array(size_old, np.float)) + + # Optionally convert the image to grayscale + if self.config["gray_scale"]: + image = (color.rgb2gray(image) * 255.).astype(np.uint8) + + # Still need to normalize image + image_transform = photoaug.normalize_image() + image = image_transform(image) + + # Generate the line heatmap after post-processing + junctions_xy = np.flip(np.round(junctions).astype(np.int32), axis=1) + image_size = image.shape[:2] + heatmap = get_line_heatmap(junctions_xy, line_map, image_size) + + # Declare default valid mask (all ones) + valid_mask = np.ones(image_size) + + junction_map = self.junc_to_junc_map(junctions, image_size) + + # Convert to tensor and return the results + to_tensor = transforms.ToTensor() + if not numpy: + outputs = { + "image": to_tensor(image), + "junctions": to_tensor(junctions).to(torch.float32)[0, ...], + "junction_map": to_tensor(junction_map).to(torch.int), + "line_map": to_tensor(line_map).to(torch.int32)[0, ...], + "heatmap": to_tensor(heatmap).to(torch.int32), + "valid_mask": to_tensor(valid_mask).to(torch.int32) + } + else: + outputs = { + "image": image, + "junctions": junctions.astype(np.float32), + "junction_map": junction_map.astype(np.int32), + "line_map": line_map.astype(np.int32), + "heatmap": heatmap.astype(np.int32), + "valid_mask": valid_mask.astype(np.int32) + } + + return outputs + + def __len__(self): + return self.dataset_length + + def get_data_from_key(self, file_key): + """ Get data from file_key. """ + # Check key exists + if not file_key in self.filename_dataset.keys(): + raise ValueError( + "[Error] the specified key is not in the dataset.") + + # Get the data paths + data_path = self.filename_dataset[file_key] + # Read in the image and npz labels + data = self.get_data_from_path(data_path) + + # Perform transform and augmentation + if (self.mode == "train" + or self.config["add_augmentation_to_all_splits"]): + data = self.train_preprocessing(data, numpy=True) + else: + data = self.test_preprocessing(data, numpy=True) + + # Add file key to the output + data["file_key"] = file_key + + return data + + def __getitem__(self, idx): + """Return data + file_key: str, keys used to retrieve data from the filename dataset. + image: torch.float, C*H*W range 0~1, + junctions: torch.float, N*2, + junction_map: torch.int32, 1*H*W range 0 or 1, + line_map: torch.int32, N*N range 0 or 1, + heatmap: torch.int32, 1*H*W range 0 or 1, + valid_mask: torch.int32, 1*H*W range 0 or 1 + """ + # Get the corresponding datapoint and contents from filename dataset + file_key = self.datapoints[idx] + data_path = self.filename_dataset[file_key] + # Read in the image and npz labels + data = self.get_data_from_path(data_path) + + if self.gt_source: + with h5py.File(self.gt_source, "r") as f: + exported_label = parse_h5_data(f[file_key]) + + data["junctions"] = exported_label["junctions"] + data["line_map"] = exported_label["line_map"] + + # Perform transform and augmentation + return_type = self.config.get("return_type", "single") + if self.gt_source is None: + # For export only + data = self.export_preprocessing(data) + elif (self.mode == "train" + or self.config["add_augmentation_to_all_splits"]): + # Perform random scaling first + if self.config["augmentation"]["random_scaling"]["enable"]: + scale_range = self.config["augmentation"]["random_scaling"]["range"] + # Decide the scaling + scale = np.random.uniform(min(scale_range), max(scale_range)) + else: + scale = 1. + if self.mode == "train" and return_type == "paired_desc": + data = self.preprocessing_exported_paired_desc(data, + scale=scale) + else: + data = self.train_preprocessing_exported(data, scale=scale) + else: + if return_type == "paired_desc": + data = self.preprocessing_exported_paired_desc(data) + else: + data = self.test_preprocessing_exported(data) + + # Add file key to the output + data["file_key"] = file_key + + return data + diff --git a/third_party/SOLD2/sold2/dataset/merge_dataset.py b/third_party/SOLD2/sold2/dataset/merge_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..178d3822d56639a49a99f68e392330e388fa8fc3 --- /dev/null +++ b/third_party/SOLD2/sold2/dataset/merge_dataset.py @@ -0,0 +1,37 @@ +""" Compose multiple datasets in a single loader. """ + +import numpy as np +from copy import deepcopy +from torch.utils.data import Dataset + +from .wireframe_dataset import WireframeDataset +from .holicity_dataset import HolicityDataset + + +class MergeDataset(Dataset): + def __init__(self, mode, config=None): + super(MergeDataset, self).__init__() + # Initialize the datasets + self._datasets = [] + spec_config = deepcopy(config) + for i, d in enumerate(config['datasets']): + spec_config['dataset_name'] = d + spec_config['gt_source_train'] = config['gt_source_train'][i] + spec_config['gt_source_test'] = config['gt_source_test'][i] + if d == "wireframe": + self._datasets.append(WireframeDataset(mode, spec_config)) + elif d == "holicity": + spec_config['train_split'] = config['train_splits'][i] + self._datasets.append(HolicityDataset(mode, spec_config)) + else: + raise ValueError("Unknown dataset: " + d) + + self._weights = config['weights'] + + def __getitem__(self, item): + dataset = self._datasets[np.random.choice( + range(len(self._datasets)), p=self._weights)] + return dataset[np.random.randint(len(dataset))] + + def __len__(self): + return np.sum([len(d) for d in self._datasets]) diff --git a/third_party/SOLD2/sold2/dataset/synthetic_dataset.py b/third_party/SOLD2/sold2/dataset/synthetic_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..cf5f11e5407e65887f4995291156f7cc361843d1 --- /dev/null +++ b/third_party/SOLD2/sold2/dataset/synthetic_dataset.py @@ -0,0 +1,712 @@ +""" +This file implements the synthetic shape dataset object for pytorch +""" +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import + +import os +import math +import h5py +import pickle +import torch +import numpy as np +import cv2 +from tqdm import tqdm +from torchvision import transforms +from torch.utils.data import Dataset +import torch.utils.data.dataloader as torch_loader + +from ..config.project_config import Config as cfg +from . import synthetic_util +from .transforms import photometric_transforms as photoaug +from .transforms import homographic_transforms as homoaug +from ..misc.train_utils import parse_h5_data + + +def synthetic_collate_fn(batch): + """ Customized collate_fn. """ + batch_keys = ["image", "junction_map", "heatmap", + "valid_mask", "homography"] + list_keys = ["junctions", "line_map", "file_key"] + + outputs = {} + for data_key in batch[0].keys(): + batch_match = sum([_ in data_key for _ in batch_keys]) + list_match = sum([_ in data_key for _ in list_keys]) + # print(batch_match, list_match) + if batch_match > 0 and list_match == 0: + outputs[data_key] = torch_loader.default_collate([b[data_key] + for b in batch]) + elif batch_match == 0 and list_match > 0: + outputs[data_key] = [b[data_key] for b in batch] + elif batch_match == 0 and list_match == 0: + continue + else: + raise ValueError( + "[Error] A key matches batch keys and list keys simultaneously.") + + return outputs + + +class SyntheticShapes(Dataset): + """ Dataset of synthetic shapes. """ + # Initialize the dataset + def __init__(self, mode="train", config=None): + super(SyntheticShapes, self).__init__() + if not mode in ["train", "val", "test"]: + raise ValueError( + "[Error] Supported dataset modes are 'train', 'val', and 'test'.") + self.mode = mode + + # Get configuration + if config is None: + self.config = self.get_default_config() + else: + self.config = config + + # Set all available primitives + self.available_primitives = [ + 'draw_lines', + 'draw_polygon', + 'draw_multiple_polygons', + 'draw_star', + 'draw_checkerboard_multiseg', + 'draw_stripes_multiseg', + 'draw_cube', + 'gaussian_noise' + ] + + # Some cache setting + self.dataset_name = self.get_dataset_name() + self.cache_name = self.get_cache_name() + self.cache_path = cfg.synthetic_cache_path + + # Check if export dataset exists + print("===============================================") + self.filename_dataset, self.datapoints = self.construct_dataset() + self.print_dataset_info() + + # Initialize h5 file handle + self.dataset_path = os.path.join(cfg.synthetic_dataroot, self.dataset_name + ".h5") + + # Fix the random seed for torch and numpy in testing mode + if ((self.mode == "val" or self.mode == "test") + and self.config["add_augmentation_to_all_splits"]): + seed = self.config.get("test_augmentation_seed", 200) + np.random.seed(seed) + torch.manual_seed(seed) + # For CuDNN + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + ########################################## + ## Dataset construction related methods ## + ########################################## + def construct_dataset(self): + """ Dataset constructor. """ + # Check if the filename cache exists + # If cache exists, load from cache + if self._check_dataset_cache(): + print("[Info]: Found filename cache at ...") + print("\t Load filename cache...") + filename_dataset, datapoints = self.get_filename_dataset_from_cache() + print("\t Check if all file exists...") + # If all file exists, continue + if self._check_file_existence(filename_dataset): + print("\t All files exist!") + # If not, need to re-export the synthetic dataset + else: + print("\t Some files are missing. Re-export the synthetic shape dataset.") + self.export_synthetic_shapes() + print("\t Initialize filename dataset") + filename_dataset, datapoints = self.get_filename_dataset() + print("\t Create filename dataset cache...") + self.create_filename_dataset_cache(filename_dataset, + datapoints) + + # If not, initialize dataset from scratch + else: + print("[Info]: Can't find filename cache ...") + print("\t First check export dataset exists.") + # If export dataset exists, then just update the filename_dataset + if self._check_export_dataset(): + print("\t Synthetic dataset exists. Initialize the dataset ...") + + # If export dataset does not exist, export from scratch + else: + print("\t Synthetic dataset does not exist. Export the synthetic dataset.") + self.export_synthetic_shapes() + print("\t Initialize filename dataset") + + filename_dataset, datapoints = self.get_filename_dataset() + print("\t Create filename dataset cache...") + self.create_filename_dataset_cache(filename_dataset, datapoints) + + return filename_dataset, datapoints + + def get_cache_name(self): + """ Get cache name from dataset config / default config. """ + if self.config["dataset_name"] is None: + dataset_name = self.default_config["dataset_name"] + "_%s" % self.mode + else: + dataset_name = self.config["dataset_name"] + "_%s" % self.mode + # Compose cache name + cache_name = dataset_name + "_cache.pkl" + + return cache_name + + def get_dataset_name(self): + """Get dataset name from dataset config / default config. """ + if self.config["dataset_name"] is None: + dataset_name = self.default_config["dataset_name"] + "_%s" % self.mode + else: + dataset_name = self.config["dataset_name"] + "_%s" % self.mode + + return dataset_name + + def get_filename_dataset_from_cache(self): + """ Get filename dataset from cache. """ + # Load from the pkl cache + cache_file_path = os.path.join(self.cache_path, self.cache_name) + with open(cache_file_path, "rb") as f: + data = pickle.load(f) + + return data["filename_dataset"], data["datapoints"] + + def get_filename_dataset(self): + """ Get filename dataset from scratch. """ + # Path to the exported dataset + dataset_path = os.path.join(cfg.synthetic_dataroot, + self.dataset_name + ".h5") + + filename_dataset = {} + datapoints = [] + # Open the h5 dataset + with h5py.File(dataset_path, "r") as f: + # Iterate through all the primitives + for prim_name in f.keys(): + filenames = sorted(f[prim_name].keys()) + filenames_full = [os.path.join(prim_name, _) + for _ in filenames] + + filename_dataset[prim_name] = filenames_full + datapoints += filenames_full + + return filename_dataset, datapoints + + def create_filename_dataset_cache(self, filename_dataset, datapoints): + """ Create filename dataset cache for faster initialization. """ + # Check cache path exists + if not os.path.exists(self.cache_path): + os.makedirs(self.cache_path) + + cache_file_path = os.path.join(self.cache_path, self.cache_name) + data = { + "filename_dataset": filename_dataset, + "datapoints": datapoints + } + with open(cache_file_path, "wb") as f: + pickle.dump(data, f, pickle.HIGHEST_PROTOCOL) + + def export_synthetic_shapes(self): + """ Export synthetic shapes to disk. """ + # Set the global random state for data generation + synthetic_util.set_random_state(np.random.RandomState( + self.config["generation"]["random_seed"])) + + # Define the export path + dataset_path = os.path.join(cfg.synthetic_dataroot, + self.dataset_name + ".h5") + + # Open h5py file + with h5py.File(dataset_path, "w", libver="latest") as f: + # Iterate through all types of shape + primitives = self.parse_drawing_primitives( + self.config["primitives"]) + split_size = self.config["generation"]["split_sizes"][self.mode] + for prim in primitives: + # Create h5 group + group = f.create_group(prim) + # Export single primitive + self.export_single_primitive(prim, split_size, group) + + f.swmr_mode = True + + def export_single_primitive(self, primitive, split_size, group): + """ Export single primitive. """ + # Check if the primitive is valid or not + if primitive not in self.available_primitives: + raise ValueError( + "[Error]: %s is not a supported primitive" % primitive) + # Set the random seed + synthetic_util.set_random_state(np.random.RandomState( + self.config["generation"]["random_seed"])) + + # Generate shapes + print("\t Generating %s ..." % primitive) + for idx in tqdm(range(split_size), ascii=True): + # Generate background image + image = synthetic_util.generate_background( + self.config['generation']['image_size'], + **self.config['generation']['params']['generate_background']) + + # Generate points + drawing_func = getattr(synthetic_util, primitive) + kwarg = self.config["generation"]["params"].get(primitive, {}) + + # Get min_len and min_label_len + min_len = self.config["generation"]["min_len"] + min_label_len = self.config["generation"]["min_label_len"] + + # Some only take min_label_len, and gaussian noises take nothing + if primitive in ["draw_lines", "draw_polygon", + "draw_multiple_polygons", "draw_star"]: + data = drawing_func(image, min_len=min_len, + min_label_len=min_label_len, **kwarg) + elif primitive in ["draw_checkerboard_multiseg", + "draw_stripes_multiseg", "draw_cube"]: + data = drawing_func(image, min_label_len=min_label_len, + **kwarg) + else: + data = drawing_func(image, **kwarg) + + # Convert the data + if data["points"] is not None: + points = np.flip(data["points"], axis=1).astype(np.float) + line_map = data["line_map"].astype(np.int32) + else: + points = np.zeros([0, 2]).astype(np.float) + line_map = np.zeros([0, 0]).astype(np.int32) + + # Post-processing + blur_size = self.config["preprocessing"]["blur_size"] + image = cv2.GaussianBlur(image, (blur_size, blur_size), 0) + + # Resize the image and the point location. + points = (points + * np.array(self.config['preprocessing']['resize'], + np.float) + / np.array(self.config['generation']['image_size'], + np.float)) + image = cv2.resize( + image, tuple(self.config['preprocessing']['resize'][::-1]), + interpolation=cv2.INTER_LINEAR) + image = np.array(image, dtype=np.uint8) + + # Generate the line heatmap after post-processing + junctions = np.flip(np.round(points).astype(np.int32), axis=1) + heatmap = (synthetic_util.get_line_heatmap( + junctions, line_map, + size=image.shape) * 255.).astype(np.uint8) + + # Record the data in group + num_pad = math.ceil(math.log10(split_size)) + 1 + file_key_name = self.get_padded_filename(num_pad, idx) + file_group = group.create_group(file_key_name) + + # Store data + file_group.create_dataset("points", data=points, + compression="gzip") + file_group.create_dataset("image", data=image, + compression="gzip") + file_group.create_dataset("line_map", data=line_map, + compression="gzip") + file_group.create_dataset("heatmap", data=heatmap, + compression="gzip") + + def get_default_config(self): + """ Get default configuration of the dataset. """ + # Initialize the default configuration + self.default_config = { + "dataset_name": "synthetic_shape", + "primitives": "all", + "add_augmentation_to_all_splits": False, + # Shape generation configuration + "generation": { + "split_sizes": {'train': 10000, 'val': 400, 'test': 500}, + "random_seed": 10, + "image_size": [960, 1280], + "min_len": 0.09, + "min_label_len": 0.1, + 'params': { + 'generate_background': { + 'min_kernel_size': 150, 'max_kernel_size': 500, + 'min_rad_ratio': 0.02, 'max_rad_ratio': 0.031}, + 'draw_stripes': {'transform_params': (0.1, 0.1)}, + 'draw_multiple_polygons': {'kernel_boundaries': (50, 100)} + }, + }, + # Date preprocessing configuration. + "preprocessing": { + "resize": [240, 320], + "blur_size": 11 + }, + 'augmentation': { + 'photometric': { + 'enable': False, + 'primitives': 'all', + 'params': {}, + 'random_order': True, + }, + 'homographic': { + 'enable': False, + 'params': {}, + 'valid_border_margin': 0, + }, + } + } + + return self.default_config + + def parse_drawing_primitives(self, names): + """ Parse the primitives in config to list of primitive names. """ + if names == "all": + p = self.available_primitives + else: + if isinstance(names, list): + p = names + else: + p = [names] + + assert set(p) <= set(self.available_primitives) + + return p + + @staticmethod + def get_padded_filename(num_pad, idx): + """ Get the padded filename using adaptive padding. """ + file_len = len("%d" % (idx)) + filename = "0" * (num_pad - file_len) + "%d" % (idx) + + return filename + + def print_dataset_info(self): + """ Print dataset info. """ + print("\t ---------Summary------------------") + print("\t Dataset mode: \t\t %s" % self.mode) + print("\t Number of primitive: \t %d" % len(self.filename_dataset.keys())) + print("\t Number of data: \t %d" % len(self.datapoints)) + print("\t ----------------------------------") + + ######################### + ## Pytorch related API ## + ######################### + def get_data_from_datapoint(self, datapoint, reader=None): + """ Get data given the datapoint + (keyname of the h5 dataset e.g. "draw_lines/0000.h5"). """ + # Check if the datapoint is valid + if not datapoint in self.datapoints: + raise ValueError( + "[Error] The specified datapoint is not in available datapoints.") + + # Get data from h5 dataset + if reader is None: + raise ValueError( + "[Error] The reader must be provided in __getitem__.") + else: + data = reader[datapoint] + + return parse_h5_data(data) + + def get_data_from_signature(self, primitive_name, index): + """ Get data given the primitive name and index ("draw_lines", 10) """ + # Check the primitive name and index + self._check_primitive_and_index(primitive_name, index) + + # Get the datapoint from filename dataset + datapoint = self.filename_dataset[primitive_name][index] + + return self.get_data_from_datapoint(datapoint) + + def parse_transforms(self, names, all_transforms): + trans = all_transforms if (names == 'all') \ + else (names if isinstance(names, list) else [names]) + assert set(trans) <= set(all_transforms) + return trans + + def get_photo_transform(self): + """ Get list of photometric transforms (according to the config). """ + # Get the photometric transform config + photo_config = self.config["augmentation"]["photometric"] + if not photo_config["enable"]: + raise ValueError( + "[Error] Photometric augmentation is not enabled.") + + # Parse photometric transforms + trans_lst = self.parse_transforms(photo_config["primitives"], + photoaug.available_augmentations) + trans_config_lst = [photo_config["params"].get(p, {}) + for p in trans_lst] + + # List of photometric augmentation + photometric_trans_lst = [ + getattr(photoaug, trans)(**conf) \ + for (trans, conf) in zip(trans_lst, trans_config_lst) + ] + + return photometric_trans_lst + + def get_homo_transform(self): + """ Get homographic transforms (according to the config). """ + # Get homographic transforms for image + homo_config = self.config["augmentation"]["homographic"]["params"] + if not self.config["augmentation"]["homographic"]["enable"]: + raise ValueError( + "[Error] Homographic augmentation is not enabled") + + # Parse the homographic transforms + # ToDo: use the shape from the config + image_shape = self.config["preprocessing"]["resize"] + + # Compute the min_label_len from config + try: + min_label_tmp = self.config["generation"]["min_label_len"] + except: + min_label_tmp = None + + # float label len => fraction + if isinstance(min_label_tmp, float): # Skip if not provided + min_label_len = min_label_tmp * min(image_shape) + # int label len => length in pixel + elif isinstance(min_label_tmp, int): + scale_ratio = (self.config["preprocessing"]["resize"] + / self.config["generation"]["image_size"][0]) + min_label_len = (self.config["generation"]["min_label_len"] + * scale_ratio) + # if none => no restriction + else: + min_label_len = 0 + + # Initialize the transform + homographic_trans = homoaug.homography_transform( + image_shape, homo_config, 0, min_label_len) + + return homographic_trans + + @staticmethod + def junc_to_junc_map(junctions, image_size): + """ Convert junction points to junction maps. """ + junctions = np.round(junctions).astype(np.int) + # Clip the boundary by image size + junctions[:, 0] = np.clip(junctions[:, 0], 0., image_size[0]-1) + junctions[:, 1] = np.clip(junctions[:, 1], 0., image_size[1]-1) + + # Create junction map + junc_map = np.zeros([image_size[0], image_size[1]]) + junc_map[junctions[:, 0], junctions[:, 1]] = 1 + + return junc_map[..., None].astype(np.int) + + def train_preprocessing(self, data, disable_homoaug=False): + """ Training preprocessing. """ + # Fetch corresponding entries + image = data["image"] + junctions = data["points"] + line_map = data["line_map"] + heatmap = data["heatmap"] + image_size = image.shape[:2] + + # Resize the image before the photometric and homographic transforms + # Check if we need to do the resizing + if not(list(image.shape) == self.config["preprocessing"]["resize"]): + # Resize the image and the point location. + size_old = list(image.shape) + image = cv2.resize( + image, tuple(self.config['preprocessing']['resize'][::-1]), + interpolation=cv2.INTER_LINEAR) + image = np.array(image, dtype=np.uint8) + + junctions = ( + junctions + * np.array(self.config['preprocessing']['resize'], np.float) + / np.array(size_old, np.float)) + + # Generate the line heatmap after post-processing + junctions_xy = np.flip(np.round(junctions).astype(np.int32), + axis=1) + heatmap = synthetic_util.get_line_heatmap(junctions_xy, line_map, + size=image.shape) + heatmap = (heatmap * 255.).astype(np.uint8) + + # Update image size + image_size = image.shape[:2] + + # Declare default valid mask (all ones) + valid_mask = np.ones(image_size) + + # Check if we need to apply augmentations + # In training mode => yes. + # In homography adaptation mode (export mode) => No + # Check photometric augmentation + if self.config["augmentation"]["photometric"]["enable"]: + photo_trans_lst = self.get_photo_transform() + ### Image transform ### + np.random.shuffle(photo_trans_lst) + image_transform = transforms.Compose( + photo_trans_lst + [photoaug.normalize_image()]) + else: + image_transform = photoaug.normalize_image() + image = image_transform(image) + + # Initialize the empty output dict + outputs = {} + # Convert to tensor and return the results + to_tensor = transforms.ToTensor() + # Check homographic augmentation + if (self.config["augmentation"]["homographic"]["enable"] + and disable_homoaug == False): + homo_trans = self.get_homo_transform() + # Perform homographic transform + homo_outputs = homo_trans(image, junctions, line_map) + + # Record the warped results + junctions = homo_outputs["junctions"] # Should be HW format + image = homo_outputs["warped_image"] + line_map = homo_outputs["line_map"] + heatmap = homo_outputs["warped_heatmap"] + valid_mask = homo_outputs["valid_mask"] # Same for pos and neg + homography_mat = homo_outputs["homo"] + + # Optionally put warpping information first. + outputs["homography_mat"] = to_tensor( + homography_mat).to(torch.float32)[0, ...] + + junction_map = self.junc_to_junc_map(junctions, image_size) + + outputs.update({ + "image": to_tensor(image), + "junctions": to_tensor(np.ascontiguousarray( + junctions).copy()).to(torch.float32)[0, ...], + "junction_map": to_tensor(junction_map).to(torch.int), + "line_map": to_tensor(line_map).to(torch.int32)[0, ...], + "heatmap": to_tensor(heatmap).to(torch.int32), + "valid_mask": to_tensor(valid_mask).to(torch.int32), + }) + + return outputs + + def test_preprocessing(self, data): + """ Test preprocessing. """ + # Fetch corresponding entries + image = data["image"] + points = data["points"] + line_map = data["line_map"] + heatmap = data["heatmap"] + image_size = image.shape[:2] + + # Resize the image before the photometric and homographic transforms + if not (list(image.shape) == self.config["preprocessing"]["resize"]): + # Resize the image and the point location. + size_old = list(image.shape) + image = cv2.resize( + image, tuple(self.config['preprocessing']['resize'][::-1]), + interpolation=cv2.INTER_LINEAR) + image = np.array(image, dtype=np.uint8) + + points = (points + * np.array(self.config['preprocessing']['resize'], + np.float) + / np.array(size_old, np.float)) + + # Generate the line heatmap after post-processing + junctions = np.flip(np.round(points).astype(np.int32), axis=1) + heatmap = synthetic_util.get_line_heatmap(junctions, line_map, + size=image.shape) + heatmap = (heatmap * 255.).astype(np.uint8) + + # Update image size + image_size = image.shape[:2] + + ### image transform ### + image_transform = photoaug.normalize_image() + image = image_transform(image) + + ### joint transform ### + junction_map = self.junc_to_junc_map(points, image_size) + to_tensor = transforms.ToTensor() + image = to_tensor(image) + junctions = to_tensor(points) + junction_map = to_tensor(junction_map).to(torch.int) + line_map = to_tensor(line_map) + heatmap = to_tensor(heatmap) + valid_mask = to_tensor(np.ones(image_size)).to(torch.int32) + + return { + "image": image, + "junctions": junctions, + "junction_map": junction_map, + "line_map": line_map, + "heatmap": heatmap, + "valid_mask": valid_mask + } + + def __getitem__(self, index): + datapoint = self.datapoints[index] + + # Initialize reader and use it + with h5py.File(self.dataset_path, "r", swmr=True) as reader: + data = self.get_data_from_datapoint(datapoint, reader) + + # Apply different transforms in different mod. + if (self.mode == "train" + or self.config["add_augmentation_to_all_splits"]): + return_type = self.config.get("return_type", "single") + data = self.train_preprocessing(data) + else: + data = self.test_preprocessing(data) + + return data + + def __len__(self): + return len(self.datapoints) + + ######################## + ## Some other methods ## + ######################## + def _check_dataset_cache(self): + """ Check if dataset cache exists. """ + cache_file_path = os.path.join(self.cache_path, self.cache_name) + if os.path.exists(cache_file_path): + return True + else: + return False + + def _check_export_dataset(self): + """ Check if exported dataset exists. """ + dataset_path = os.path.join(cfg.synthetic_dataroot, self.dataset_name) + if os.path.exists(dataset_path) and len(os.listdir(dataset_path)) > 0: + return True + else: + return False + + def _check_file_existence(self, filename_dataset): + """ Check if all exported file exists. """ + # Path to the exported dataset + dataset_path = os.path.join(cfg.synthetic_dataroot, + self.dataset_name + ".h5") + + flag = True + # Open the h5 dataset + with h5py.File(dataset_path, "r") as f: + # Iterate through all the primitives + for prim_name in f.keys(): + if (len(filename_dataset[prim_name]) + != len(f[prim_name].keys())): + flag = False + + return flag + + def _check_primitive_and_index(self, primitive, index): + """ Check if the primitve and index are valid. """ + # Check primitives + if not primitive in self.available_primitives: + raise ValueError( + "[Error] The primitive is not in available primitives.") + + prim_len = len(self.filename_dataset[primitive]) + # Check the index + if not index < prim_len: + raise ValueError( + "[Error] The index exceeds the total file counts %d for %s" + % (prim_len, primitive)) diff --git a/third_party/SOLD2/sold2/dataset/synthetic_util.py b/third_party/SOLD2/sold2/dataset/synthetic_util.py new file mode 100644 index 0000000000000000000000000000000000000000..af009e0ce7e91391e31d7069064ae6121aa84cc0 --- /dev/null +++ b/third_party/SOLD2/sold2/dataset/synthetic_util.py @@ -0,0 +1,1232 @@ +""" +Code adapted from https://github.com/rpautrat/SuperPoint +Module used to generate geometrical synthetic shapes +""" +import math +import cv2 as cv +import numpy as np +import shapely.geometry +from itertools import combinations + +random_state = np.random.RandomState(None) + + +def set_random_state(state): + global random_state + random_state = state + + +def get_random_color(background_color): + """ Output a random scalar in grayscale with a least a small contrast + with the background color. """ + color = random_state.randint(256) + if abs(color - background_color) < 30: # not enough contrast + color = (color + 128) % 256 + return color + + +def get_different_color(previous_colors, min_dist=50, max_count=20): + """ Output a color that contrasts with the previous colors. + Parameters: + previous_colors: np.array of the previous colors + min_dist: the difference between the new color and + the previous colors must be at least min_dist + max_count: maximal number of iterations + """ + color = random_state.randint(256) + count = 0 + while np.any(np.abs(previous_colors - color) < min_dist) and count < max_count: + count += 1 + color = random_state.randint(256) + return color + + +def add_salt_and_pepper(img): + """ Add salt and pepper noise to an image. """ + noise = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8) + cv.randu(noise, 0, 255) + black = noise < 30 + white = noise > 225 + img[white > 0] = 255 + img[black > 0] = 0 + cv.blur(img, (5, 5), img) + return np.empty((0, 2), dtype=np.int) + + +def generate_background(size=(960, 1280), nb_blobs=100, min_rad_ratio=0.01, + max_rad_ratio=0.05, min_kernel_size=50, + max_kernel_size=300): + """ Generate a customized background image. + Parameters: + size: size of the image + nb_blobs: number of circles to draw + min_rad_ratio: the radius of blobs is at least min_rad_size * max(size) + max_rad_ratio: the radius of blobs is at most max_rad_size * max(size) + min_kernel_size: minimal size of the kernel + max_kernel_size: maximal size of the kernel + """ + img = np.zeros(size, dtype=np.uint8) + dim = max(size) + cv.randu(img, 0, 255) + cv.threshold(img, random_state.randint(256), 255, cv.THRESH_BINARY, img) + background_color = int(np.mean(img)) + blobs = np.concatenate( + [random_state.randint(0, size[1], size=(nb_blobs, 1)), + random_state.randint(0, size[0], size=(nb_blobs, 1))], axis=1) + for i in range(nb_blobs): + col = get_random_color(background_color) + cv.circle(img, (blobs[i][0], blobs[i][1]), + np.random.randint(int(dim * min_rad_ratio), + int(dim * max_rad_ratio)), + col, -1) + kernel_size = random_state.randint(min_kernel_size, max_kernel_size) + cv.blur(img, (kernel_size, kernel_size), img) + return img + + +def generate_custom_background(size, background_color, nb_blobs=3000, + kernel_boundaries=(50, 100)): + """ Generate a customized background to fill the shapes. + Parameters: + background_color: average color of the background image + nb_blobs: number of circles to draw + kernel_boundaries: interval of the possible sizes of the kernel + """ + img = np.zeros(size, dtype=np.uint8) + img = img + get_random_color(background_color) + blobs = np.concatenate( + [np.random.randint(0, size[1], size=(nb_blobs, 1)), + np.random.randint(0, size[0], size=(nb_blobs, 1))], axis=1) + for i in range(nb_blobs): + col = get_random_color(background_color) + cv.circle(img, (blobs[i][0], blobs[i][1]), + np.random.randint(20), col, -1) + kernel_size = np.random.randint(kernel_boundaries[0], + kernel_boundaries[1]) + cv.blur(img, (kernel_size, kernel_size), img) + return img + + +def final_blur(img, kernel_size=(5, 5)): + """ Gaussian blur applied to an image. + Parameters: + kernel_size: size of the kernel + """ + cv.GaussianBlur(img, kernel_size, 0, img) + + +def ccw(A, B, C, dim): + """ Check if the points are listed in counter-clockwise order. """ + if dim == 2: # only 2 dimensions + return((C[:, 1] - A[:, 1]) * (B[:, 0] - A[:, 0]) + > (B[:, 1] - A[:, 1]) * (C[:, 0] - A[:, 0])) + else: # dim should be equal to 3 + return((C[:, 1, :] - A[:, 1, :]) + * (B[:, 0, :] - A[:, 0, :]) + > (B[:, 1, :] - A[:, 1, :]) + * (C[:, 0, :] - A[:, 0, :])) + + +def intersect(A, B, C, D, dim): + """ Return true if line segments AB and CD intersect """ + return np.any((ccw(A, C, D, dim) != ccw(B, C, D, dim)) & + (ccw(A, B, C, dim) != ccw(A, B, D, dim))) + + +def keep_points_inside(points, size): + """ Keep only the points whose coordinates are inside the dimensions of + the image of size 'size' """ + mask = (points[:, 0] >= 0) & (points[:, 0] < size[1]) &\ + (points[:, 1] >= 0) & (points[:, 1] < size[0]) + return points[mask, :] + + +def get_unique_junctions(segments, min_label_len): + """ Get unique junction points from line segments. """ + # Get all junctions from segments + junctions_all = np.concatenate((segments[:, :2], segments[:, 2:]), axis=0) + if junctions_all.shape[0] == 0: + junc_points = None + line_map = None + + # Get all unique junction points + else: + junc_points = np.unique(junctions_all, axis=0) + # Generate line map from points and segments + line_map = get_line_map(junc_points, segments) + + return junc_points, line_map + + +def get_line_map(points: np.ndarray, segments: np.ndarray) -> np.ndarray: + """ Get line map given the points and segment sets. """ + # create empty line map + num_point = points.shape[0] + line_map = np.zeros([num_point, num_point]) + + # Iterate through every segment + for idx in range(segments.shape[0]): + # Get the junctions from a single segement + seg = segments[idx, :] + junction1 = seg[:2] + junction2 = seg[2:] + + # Get index + idx_junction1 = np.where((points == junction1).sum(axis=1) == 2)[0] + idx_junction2 = np.where((points == junction2).sum(axis=1) == 2)[0] + + # label the corresponding entries + line_map[idx_junction1, idx_junction2] = 1 + line_map[idx_junction2, idx_junction1] = 1 + + return line_map + + +def get_line_heatmap(junctions, line_map, size=[480, 640], thickness=1): + """ Get line heat map from junctions and line map. """ + # Make sure that the thickness is 1 + if not isinstance(thickness, int): + thickness = int(thickness) + + # If the junction points are not int => round them and convert to int + if not junctions.dtype == np.int: + junctions = (np.round(junctions)).astype(np.int) + + # Initialize empty map + heat_map = np.zeros(size) + + if junctions.shape[0] > 0: # If empty, just return zero map + # Iterate through all the junctions + for idx in range(junctions.shape[0]): + # if no connectivity, just skip it + if line_map[idx, :].sum() == 0: + continue + # Plot the line segment + else: + # Iterate through all the connected junctions + for idx2 in np.where(line_map[idx, :] == 1)[0]: + point1 = junctions[idx, :] + point2 = junctions[idx2, :] + + # Draw line + cv.line(heat_map, tuple(point1), tuple(point2), 1., thickness) + + return heat_map + + +def draw_lines(img, nb_lines=10, min_len=32, min_label_len=32): + """ Draw random lines and output the positions of the pair of junctions + and line associativities. + Parameters: + nb_lines: maximal number of lines + """ + # Set line number and points placeholder + num_lines = random_state.randint(1, nb_lines) + segments = np.empty((0, 4), dtype=np.int) + points = np.empty((0, 2), dtype=np.int) + background_color = int(np.mean(img)) + min_dim = min(img.shape) + + # Convert length constrain to pixel if given float number + if isinstance(min_len, float) and min_len <= 1.: + min_len = int(min_dim * min_len) + if isinstance(min_label_len, float) and min_label_len <= 1.: + min_label_len = int(min_dim * min_label_len) + + # Generate lines one by one + for i in range(num_lines): + x1 = random_state.randint(img.shape[1]) + y1 = random_state.randint(img.shape[0]) + p1 = np.array([[x1, y1]]) + x2 = random_state.randint(img.shape[1]) + y2 = random_state.randint(img.shape[0]) + p2 = np.array([[x2, y2]]) + + # Check the length of the line + line_length = np.sqrt(np.sum((p1 - p2) ** 2)) + if line_length < min_len: + continue + + # Check that there is no overlap + if intersect(segments[:, 0:2], segments[:, 2:4], p1, p2, 2): + continue + + col = get_random_color(background_color) + thickness = random_state.randint(min_dim * 0.01, min_dim * 0.02) + cv.line(img, (x1, y1), (x2, y2), col, thickness) + + # Only record the segments longer than min_label_len + seg_len = math.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2) + if seg_len >= min_label_len: + segments = np.concatenate([segments, + np.array([[x1, y1, x2, y2]])], axis=0) + points = np.concatenate([points, + np.array([[x1, y1], [x2, y2]])], axis=0) + + # If no line is drawn, recursively call the function + if points.shape[0] == 0: + return draw_lines(img, nb_lines, min_len, min_label_len) + + # Get the line associativity map + line_map = get_line_map(points, segments) + + return { + "points": points, + "line_map": line_map + } + + +def check_segment_len(segments, min_len=32): + """ Check if one of the segments is too short (True means too short). """ + point1_vec = segments[:, :2] + point2_vec = segments[:, 2:] + diff = point1_vec - point2_vec + + dist = np.sqrt(np.sum(diff ** 2, axis=1)) + if np.any(dist < min_len): + return True + else: + return False + + +def draw_polygon(img, max_sides=8, min_len=32, min_label_len=64): + """ Draw a polygon with a random number of corners and return the position + of the junctions + line map. + Parameters: + max_sides: maximal number of sides + 1 + """ + num_corners = random_state.randint(3, max_sides) + min_dim = min(img.shape[0], img.shape[1]) + rad = max(random_state.rand() * min_dim / 2, min_dim / 10) + # Center of a circle + x = random_state.randint(rad, img.shape[1] - rad) + y = random_state.randint(rad, img.shape[0] - rad) + + # Convert length constrain to pixel if given float number + if isinstance(min_len, float) and min_len <= 1.: + min_len = int(min_dim * min_len) + if isinstance(min_label_len, float) and min_label_len <= 1.: + min_label_len = int(min_dim * min_label_len) + + # Sample num_corners points inside the circle + slices = np.linspace(0, 2 * math.pi, num_corners + 1) + angles = [slices[i] + random_state.rand() * (slices[i+1] - slices[i]) + for i in range(num_corners)] + points = np.array( + [[int(x + max(random_state.rand(), 0.4) * rad * math.cos(a)), + int(y + max(random_state.rand(), 0.4) * rad * math.sin(a))] + for a in angles]) + + # Filter the points that are too close or that have an angle too flat + norms = [np.linalg.norm(points[(i-1) % num_corners, :] + - points[i, :]) for i in range(num_corners)] + mask = np.array(norms) > 0.01 + points = points[mask, :] + num_corners = points.shape[0] + corner_angles = [angle_between_vectors(points[(i-1) % num_corners, :] - + points[i, :], + points[(i+1) % num_corners, :] - + points[i, :]) + for i in range(num_corners)] + mask = np.array(corner_angles) < (2 * math.pi / 3) + points = points[mask, :] + num_corners = points.shape[0] + + # Get junction pairs from points + segments = np.zeros([0, 4]) + # Used to record all the segments no matter we are going to label it or not. + segments_raw = np.zeros([0, 4]) + for idx in range(num_corners): + if idx == (num_corners - 1): + p1 = points[idx] + p2 = points[0] + else: + p1 = points[idx] + p2 = points[idx + 1] + + segment = np.concatenate((p1, p2), axis=0) + # Only record the segments longer than min_label_len + seg_len = np.sqrt(np.sum((p1 - p2) ** 2)) + if seg_len >= min_label_len: + segments = np.concatenate((segments, segment[None, ...]), axis=0) + segments_raw = np.concatenate((segments_raw, segment[None, ...]), + axis=0) + + # If not enough corner, just regenerate one + if (num_corners < 3) or check_segment_len(segments_raw, min_len): + return draw_polygon(img, max_sides, min_len, min_label_len) + + # Get junctions from segments + junctions_all = np.concatenate((segments[:, :2], segments[:, 2:]), axis=0) + if junctions_all.shape[0] == 0: + junc_points = None + line_map = None + + else: + junc_points = np.unique(junctions_all, axis=0) + + # Get the line map + line_map = get_line_map(junc_points, segments) + + corners = points.reshape((-1, 1, 2)) + col = get_random_color(int(np.mean(img))) + cv.fillPoly(img, [corners], col) + + return { + "points": junc_points, + "line_map": line_map + } + + +def overlap(center, rad, centers, rads): + """ Check that the circle with (center, rad) + doesn't overlap with the other circles. """ + flag = False + for i in range(len(rads)): + if np.linalg.norm(center - centers[i]) < rad + rads[i]: + flag = True + break + return flag + + +def angle_between_vectors(v1, v2): + """ Compute the angle (in rad) between the two vectors v1 and v2. """ + v1_u = v1 / np.linalg.norm(v1) + v2_u = v2 / np.linalg.norm(v2) + return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0)) + + +def draw_multiple_polygons(img, max_sides=8, nb_polygons=30, min_len=32, + min_label_len=64, safe_margin=5, **extra): + """ Draw multiple polygons with a random number of corners + and return the junction points + line map. + Parameters: + max_sides: maximal number of sides + 1 + nb_polygons: maximal number of polygons + """ + segments = np.empty((0, 4), dtype=np.int) + label_segments = np.empty((0, 4), dtype=np.int) + centers = [] + rads = [] + points = np.empty((0, 2), dtype=np.int) + background_color = int(np.mean(img)) + + min_dim = min(img.shape[0], img.shape[1]) + # Convert length constrain to pixel if given float number + if isinstance(min_len, float) and min_len <= 1.: + min_len = int(min_dim * min_len) + if isinstance(min_label_len, float) and min_label_len <= 1.: + min_label_len = int(min_dim * min_label_len) + if isinstance(safe_margin, float) and safe_margin <= 1.: + safe_margin = int(min_dim * safe_margin) + + # Sequentially generate polygons + for i in range(nb_polygons): + num_corners = random_state.randint(3, max_sides) + min_dim = min(img.shape[0], img.shape[1]) + + # Also add the real radius + rad = max(random_state.rand() * min_dim / 2, min_dim / 9) + rad_real = rad - safe_margin + + # Center of a circle + x = random_state.randint(rad, img.shape[1] - rad) + y = random_state.randint(rad, img.shape[0] - rad) + + # Sample num_corners points inside the circle + slices = np.linspace(0, 2 * math.pi, num_corners + 1) + angles = [slices[i] + random_state.rand() * (slices[i+1] - slices[i]) + for i in range(num_corners)] + + # Sample outer points and inner points + new_points = [] + new_points_real = [] + for a in angles: + x_offset = max(random_state.rand(), 0.4) + y_offset = max(random_state.rand(), 0.4) + new_points.append([int(x + x_offset * rad * math.cos(a)), + int(y + y_offset * rad * math.sin(a))]) + new_points_real.append( + [int(x + x_offset * rad_real * math.cos(a)), + int(y + y_offset * rad_real * math.sin(a))]) + new_points = np.array(new_points) + new_points_real = np.array(new_points_real) + + # Filter the points that are too close or that have an angle too flat + norms = [np.linalg.norm(new_points[(i-1) % num_corners, :] + - new_points[i, :]) + for i in range(num_corners)] + mask = np.array(norms) > 0.01 + new_points = new_points[mask, :] + new_points_real = new_points_real[mask, :] + + num_corners = new_points.shape[0] + corner_angles = [ + angle_between_vectors(new_points[(i-1) % num_corners, :] - + new_points[i, :], + new_points[(i+1) % num_corners, :] - + new_points[i, :]) + for i in range(num_corners)] + mask = np.array(corner_angles) < (2 * math.pi / 3) + new_points = new_points[mask, :] + new_points_real = new_points_real[mask, :] + num_corners = new_points.shape[0] + + # Not enough corners + if num_corners < 3: + continue + + # Segments for checking overlap (outer circle) + new_segments = np.zeros((1, 4, num_corners)) + new_segments[:, 0, :] = [new_points[i][0] for i in range(num_corners)] + new_segments[:, 1, :] = [new_points[i][1] for i in range(num_corners)] + new_segments[:, 2, :] = [new_points[(i+1) % num_corners][0] + for i in range(num_corners)] + new_segments[:, 3, :] = [new_points[(i+1) % num_corners][1] + for i in range(num_corners)] + + # Segments to record (inner circle) + new_segments_real = np.zeros((1, 4, num_corners)) + new_segments_real[:, 0, :] = [new_points_real[i][0] + for i in range(num_corners)] + new_segments_real[:, 1, :] = [new_points_real[i][1] + for i in range(num_corners)] + new_segments_real[:, 2, :] = [ + new_points_real[(i + 1) % num_corners][0] + for i in range(num_corners)] + new_segments_real[:, 3, :] = [ + new_points_real[(i + 1) % num_corners][1] + for i in range(num_corners)] + + # Check that the polygon will not overlap with pre-existing shapes + if intersect(segments[:, 0:2, None], segments[:, 2:4, None], + new_segments[:, 0:2, :], new_segments[:, 2:4, :], + 3) or overlap(np.array([x, y]), rad, centers, rads): + continue + + # Check that the the edges of the polygon is not too short + if check_segment_len(new_segments_real, min_len): + continue + + # If the polygon is valid, append it to the polygon set + centers.append(np.array([x, y])) + rads.append(rad) + new_segments = np.reshape(np.swapaxes(new_segments, 0, 2), (-1, 4)) + segments = np.concatenate([segments, new_segments], axis=0) + + # Only record the segments longer than min_label_len + new_segments_real = np.reshape(np.swapaxes(new_segments_real, 0, 2), + (-1, 4)) + points1 = new_segments_real[:, :2] + points2 = new_segments_real[:, 2:] + seg_len = np.sqrt(np.sum((points1 - points2) ** 2, axis=1)) + new_label_segment = new_segments_real[seg_len >= min_label_len, :] + label_segments = np.concatenate([label_segments, new_label_segment], + axis=0) + + # Color the polygon with a custom background + corners = new_points_real.reshape((-1, 1, 2)) + mask = np.zeros(img.shape, np.uint8) + custom_background = generate_custom_background( + img.shape, background_color, **extra) + + cv.fillPoly(mask, [corners], 255) + locs = np.where(mask != 0) + img[locs[0], locs[1]] = custom_background[locs[0], locs[1]] + points = np.concatenate([points, new_points], axis=0) + + # Get all junctions from label segments + junctions_all = np.concatenate( + (label_segments[:, :2], label_segments[:, 2:]), axis=0) + if junctions_all.shape[0] == 0: + junc_points = None + line_map = None + + else: + junc_points = np.unique(junctions_all, axis=0) + + # Generate line map from points and segments + line_map = get_line_map(junc_points, label_segments) + + return { + "points": junc_points, + "line_map": line_map + } + + +def draw_ellipses(img, nb_ellipses=20): + """ Draw several ellipses. + Parameters: + nb_ellipses: maximal number of ellipses + """ + centers = np.empty((0, 2), dtype=np.int) + rads = np.empty((0, 1), dtype=np.int) + min_dim = min(img.shape[0], img.shape[1]) / 4 + background_color = int(np.mean(img)) + for i in range(nb_ellipses): + ax = int(max(random_state.rand() * min_dim, min_dim / 5)) + ay = int(max(random_state.rand() * min_dim, min_dim / 5)) + max_rad = max(ax, ay) + x = random_state.randint(max_rad, img.shape[1] - max_rad) # center + y = random_state.randint(max_rad, img.shape[0] - max_rad) + new_center = np.array([[x, y]]) + + # Check that the ellipsis will not overlap with pre-existing shapes + diff = centers - new_center + if np.any(max_rad > (np.sqrt(np.sum(diff * diff, axis=1)) - rads)): + continue + centers = np.concatenate([centers, new_center], axis=0) + rads = np.concatenate([rads, np.array([[max_rad]])], axis=0) + + col = get_random_color(background_color) + angle = random_state.rand() * 90 + cv.ellipse(img, (x, y), (ax, ay), angle, 0, 360, col, -1) + return np.empty((0, 2), dtype=np.int) + + +def draw_star(img, nb_branches=6, min_len=32, min_label_len=64): + """ Draw a star and return the junction points + line map. + Parameters: + nb_branches: number of branches of the star + """ + num_branches = random_state.randint(3, nb_branches) + min_dim = min(img.shape[0], img.shape[1]) + # Convert length constrain to pixel if given float number + if isinstance(min_len, float) and min_len <= 1.: + min_len = int(min_dim * min_len) + if isinstance(min_label_len, float) and min_label_len <= 1.: + min_label_len = int(min_dim * min_label_len) + + thickness = random_state.randint(min_dim * 0.01, min_dim * 0.025) + rad = max(random_state.rand() * min_dim / 2, min_dim / 5) + x = random_state.randint(rad, img.shape[1] - rad) + y = random_state.randint(rad, img.shape[0] - rad) + # Sample num_branches points inside the circle + slices = np.linspace(0, 2 * math.pi, num_branches + 1) + angles = [slices[i] + random_state.rand() * (slices[i+1] - slices[i]) + for i in range(num_branches)] + points = np.array( + [[int(x + max(random_state.rand(), 0.3) * rad * math.cos(a)), + int(y + max(random_state.rand(), 0.3) * rad * math.sin(a))] + for a in angles]) + points = np.concatenate(([[x, y]], points), axis=0) + + # Generate segments and check the length + segments = np.array([[x, y, _[0], _[1]] for _ in points[1:, :]]) + if check_segment_len(segments, min_len): + return draw_star(img, nb_branches, min_len, min_label_len) + + # Only record the segments longer than min_label_len + points1 = segments[:, :2] + points2 = segments[:, 2:] + seg_len = np.sqrt(np.sum((points1 - points2) ** 2, axis=1)) + label_segments = segments[seg_len >= min_label_len, :] + + # Get all junctions from label segments + junctions_all = np.concatenate( + (label_segments[:, :2], label_segments[:, 2:]), axis=0) + if junctions_all.shape[0] == 0: + junc_points = None + line_map = None + + # Get all unique junction points + else: + junc_points = np.unique(junctions_all, axis=0) + # Generate line map from points and segments + line_map = get_line_map(junc_points, label_segments) + + background_color = int(np.mean(img)) + for i in range(1, num_branches + 1): + col = get_random_color(background_color) + cv.line(img, (points[0][0], points[0][1]), + (points[i][0], points[i][1]), + col, thickness) + return { + "points": junc_points, + "line_map": line_map + } + + +def draw_checkerboard_multiseg(img, max_rows=7, max_cols=7, + transform_params=(0.05, 0.15), + min_label_len=64, seed=None): + """ Draw a checkerboard and output the junctions + line segments + Parameters: + max_rows: maximal number of rows + 1 + max_cols: maximal number of cols + 1 + transform_params: set the range of the parameters of the transformations + """ + if seed is None: + global random_state + else: + random_state = np.random.RandomState(seed) + + background_color = int(np.mean(img)) + + min_dim = min(img.shape) + if isinstance(min_label_len, float) and min_label_len <= 1.: + min_label_len = int(min_dim * min_label_len) + # Create the grid + rows = random_state.randint(3, max_rows) # number of rows + cols = random_state.randint(3, max_cols) # number of cols + s = min((img.shape[1] - 1) // cols, (img.shape[0] - 1) // rows) + x_coord = np.tile(range(cols + 1), + rows + 1).reshape(((rows + 1) * (cols + 1), 1)) + y_coord = np.repeat(range(rows + 1), + cols + 1).reshape(((rows + 1) * (cols + 1), 1)) + # points are the grid coordinates + points = s * np.concatenate([x_coord, y_coord], axis=1) + + # Warp the grid using an affine transformation and an homography + alpha_affine = np.max(img.shape) * ( + transform_params[0] + random_state.rand() * transform_params[1]) + center_square = np.float32(img.shape) // 2 + min_dim = min(img.shape) + square_size = min_dim // 3 + pts1 = np.float32([center_square + square_size, + [center_square[0] + square_size, + center_square[1] - square_size], + center_square - square_size, + [center_square[0] - square_size, + center_square[1] + square_size]]) + pts2 = pts1 + random_state.uniform(-alpha_affine, alpha_affine, + size=pts1.shape).astype(np.float32) + affine_transform = cv.getAffineTransform(pts1[:3], pts2[:3]) + pts2 = pts1 + random_state.uniform(-alpha_affine / 2, alpha_affine / 2, + size=pts1.shape).astype(np.float32) + perspective_transform = cv.getPerspectiveTransform(pts1, pts2) + + # Apply the affine transformation + points = np.transpose(np.concatenate( + (points, np.ones(((rows + 1) * (cols + 1), 1))), axis=1)) + warped_points = np.transpose(np.dot(affine_transform, points)) + + # Apply the homography + warped_col0 = np.add(np.sum(np.multiply( + warped_points, perspective_transform[0, :2]), axis=1), + perspective_transform[0, 2]) + warped_col1 = np.add(np.sum(np.multiply( + warped_points, perspective_transform[1, :2]), axis=1), + perspective_transform[1, 2]) + warped_col2 = np.add(np.sum(np.multiply( + warped_points, perspective_transform[2, :2]), axis=1), + perspective_transform[2, 2]) + warped_col0 = np.divide(warped_col0, warped_col2) + warped_col1 = np.divide(warped_col1, warped_col2) + warped_points = np.concatenate( + [warped_col0[:, None], warped_col1[:, None]], axis=1) + warped_points_float = warped_points.copy() + warped_points = warped_points.astype(int) + + # Fill the rectangles + colors = np.zeros((rows * cols,), np.int32) + for i in range(rows): + for j in range(cols): + # Get a color that contrast with the neighboring cells + if i == 0 and j == 0: + col = get_random_color(background_color) + else: + neighboring_colors = [] + if i != 0: + neighboring_colors.append(colors[(i - 1) * cols + j]) + if j != 0: + neighboring_colors.append(colors[i * cols + j - 1]) + col = get_different_color(np.array(neighboring_colors)) + colors[i * cols + j] = col + + # Fill the cell + cv.fillConvexPoly(img, np.array( + [(warped_points[i * (cols + 1) + j, 0], + warped_points[i * (cols + 1) + j, 1]), + (warped_points[i * (cols + 1) + j + 1, 0], + warped_points[i * (cols + 1) + j + 1, 1]), + (warped_points[(i + 1) * (cols + 1) + j + 1, 0], + warped_points[(i + 1) * (cols + 1) + j + 1, 1]), + (warped_points[(i + 1) * (cols + 1) + j, 0], + warped_points[(i + 1) * (cols + 1) + j, 1])]), col) + + label_segments = np.empty([0, 4], dtype=np.int) + # Iterate through rows + for row_idx in range(rows + 1): + # Include all the combination of the junctions + # Iterate through all the combination of junction index in that row + multi_seg_lst = [ + np.array([warped_points_float[id1, 0], + warped_points_float[id1, 1], + warped_points_float[id2, 0], + warped_points_float[id2, 1]])[None, ...] + for (id1, id2) in combinations(range( + row_idx * (cols + 1), (row_idx + 1) * (cols + 1), 1), 2)] + multi_seg = np.concatenate(multi_seg_lst, axis=0) + label_segments = np.concatenate((label_segments, multi_seg), axis=0) + + # Iterate through columns + for col_idx in range(cols + 1): # for 5 columns, we will have 5 + 1 edges + # Include all the combination of the junctions + # Iterate throuhg all the combination of junction index in that column + multi_seg_lst = [ + np.array([warped_points_float[id1, 0], + warped_points_float[id1, 1], + warped_points_float[id2, 0], + warped_points_float[id2, 1]])[None, ...] + for (id1, id2) in combinations(range( + col_idx, col_idx + ((rows + 1) * (cols + 1)), cols + 1), 2)] + multi_seg = np.concatenate(multi_seg_lst, axis=0) + label_segments = np.concatenate((label_segments, multi_seg), axis=0) + + label_segments_filtered = np.zeros([0, 4]) + # Define image boundary polygon (in x y manner) + image_poly = shapely.geometry.Polygon( + [[0, 0], [img.shape[1] - 1, 0], [img.shape[1] - 1, img.shape[0] - 1], + [0, img.shape[0] - 1]]) + for idx in range(label_segments.shape[0]): + # Get the line segment + seg_raw = label_segments[idx, :] + seg = shapely.geometry.LineString([seg_raw[:2], seg_raw[2:]]) + + # The line segment is just inside the image. + if seg.intersection(image_poly) == seg: + label_segments_filtered = np.concatenate( + (label_segments_filtered, seg_raw[None, ...]), axis=0) + + # Intersect with the image. + elif seg.intersects(image_poly): + # Check intersection + try: + p = np.array(seg.intersection( + image_poly).coords).reshape([-1, 4]) + # If intersect with eact one point + except: + continue + segment = p + label_segments_filtered = np.concatenate( + (label_segments_filtered, segment), axis=0) + + else: + continue + + label_segments = np.round(label_segments_filtered).astype(np.int) + + # Only record the segments longer than min_label_len + points1 = label_segments[:, :2] + points2 = label_segments[:, 2:] + seg_len = np.sqrt(np.sum((points1 - points2) ** 2, axis=1)) + label_segments = label_segments[seg_len >= min_label_len, :] + + # Get all junctions from label segments + junc_points, line_map = get_unique_junctions(label_segments, + min_label_len) + + # Draw lines on the boundaries of the board at random + nb_rows = random_state.randint(2, rows + 2) + nb_cols = random_state.randint(2, cols + 2) + thickness = random_state.randint(min_dim * 0.01, min_dim * 0.015) + for _ in range(nb_rows): + row_idx = random_state.randint(rows + 1) + col_idx1 = random_state.randint(cols + 1) + col_idx2 = random_state.randint(cols + 1) + col = get_random_color(background_color) + cv.line(img, (warped_points[row_idx * (cols + 1) + col_idx1, 0], + warped_points[row_idx * (cols + 1) + col_idx1, 1]), + (warped_points[row_idx * (cols + 1) + col_idx2, 0], + warped_points[row_idx * (cols + 1) + col_idx2, 1]), + col, thickness) + for _ in range(nb_cols): + col_idx = random_state.randint(cols + 1) + row_idx1 = random_state.randint(rows + 1) + row_idx2 = random_state.randint(rows + 1) + col = get_random_color(background_color) + cv.line(img, (warped_points[row_idx1 * (cols + 1) + col_idx, 0], + warped_points[row_idx1 * (cols + 1) + col_idx, 1]), + (warped_points[row_idx2 * (cols + 1) + col_idx, 0], + warped_points[row_idx2 * (cols + 1) + col_idx, 1]), + col, thickness) + + # Keep only the points inside the image + points = keep_points_inside(warped_points, img.shape[:2]) + return { + "points": junc_points, + "line_map": line_map + } + + +def draw_stripes_multiseg(img, max_nb_cols=13, min_len=0.04, min_label_len=64, + transform_params=(0.05, 0.15), seed=None): + """ Draw stripes in a distorted rectangle + and output the junctions points + line map. + Parameters: + max_nb_cols: maximal number of stripes to be drawn + min_width_ratio: the minimal width of a stripe is + min_width_ratio * smallest dimension of the image + transform_params: set the range of the parameters of the transformations + """ + # Set the optional random seed (most for debugging) + if seed is None: + global random_state + else: + random_state = np.random.RandomState(seed) + + background_color = int(np.mean(img)) + # Create the grid + board_size = (int(img.shape[0] * (1 + random_state.rand())), + int(img.shape[1] * (1 + random_state.rand()))) + + # Number of cols + col = random_state.randint(5, max_nb_cols) + cols = np.concatenate([board_size[1] * random_state.rand(col - 1), + np.array([0, board_size[1] - 1])], axis=0) + cols = np.unique(cols.astype(int)) + + # Remove the indices that are too close + min_dim = min(img.shape) + + # Convert length constrain to pixel if given float number + if isinstance(min_len, float) and min_len <= 1.: + min_len = int(min_dim * min_len) + if isinstance(min_label_len, float) and min_label_len <= 1.: + min_label_len = int(min_dim * min_label_len) + + cols = cols[(np.concatenate([cols[1:], + np.array([board_size[1] + min_len])], + axis=0) - cols) >= min_len] + # Update the number of cols + col = cols.shape[0] - 1 + cols = np.reshape(cols, (col + 1, 1)) + cols1 = np.concatenate([cols, np.zeros((col + 1, 1), np.int32)], axis=1) + cols2 = np.concatenate( + [cols, (board_size[0] - 1) * np.ones((col + 1, 1), np.int32)], axis=1) + points = np.concatenate([cols1, cols2], axis=0) + + # Warp the grid using an affine transformation and a homography + alpha_affine = np.max(img.shape) * ( + transform_params[0] + random_state.rand() * transform_params[1]) + center_square = np.float32(img.shape) // 2 + square_size = min(img.shape) // 3 + pts1 = np.float32([center_square + square_size, + [center_square[0]+square_size, + center_square[1]-square_size], + center_square - square_size, + [center_square[0]-square_size, + center_square[1]+square_size]]) + pts2 = pts1 + random_state.uniform(-alpha_affine, alpha_affine, + size=pts1.shape).astype(np.float32) + affine_transform = cv.getAffineTransform(pts1[:3], pts2[:3]) + pts2 = pts1 + random_state.uniform(-alpha_affine / 2, alpha_affine / 2, + size=pts1.shape).astype(np.float32) + perspective_transform = cv.getPerspectiveTransform(pts1, pts2) + + # Apply the affine transformation + points = np.transpose(np.concatenate((points, + np.ones((2 * (col + 1), 1))), + axis=1)) + warped_points = np.transpose(np.dot(affine_transform, points)) + + # Apply the homography + warped_col0 = np.add(np.sum(np.multiply( + warped_points, perspective_transform[0, :2]), axis=1), + perspective_transform[0, 2]) + warped_col1 = np.add(np.sum(np.multiply( + warped_points, perspective_transform[1, :2]), axis=1), + perspective_transform[1, 2]) + warped_col2 = np.add(np.sum(np.multiply( + warped_points, perspective_transform[2, :2]), axis=1), + perspective_transform[2, 2]) + warped_col0 = np.divide(warped_col0, warped_col2) + warped_col1 = np.divide(warped_col1, warped_col2) + warped_points = np.concatenate( + [warped_col0[:, None], warped_col1[:, None]], axis=1) + warped_points_float = warped_points.copy() + warped_points = warped_points.astype(int) + + # Fill the rectangles and get the segments + color = get_random_color(background_color) + # segments_debug = np.zeros([0, 4]) + for i in range(col): + # Fill the color + color = (color + 128 + random_state.randint(-30, 30)) % 256 + cv.fillConvexPoly(img, np.array([(warped_points[i, 0], + warped_points[i, 1]), + (warped_points[i+1, 0], + warped_points[i+1, 1]), + (warped_points[i+col+2, 0], + warped_points[i+col+2, 1]), + (warped_points[i+col+1, 0], + warped_points[i+col+1, 1])]), + color) + + segments = np.zeros([0, 4]) + row = 1 # in stripes case + # Iterate through rows + for row_idx in range(row + 1): + # Include all the combination of the junctions + # Iterate through all the combination of junction index in that row + multi_seg_lst = [np.array( + [warped_points_float[id1, 0], + warped_points_float[id1, 1], + warped_points_float[id2, 0], + warped_points_float[id2, 1]])[None, ...] + for (id1, id2) in combinations(range( + row_idx * (col + 1), (row_idx + 1) * (col + 1), 1), 2)] + multi_seg = np.concatenate(multi_seg_lst, axis=0) + segments = np.concatenate((segments, multi_seg), axis=0) + + # Iterate through columns + for col_idx in range(col + 1): # for 5 columns, we will have 5 + 1 edges. + # Include all the combination of the junctions + # Iterate throuhg all the combination of junction index in that column + multi_seg_lst = [np.array( + [warped_points_float[id1, 0], + warped_points_float[id1, 1], + warped_points_float[id2, 0], + warped_points_float[id2, 1]])[None, ...] + for (id1, id2) in combinations(range( + col_idx, col_idx + (row * col) + 2, col + 1), 2)] + multi_seg = np.concatenate(multi_seg_lst, axis=0) + segments = np.concatenate((segments, multi_seg), axis=0) + + # Select and refine the segments + segments_new = np.zeros([0, 4]) + # Define image boundary polygon (in x y manner) + image_poly = shapely.geometry.Polygon( + [[0, 0], [img.shape[1]-1, 0], [img.shape[1]-1, img.shape[0]-1], + [0, img.shape[0]-1]]) + for idx in range(segments.shape[0]): + # Get the line segment + seg_raw = segments[idx, :] + seg = shapely.geometry.LineString([seg_raw[:2], seg_raw[2:]]) + + # The line segment is just inside the image. + if seg.intersection(image_poly) == seg: + segments_new = np.concatenate( + (segments_new, seg_raw[None, ...]), axis=0) + + # Intersect with the image. + elif seg.intersects(image_poly): + # Check intersection + try: + p = np.array( + seg.intersection(image_poly).coords).reshape([-1, 4]) + # If intersect at exact one point, just continue. + except: + continue + segment = p + segments_new = np.concatenate((segments_new, segment), axis=0) + + else: + continue + + segments = (np.round(segments_new)).astype(np.int) + + # Only record the segments longer than min_label_len + points1 = segments[:, :2] + points2 = segments[:, 2:] + seg_len = np.sqrt(np.sum((points1 - points2) ** 2, axis=1)) + label_segments = segments[seg_len >= min_label_len, :] + + # Get all junctions from label segments + junctions_all = np.concatenate( + (label_segments[:, :2], label_segments[:, 2:]), axis=0) + if junctions_all.shape[0] == 0: + junc_points = None + line_map = None + + # Get all unique junction points + else: + junc_points = np.unique(junctions_all, axis=0) + # Generate line map from points and segments + line_map = get_line_map(junc_points, label_segments) + + # Draw lines on the boundaries of the stripes at random + nb_rows = random_state.randint(2, 5) + nb_cols = random_state.randint(2, col + 2) + thickness = random_state.randint(min_dim * 0.01, min_dim * 0.011) + for _ in range(nb_rows): + row_idx = random_state.choice([0, col + 1]) + col_idx1 = random_state.randint(col + 1) + col_idx2 = random_state.randint(col + 1) + color = get_random_color(background_color) + cv.line(img, (warped_points[row_idx + col_idx1, 0], + warped_points[row_idx + col_idx1, 1]), + (warped_points[row_idx + col_idx2, 0], + warped_points[row_idx + col_idx2, 1]), + color, thickness) + + for _ in range(nb_cols): + col_idx = random_state.randint(col + 1) + color = get_random_color(background_color) + cv.line(img, (warped_points[col_idx, 0], + warped_points[col_idx, 1]), + (warped_points[col_idx + col + 1, 0], + warped_points[col_idx + col + 1, 1]), + color, thickness) + + # Keep only the points inside the image + # points = keep_points_inside(warped_points, img.shape[:2]) + return { + "points": junc_points, + "line_map": line_map + } + + +def draw_cube(img, min_size_ratio=0.2, min_label_len=64, + scale_interval=(0.4, 0.6), trans_interval=(0.5, 0.2)): + """ Draw a 2D projection of a cube and output the visible juntions. + Parameters: + min_size_ratio: min(img.shape) * min_size_ratio is the smallest + achievable cube side size + scale_interval: the scale is between scale_interval[0] and + scale_interval[0]+scale_interval[1] + trans_interval: the translation is between img.shape*trans_interval[0] + and img.shape*(trans_interval[0] + trans_interval[1]) + """ + # Generate a cube and apply to it an affine transformation + # The order matters! + # The indices of two adjacent vertices differ only of one bit (Gray code) + background_color = int(np.mean(img)) + min_dim = min(img.shape[:2]) + min_side = min_dim * min_size_ratio + lx = min_side + random_state.rand() * 2 * min_dim / 3 # dims of the cube + ly = min_side + random_state.rand() * 2 * min_dim / 3 + lz = min_side + random_state.rand() * 2 * min_dim / 3 + cube = np.array([[0, 0, 0], + [lx, 0, 0], + [0, ly, 0], + [lx, ly, 0], + [0, 0, lz], + [lx, 0, lz], + [0, ly, lz], + [lx, ly, lz]]) + rot_angles = random_state.rand(3) * 3 * math.pi / 10. + math.pi / 10. + rotation_1 = np.array([[math.cos(rot_angles[0]), + -math.sin(rot_angles[0]), 0], + [math.sin(rot_angles[0]), + math.cos(rot_angles[0]), 0], + [0, 0, 1]]) + rotation_2 = np.array([[1, 0, 0], + [0, math.cos(rot_angles[1]), + -math.sin(rot_angles[1])], + [0, math.sin(rot_angles[1]), + math.cos(rot_angles[1])]]) + rotation_3 = np.array([[math.cos(rot_angles[2]), 0, + -math.sin(rot_angles[2])], + [0, 1, 0], + [math.sin(rot_angles[2]), 0, + math.cos(rot_angles[2])]]) + scaling = np.array([[scale_interval[0] + + random_state.rand() * scale_interval[1], 0, 0], + [0, scale_interval[0] + + random_state.rand() * scale_interval[1], 0], + [0, 0, scale_interval[0] + + random_state.rand() * scale_interval[1]]]) + trans = np.array([img.shape[1] * trans_interval[0] + + random_state.randint(-img.shape[1] * trans_interval[1], + img.shape[1] * trans_interval[1]), + img.shape[0] * trans_interval[0] + + random_state.randint(-img.shape[0] * trans_interval[1], + img.shape[0] * trans_interval[1]), + 0]) + cube = trans + np.transpose( + np.dot(scaling, np.dot(rotation_1, + np.dot(rotation_2, np.dot(rotation_3, np.transpose(cube)))))) + + # The hidden corner is 0 by construction + # The front one is 7 + cube = cube[:, :2] # project on the plane z=0 + cube = cube.astype(int) + points = cube[1:, :] # get rid of the hidden corner + + # Get the three visible faces + faces = np.array([[7, 3, 1, 5], [7, 5, 4, 6], [7, 6, 2, 3]]) + + # Get all visible line segments + segments = np.zeros([0, 4]) + # Iterate through all the faces + for face_idx in range(faces.shape[0]): + face = faces[face_idx, :] + # Brute-forcely expand all the segments + segment = np.array( + [np.concatenate((cube[face[0]], cube[face[1]]), axis=0), + np.concatenate((cube[face[1]], cube[face[2]]), axis=0), + np.concatenate((cube[face[2]], cube[face[3]]), axis=0), + np.concatenate((cube[face[3]], cube[face[0]]), axis=0)]) + segments = np.concatenate((segments, segment), axis=0) + + # Select and refine the segments + segments_new = np.zeros([0, 4]) + # Define image boundary polygon (in x y manner) + image_poly = shapely.geometry.Polygon( + [[0, 0], [img.shape[1] - 1, 0], [img.shape[1] - 1, img.shape[0] - 1], + [0, img.shape[0] - 1]]) + for idx in range(segments.shape[0]): + # Get the line segment + seg_raw = segments[idx, :] + seg = shapely.geometry.LineString([seg_raw[:2], seg_raw[2:]]) + + # The line segment is just inside the image. + if seg.intersection(image_poly) == seg: + segments_new = np.concatenate( + (segments_new, seg_raw[None, ...]), axis=0) + + # Intersect with the image. + elif seg.intersects(image_poly): + try: + p = np.array( + seg.intersection(image_poly).coords).reshape([-1, 4]) + except: + continue + segment = p + segments_new = np.concatenate((segments_new, segment), axis=0) + + else: + continue + + segments = (np.round(segments_new)).astype(np.int) + + # Only record the segments longer than min_label_len + points1 = segments[:, :2] + points2 = segments[:, 2:] + seg_len = np.sqrt(np.sum((points1 - points2) ** 2, axis=1)) + label_segments = segments[seg_len >= min_label_len, :] + + # Get all junctions from label segments + junctions_all = np.concatenate( + (label_segments[:, :2], label_segments[:, 2:]), axis=0) + if junctions_all.shape[0] == 0: + junc_points = None + line_map = None + + # Get all unique junction points + else: + junc_points = np.unique(junctions_all, axis=0) + # Generate line map from points and segments + line_map = get_line_map(junc_points, label_segments) + + # Fill the faces and draw the contours + col_face = get_random_color(background_color) + for i in [0, 1, 2]: + cv.fillPoly(img, [cube[faces[i]].reshape((-1, 1, 2))], + col_face) + thickness = random_state.randint(min_dim * 0.003, min_dim * 0.015) + for i in [0, 1, 2]: + for j in [0, 1, 2, 3]: + col_edge = (col_face + 128 + + random_state.randint(-64, 64))\ + % 256 # color that constrats with the face color + cv.line(img, (cube[faces[i][j], 0], cube[faces[i][j], 1]), + (cube[faces[i][(j + 1) % 4], 0], + cube[faces[i][(j + 1) % 4], 1]), + col_edge, thickness) + + return { + "points": junc_points, + "line_map": line_map + } + + +def gaussian_noise(img): + """ Apply random noise to the image. """ + cv.randu(img, 0, 255) + return { + "points": None, + "line_map": None + } diff --git a/third_party/SOLD2/sold2/dataset/transforms/__init__.py b/third_party/SOLD2/sold2/dataset/transforms/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/SOLD2/sold2/dataset/transforms/homographic_transforms.py b/third_party/SOLD2/sold2/dataset/transforms/homographic_transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..d9338abb169f7a86f3c6e702a031e1c0de86c339 --- /dev/null +++ b/third_party/SOLD2/sold2/dataset/transforms/homographic_transforms.py @@ -0,0 +1,350 @@ +""" +This file implements the homographic transforms for data augmentation. +Code adapted from https://github.com/rpautrat/SuperPoint +""" +import numpy as np +from math import pi + +from ..synthetic_util import get_line_map, get_line_heatmap +import cv2 +import copy +import shapely.geometry + + +def sample_homography( + shape, perspective=True, scaling=True, rotation=True, + translation=True, n_scales=5, n_angles=25, scaling_amplitude=0.1, + perspective_amplitude_x=0.1, perspective_amplitude_y=0.1, + patch_ratio=0.5, max_angle=pi/2, allow_artifacts=False, + translation_overflow=0.): + """ + Computes the homography transformation between a random patch in the + original image and a warped projection with the same image size. + As in `tf.contrib.image.transform`, it maps the output point + (warped patch) to a transformed input point (original patch). + The original patch, initialized with a simple half-size centered crop, + is iteratively projected, scaled, rotated and translated. + + Arguments: + shape: A rank-2 `Tensor` specifying the height and width of the original image. + perspective: A boolean that enables the perspective and affine transformations. + scaling: A boolean that enables the random scaling of the patch. + rotation: A boolean that enables the random rotation of the patch. + translation: A boolean that enables the random translation of the patch. + n_scales: The number of tentative scales that are sampled when scaling. + n_angles: The number of tentatives angles that are sampled when rotating. + scaling_amplitude: Controls the amount of scale. + perspective_amplitude_x: Controls the perspective effect in x direction. + perspective_amplitude_y: Controls the perspective effect in y direction. + patch_ratio: Controls the size of the patches used to create the homography. + max_angle: Maximum angle used in rotations. + allow_artifacts: A boolean that enables artifacts when applying the homography. + translation_overflow: Amount of border artifacts caused by translation. + + Returns: + homo_mat: A numpy array of shape `[1, 3, 3]` corresponding to the + homography transform. + selected_scale: The selected scaling factor. + """ + # Convert shape to ndarry + if not isinstance(shape, np.ndarray): + shape = np.array(shape) + + # Corners of the output image + pts1 = np.array([[0., 0.], [0., 1.], [1., 1.], [1., 0.]]) + # Corners of the input patch + margin = (1 - patch_ratio) / 2 + pts2 = margin + np.array([[0, 0], [0, patch_ratio], + [patch_ratio, patch_ratio], [patch_ratio, 0]]) + + # Random perspective and affine perturbations + if perspective: + if not allow_artifacts: + perspective_amplitude_x = min(perspective_amplitude_x, margin) + perspective_amplitude_y = min(perspective_amplitude_y, margin) + + # normal distribution with mean=0, std=perspective_amplitude_y/2 + perspective_displacement = np.random.normal( + 0., perspective_amplitude_y/2, [1]) + h_displacement_left = np.random.normal( + 0., perspective_amplitude_x/2, [1]) + h_displacement_right = np.random.normal( + 0., perspective_amplitude_x/2, [1]) + pts2 += np.stack([np.concatenate([h_displacement_left, + perspective_displacement], 0), + np.concatenate([h_displacement_left, + -perspective_displacement], 0), + np.concatenate([h_displacement_right, + perspective_displacement], 0), + np.concatenate([h_displacement_right, + -perspective_displacement], 0)]) + + # Random scaling: sample several scales, check collision with borders, + # randomly pick a valid one + if scaling: + scales = np.concatenate( + [[1.], np.random.normal(1, scaling_amplitude/2, [n_scales])], 0) + center = np.mean(pts2, axis=0, keepdims=True) + scaled = (pts2 - center)[None, ...] * scales[..., None, None] + center + # all scales are valid except scale=1 + if allow_artifacts: + valid = np.array(range(n_scales)) + # Chech the valid scale + else: + valid = np.where(np.all((scaled >= 0.) + & (scaled < 1.), (1, 2)))[0] + # No valid scale found => recursively call + if valid.shape[0] == 0: + return sample_homography( + shape, perspective, scaling, rotation, translation, + n_scales, n_angles, scaling_amplitude, + perspective_amplitude_x, perspective_amplitude_y, + patch_ratio, max_angle, allow_artifacts, translation_overflow) + + idx = valid[np.random.uniform(0., valid.shape[0], ()).astype(np.int32)] + pts2 = scaled[idx] + + # Additionally save and return the selected scale. + selected_scale = scales[idx] + + # Random translation + if translation: + t_min, t_max = np.min(pts2, axis=0), np.min(1 - pts2, axis=0) + if allow_artifacts: + t_min += translation_overflow + t_max += translation_overflow + pts2 += (np.stack([np.random.uniform(-t_min[0], t_max[0], ()), + np.random.uniform(-t_min[1], + t_max[1], ())]))[None, ...] + + # Random rotation: sample several rotations, check collision with borders, + # randomly pick a valid one + if rotation: + angles = np.linspace(-max_angle, max_angle, n_angles) + # in case no rotation is valid + angles = np.concatenate([[0.], angles], axis=0) + center = np.mean(pts2, axis=0, keepdims=True) + rot_mat = np.reshape(np.stack( + [np.cos(angles), -np.sin(angles), + np.sin(angles), np.cos(angles)], axis=1), [-1, 2, 2]) + rotated = np.matmul( + np.tile((pts2 - center)[None, ...], [n_angles+1, 1, 1]), + rot_mat) + center + if allow_artifacts: + # All angles are valid, except angle=0 + valid = np.array(range(n_angles)) + else: + valid = np.where(np.all((rotated >= 0.) + & (rotated < 1.), axis=(1, 2)))[0] + + if valid.shape[0] == 0: + return sample_homography( + shape, perspective, scaling, rotation, translation, + n_scales, n_angles, scaling_amplitude, + perspective_amplitude_x, perspective_amplitude_y, + patch_ratio, max_angle, allow_artifacts, translation_overflow) + + idx = valid[np.random.uniform(0., valid.shape[0], + ()).astype(np.int32)] + pts2 = rotated[idx] + + # Rescale to actual size + shape = shape[::-1].astype(np.float32) # different convention [y, x] + pts1 *= shape[None, ...] + pts2 *= shape[None, ...] + + def ax(p, q): return [p[0], p[1], 1, 0, 0, 0, -p[0] * q[0], -p[1] * q[0]] + + def ay(p, q): return [0, 0, 0, p[0], p[1], 1, -p[0] * q[1], -p[1] * q[1]] + + a_mat = np.stack([f(pts1[i], pts2[i]) for i in range(4) + for f in (ax, ay)], axis=0) + p_mat = np.transpose(np.stack([[pts2[i][j] for i in range(4) + for j in range(2)]], axis=0)) + homo_vec, _, _, _ = np.linalg.lstsq(a_mat, p_mat, rcond=None) + + # Compose the homography vector back to matrix + homo_mat = np.concatenate([ + homo_vec[0:3, 0][None, ...], homo_vec[3:6, 0][None, ...], + np.concatenate((homo_vec[6], homo_vec[7], [1]), + axis=0)[None, ...]], axis=0) + + return homo_mat, selected_scale + + +def convert_to_line_segments(junctions, line_map): + """ Convert junctions and line map to line segments. """ + # Copy the line map + line_map_tmp = copy.copy(line_map) + + line_segments = np.zeros([0, 4]) + for idx in range(junctions.shape[0]): + # If no connectivity, just skip it + if line_map_tmp[idx, :].sum() == 0: + continue + # Record the line segment + else: + for idx2 in np.where(line_map_tmp[idx, :] == 1)[0]: + p1 = junctions[idx, :] + p2 = junctions[idx2, :] + line_segments = np.concatenate( + (line_segments, + np.array([p1[0], p1[1], p2[0], p2[1]])[None, ...]), + axis=0) + # Update line_map + line_map_tmp[idx, idx2] = 0 + line_map_tmp[idx2, idx] = 0 + + return line_segments + + +def compute_valid_mask(image_size, homography, + border_margin, valid_mask=None): + # Warp the mask + if valid_mask is None: + initial_mask = np.ones(image_size) + else: + initial_mask = valid_mask + mask = cv2.warpPerspective( + initial_mask, homography, (image_size[1], image_size[0]), + flags=cv2.INTER_NEAREST) + + # Optionally perform erosion + if border_margin > 0: + kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, + (border_margin*2, )*2) + mask = cv2.erode(mask, kernel) + + # Perform dilation if border_margin is negative + if border_margin < 0: + kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, + (abs(int(border_margin))*2, )*2) + mask = cv2.dilate(mask, kernel) + + return mask + + +def warp_line_segment(line_segments, homography, image_size): + """ Warp the line segments using a homography. """ + # Separate the line segements into 2N points to apply matrix operation + num_segments = line_segments.shape[0] + + junctions = np.concatenate( + (line_segments[:, :2], # The first junction of each segment. + line_segments[:, 2:]), # The second junction of each segment. + axis=0) + # Convert to homogeneous coordinates + # Flip the junctions before converting to homogeneous (xy format) + junctions = np.flip(junctions, axis=1) + junctions = np.concatenate((junctions, np.ones([2*num_segments, 1])), + axis=1) + warped_junctions = np.matmul(homography, junctions.T).T + + # Convert back to segments + warped_junctions = warped_junctions[:, :2] / warped_junctions[:, 2:] + # (Convert back to hw format) + warped_junctions = np.flip(warped_junctions, axis=1) + warped_segments = np.concatenate( + (warped_junctions[:num_segments, :], + warped_junctions[num_segments:, :]), + axis=1 + ) + + # Check the intersections with the boundary + warped_segments_new = np.zeros([0, 4]) + image_poly = shapely.geometry.Polygon( + [[0, 0], [image_size[1]-1, 0], [image_size[1]-1, image_size[0]-1], + [0, image_size[0]-1]]) + for idx in range(warped_segments.shape[0]): + # Get the line segment + seg_raw = warped_segments[idx, :] # in HW format. + # Convert to shapely line (flip to xy format) + seg = shapely.geometry.LineString([np.flip(seg_raw[:2]), + np.flip(seg_raw[2:])]) + + # The line segment is just inside the image. + if seg.intersection(image_poly) == seg: + warped_segments_new = np.concatenate((warped_segments_new, + seg_raw[None, ...]), axis=0) + + # Intersect with the image. + elif seg.intersects(image_poly): + # Check intersection + try: + p = np.array( + seg.intersection(image_poly).coords).reshape([-1, 4]) + # If intersect at exact one point, just continue. + except: + continue + segment = np.concatenate([np.flip(p[0, :2]), np.flip(p[0, 2:], + axis=0)])[None, ...] + warped_segments_new = np.concatenate( + (warped_segments_new, segment), axis=0) + + else: + continue + + warped_segments = (np.round(warped_segments_new)).astype(np.int) + return warped_segments + + +class homography_transform(object): + """ # Homography transformations. """ + def __init__(self, image_size, homograpy_config, + border_margin=0, min_label_len=20): + self.homo_config = homograpy_config + self.image_size = image_size + self.target_size = (self.image_size[1], self.image_size[0]) + self.border_margin = border_margin + if (min_label_len < 1) and isinstance(min_label_len, float): + raise ValueError("[Error] min_label_len should be in pixels.") + self.min_label_len = min_label_len + + def __call__(self, input_image, junctions, line_map, + valid_mask=None, homo=None, scale=None): + # Sample one random homography or use the given one + if homo is None or scale is None: + homo, scale = sample_homography(self.image_size, + **self.homo_config) + + # Warp the image + warped_image = cv2.warpPerspective( + input_image, homo, self.target_size, flags=cv2.INTER_LINEAR) + + valid_mask = compute_valid_mask(self.image_size, homo, + self.border_margin, valid_mask) + + # Convert junctions and line_map back to line segments + line_segments = convert_to_line_segments(junctions, line_map) + + # Warp the segments and check the length. + # Adjust the min_label_length + warped_segments = warp_line_segment(line_segments, homo, + self.image_size) + + # Convert back to junctions and line_map + junctions_new = np.concatenate((warped_segments[:, :2], + warped_segments[:, 2:]), axis=0) + if junctions_new.shape[0] == 0: + junctions_new = np.zeros([0, 2]) + line_map = np.zeros([0, 0]) + warped_heatmap = np.zeros(self.image_size) + else: + junctions_new = np.unique(junctions_new, axis=0) + + # Generate line map from points and segments + line_map = get_line_map(junctions_new, + warped_segments).astype(np.int) + # Compute the heatmap + warped_heatmap = get_line_heatmap(np.flip(junctions_new, axis=1), + line_map, self.image_size) + + return { + "junctions": junctions_new, + "warped_image": warped_image, + "valid_mask": valid_mask, + "line_map": line_map, + "warped_heatmap": warped_heatmap, + "homo": homo, + "scale": scale + } diff --git a/third_party/SOLD2/sold2/dataset/transforms/photometric_transforms.py b/third_party/SOLD2/sold2/dataset/transforms/photometric_transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..8fa44bf0efa93a47e5f8012988058f1cbd49324f --- /dev/null +++ b/third_party/SOLD2/sold2/dataset/transforms/photometric_transforms.py @@ -0,0 +1,185 @@ +""" +Common photometric transforms for data augmentation. +""" +import numpy as np +from PIL import Image +from torchvision import transforms as transforms +import cv2 + + +# List all the available augmentations +available_augmentations = [ + 'additive_gaussian_noise', + 'additive_speckle_noise', + 'random_brightness', + 'random_contrast', + 'additive_shade', + 'motion_blur' +] + + +class additive_gaussian_noise(object): + """ Additive gaussian noise. """ + def __init__(self, stddev_range=None): + # If std is not given, use the default setting + if stddev_range is None: + self.stddev_range = [5, 95] + else: + self.stddev_range = stddev_range + + def __call__(self, input_image): + # Get the noise stddev + stddev = np.random.uniform(self.stddev_range[0], self.stddev_range[1]) + noise = np.random.normal(0., stddev, size=input_image.shape) + noisy_image = (input_image + noise).clip(0., 255.) + + return noisy_image + + +class additive_speckle_noise(object): + """ Additive speckle noise. """ + def __init__(self, prob_range=None): + # If prob range is not given, use the default setting + if prob_range is None: + self.prob_range = [0.0, 0.005] + else: + self.prob_range = prob_range + + def __call__(self, input_image): + # Sample + prob = np.random.uniform(self.prob_range[0], self.prob_range[1]) + sample = np.random.uniform(0., 1., size=input_image.shape) + + # Get the mask + mask0 = sample <= prob + mask1 = sample >= (1 - prob) + + # Mask the image (here we assume the image ranges from 0~255 + noisy = input_image.copy() + noisy[mask0] = 0. + noisy[mask1] = 255. + + return noisy + + +class random_brightness(object): + """ Brightness change. """ + def __init__(self, brightness=None): + # If the brightness is not given, use the default setting + if brightness is None: + self.brightness = 0.5 + else: + self.brightness = brightness + + # Initialize the transformer + self.transform = transforms.ColorJitter(brightness=self.brightness) + + def __call__(self, input_image): + # Convert to PIL image + if isinstance(input_image, np.ndarray): + input_image = Image.fromarray(input_image.astype(np.uint8)) + + return np.array(self.transform(input_image)) + + +class random_contrast(object): + """ Additive contrast. """ + def __init__(self, contrast=None): + # If the brightness is not given, use the default setting + if contrast is None: + self.contrast = 0.5 + else: + self.contrast = contrast + + # Initialize the transformer + self.transform = transforms.ColorJitter(contrast=self.contrast) + + def __call__(self, input_image): + # Convert to PIL image + if isinstance(input_image, np.ndarray): + input_image = Image.fromarray(input_image.astype(np.uint8)) + + return np.array(self.transform(input_image)) + + +class additive_shade(object): + """ Additive shade. """ + def __init__(self, nb_ellipses=20, transparency_range=None, + kernel_size_range=None): + self.nb_ellipses = nb_ellipses + if transparency_range is None: + self.transparency_range = [-0.5, 0.8] + else: + self.transparency_range = transparency_range + + if kernel_size_range is None: + self.kernel_size_range = [250, 350] + else: + self.kernel_size_range = kernel_size_range + + def __call__(self, input_image): + # ToDo: if we should convert to numpy array first. + min_dim = min(input_image.shape[:2]) / 4 + mask = np.zeros(input_image.shape[:2], np.uint8) + for i in range(self.nb_ellipses): + ax = int(max(np.random.rand() * min_dim, min_dim / 5)) + ay = int(max(np.random.rand() * min_dim, min_dim / 5)) + max_rad = max(ax, ay) + x = np.random.randint(max_rad, input_image.shape[1] - max_rad) + y = np.random.randint(max_rad, input_image.shape[0] - max_rad) + angle = np.random.rand() * 90 + cv2.ellipse(mask, (x, y), (ax, ay), angle, 0, 360, 255, -1) + + transparency = np.random.uniform(*self.transparency_range) + kernel_size = np.random.randint(*self.kernel_size_range) + + # kernel_size has to be odd + if (kernel_size % 2) == 0: + kernel_size += 1 + mask = cv2.GaussianBlur(mask.astype(np.float32), + (kernel_size, kernel_size), 0) + shaded = (input_image[..., None] + * (1 - transparency * mask[..., np.newaxis]/255.)) + shaded = np.clip(shaded, 0, 255) + + return np.reshape(shaded, input_image.shape) + + +class motion_blur(object): + """ Motion blur. """ + def __init__(self, max_kernel_size=10): + self.max_kernel_size = max_kernel_size + + def __call__(self, input_image): + # Either vertical, horizontal or diagonal blur + mode = np.random.choice(['h', 'v', 'diag_down', 'diag_up']) + ksize = np.random.randint( + 0, int(round((self.max_kernel_size + 1) / 2))) * 2 + 1 + center = int((ksize - 1) / 2) + kernel = np.zeros((ksize, ksize)) + if mode == 'h': + kernel[center, :] = 1. + elif mode == 'v': + kernel[:, center] = 1. + elif mode == 'diag_down': + kernel = np.eye(ksize) + elif mode == 'diag_up': + kernel = np.flip(np.eye(ksize), 0) + var = ksize * ksize / 16. + grid = np.repeat(np.arange(ksize)[:, np.newaxis], ksize, axis=-1) + gaussian = np.exp(-(np.square(grid - center) + + np.square(grid.T - center)) / (2. * var)) + kernel *= gaussian + kernel /= np.sum(kernel) + blurred = cv2.filter2D(input_image, -1, kernel) + + return np.reshape(blurred, input_image.shape) + + +class normalize_image(object): + """ Image normalization to the range [0, 1]. """ + def __init__(self): + self.normalize_value = 255 + + def __call__(self, input_image): + return (input_image / self.normalize_value).astype(np.float32) diff --git a/third_party/SOLD2/sold2/dataset/transforms/utils.py b/third_party/SOLD2/sold2/dataset/transforms/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..5f1ed09e5b32e2ae2f3577e0e8e5491495e7b05b --- /dev/null +++ b/third_party/SOLD2/sold2/dataset/transforms/utils.py @@ -0,0 +1,121 @@ +""" +Some useful functions for dataset pre-processing +""" +import cv2 +import numpy as np +import shapely.geometry as sg + +from ..synthetic_util import get_line_map +from . import homographic_transforms as homoaug + + +def random_scaling(image, junctions, line_map, scale=1., h_crop=0, w_crop=0): + H, W = image.shape[:2] + H_scale, W_scale = round(H * scale), round(W * scale) + + # Nothing to do if the scale is too close to 1 + if H_scale == H and W_scale == W: + return (image, junctions, line_map, np.ones([H, W], dtype=np.int)) + + # Zoom-in => resize and random crop + if scale >= 1.: + image_big = cv2.resize(image, (W_scale, H_scale), + interpolation=cv2.INTER_LINEAR) + # Crop the image + image = image_big[h_crop:h_crop+H, w_crop:w_crop+W, ...] + valid_mask = np.ones([H, W], dtype=np.int) + + # Process junctions + junctions, line_map = process_junctions_and_line_map( + h_crop, w_crop, H, W, H_scale, W_scale, + junctions, line_map, "zoom-in") + # Zoom-out => resize and pad + else: + image_shape_raw = image.shape + image_small = cv2.resize(image, (W_scale, H_scale), + interpolation=cv2.INTER_AREA) + # Decide the pasting location + h_start = round((H - H_scale) / 2) + w_start = round((W - W_scale) / 2) + # Paste the image to the middle + image = np.zeros(image_shape_raw, dtype=np.float) + image[h_start:h_start+H_scale, + w_start:w_start+W_scale, ...] = image_small + valid_mask = np.zeros([H, W], dtype=np.int) + valid_mask[h_start:h_start+H_scale, w_start:w_start+W_scale] = 1 + + # Process the junctions + junctions, line_map = process_junctions_and_line_map( + h_start, w_start, H, W, H_scale, W_scale, + junctions, line_map, "zoom-out") + + return image, junctions, line_map, valid_mask + + +def process_junctions_and_line_map(h_start, w_start, H, W, H_scale, W_scale, + junctions, line_map, mode="zoom-in"): + if mode == "zoom-in": + junctions[:, 0] = junctions[:, 0] * H_scale / H + junctions[:, 1] = junctions[:, 1] * W_scale / W + line_segments = homoaug.convert_to_line_segments(junctions, line_map) + # Crop segments to the new boundaries + line_segments_new = np.zeros([0, 4]) + image_poly = sg.Polygon( + [[w_start, h_start], + [w_start+W, h_start], + [w_start+W, h_start+H], + [w_start, h_start+H] + ]) + for idx in range(line_segments.shape[0]): + # Get the line segment + seg_raw = line_segments[idx, :] # in HW format. + # Convert to shapely line (flip to xy format) + seg = sg.LineString([np.flip(seg_raw[:2]), + np.flip(seg_raw[2:])]) + # The line segment is just inside the image. + if seg.intersection(image_poly) == seg: + line_segments_new = np.concatenate( + (line_segments_new, seg_raw[None, ...]), axis=0) + # Intersect with the image. + elif seg.intersects(image_poly): + # Check intersection + try: + p = np.array( + seg.intersection(image_poly).coords).reshape([-1, 4]) + # If intersect at exact one point, just continue. + except: + continue + segment = np.concatenate([np.flip(p[0, :2]), np.flip(p[0, 2:], + axis=0)])[None, ...] + line_segments_new = np.concatenate( + (line_segments_new, segment), axis=0) + else: + continue + line_segments_new = (np.round(line_segments_new)).astype(np.int) + # Filter segments with 0 length + segment_lens = np.linalg.norm( + line_segments_new[:, :2] - line_segments_new[:, 2:], axis=-1) + seg_mask = segment_lens != 0 + line_segments_new = line_segments_new[seg_mask, :] + # Convert back to junctions and line_map + junctions_new = np.concatenate( + (line_segments_new[:, :2], line_segments_new[:, 2:]), axis=0) + if junctions_new.shape[0] == 0: + junctions_new = np.zeros([0, 2]) + line_map = np.zeros([0, 0]) + else: + junctions_new = np.unique(junctions_new, axis=0) + # Generate line map from points and segments + line_map = get_line_map(junctions_new, + line_segments_new).astype(np.int) + junctions_new[:, 0] -= h_start + junctions_new[:, 1] -= w_start + junctions = junctions_new + elif mode == "zoom-out": + # Process the junctions + junctions[:, 0] = (junctions[:, 0] * H_scale / H) + h_start + junctions[:, 1] = (junctions[:, 1] * W_scale / W) + w_start + else: + raise ValueError("[Error] unknown mode...") + + return junctions, line_map diff --git a/third_party/SOLD2/sold2/dataset/wireframe_dataset.py b/third_party/SOLD2/sold2/dataset/wireframe_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..ed5bb910bed1b89934ddaaec3bcddf111ea0faef --- /dev/null +++ b/third_party/SOLD2/sold2/dataset/wireframe_dataset.py @@ -0,0 +1,1000 @@ +""" +This file implements the wireframe dataset object for pytorch. +Some parts of the code are adapted from https://github.com/zhou13/lcnn +""" +import os +import math +import copy +from skimage.io import imread +from skimage import color +import PIL +import numpy as np +import h5py +import cv2 +import pickle +import torch +import torch.utils.data.dataloader as torch_loader +from torch.utils.data import Dataset +from torchvision import transforms + +from ..config.project_config import Config as cfg +from .transforms import photometric_transforms as photoaug +from .transforms import homographic_transforms as homoaug +from .transforms.utils import random_scaling +from .synthetic_util import get_line_heatmap +from ..misc.train_utils import parse_h5_data +from ..misc.geometry_utils import warp_points, mask_points + + +def wireframe_collate_fn(batch): + """ Customized collate_fn for wireframe dataset. """ + batch_keys = ["image", "junction_map", "valid_mask", "heatmap", + "heatmap_pos", "heatmap_neg", "homography", + "line_points", "line_indices"] + list_keys = ["junctions", "line_map", "line_map_pos", + "line_map_neg", "file_key"] + + outputs = {} + for data_key in batch[0].keys(): + batch_match = sum([_ in data_key for _ in batch_keys]) + list_match = sum([_ in data_key for _ in list_keys]) + # print(batch_match, list_match) + if batch_match > 0 and list_match == 0: + outputs[data_key] = torch_loader.default_collate( + [b[data_key] for b in batch]) + elif batch_match == 0 and list_match > 0: + outputs[data_key] = [b[data_key] for b in batch] + elif batch_match == 0 and list_match == 0: + continue + else: + raise ValueError( + "[Error] A key matches batch keys and list keys simultaneously.") + + return outputs + + +class WireframeDataset(Dataset): + def __init__(self, mode="train", config=None): + super(WireframeDataset, self).__init__() + if not mode in ["train", "test"]: + raise ValueError( + "[Error] Unknown mode for Wireframe dataset. Only 'train' and 'test'.") + self.mode = mode + + if config is None: + self.config = self.get_default_config() + else: + self.config = config + # Also get the default config + self.default_config = self.get_default_config() + + # Get cache setting + self.dataset_name = self.get_dataset_name() + self.cache_name = self.get_cache_name() + self.cache_path = cfg.wireframe_cache_path + + # Get the ground truth source + self.gt_source = self.config.get("gt_source_%s"%(self.mode), + "official") + if not self.gt_source == "official": + # Convert gt_source to full path + self.gt_source = os.path.join(cfg.export_dataroot, self.gt_source) + # Check the full path exists + if not os.path.exists(self.gt_source): + raise ValueError( + "[Error] The specified ground truth source does not exist.") + + + # Get the filename dataset + print("[Info] Initializing wireframe dataset...") + self.filename_dataset, self.datapoints = self.construct_dataset() + + # Get dataset length + self.dataset_length = len(self.datapoints) + + # Print some info + print("[Info] Successfully initialized dataset") + print("\t Name: wireframe") + print("\t Mode: %s" %(self.mode)) + print("\t Gt: %s" %(self.config.get("gt_source_%s"%(self.mode), + "official"))) + print("\t Counts: %d" %(self.dataset_length)) + print("----------------------------------------") + + ####################################### + ## Dataset construction related APIs ## + ####################################### + def construct_dataset(self): + """ Construct the dataset (from scratch or from cache). """ + # Check if the filename cache exists + # If cache exists, load from cache + if self._check_dataset_cache(): + print("\t Found filename cache %s at %s"%(self.cache_name, + self.cache_path)) + print("\t Load filename cache...") + filename_dataset, datapoints = self.get_filename_dataset_from_cache() + # If not, initialize dataset from scratch + else: + print("\t Can't find filename cache ...") + print("\t Create filename dataset from scratch...") + filename_dataset, datapoints = self.get_filename_dataset() + print("\t Create filename dataset cache...") + self.create_filename_dataset_cache(filename_dataset, datapoints) + + return filename_dataset, datapoints + + def create_filename_dataset_cache(self, filename_dataset, datapoints): + """ Create filename dataset cache for faster initialization. """ + # Check cache path exists + if not os.path.exists(self.cache_path): + os.makedirs(self.cache_path) + + cache_file_path = os.path.join(self.cache_path, self.cache_name) + data = { + "filename_dataset": filename_dataset, + "datapoints": datapoints + } + with open(cache_file_path, "wb") as f: + pickle.dump(data, f, pickle.HIGHEST_PROTOCOL) + + def get_filename_dataset_from_cache(self): + """ Get filename dataset from cache. """ + # Load from pkl cache + cache_file_path = os.path.join(self.cache_path, self.cache_name) + with open(cache_file_path, "rb") as f: + data = pickle.load(f) + + return data["filename_dataset"], data["datapoints"] + + def get_filename_dataset(self): + # Get the path to the dataset + if self.mode == "train": + dataset_path = os.path.join(cfg.wireframe_dataroot, "train") + elif self.mode == "test": + dataset_path = os.path.join(cfg.wireframe_dataroot, "valid") + + # Get paths to all image files + image_paths = sorted([os.path.join(dataset_path, _) + for _ in os.listdir(dataset_path)\ + if os.path.splitext(_)[-1] == ".png"]) + # Get the shared prefix + prefix_paths = [_.split(".png")[0] for _ in image_paths] + + # Get the label paths (different procedure for different split) + if self.mode == "train": + label_paths = [_ + "_label.npz" for _ in prefix_paths] + else: + label_paths = [_ + "_label.npz" for _ in prefix_paths] + mat_paths = [p[:-2] + "_line.mat" for p in prefix_paths] + + # Verify all the images and labels exist + for idx in range(len(image_paths)): + image_path = image_paths[idx] + label_path = label_paths[idx] + if (not (os.path.exists(image_path) + and os.path.exists(label_path))): + raise ValueError( + "[Error] The image and label do not exist. %s"%(image_path)) + # Further verify mat paths for test split + if self.mode == "test": + mat_path = mat_paths[idx] + if not os.path.exists(mat_path): + raise ValueError( + "[Error] The mat file does not exist. %s"%(mat_path)) + + # Construct the filename dataset + num_pad = int(math.ceil(math.log10(len(image_paths))) + 1) + filename_dataset = {} + for idx in range(len(image_paths)): + # Get the file key + key = self.get_padded_filename(num_pad, idx) + + filename_dataset[key] = { + "image": image_paths[idx], + "label": label_paths[idx] + } + + # Get the datapoints + datapoints = list(sorted(filename_dataset.keys())) + + return filename_dataset, datapoints + + def get_dataset_name(self): + """ Get dataset name from dataset config / default config. """ + if self.config["dataset_name"] is None: + dataset_name = self.default_config["dataset_name"] + "_%s" % self.mode + else: + dataset_name = self.config["dataset_name"] + "_%s" % self.mode + + return dataset_name + + def get_cache_name(self): + """ Get cache name from dataset config / default config. """ + if self.config["dataset_name"] is None: + dataset_name = self.default_config["dataset_name"] + "_%s" % self.mode + else: + dataset_name = self.config["dataset_name"] + "_%s" % self.mode + # Compose cache name + cache_name = dataset_name + "_cache.pkl" + + return cache_name + + @staticmethod + def get_padded_filename(num_pad, idx): + """ Get the padded filename using adaptive padding. """ + file_len = len("%d" % (idx)) + filename = "0" * (num_pad - file_len) + "%d" % (idx) + + return filename + + def get_default_config(self): + """ Get the default configuration. """ + return { + "dataset_name": "wireframe", + "add_augmentation_to_all_splits": False, + "preprocessing": { + "resize": [240, 320], + "blur_size": 11 + }, + "augmentation":{ + "photometric":{ + "enable": False + }, + "homographic":{ + "enable": False + }, + }, + } + + + ############################################ + ## Pytorch and preprocessing related APIs ## + ############################################ + # Get data from the information from filename dataset + @staticmethod + def get_data_from_path(data_path): + output = {} + + # Get image data + image_path = data_path["image"] + image = imread(image_path) + output["image"] = image + + # Get the npz label + """ Data entries in the npz file + jmap: [J, H, W] Junction heat map (H and W are 4x smaller) + joff: [J, 2, H, W] Junction offset within each pixel (Not sure about offsets) + lmap: [H, W] Line heat map with anti-aliasing (H and W are 4x smaller) + junc: [Na, 3] Junction coordinates (coordinates from 0~128 => 4x smaller.) + Lpos: [M, 2] Positive lines represented with junction indices + Lneg: [M, 2] Negative lines represented with junction indices + lpos: [Np, 2, 3] Positive lines represented with junction coordinates + lneg: [Nn, 2, 3] Negative lines represented with junction coordinates + """ + label_path = data_path["label"] + label = np.load(label_path) + for key in list(label.keys()): + output[key] = label[key] + + # If there's "line_mat" entry. + # TODO: How to process mat data + if data_path.get("line_mat") is not None: + raise NotImplementedError + + return output + + @staticmethod + def convert_line_map(lcnn_line_map, num_junctions): + """ Convert the line_pos or line_neg + (represented by two junction indexes) to our line map. """ + # Initialize empty line map + line_map = np.zeros([num_junctions, num_junctions]) + + # Iterate through all the lines + for idx in range(lcnn_line_map.shape[0]): + index1 = lcnn_line_map[idx, 0] + index2 = lcnn_line_map[idx, 1] + + line_map[index1, index2] = 1 + line_map[index2, index1] = 1 + + return line_map + + @staticmethod + def junc_to_junc_map(junctions, image_size): + """ Convert junction points to junction maps. """ + junctions = np.round(junctions).astype(np.int) + # Clip the boundary by image size + junctions[:, 0] = np.clip(junctions[:, 0], 0., image_size[0]-1) + junctions[:, 1] = np.clip(junctions[:, 1], 0., image_size[1]-1) + + # Create junction map + junc_map = np.zeros([image_size[0], image_size[1]]) + junc_map[junctions[:, 0], junctions[:, 1]] = 1 + + return junc_map[..., None].astype(np.int) + + def parse_transforms(self, names, all_transforms): + """ Parse the transform. """ + trans = all_transforms if (names == 'all') \ + else (names if isinstance(names, list) else [names]) + assert set(trans) <= set(all_transforms) + return trans + + def get_photo_transform(self): + """ Get list of photometric transforms (according to the config). """ + # Get the photometric transform config + photo_config = self.config["augmentation"]["photometric"] + if not photo_config["enable"]: + raise ValueError( + "[Error] Photometric augmentation is not enabled.") + + # Parse photometric transforms + trans_lst = self.parse_transforms(photo_config["primitives"], + photoaug.available_augmentations) + trans_config_lst = [photo_config["params"].get(p, {}) + for p in trans_lst] + + # List of photometric augmentation + photometric_trans_lst = [ + getattr(photoaug, trans)(**conf) \ + for (trans, conf) in zip(trans_lst, trans_config_lst) + ] + + return photometric_trans_lst + + def get_homo_transform(self): + """ Get homographic transforms (according to the config). """ + # Get homographic transforms for image + homo_config = self.config["augmentation"]["homographic"]["params"] + if not self.config["augmentation"]["homographic"]["enable"]: + raise ValueError( + "[Error] Homographic augmentation is not enabled.") + + # Parse the homographic transforms + image_shape = self.config["preprocessing"]["resize"] + + # Compute the min_label_len from config + try: + min_label_tmp = self.config["generation"]["min_label_len"] + except: + min_label_tmp = None + + # float label len => fraction + if isinstance(min_label_tmp, float): # Skip if not provided + min_label_len = min_label_tmp * min(image_shape) + # int label len => length in pixel + elif isinstance(min_label_tmp, int): + scale_ratio = (self.config["preprocessing"]["resize"] + / self.config["generation"]["image_size"][0]) + min_label_len = (self.config["generation"]["min_label_len"] + * scale_ratio) + # if none => no restriction + else: + min_label_len = 0 + + # Initialize the transform + homographic_trans = homoaug.homography_transform( + image_shape, homo_config, 0, min_label_len) + + return homographic_trans + + def get_line_points(self, junctions, line_map, H1=None, H2=None, + img_size=None, warp=False): + """ Sample evenly points along each line segments + and keep track of line idx. """ + if np.sum(line_map) == 0: + # No segment detected in the image + line_indices = np.zeros(self.config["max_pts"], dtype=int) + line_points = np.zeros((self.config["max_pts"], 2), dtype=float) + return line_points, line_indices + + # Extract all pairs of connected junctions + junc_indices = np.array( + [[i, j] for (i, j) in zip(*np.where(line_map)) if j > i]) + line_segments = np.stack([junctions[junc_indices[:, 0]], + junctions[junc_indices[:, 1]]], axis=1) + # line_segments is (num_lines, 2, 2) + line_lengths = np.linalg.norm( + line_segments[:, 0] - line_segments[:, 1], axis=1) + + # Sample the points separated by at least min_dist_pts along each line + # The number of samples depends on the length of the line + num_samples = np.minimum(line_lengths // self.config["min_dist_pts"], + self.config["max_num_samples"]) + line_points = [] + line_indices = [] + cur_line_idx = 1 + for n in np.arange(2, self.config["max_num_samples"] + 1): + # Consider all lines where we can fit up to n points + cur_line_seg = line_segments[num_samples == n] + line_points_x = np.linspace(cur_line_seg[:, 0, 0], + cur_line_seg[:, 1, 0], + n, axis=-1).flatten() + line_points_y = np.linspace(cur_line_seg[:, 0, 1], + cur_line_seg[:, 1, 1], + n, axis=-1).flatten() + jitter = self.config.get("jittering", 0) + if jitter: + # Add a small random jittering of all points along the line + angles = np.arctan2( + cur_line_seg[:, 1, 0] - cur_line_seg[:, 0, 0], + cur_line_seg[:, 1, 1] - cur_line_seg[:, 0, 1]).repeat(n) + jitter_hyp = (np.random.rand(len(angles)) * 2 - 1) * jitter + line_points_x += jitter_hyp * np.sin(angles) + line_points_y += jitter_hyp * np.cos(angles) + line_points.append(np.stack([line_points_x, line_points_y], axis=-1)) + # Keep track of the line indices for each sampled point + num_cur_lines = len(cur_line_seg) + line_idx = np.arange(cur_line_idx, cur_line_idx + num_cur_lines) + line_indices.append(line_idx.repeat(n)) + cur_line_idx += num_cur_lines + line_points = np.concatenate(line_points, + axis=0)[:self.config["max_pts"]] + line_indices = np.concatenate(line_indices, + axis=0)[:self.config["max_pts"]] + + # Warp the points if need be, and filter unvalid ones + # If the other view is also warped + if warp and H2 is not None: + warp_points2 = warp_points(line_points, H2) + line_points = warp_points(line_points, H1) + mask = mask_points(line_points, img_size) + mask2 = mask_points(warp_points2, img_size) + mask = mask * mask2 + # If the other view is not warped + elif warp and H2 is None: + line_points = warp_points(line_points, H1) + mask = mask_points(line_points, img_size) + else: + if H1 is not None: + raise ValueError("[Error] Wrong combination of homographies.") + # Remove points that would be outside of img_size if warped by H + warped_points = warp_points(line_points, H1) + mask = mask_points(warped_points, img_size) + line_points = line_points[mask] + line_indices = line_indices[mask] + + # Pad the line points to a fixed length + # Index of 0 means padded line + line_indices = np.concatenate([line_indices, np.zeros( + self.config["max_pts"] - len(line_indices))], axis=0) + line_points = np.concatenate( + [line_points, + np.zeros((self.config["max_pts"] - len(line_points), 2), + dtype=float)], axis=0) + + return line_points, line_indices + + def train_preprocessing(self, data, numpy=False): + """ Train preprocessing for GT data. """ + # Fetch the corresponding entries + image = data["image"] + junctions = data["junc"][:, :2] + line_pos = data["Lpos"] + line_neg = data["Lneg"] + image_size = image.shape[:2] + # Convert junctions to pixel coordinates (from 128x128) + junctions[:, 0] *= image_size[0] / 128 + junctions[:, 1] *= image_size[1] / 128 + + # Resize the image before photometric and homographical augmentations + if not(list(image_size) == self.config["preprocessing"]["resize"]): + # Resize the image and the point location. + size_old = list(image.shape)[:2] # Only H and W dimensions + + image = cv2.resize( + image, tuple(self.config['preprocessing']['resize'][::-1]), + interpolation=cv2.INTER_LINEAR) + image = np.array(image, dtype=np.uint8) + + # In HW format + junctions = (junctions * np.array( + self.config['preprocessing']['resize'], np.float) + / np.array(size_old, np.float)) + + # Convert to positive line map and negative line map (our format) + num_junctions = junctions.shape[0] + line_map_pos = self.convert_line_map(line_pos, num_junctions) + line_map_neg = self.convert_line_map(line_neg, num_junctions) + + # Generate the line heatmap after post-processing + junctions_xy = np.flip(np.round(junctions).astype(np.int32), axis=1) + # Update image size + image_size = image.shape[:2] + heatmap_pos = get_line_heatmap(junctions_xy, line_map_pos, image_size) + heatmap_neg = get_line_heatmap(junctions_xy, line_map_neg, image_size) + # Declare default valid mask (all ones) + valid_mask = np.ones(image_size) + + # Optionally convert the image to grayscale + if self.config["gray_scale"]: + image = (color.rgb2gray(image) * 255.).astype(np.uint8) + + # Check if we need to apply augmentations + # In training mode => yes. + # In homography adaptation mode (export mode) => No + if self.config["augmentation"]["photometric"]["enable"]: + photo_trans_lst = self.get_photo_transform() + ### Image transform ### + np.random.shuffle(photo_trans_lst) + image_transform = transforms.Compose( + photo_trans_lst + [photoaug.normalize_image()]) + else: + image_transform = photoaug.normalize_image() + image = image_transform(image) + + # Check homographic augmentation + if self.config["augmentation"]["homographic"]["enable"]: + homo_trans = self.get_homo_transform() + # Perform homographic transform + outputs_pos = homo_trans(image, junctions, line_map_pos) + outputs_neg = homo_trans(image, junctions, line_map_neg) + + # record the warped results + junctions = outputs_pos["junctions"] # Should be HW format + image = outputs_pos["warped_image"] + line_map_pos = outputs_pos["line_map"] + line_map_neg = outputs_neg["line_map"] + heatmap_pos = outputs_pos["warped_heatmap"] + heatmap_neg = outputs_neg["warped_heatmap"] + valid_mask = outputs_pos["valid_mask"] # Same for pos and neg + + junction_map = self.junc_to_junc_map(junctions, image_size) + + # Convert to tensor and return the results + to_tensor = transforms.ToTensor() + if not numpy: + return { + "image": to_tensor(image), + "junctions": to_tensor(junctions).to(torch.float32)[0, ...], + "junction_map": to_tensor(junction_map).to(torch.int), + "line_map_pos": to_tensor( + line_map_pos).to(torch.int32)[0, ...], + "line_map_neg": to_tensor( + line_map_neg).to(torch.int32)[0, ...], + "heatmap_pos": to_tensor(heatmap_pos).to(torch.int32), + "heatmap_neg": to_tensor(heatmap_neg).to(torch.int32), + "valid_mask": to_tensor(valid_mask).to(torch.int32) + } + else: + return { + "image": image, + "junctions": junctions.astype(np.float32), + "junction_map": junction_map.astype(np.int32), + "line_map_pos": line_map_pos.astype(np.int32), + "line_map_neg": line_map_neg.astype(np.int32), + "heatmap_pos": heatmap_pos.astype(np.int32), + "heatmap_neg": heatmap_neg.astype(np.int32), + "valid_mask": valid_mask.astype(np.int32) + } + + def train_preprocessing_exported( + self, data, numpy=False, disable_homoaug=False, + desc_training=False, H1=None, H1_scale=None, H2=None, scale=1., + h_crop=None, w_crop=None): + """ Train preprocessing for the exported labels. """ + data = copy.deepcopy(data) + # Fetch the corresponding entries + image = data["image"] + junctions = data["junctions"] + line_map = data["line_map"] + image_size = image.shape[:2] + + # Define the random crop for scaling if necessary + if h_crop is None or w_crop is None: + h_crop, w_crop = 0, 0 + if scale > 1: + H, W = self.config["preprocessing"]["resize"] + H_scale, W_scale = round(H * scale), round(W * scale) + if H_scale > H: + h_crop = np.random.randint(H_scale - H) + if W_scale > W: + w_crop = np.random.randint(W_scale - W) + + # Resize the image before photometric and homographical augmentations + if not(list(image_size) == self.config["preprocessing"]["resize"]): + # Resize the image and the point location. + size_old = list(image.shape)[:2] # Only H and W dimensions + + image = cv2.resize( + image, tuple(self.config['preprocessing']['resize'][::-1]), + interpolation=cv2.INTER_LINEAR) + image = np.array(image, dtype=np.uint8) + + # # In HW format + # junctions = (junctions * np.array( + # self.config['preprocessing']['resize'], np.float) + # / np.array(size_old, np.float)) + + # Generate the line heatmap after post-processing + junctions_xy = np.flip(np.round(junctions).astype(np.int32), axis=1) + image_size = image.shape[:2] + heatmap = get_line_heatmap(junctions_xy, line_map, image_size) + + # Optionally convert the image to grayscale + if self.config["gray_scale"]: + image = (color.rgb2gray(image) * 255.).astype(np.uint8) + + # Check if we need to apply augmentations + # In training mode => yes. + # In homography adaptation mode (export mode) => No + if self.config["augmentation"]["photometric"]["enable"]: + photo_trans_lst = self.get_photo_transform() + ### Image transform ### + np.random.shuffle(photo_trans_lst) + image_transform = transforms.Compose( + photo_trans_lst + [photoaug.normalize_image()]) + else: + image_transform = photoaug.normalize_image() + image = image_transform(image) + + # Perform the random scaling + if scale != 1.: + image, junctions, line_map, valid_mask = random_scaling( + image, junctions, line_map, scale, + h_crop=h_crop, w_crop=w_crop) + else: + # Declare default valid mask (all ones) + valid_mask = np.ones(image_size) + + # Initialize the empty output dict + outputs = {} + # Convert to tensor and return the results + to_tensor = transforms.ToTensor() + + # Check homographic augmentation + warp = (self.config["augmentation"]["homographic"]["enable"] + and disable_homoaug == False) + if warp: + homo_trans = self.get_homo_transform() + # Perform homographic transform + if H1 is None: + homo_outputs = homo_trans( + image, junctions, line_map, valid_mask=valid_mask) + else: + homo_outputs = homo_trans( + image, junctions, line_map, homo=H1, scale=H1_scale, + valid_mask=valid_mask) + homography_mat = homo_outputs["homo"] + + # Give the warp of the other view + if H1 is None: + H1 = homo_outputs["homo"] + + # Sample points along each line segments for the descriptor + if desc_training: + line_points, line_indices = self.get_line_points( + junctions, line_map, H1=H1, H2=H2, + img_size=image_size, warp=warp) + + # Record the warped results + if warp: + junctions = homo_outputs["junctions"] # Should be HW format + image = homo_outputs["warped_image"] + line_map = homo_outputs["line_map"] + valid_mask = homo_outputs["valid_mask"] # Same for pos and neg + heatmap = homo_outputs["warped_heatmap"] + + # Optionally put warping information first. + if not numpy: + outputs["homography_mat"] = to_tensor( + homography_mat).to(torch.float32)[0, ...] + else: + outputs["homography_mat"] = homography_mat.astype(np.float32) + + junction_map = self.junc_to_junc_map(junctions, image_size) + + if not numpy: + outputs.update({ + "image": to_tensor(image).to(torch.float32), + "junctions": to_tensor(junctions).to(torch.float32)[0, ...], + "junction_map": to_tensor(junction_map).to(torch.int), + "line_map": to_tensor(line_map).to(torch.int32)[0, ...], + "heatmap": to_tensor(heatmap).to(torch.int32), + "valid_mask": to_tensor(valid_mask).to(torch.int32) + }) + if desc_training: + outputs.update({ + "line_points": to_tensor( + line_points).to(torch.float32)[0], + "line_indices": torch.tensor(line_indices, + dtype=torch.int) + }) + else: + outputs.update({ + "image": image, + "junctions": junctions.astype(np.float32), + "junction_map": junction_map.astype(np.int32), + "line_map": line_map.astype(np.int32), + "heatmap": heatmap.astype(np.int32), + "valid_mask": valid_mask.astype(np.int32) + }) + if desc_training: + outputs.update({ + "line_points": line_points.astype(np.float32), + "line_indices": line_indices.astype(int) + }) + + return outputs + + def preprocessing_exported_paired_desc(self, data, numpy=False, scale=1.): + """ Train preprocessing for paired data for the exported labels + for descriptor training. """ + outputs = {} + + # Define the random crop for scaling if necessary + h_crop, w_crop = 0, 0 + if scale > 1: + H, W = self.config["preprocessing"]["resize"] + H_scale, W_scale = round(H * scale), round(W * scale) + if H_scale > H: + h_crop = np.random.randint(H_scale - H) + if W_scale > W: + w_crop = np.random.randint(W_scale - W) + + # Sample ref homography first + homo_config = self.config["augmentation"]["homographic"]["params"] + image_shape = self.config["preprocessing"]["resize"] + ref_H, ref_scale = homoaug.sample_homography(image_shape, + **homo_config) + + # Data for target view (All augmentation) + target_data = self.train_preprocessing_exported( + data, numpy=numpy, desc_training=True, H1=None, H2=ref_H, + scale=scale, h_crop=h_crop, w_crop=w_crop) + + # Data for reference view (No homographical augmentation) + ref_data = self.train_preprocessing_exported( + data, numpy=numpy, desc_training=True, H1=ref_H, + H1_scale=ref_scale, H2=target_data["homography_mat"].numpy(), + scale=scale, h_crop=h_crop, w_crop=w_crop) + + # Spread ref data + for key, val in ref_data.items(): + outputs["ref_" + key] = val + + # Spread target data + for key, val in target_data.items(): + outputs["target_" + key] = val + + return outputs + + def test_preprocessing(self, data, numpy=False): + """ Test preprocessing for GT data. """ + data = copy.deepcopy(data) + # Fetch the corresponding entries + image = data["image"] + junctions = data["junc"][:, :2] + line_pos = data["Lpos"] + line_neg = data["Lneg"] + image_size = image.shape[:2] + # Convert junctions to pixel coordinates (from 128x128) + junctions[:, 0] *= image_size[0] / 128 + junctions[:, 1] *= image_size[1] / 128 + + # Resize the image before photometric and homographical augmentations + if not(list(image_size) == self.config["preprocessing"]["resize"]): + # Resize the image and the point location. + size_old = list(image.shape)[:2] # Only H and W dimensions + + image = cv2.resize( + image, tuple(self.config['preprocessing']['resize'][::-1]), + interpolation=cv2.INTER_LINEAR) + image = np.array(image, dtype=np.uint8) + + # In HW format + junctions = (junctions * np.array( + self.config['preprocessing']['resize'], np.float) + / np.array(size_old, np.float)) + + # Optionally convert the image to grayscale + if self.config["gray_scale"]: + image = (color.rgb2gray(image) * 255.).astype(np.uint8) + + # Still need to normalize image + image_transform = photoaug.normalize_image() + image = image_transform(image) + + # Convert to positive line map and negative line map (our format) + num_junctions = junctions.shape[0] + line_map_pos = self.convert_line_map(line_pos, num_junctions) + line_map_neg = self.convert_line_map(line_neg, num_junctions) + + # Generate the line heatmap after post-processing + junctions_xy = np.flip(np.round(junctions).astype(np.int32), axis=1) + # Update image size + image_size = image.shape[:2] + heatmap_pos = get_line_heatmap(junctions_xy, line_map_pos, image_size) + heatmap_neg = get_line_heatmap(junctions_xy, line_map_neg, image_size) + # Declare default valid mask (all ones) + valid_mask = np.ones(image_size) + + junction_map = self.junc_to_junc_map(junctions, image_size) + + # Convert to tensor and return the results + to_tensor = transforms.ToTensor() + if not numpy: + return { + "image": to_tensor(image), + "junctions": to_tensor(junctions).to(torch.float32)[0, ...], + "junction_map": to_tensor(junction_map).to(torch.int), + "line_map_pos": to_tensor( + line_map_pos).to(torch.int32)[0, ...], + "line_map_neg": to_tensor( + line_map_neg).to(torch.int32)[0, ...], + "heatmap_pos": to_tensor(heatmap_pos).to(torch.int32), + "heatmap_neg": to_tensor(heatmap_neg).to(torch.int32), + "valid_mask": to_tensor(valid_mask).to(torch.int32) + } + else: + return { + "image": image, + "junctions": junctions.astype(np.float32), + "junction_map": junction_map.astype(np.int32), + "line_map_pos": line_map_pos.astype(np.int32), + "line_map_neg": line_map_neg.astype(np.int32), + "heatmap_pos": heatmap_pos.astype(np.int32), + "heatmap_neg": heatmap_neg.astype(np.int32), + "valid_mask": valid_mask.astype(np.int32) + } + + def test_preprocessing_exported(self, data, numpy=False, scale=1.): + """ Test preprocessing for the exported labels. """ + data = copy.deepcopy(data) + # Fetch the corresponding entries + image = data["image"] + junctions = data["junctions"] + line_map = data["line_map"] + image_size = image.shape[:2] + + # Resize the image before photometric and homographical augmentations + if not(list(image_size) == self.config["preprocessing"]["resize"]): + # Resize the image and the point location. + size_old = list(image.shape)[:2] # Only H and W dimensions + + image = cv2.resize( + image, tuple(self.config['preprocessing']['resize'][::-1]), + interpolation=cv2.INTER_LINEAR) + image = np.array(image, dtype=np.uint8) + + # # In HW format + # junctions = (junctions * np.array( + # self.config['preprocessing']['resize'], np.float) + # / np.array(size_old, np.float)) + + # Optionally convert the image to grayscale + if self.config["gray_scale"]: + image = (color.rgb2gray(image) * 255.).astype(np.uint8) + + # Still need to normalize image + image_transform = photoaug.normalize_image() + image = image_transform(image) + + # Generate the line heatmap after post-processing + junctions_xy = np.flip(np.round(junctions).astype(np.int32), axis=1) + image_size = image.shape[:2] + heatmap = get_line_heatmap(junctions_xy, line_map, image_size) + + # Declare default valid mask (all ones) + valid_mask = np.ones(image_size) + + junction_map = self.junc_to_junc_map(junctions, image_size) + + # Convert to tensor and return the results + to_tensor = transforms.ToTensor() + if not numpy: + outputs = { + "image": to_tensor(image), + "junctions": to_tensor(junctions).to(torch.float32)[0, ...], + "junction_map": to_tensor(junction_map).to(torch.int), + "line_map": to_tensor(line_map).to(torch.int32)[0, ...], + "heatmap": to_tensor(heatmap).to(torch.int32), + "valid_mask": to_tensor(valid_mask).to(torch.int32) + } + else: + outputs = { + "image": image, + "junctions": junctions.astype(np.float32), + "junction_map": junction_map.astype(np.int32), + "line_map": line_map.astype(np.int32), + "heatmap": heatmap.astype(np.int32), + "valid_mask": valid_mask.astype(np.int32) + } + + return outputs + + def __len__(self): + return self.dataset_length + + def get_data_from_key(self, file_key): + """ Get data from file_key. """ + # Check key exists + if not file_key in self.filename_dataset.keys(): + raise ValueError("[Error] the specified key is not in the dataset.") + + # Get the data paths + data_path = self.filename_dataset[file_key] + # Read in the image and npz labels (but haven't applied any transform) + data = self.get_data_from_path(data_path) + + # Perform transform and augmentation + if self.mode == "train" or self.config["add_augmentation_to_all_splits"]: + data = self.train_preprocessing(data, numpy=True) + else: + data = self.test_preprocessing(data, numpy=True) + + # Add file key to the output + data["file_key"] = file_key + + return data + + def __getitem__(self, idx): + """Return data + file_key: str, keys used to retrieve data from the filename dataset. + image: torch.float, C*H*W range 0~1, + junctions: torch.float, N*2, + junction_map: torch.int32, 1*H*W range 0 or 1, + line_map_pos: torch.int32, N*N range 0 or 1, + line_map_neg: torch.int32, N*N range 0 or 1, + heatmap_pos: torch.int32, 1*H*W range 0 or 1, + heatmap_neg: torch.int32, 1*H*W range 0 or 1, + valid_mask: torch.int32, 1*H*W range 0 or 1 + """ + # Get the corresponding datapoint and contents from filename dataset + file_key = self.datapoints[idx] + data_path = self.filename_dataset[file_key] + # Read in the image and npz labels (but haven't applied any transform) + data = self.get_data_from_path(data_path) + + # Also load the exported labels if not using the official ground truth + if not self.gt_source == "official": + with h5py.File(self.gt_source, "r") as f: + exported_label = parse_h5_data(f[file_key]) + + data["junctions"] = exported_label["junctions"] + data["line_map"] = exported_label["line_map"] + + # Perform transform and augmentation + return_type = self.config.get("return_type", "single") + if (self.mode == "train" + or self.config["add_augmentation_to_all_splits"]): + # Perform random scaling first + if self.config["augmentation"]["random_scaling"]["enable"]: + scale_range = self.config["augmentation"]["random_scaling"]["range"] + # Decide the scaling + scale = np.random.uniform(min(scale_range), max(scale_range)) + else: + scale = 1. + if self.gt_source == "official": + data = self.train_preprocessing(data) + else: + if return_type == "paired_desc": + data = self.preprocessing_exported_paired_desc( + data, scale=scale) + else: + data = self.train_preprocessing_exported(data, + scale=scale) + else: + if self.gt_source == "official": + data = self.test_preprocessing(data) + elif return_type == "paired_desc": + data = self.preprocessing_exported_paired_desc(data) + else: + data = self.test_preprocessing_exported(data) + + # Add file key to the output + data["file_key"] = file_key + + return data + + ######################## + ## Some other methods ## + ######################## + def _check_dataset_cache(self): + """ Check if dataset cache exists. """ + cache_file_path = os.path.join(self.cache_path, self.cache_name) + if os.path.exists(cache_file_path): + return True + else: + return False diff --git a/third_party/SOLD2/sold2/experiment.py b/third_party/SOLD2/sold2/experiment.py new file mode 100644 index 0000000000000000000000000000000000000000..3bf4db1c9f148b9e33c6d7d0ba973375cd770a14 --- /dev/null +++ b/third_party/SOLD2/sold2/experiment.py @@ -0,0 +1,227 @@ +""" +Main file to launch training and testing experiments. +""" + +import yaml +import os +import argparse +import numpy as np +import torch + +from .config.project_config import Config as cfg +from .train import train_net +from .export import export_predictions, export_homograpy_adaptation + + +# Pytorch configurations +torch.cuda.empty_cache() +torch.backends.cudnn.benchmark = True + + +def load_config(config_path): + """ Load configurations from a given yaml file. """ + # Check file exists + if not os.path.exists(config_path): + raise ValueError("[Error] The provided config path is not valid.") + + # Load the configuration + with open(config_path, "r") as f: + config = yaml.safe_load(f) + + return config + + +def update_config(path, model_cfg=None, dataset_cfg=None): + """ Update configuration file from the resume path. """ + # Check we need to update or completely override. + model_cfg = {} if model_cfg is None else model_cfg + dataset_cfg = {} if dataset_cfg is None else dataset_cfg + + # Load saved configs + with open(os.path.join(path, "model_cfg.yaml"), "r") as f: + model_cfg_saved = yaml.safe_load(f) + model_cfg.update(model_cfg_saved) + with open(os.path.join(path, "dataset_cfg.yaml"), "r") as f: + dataset_cfg_saved = yaml.safe_load(f) + dataset_cfg.update(dataset_cfg_saved) + + # Update the saved yaml file + if not model_cfg == model_cfg_saved: + with open(os.path.join(path, "model_cfg.yaml"), "w") as f: + yaml.dump(model_cfg, f) + if not dataset_cfg == dataset_cfg_saved: + with open(os.path.join(path, "dataset_cfg.yaml"), "w") as f: + yaml.dump(dataset_cfg, f) + + return model_cfg, dataset_cfg + + +def record_config(model_cfg, dataset_cfg, output_path): + """ Record dataset config to the log path. """ + # Record model config + with open(os.path.join(output_path, "model_cfg.yaml"), "w") as f: + yaml.safe_dump(model_cfg, f) + + # Record dataset config + with open(os.path.join(output_path, "dataset_cfg.yaml"), "w") as f: + yaml.safe_dump(dataset_cfg, f) + + +def train(args, dataset_cfg, model_cfg, output_path): + """ Training function. """ + # Update model config from the resume path (only in resume mode) + if args.resume: + if os.path.realpath(output_path) != os.path.realpath(args.resume_path): + record_config(model_cfg, dataset_cfg, output_path) + + # First time, then write the config file to the output path + else: + record_config(model_cfg, dataset_cfg, output_path) + + # Launch the training + train_net(args, dataset_cfg, model_cfg, output_path) + + +def export(args, dataset_cfg, model_cfg, output_path, + export_dataset_mode=None, device=torch.device("cuda")): + """ Export function. """ + # Choose between normal predictions export or homography adaptation + if dataset_cfg.get("homography_adaptation") is not None: + print("[Info] Export predictions with homography adaptation.") + export_homograpy_adaptation(args, dataset_cfg, model_cfg, output_path, + export_dataset_mode, device) + else: + print("[Info] Export predictions normally.") + export_predictions(args, dataset_cfg, model_cfg, output_path, + export_dataset_mode) + + +def main(args, dataset_cfg, model_cfg, export_dataset_mode=None, + device=torch.device("cuda")): + """ Main function. """ + # Make the output path + output_path = os.path.join(cfg.EXP_PATH, args.exp_name) + + if args.mode == "train": + if not os.path.exists(output_path): + os.makedirs(output_path) + print("[Info] Training mode") + print("\t Output path: %s" % output_path) + train(args, dataset_cfg, model_cfg, output_path) + elif args.mode == "export": + # Different output_path in export mode + output_path = os.path.join(cfg.export_dataroot, args.exp_name) + print("[Info] Export mode") + print("\t Output path: %s" % output_path) + export(args, dataset_cfg, model_cfg, output_path, export_dataset_mode, device=device) + else: + raise ValueError("[Error]: Unknown mode: " + args.mode) + + +def set_random_seed(seed): + np.random.seed(seed) + torch.manual_seed(seed) + + +if __name__ == "__main__": + # Parse input arguments + parser = argparse.ArgumentParser() + parser.add_argument("--mode", type=str, default="train", + help="'train' or 'export'.") + parser.add_argument("--dataset_config", type=str, default=None, + help="Path to the dataset config.") + parser.add_argument("--model_config", type=str, default=None, + help="Path to the model config.") + parser.add_argument("--exp_name", type=str, default="exp", + help="Experiment name.") + parser.add_argument("--resume", action="store_true", default=False, + help="Load a previously trained model.") + parser.add_argument("--pretrained", action="store_true", default=False, + help="Start training from a pre-trained model.") + parser.add_argument("--resume_path", default=None, + help="Path from which to resume training.") + parser.add_argument("--pretrained_path", default=None, + help="Path to the pre-trained model.") + parser.add_argument("--checkpoint_name", default=None, + help="Name of the checkpoint to use.") + parser.add_argument("--export_dataset_mode", default=None, + help="'train' or 'test'.") + parser.add_argument("--export_batch_size", default=4, type=int, + help="Export batch size.") + + args = parser.parse_args() + + # Check if GPU is available + # Get the model + if torch.cuda.is_available(): + device = torch.device("cuda") + else: + device = torch.device("cpu") + + # Check if dataset config and model config is given. + if (((args.dataset_config is None) or (args.model_config is None)) + and (not args.resume) and (args.mode == "train")): + raise ValueError( + "[Error] The dataset config and model config should be given in non-resume mode") + + # If resume, check if the resume path has been given + if args.resume and (args.resume_path is None): + raise ValueError( + "[Error] Missing resume path.") + + # [Training] Load the config file. + if args.mode == "train" and (not args.resume): + # Check the pretrained checkpoint_path exists + if args.pretrained: + checkpoint_folder = args.resume_path + checkpoint_path = os.path.join(args.pretrained_path, + args.checkpoint_name) + if not os.path.exists(checkpoint_path): + raise ValueError("[Error] Missing checkpoint: " + + checkpoint_path) + dataset_cfg = load_config(args.dataset_config) + model_cfg = load_config(args.model_config) + + # [resume Training, Test, Export] Load the config file. + elif (args.mode == "train" and args.resume) or (args.mode == "export"): + # Check checkpoint path exists + checkpoint_folder = args.resume_path + checkpoint_path = os.path.join(args.resume_path, args.checkpoint_name) + if not os.path.exists(checkpoint_path): + raise ValueError("[Error] Missing checkpoint: " + checkpoint_path) + + # Load model_cfg from checkpoint folder if not provided + if args.model_config is None: + print("[Info] No model config provided. Loading from checkpoint folder.") + model_cfg_path = os.path.join(checkpoint_folder, "model_cfg.yaml") + if not os.path.exists(model_cfg_path): + raise ValueError( + "[Error] Missing model config in checkpoint path.") + model_cfg = load_config(model_cfg_path) + else: + model_cfg = load_config(args.model_config) + + # Load dataset_cfg from checkpoint folder if not provided + if args.dataset_config is None: + print("[Info] No dataset config provided. Loading from checkpoint folder.") + dataset_cfg_path = os.path.join(checkpoint_folder, + "dataset_cfg.yaml") + if not os.path.exists(dataset_cfg_path): + raise ValueError( + "[Error] Missing dataset config in checkpoint path.") + dataset_cfg = load_config(dataset_cfg_path) + else: + dataset_cfg = load_config(args.dataset_config) + + # Check the --export_dataset_mode flag + if (args.mode == "export") and (args.export_dataset_mode is None): + raise ValueError("[Error] Empty --export_dataset_mode flag.") + else: + raise ValueError("[Error] Unknown mode: " + args.mode) + + # Set the random seed + seed = dataset_cfg.get("random_seed", 0) + set_random_seed(seed) + + main(args, dataset_cfg, model_cfg, + export_dataset_mode=args.export_dataset_mode, device=device) diff --git a/third_party/SOLD2/sold2/export.py b/third_party/SOLD2/sold2/export.py new file mode 100644 index 0000000000000000000000000000000000000000..19683d982c6d7fd429b27868b620fd20562d1aa7 --- /dev/null +++ b/third_party/SOLD2/sold2/export.py @@ -0,0 +1,342 @@ +import numpy as np +import copy +import cv2 +import h5py +import math +from tqdm import tqdm +import torch +from torch.nn.functional import pixel_shuffle, softmax +from torch.utils.data import DataLoader +from kornia.geometry import warp_perspective + +from .dataset.dataset_util import get_dataset +from .model.model_util import get_model +from .misc.train_utils import get_latest_checkpoint +from .train import convert_junc_predictions +from .dataset.transforms.homographic_transforms import sample_homography + + +def restore_weights(model, state_dict): + """ Restore weights in compatible mode. """ + # Try to directly load state dict + try: + model.load_state_dict(state_dict) + except: + err = model.load_state_dict(state_dict, strict=False) + # missing keys are those in model but not in state_dict + missing_keys = err.missing_keys + # Unexpected keys are those in state_dict but not in model + unexpected_keys = err.unexpected_keys + + # Load mismatched keys manually + model_dict = model.state_dict() + for idx, key in enumerate(missing_keys): + dict_keys = [_ for _ in unexpected_keys if not "tracked" in _] + model_dict[key] = state_dict[dict_keys[idx]] + model.load_state_dict(model_dict) + return model + + +def get_padded_filename(num_pad, idx): + """ Get the filename padded with 0. """ + file_len = len("%d" % (idx)) + filename = "0" * (num_pad - file_len) + "%d" % (idx) + return filename + + +def export_predictions(args, dataset_cfg, model_cfg, output_path, + export_dataset_mode): + """ Export predictions. """ + # Get the test configuration + test_cfg = model_cfg["test"] + + # Create the dataset and dataloader based on the export_dataset_mode + print("\t Initializing dataset and dataloader") + batch_size = 4 + export_dataset, collate_fn = get_dataset(export_dataset_mode, dataset_cfg) + export_loader = DataLoader(export_dataset, batch_size=batch_size, + num_workers=test_cfg.get("num_workers", 4), + shuffle=False, pin_memory=False, + collate_fn=collate_fn) + print("\t Successfully intialized dataset and dataloader.") + + # Initialize model and load the checkpoint + model = get_model(model_cfg, mode="test") + checkpoint = get_latest_checkpoint(args.resume_path, args.checkpoint_name) + model = restore_weights(model, checkpoint["model_state_dict"]) + model = model.cuda() + model.eval() + print("\t Successfully initialized model") + + # Start the export process + print("[Info] Start exporting predictions") + output_dataset_path = output_path + ".h5" + filename_idx = 0 + with h5py.File(output_dataset_path, "w", libver="latest", swmr=True) as f: + # Iterate through all the data in dataloader + for data in tqdm(export_loader, ascii=True): + # Fetch the data + junc_map = data["junction_map"] + heatmap = data["heatmap"] + valid_mask = data["valid_mask"] + input_images = data["image"].cuda() + + # Run the forward pass + with torch.no_grad(): + outputs = model(input_images) + + # Convert predictions + junc_np = convert_junc_predictions( + outputs["junctions"], model_cfg["grid_size"], + model_cfg["detection_thresh"], 300) + junc_map_np = junc_map.numpy().transpose(0, 2, 3, 1) + heatmap_np = softmax(outputs["heatmap"].detach(), + dim=1).cpu().numpy().transpose(0, 2, 3, 1) + heatmap_gt_np = heatmap.numpy().transpose(0, 2, 3, 1) + valid_mask_np = valid_mask.numpy().transpose(0, 2, 3, 1) + + # Data entries to save + current_batch_size = input_images.shape[0] + for batch_idx in range(current_batch_size): + output_data = { + "image": input_images.cpu().numpy().transpose(0, 2, 3, 1)[batch_idx], + "junc_gt": junc_map_np[batch_idx], + "junc_pred": junc_np["junc_pred"][batch_idx], + "junc_pred_nms": junc_np["junc_pred_nms"][batch_idx].astype(np.float32), + "heatmap_gt": heatmap_gt_np[batch_idx], + "heatmap_pred": heatmap_np[batch_idx], + "valid_mask": valid_mask_np[batch_idx], + "junc_points": data["junctions"][batch_idx].numpy()[0].round().astype(np.int32), + "line_map": data["line_map"][batch_idx].numpy()[0].astype(np.int32) + } + + # Save data to h5 dataset + num_pad = math.ceil(math.log10(len(export_loader))) + 1 + output_key = get_padded_filename(num_pad, filename_idx) + f_group = f.create_group(output_key) + + # Store data + for key, output_data in output_data.items(): + f_group.create_dataset(key, data=output_data, + compression="gzip") + filename_idx += 1 + + +def export_homograpy_adaptation(args, dataset_cfg, model_cfg, output_path, + export_dataset_mode, device): + """ Export homography adaptation results. """ + # Check if the export_dataset_mode is supported + supported_modes = ["train", "test"] + if not export_dataset_mode in supported_modes: + raise ValueError( + "[Error] The specified export_dataset_mode is not supported.") + + # Get the test configuration + test_cfg = model_cfg["test"] + + # Get the homography adaptation configurations + homography_cfg = dataset_cfg.get("homography_adaptation", None) + if homography_cfg is None: + raise ValueError( + "[Error] Empty homography_adaptation entry in config.") + + # Create the dataset and dataloader based on the export_dataset_mode + print("\t Initializing dataset and dataloader") + batch_size = args.export_batch_size + + export_dataset, collate_fn = get_dataset(export_dataset_mode, dataset_cfg) + export_loader = DataLoader(export_dataset, batch_size=batch_size, + num_workers=test_cfg.get("num_workers", 4), + shuffle=False, pin_memory=False, + collate_fn=collate_fn) + print("\t Successfully intialized dataset and dataloader.") + + # Initialize model and load the checkpoint + model = get_model(model_cfg, mode="test") + checkpoint = get_latest_checkpoint(args.resume_path, args.checkpoint_name, + device) + model = restore_weights(model, checkpoint["model_state_dict"]) + model = model.to(device).eval() + print("\t Successfully initialized model") + + # Start the export process + print("[Info] Start exporting predictions") + output_dataset_path = output_path + ".h5" + with h5py.File(output_dataset_path, "w", libver="latest") as f: + f.swmr_mode=True + for _, data in enumerate(tqdm(export_loader, ascii=True)): + input_images = data["image"].to(device) + file_keys = data["file_key"] + batch_size = input_images.shape[0] + + # Run the homograpy adaptation + outputs = homography_adaptation(input_images, model, + model_cfg["grid_size"], + homography_cfg) + + # Save the entries + for batch_idx in range(batch_size): + # Get the save key + save_key = file_keys[batch_idx] + output_data = { + "image": input_images.cpu().numpy().transpose(0, 2, 3, 1)[batch_idx], + "junc_prob_mean": outputs["junc_probs_mean"].cpu().numpy().transpose(0, 2, 3, 1)[batch_idx], + "junc_prob_max": outputs["junc_probs_max"].cpu().numpy().transpose(0, 2, 3, 1)[batch_idx], + "junc_count": outputs["junc_counts"].cpu().numpy().transpose(0, 2, 3, 1)[batch_idx], + "heatmap_prob_mean": outputs["heatmap_probs_mean"].cpu().numpy().transpose(0, 2, 3, 1)[batch_idx], + "heatmap_prob_max": outputs["heatmap_probs_max"].cpu().numpy().transpose(0, 2, 3, 1)[batch_idx], + "heatmap_cout": outputs["heatmap_counts"].cpu().numpy().transpose(0, 2, 3, 1)[batch_idx] + } + + # Create group and write data + f_group = f.create_group(save_key) + for key, output_data in output_data.items(): + f_group.create_dataset(key, data=output_data, + compression="gzip") + + +def homography_adaptation(input_images, model, grid_size, homography_cfg): + """ The homography adaptation process. + Arguments: + input_images: The images to be evaluated. + model: The pytorch model in evaluation mode. + grid_size: Grid size of the junction decoder. + homography_cfg: Homography adaptation configurations. + """ + # Get the device of the current model + device = next(model.parameters()).device + + # Define some constants and placeholder + batch_size, _, H, W = input_images.shape + num_iter = homography_cfg["num_iter"] + junc_probs = torch.zeros([batch_size, num_iter, H, W], device=device) + junc_counts = torch.zeros([batch_size, 1, H, W], device=device) + heatmap_probs = torch.zeros([batch_size, num_iter, H, W], device=device) + heatmap_counts = torch.zeros([batch_size, 1, H, W], device=device) + margin = homography_cfg["valid_border_margin"] + + # Keep a config with no artifacts + homography_cfg_no_artifacts = copy.copy(homography_cfg["homographies"]) + homography_cfg_no_artifacts["allow_artifacts"] = False + + for idx in range(num_iter): + if idx <= num_iter // 5: + # Ensure that 20% of the homographies have no artifact + H_mat_lst = [sample_homography( + [H,W], **homography_cfg_no_artifacts)[0][None] + for _ in range(batch_size)] + else: + H_mat_lst = [sample_homography( + [H,W], **homography_cfg["homographies"])[0][None] + for _ in range(batch_size)] + + H_mats = np.concatenate(H_mat_lst, axis=0) + H_tensor = torch.tensor(H_mats, dtype=torch.float, device=device) + H_inv_tensor = torch.inverse(H_tensor) + + # Perform the homography warp + images_warped = warp_perspective(input_images, H_tensor, (H, W), + flags="bilinear") + + # Warp the mask + masks_junc_warped = warp_perspective( + torch.ones([batch_size, 1, H, W], device=device), + H_tensor, (H, W), flags="nearest") + masks_heatmap_warped = warp_perspective( + torch.ones([batch_size, 1, H, W], device=device), + H_tensor, (H, W), flags="nearest") + + # Run the network forward pass + with torch.no_grad(): + outputs = model(images_warped) + + # Unwarp and mask the junction prediction + junc_prob_warped = pixel_shuffle(softmax( + outputs["junctions"], dim=1)[:, :-1, :, :], grid_size) + junc_prob = warp_perspective(junc_prob_warped, H_inv_tensor, + (H, W), flags="bilinear") + + # Create the out of boundary mask + out_boundary_mask = warp_perspective( + torch.ones([batch_size, 1, H, W], device=device), + H_inv_tensor, (H, W), flags="nearest") + out_boundary_mask = adjust_border(out_boundary_mask, device, margin) + + junc_prob = junc_prob * out_boundary_mask + junc_count = warp_perspective(masks_junc_warped * out_boundary_mask, + H_inv_tensor, (H, W), flags="nearest") + + # Unwarp the mask and heatmap prediction + # Always fetch only one channel + if outputs["heatmap"].shape[1] == 2: + # Convert to single channel directly from here + heatmap_prob_warped = softmax(outputs["heatmap"], + dim=1)[:, 1:, :, :] + else: + heatmap_prob_warped = torch.sigmoid(outputs["heatmap"]) + + heatmap_prob_warped = heatmap_prob_warped * masks_heatmap_warped + heatmap_prob = warp_perspective(heatmap_prob_warped, H_inv_tensor, + (H, W), flags="bilinear") + heatmap_count = warp_perspective(masks_heatmap_warped, H_inv_tensor, + (H, W), flags="nearest") + + # Record the results + junc_probs[:, idx:idx+1, :, :] = junc_prob + heatmap_probs[:, idx:idx+1, :, :] = heatmap_prob + junc_counts += junc_count + heatmap_counts += heatmap_count + + # Perform the accumulation operation + if homography_cfg["min_counts"] > 0: + min_counts = homography_cfg["min_counts"] + junc_count_mask = (junc_counts < min_counts) + heatmap_count_mask = (heatmap_counts < min_counts) + junc_counts[junc_count_mask] = 0 + heatmap_counts[heatmap_count_mask] = 0 + else: + junc_count_mask = np.zeros_like(junc_counts, dtype=bool) + heatmap_count_mask = np.zeros_like(heatmap_counts, dtype=bool) + + # Compute the mean accumulation + junc_probs_mean = torch.sum(junc_probs, dim=1, keepdim=True) / junc_counts + junc_probs_mean[junc_count_mask] = 0. + heatmap_probs_mean = (torch.sum(heatmap_probs, dim=1, keepdim=True) + / heatmap_counts) + heatmap_probs_mean[heatmap_count_mask] = 0. + + # Compute the max accumulation + junc_probs_max = torch.max(junc_probs, dim=1, keepdim=True)[0] + junc_probs_max[junc_count_mask] = 0. + heatmap_probs_max = torch.max(heatmap_probs, dim=1, keepdim=True)[0] + heatmap_probs_max[heatmap_count_mask] = 0. + + return {"junc_probs_mean": junc_probs_mean, + "junc_probs_max": junc_probs_max, + "junc_counts": junc_counts, + "heatmap_probs_mean": heatmap_probs_mean, + "heatmap_probs_max": heatmap_probs_max, + "heatmap_counts": heatmap_counts} + + +def adjust_border(input_masks, device, margin=3): + """ Adjust the border of the counts and valid_mask. """ + # Convert the mask to numpy array + dtype = input_masks.dtype + input_masks = np.squeeze(input_masks.cpu().numpy(), axis=1) + + erosion_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, + (margin*2, margin*2)) + batch_size = input_masks.shape[0] + + output_mask_lst = [] + # Erode all the masks + for i in range(batch_size): + output_mask = cv2.erode(input_masks[i, ...], erosion_kernel) + + output_mask_lst.append( + torch.tensor(output_mask, dtype=dtype, device=device)[None]) + + # Concat back along the batch dimension. + output_masks = torch.cat(output_mask_lst, dim=0) + return output_masks.unsqueeze(dim=1) diff --git a/third_party/SOLD2/sold2/export_line_features.py b/third_party/SOLD2/sold2/export_line_features.py new file mode 100644 index 0000000000000000000000000000000000000000..4cbde860a446d758dff254ea5320ca13bb79e6b7 --- /dev/null +++ b/third_party/SOLD2/sold2/export_line_features.py @@ -0,0 +1,74 @@ +""" + Export line detections and descriptors given a list of input images. +""" +import os +import argparse +import cv2 +import numpy as np +import torch +from tqdm import tqdm + +from .experiment import load_config +from .model.line_matcher import LineMatcher + + +def export_descriptors(images_list, ckpt_path, config, device, extension, + output_folder, multiscale=False): + # Extract the image paths + with open(images_list, 'r') as f: + image_files = f.readlines() + image_files = [path.strip('\n') for path in image_files] + + # Initialize the line matcher + line_matcher = LineMatcher( + config["model_cfg"], ckpt_path, device, config["line_detector_cfg"], + config["line_matcher_cfg"], multiscale) + print("\t Successfully initialized model") + + # Run the inference on each image and write the output on disk + for img_path in tqdm(image_files): + img = cv2.imread(img_path, 0) + img = torch.tensor(img[None, None] / 255., dtype=torch.float, + device=device) + + # Run the line detection and description + ref_detection = line_matcher.line_detection(img) + ref_line_seg = ref_detection["line_segments"] + ref_descriptors = ref_detection["descriptor"][0].cpu().numpy() + + # Write the output on disk + img_name = os.path.splitext(os.path.basename(img_path))[0] + output_file = os.path.join(output_folder, img_name + extension) + np.savez_compressed(output_file, line_seg=ref_line_seg, + descriptors=ref_descriptors) + + +if __name__ == "__main__": + # Parse input arguments + parser = argparse.ArgumentParser() + parser.add_argument("--img_list", type=str, required=True, + help="List of input images in a text file.") + parser.add_argument("--output_folder", type=str, required=True, + help="Path to the output folder.") + parser.add_argument("--config", type=str, + default="config/export_line_features.yaml") + parser.add_argument("--checkpoint_path", type=str, + default="pretrained_models/sold2_wireframe.tar") + parser.add_argument("--multiscale", action="store_true", default=False) + parser.add_argument("--extension", type=str, default=None) + args = parser.parse_args() + + # Get the device + if torch.cuda.is_available(): + device = torch.device("cuda") + else: + device = torch.device("cpu") + + # Get the model config, extension and checkpoint path + config = load_config(args.config) + ckpt_path = os.path.abspath(args.checkpoint_path) + extension = 'sold2' if args.extension is None else args.extension + extension = "." + extension + + export_descriptors(args.img_list, ckpt_path, config, device, extension, + args.output_folder, args.multiscale) diff --git a/third_party/SOLD2/sold2/misc/__init__.py b/third_party/SOLD2/sold2/misc/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/SOLD2/sold2/misc/geometry_utils.py b/third_party/SOLD2/sold2/misc/geometry_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..50f0478062cd19ebac812bff62b6c3a3d5f124c2 --- /dev/null +++ b/third_party/SOLD2/sold2/misc/geometry_utils.py @@ -0,0 +1,81 @@ +import numpy as np +import torch + + +### Point-related utils + +# Warp a list of points using a homography +def warp_points(points, homography): + # Convert to homogeneous and in xy format + new_points = np.concatenate([points[..., [1, 0]], + np.ones_like(points[..., :1])], axis=-1) + # Warp + new_points = (homography @ new_points.T).T + # Convert back to inhomogeneous and hw format + new_points = new_points[..., [1, 0]] / new_points[..., 2:] + return new_points + + +# Mask out the points that are outside of img_size +def mask_points(points, img_size): + mask = ((points[..., 0] >= 0) + & (points[..., 0] < img_size[0]) + & (points[..., 1] >= 0) + & (points[..., 1] < img_size[1])) + return mask + + +# Convert a tensor [N, 2] or batched tensor [B, N, 2] of N keypoints into +# a grid in [-1, 1]² that can be used in torch.nn.functional.interpolate +def keypoints_to_grid(keypoints, img_size): + n_points = keypoints.size()[-2] + device = keypoints.device + grid_points = keypoints.float() * 2. / torch.tensor( + img_size, dtype=torch.float, device=device) - 1. + grid_points = grid_points[..., [1, 0]].view(-1, n_points, 1, 2) + return grid_points + + +# Return a 2D matrix indicating the local neighborhood of each point +# for a given threshold and two lists of corresponding keypoints +def get_dist_mask(kp0, kp1, valid_mask, dist_thresh): + b_size, n_points, _ = kp0.size() + dist_mask0 = torch.norm(kp0.unsqueeze(2) - kp0.unsqueeze(1), dim=-1) + dist_mask1 = torch.norm(kp1.unsqueeze(2) - kp1.unsqueeze(1), dim=-1) + dist_mask = torch.min(dist_mask0, dist_mask1) + dist_mask = dist_mask <= dist_thresh + dist_mask = dist_mask.repeat(1, 1, b_size).reshape(b_size * n_points, + b_size * n_points) + dist_mask = dist_mask[valid_mask, :][:, valid_mask] + return dist_mask + + +### Line-related utils + +# Sample n points along lines of shape (num_lines, 2, 2) +def sample_line_points(lines, n): + line_points_x = np.linspace(lines[:, 0, 0], lines[:, 1, 0], n, axis=-1) + line_points_y = np.linspace(lines[:, 0, 1], lines[:, 1, 1], n, axis=-1) + line_points = np.stack([line_points_x, line_points_y], axis=2) + return line_points + + +# Return a mask of the valid lines that are within a valid mask of an image +def mask_lines(lines, valid_mask): + h, w = valid_mask.shape + int_lines = np.clip(np.round(lines).astype(int), 0, [h - 1, w - 1]) + h_valid = valid_mask[int_lines[:, 0, 0], int_lines[:, 0, 1]] + w_valid = valid_mask[int_lines[:, 1, 0], int_lines[:, 1, 1]] + valid = h_valid & w_valid + return valid + + +# Return a 2D matrix indicating for each pair of points +# if they are on the same line or not +def get_common_line_mask(line_indices, valid_mask): + b_size, n_points = line_indices.shape + common_mask = line_indices[:, :, None] == line_indices[:, None, :] + common_mask = common_mask.repeat(1, 1, b_size).reshape(b_size * n_points, + b_size * n_points) + common_mask = common_mask[valid_mask, :][:, valid_mask] + return common_mask diff --git a/third_party/SOLD2/sold2/misc/train_utils.py b/third_party/SOLD2/sold2/misc/train_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d5ada35eea660df1f78b9f20d9bf7ed726eaee2c --- /dev/null +++ b/third_party/SOLD2/sold2/misc/train_utils.py @@ -0,0 +1,74 @@ +""" +This file contains some useful functions for train / val. +""" +import os +import numpy as np +import torch + + +################# +## image utils ## +################# +def convert_image(input_tensor, axis): + """ Convert single channel images to 3-channel images. """ + image_lst = [input_tensor for _ in range(3)] + outputs = np.concatenate(image_lst, axis) + return outputs + + +###################### +## checkpoint utils ## +###################### +def get_latest_checkpoint(checkpoint_root, checkpoint_name, + device=torch.device("cuda")): + """ Get the latest checkpoint or by filename. """ + # Load specific checkpoint + if checkpoint_name is not None: + checkpoint = torch.load( + os.path.join(checkpoint_root, checkpoint_name), + map_location=device) + # Load the latest checkpoint + else: + lastest_checkpoint = sorted(os.listdir(os.path.join( + checkpoint_root, "*.tar")))[-1] + checkpoint = torch.load(os.path.join( + checkpoint_root, lastest_checkpoint), map_location=device) + return checkpoint + + +def remove_old_checkpoints(checkpoint_root, max_ckpt=15): + """ Remove the outdated checkpoints. """ + # Get sorted list of checkpoints + checkpoint_list = sorted( + [_ for _ in os.listdir(os.path.join(checkpoint_root)) + if _.endswith(".tar")]) + + # Get the checkpoints to be removed + if len(checkpoint_list) > max_ckpt: + remove_list = checkpoint_list[:-max_ckpt] + for _ in remove_list: + full_name = os.path.join(checkpoint_root, _) + os.remove(full_name) + print("[Debug] Remove outdated checkpoint %s" % (full_name)) + + +def adapt_checkpoint(state_dict): + new_state_dict = {} + for k, v in state_dict.items(): + if k.startswith('module.'): + new_state_dict[k[7:]] = v + else: + new_state_dict[k] = v + return new_state_dict + + +################ +## HDF5 utils ## +################ +def parse_h5_data(h5_data): + """ Parse h5 dataset. """ + output_data = {} + for key in h5_data.keys(): + output_data[key] = np.array(h5_data[key]) + + return output_data diff --git a/third_party/SOLD2/sold2/misc/visualize_util.py b/third_party/SOLD2/sold2/misc/visualize_util.py new file mode 100644 index 0000000000000000000000000000000000000000..4aa46877f79724221b7caa423de6916acdc021f8 --- /dev/null +++ b/third_party/SOLD2/sold2/misc/visualize_util.py @@ -0,0 +1,526 @@ +""" Organize some frequently used visualization functions. """ +import cv2 +import numpy as np +import matplotlib +import matplotlib.pyplot as plt +import copy +import seaborn as sns + + +# Plot junctions onto the image (return a separate copy) +def plot_junctions(input_image, junctions, junc_size=3, color=None): + """ + input_image: can be 0~1 float or 0~255 uint8. + junctions: Nx2 or 2xN np array. + junc_size: the size of the plotted circles. + """ + # Create image copy + image = copy.copy(input_image) + # Make sure the image is converted to 255 uint8 + if image.dtype == np.uint8: + pass + # A float type image ranging from 0~1 + elif image.dtype in [np.float32, np.float64, np.float] and image.max() <= 2.: + image = (image * 255.).astype(np.uint8) + # A float type image ranging from 0.~255. + elif image.dtype in [np.float32, np.float64, np.float] and image.mean() > 10.: + image = image.astype(np.uint8) + else: + raise ValueError("[Error] Unknown image data type. Expect 0~1 float or 0~255 uint8.") + + # Check whether the image is single channel + if len(image.shape) == 2 or ((len(image.shape) == 3) and (image.shape[-1] == 1)): + # Squeeze to H*W first + image = image.squeeze() + + # Stack to channle 3 + image = np.concatenate([image[..., None] for _ in range(3)], axis=-1) + + # Junction dimensions should be N*2 + if not len(junctions.shape) == 2: + raise ValueError("[Error] junctions should be 2-dim array.") + + # Always convert to N*2 + if junctions.shape[-1] != 2: + if junctions.shape[0] == 2: + junctions = junctions.T + else: + raise ValueError("[Error] At least one of the two dims should be 2.") + + # Round and convert junctions to int (and check the boundary) + H, W = image.shape[:2] + junctions = (np.round(junctions)).astype(np.int) + junctions[junctions < 0] = 0 + junctions[junctions[:, 0] >= H, 0] = H-1 # (first dim) max bounded by H-1 + junctions[junctions[:, 1] >= W, 1] = W-1 # (second dim) max bounded by W-1 + + # Iterate through all the junctions + num_junc = junctions.shape[0] + if color is None: + color = (0, 255., 0) + for idx in range(num_junc): + # Fetch one junction + junc = junctions[idx, :] + cv2.circle(image, tuple(np.flip(junc)), radius=junc_size, + color=color, thickness=3) + + return image + + +# Plot line segements given junctions and line adjecent map +def plot_line_segments(input_image, junctions, line_map, junc_size=3, + color=(0, 255., 0), line_width=1, plot_survived_junc=True): + """ + input_image: can be 0~1 float or 0~255 uint8. + junctions: Nx2 or 2xN np array. + line_map: NxN np array + junc_size: the size of the plotted circles. + color: color of the line segments (can be string "random") + line_width: width of the drawn segments. + plot_survived_junc: whether we only plot the survived junctions. + """ + # Create image copy + image = copy.copy(input_image) + # Make sure the image is converted to 255 uint8 + if image.dtype == np.uint8: + pass + # A float type image ranging from 0~1 + elif image.dtype in [np.float32, np.float64, np.float] and image.max() <= 2.: + image = (image * 255.).astype(np.uint8) + # A float type image ranging from 0.~255. + elif image.dtype in [np.float32, np.float64, np.float] and image.mean() > 10.: + image = image.astype(np.uint8) + else: + raise ValueError("[Error] Unknown image data type. Expect 0~1 float or 0~255 uint8.") + + # Check whether the image is single channel + if len(image.shape) == 2 or ((len(image.shape) == 3) and (image.shape[-1] == 1)): + # Squeeze to H*W first + image = image.squeeze() + + # Stack to channle 3 + image = np.concatenate([image[..., None] for _ in range(3)], axis=-1) + + # Junction dimensions should be 2 + if not len(junctions.shape) == 2: + raise ValueError("[Error] junctions should be 2-dim array.") + + # Always convert to N*2 + if junctions.shape[-1] != 2: + if junctions.shape[0] == 2: + junctions = junctions.T + else: + raise ValueError("[Error] At least one of the two dims should be 2.") + + # line_map dimension should be 2 + if not len(line_map.shape) == 2: + raise ValueError("[Error] line_map should be 2-dim array.") + + # Color should be "random" or a list or tuple with length 3 + if color != "random": + if not (isinstance(color, tuple) or isinstance(color, list)): + raise ValueError("[Error] color should have type list or tuple.") + else: + if len(color) != 3: + raise ValueError("[Error] color should be a list or tuple with length 3.") + + # Make a copy of the line_map + line_map_tmp = copy.copy(line_map) + + # Parse line_map back to segment pairs + segments = np.zeros([0, 4]) + for idx in range(junctions.shape[0]): + # if no connectivity, just skip it + if line_map_tmp[idx, :].sum() == 0: + continue + # record the line segment + else: + for idx2 in np.where(line_map_tmp[idx, :] == 1)[0]: + p1 = np.flip(junctions[idx, :]) # Convert to xy format + p2 = np.flip(junctions[idx2, :]) # Convert to xy format + segments = np.concatenate((segments, np.array([p1[0], p1[1], p2[0], p2[1]])[None, ...]), axis=0) + + # Update line_map + line_map_tmp[idx, idx2] = 0 + line_map_tmp[idx2, idx] = 0 + + # Draw segment pairs + for idx in range(segments.shape[0]): + seg = np.round(segments[idx, :]).astype(np.int) + # Decide the color + if color != "random": + color = tuple(color) + else: + color = tuple(np.random.rand(3,)) + cv2.line(image, tuple(seg[:2]), tuple(seg[2:]), color=color, thickness=line_width) + + # Also draw the junctions + if not plot_survived_junc: + num_junc = junctions.shape[0] + for idx in range(num_junc): + # Fetch one junction + junc = junctions[idx, :] + cv2.circle(image, tuple(np.flip(junc)), radius=junc_size, + color=(0, 255., 0), thickness=3) + # Only plot the junctions which are part of a line segment + else: + for idx in range(segments.shape[0]): + seg = np.round(segments[idx, :]).astype(np.int) # Already in HW format. + cv2.circle(image, tuple(seg[:2]), radius=junc_size, + color=(0, 255., 0), thickness=3) + cv2.circle(image, tuple(seg[2:]), radius=junc_size, + color=(0, 255., 0), thickness=3) + + return image + + +# Plot line segments given Nx4 or Nx2x2 line segments +def plot_line_segments_from_segments(input_image, line_segments, junc_size=3, + color=(0, 255., 0), line_width=1): + # Create image copy + image = copy.copy(input_image) + # Make sure the image is converted to 255 uint8 + if image.dtype == np.uint8: + pass + # A float type image ranging from 0~1 + elif image.dtype in [np.float32, np.float64, np.float] and image.max() <= 2.: + image = (image * 255.).astype(np.uint8) + # A float type image ranging from 0.~255. + elif image.dtype in [np.float32, np.float64, np.float] and image.mean() > 10.: + image = image.astype(np.uint8) + else: + raise ValueError("[Error] Unknown image data type. Expect 0~1 float or 0~255 uint8.") + + # Check whether the image is single channel + if len(image.shape) == 2 or ((len(image.shape) == 3) and (image.shape[-1] == 1)): + # Squeeze to H*W first + image = image.squeeze() + + # Stack to channle 3 + image = np.concatenate([image[..., None] for _ in range(3)], axis=-1) + + # Check the if line_segments are in (1) Nx4, or (2) Nx2x2. + H, W, _ = image.shape + # (1) Nx4 format + if len(line_segments.shape) == 2 and line_segments.shape[-1] == 4: + # Round to int32 + line_segments = line_segments.astype(np.int32) + + # Clip H dimension + line_segments[:, 0] = np.clip(line_segments[:, 0], a_min=0, a_max=H-1) + line_segments[:, 2] = np.clip(line_segments[:, 2], a_min=0, a_max=H-1) + + # Clip W dimension + line_segments[:, 1] = np.clip(line_segments[:, 1], a_min=0, a_max=W-1) + line_segments[:, 3] = np.clip(line_segments[:, 3], a_min=0, a_max=W-1) + + # Convert to Nx2x2 format + line_segments = np.concatenate( + [np.expand_dims(line_segments[:, :2], axis=1), + np.expand_dims(line_segments[:, 2:], axis=1)], + axis=1 + ) + + # (2) Nx2x2 format + elif len(line_segments.shape) == 3 and line_segments.shape[-1] == 2: + # Round to int32 + line_segments = line_segments.astype(np.int32) + + # Clip H dimension + line_segments[:, :, 0] = np.clip(line_segments[:, :, 0], a_min=0, a_max=H-1) + line_segments[:, :, 1] = np.clip(line_segments[:, :, 1], a_min=0, a_max=W-1) + + else: + raise ValueError("[Error] line_segments should be either Nx4 or Nx2x2 in HW format.") + + # Draw segment pairs (all segments should be in HW format) + image = image.copy() + for idx in range(line_segments.shape[0]): + seg = np.round(line_segments[idx, :, :]).astype(np.int32) + # Decide the color + if color != "random": + color = tuple(color) + else: + color = tuple(np.random.rand(3,)) + cv2.line(image, tuple(np.flip(seg[0, :])), + tuple(np.flip(seg[1, :])), + color=color, thickness=line_width) + + # Also draw the junctions + cv2.circle(image, tuple(np.flip(seg[0, :])), radius=junc_size, color=(0, 255., 0), thickness=3) + cv2.circle(image, tuple(np.flip(seg[1, :])), radius=junc_size, color=(0, 255., 0), thickness=3) + + return image + + +# Additional functions to visualize multiple images at the same time, +# e.g. for line matching +def plot_images(imgs, titles=None, cmaps='gray', dpi=100, size=6, pad=.5): + """Plot a set of images horizontally. + Args: + imgs: a list of NumPy or PyTorch images, RGB (H, W, 3) or mono (H, W). + titles: a list of strings, as titles for each image. + cmaps: colormaps for monochrome images. + """ + n = len(imgs) + if not isinstance(cmaps, (list, tuple)): + cmaps = [cmaps] * n + figsize = (size*n, size*3/4) if size is not None else None + fig, ax = plt.subplots(1, n, figsize=figsize, dpi=dpi) + if n == 1: + ax = [ax] + for i in range(n): + ax[i].imshow(imgs[i], cmap=plt.get_cmap(cmaps[i])) + ax[i].get_yaxis().set_ticks([]) + ax[i].get_xaxis().set_ticks([]) + ax[i].set_axis_off() + for spine in ax[i].spines.values(): # remove frame + spine.set_visible(False) + if titles: + ax[i].set_title(titles[i]) + fig.tight_layout(pad=pad) + + +def plot_keypoints(kpts, colors='lime', ps=4): + """Plot keypoints for existing images. + Args: + kpts: list of ndarrays of size (N, 2). + colors: string, or list of list of tuples (one for each keypoints). + ps: size of the keypoints as float. + """ + if not isinstance(colors, list): + colors = [colors] * len(kpts) + axes = plt.gcf().axes + for a, k, c in zip(axes, kpts, colors): + a.scatter(k[:, 0], k[:, 1], c=c, s=ps, linewidths=0) + + +def plot_matches(kpts0, kpts1, color=None, lw=1.5, ps=4, indices=(0, 1), a=1.): + """Plot matches for a pair of existing images. + Args: + kpts0, kpts1: corresponding keypoints of size (N, 2). + color: color of each match, string or RGB tuple. Random if not given. + lw: width of the lines. + ps: size of the end points (no endpoint if ps=0) + indices: indices of the images to draw the matches on. + a: alpha opacity of the match lines. + """ + fig = plt.gcf() + ax = fig.axes + assert len(ax) > max(indices) + ax0, ax1 = ax[indices[0]], ax[indices[1]] + fig.canvas.draw() + + assert len(kpts0) == len(kpts1) + if color is None: + color = matplotlib.cm.hsv(np.random.rand(len(kpts0))).tolist() + elif len(color) > 0 and not isinstance(color[0], (tuple, list)): + color = [color] * len(kpts0) + + if lw > 0: + # transform the points into the figure coordinate system + transFigure = fig.transFigure.inverted() + fkpts0 = transFigure.transform(ax0.transData.transform(kpts0)) + fkpts1 = transFigure.transform(ax1.transData.transform(kpts1)) + fig.lines += [matplotlib.lines.Line2D( + (fkpts0[i, 0], fkpts1[i, 0]), (fkpts0[i, 1], fkpts1[i, 1]), + zorder=1, transform=fig.transFigure, c=color[i], linewidth=lw, + alpha=a) + for i in range(len(kpts0))] + + # freeze the axes to prevent the transform to change + ax0.autoscale(enable=False) + ax1.autoscale(enable=False) + + if ps > 0: + ax0.scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps, zorder=2) + ax1.scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps, zorder=2) + + +def plot_lines(lines, line_colors='orange', point_colors='cyan', + ps=4, lw=2, indices=(0, 1)): + """Plot lines and endpoints for existing images. + Args: + lines: list of ndarrays of size (N, 2, 2). + colors: string, or list of list of tuples (one for each keypoints). + ps: size of the keypoints as float pixels. + lw: line width as float pixels. + indices: indices of the images to draw the matches on. + """ + if not isinstance(line_colors, list): + line_colors = [line_colors] * len(lines) + if not isinstance(point_colors, list): + point_colors = [point_colors] * len(lines) + + fig = plt.gcf() + ax = fig.axes + assert len(ax) > max(indices) + axes = [ax[i] for i in indices] + fig.canvas.draw() + + # Plot the lines and junctions + for a, l, lc, pc in zip(axes, lines, line_colors, point_colors): + for i in range(len(l)): + line = matplotlib.lines.Line2D((l[i, 0, 0], l[i, 1, 0]), + (l[i, 0, 1], l[i, 1, 1]), + zorder=1, c=lc, linewidth=lw) + a.add_line(line) + pts = l.reshape(-1, 2) + a.scatter(pts[:, 0], pts[:, 1], + c=pc, s=ps, linewidths=0, zorder=2) + + +def plot_line_matches(kpts0, kpts1, color=None, lw=1.5, indices=(0, 1), a=1.): + """Plot matches for a pair of existing images, parametrized by their middle point. + Args: + kpts0, kpts1: corresponding middle points of the lines of size (N, 2). + color: color of each match, string or RGB tuple. Random if not given. + lw: width of the lines. + indices: indices of the images to draw the matches on. + a: alpha opacity of the match lines. + """ + fig = plt.gcf() + ax = fig.axes + assert len(ax) > max(indices) + ax0, ax1 = ax[indices[0]], ax[indices[1]] + fig.canvas.draw() + + assert len(kpts0) == len(kpts1) + if color is None: + color = matplotlib.cm.hsv(np.random.rand(len(kpts0))).tolist() + elif len(color) > 0 and not isinstance(color[0], (tuple, list)): + color = [color] * len(kpts0) + + if lw > 0: + # transform the points into the figure coordinate system + transFigure = fig.transFigure.inverted() + fkpts0 = transFigure.transform(ax0.transData.transform(kpts0)) + fkpts1 = transFigure.transform(ax1.transData.transform(kpts1)) + fig.lines += [matplotlib.lines.Line2D( + (fkpts0[i, 0], fkpts1[i, 0]), (fkpts0[i, 1], fkpts1[i, 1]), + zorder=1, transform=fig.transFigure, c=color[i], linewidth=lw, + alpha=a) + for i in range(len(kpts0))] + + # freeze the axes to prevent the transform to change + ax0.autoscale(enable=False) + ax1.autoscale(enable=False) + + +def plot_color_line_matches(lines, correct_matches=None, + lw=2, indices=(0, 1)): + """Plot line matches for existing images with multiple colors. + Args: + lines: list of ndarrays of size (N, 2, 2). + correct_matches: bool array of size (N,) indicating correct matches. + lw: line width as float pixels. + indices: indices of the images to draw the matches on. + """ + n_lines = len(lines[0]) + colors = sns.color_palette('husl', n_colors=n_lines) + np.random.shuffle(colors) + alphas = np.ones(n_lines) + # If correct_matches is not None, display wrong matches with a low alpha + if correct_matches is not None: + alphas[~np.array(correct_matches)] = 0.2 + + fig = plt.gcf() + ax = fig.axes + assert len(ax) > max(indices) + axes = [ax[i] for i in indices] + fig.canvas.draw() + + # Plot the lines + for a, l in zip(axes, lines): + # Transform the points into the figure coordinate system + transFigure = fig.transFigure.inverted() + endpoint0 = transFigure.transform(a.transData.transform(l[:, 0])) + endpoint1 = transFigure.transform(a.transData.transform(l[:, 1])) + fig.lines += [matplotlib.lines.Line2D( + (endpoint0[i, 0], endpoint1[i, 0]), + (endpoint0[i, 1], endpoint1[i, 1]), + zorder=1, transform=fig.transFigure, c=colors[i], + alpha=alphas[i], linewidth=lw) for i in range(n_lines)] + + +def plot_color_lines(lines, correct_matches, wrong_matches, + lw=2, indices=(0, 1)): + """Plot line matches for existing images with multiple colors: + green for correct matches, red for wrong ones, and blue for the rest. + Args: + lines: list of ndarrays of size (N, 2, 2). + correct_matches: list of bool arrays of size N with correct matches. + wrong_matches: list of bool arrays of size (N,) with correct matches. + lw: line width as float pixels. + indices: indices of the images to draw the matches on. + """ + # palette = sns.color_palette() + palette = sns.color_palette("hls", 8) + blue = palette[5] # palette[0] + red = palette[0] # palette[3] + green = palette[2] # palette[2] + colors = [np.array([blue] * len(l)) for l in lines] + for i, c in enumerate(colors): + c[np.array(correct_matches[i])] = green + c[np.array(wrong_matches[i])] = red + + fig = plt.gcf() + ax = fig.axes + assert len(ax) > max(indices) + axes = [ax[i] for i in indices] + fig.canvas.draw() + + # Plot the lines + for a, l, c in zip(axes, lines, colors): + # Transform the points into the figure coordinate system + transFigure = fig.transFigure.inverted() + endpoint0 = transFigure.transform(a.transData.transform(l[:, 0])) + endpoint1 = transFigure.transform(a.transData.transform(l[:, 1])) + fig.lines += [matplotlib.lines.Line2D( + (endpoint0[i, 0], endpoint1[i, 0]), + (endpoint0[i, 1], endpoint1[i, 1]), + zorder=1, transform=fig.transFigure, c=c[i], + linewidth=lw) for i in range(len(l))] + + +def plot_subsegment_matches(lines, subsegments, lw=2, indices=(0, 1)): + """ Plot line matches for existing images with multiple colors and + highlight the actually matched subsegments. + Args: + lines: list of ndarrays of size (N, 2, 2). + subsegments: list of ndarrays of size (N, 2, 2). + lw: line width as float pixels. + indices: indices of the images to draw the matches on. + """ + n_lines = len(lines[0]) + colors = sns.cubehelix_palette(start=2, rot=-0.2, dark=0.3, light=.7, + gamma=1.3, hue=1, n_colors=n_lines) + + fig = plt.gcf() + ax = fig.axes + assert len(ax) > max(indices) + axes = [ax[i] for i in indices] + fig.canvas.draw() + + # Plot the lines + for a, l, ss in zip(axes, lines, subsegments): + # Transform the points into the figure coordinate system + transFigure = fig.transFigure.inverted() + + # Draw full line + endpoint0 = transFigure.transform(a.transData.transform(l[:, 0])) + endpoint1 = transFigure.transform(a.transData.transform(l[:, 1])) + fig.lines += [matplotlib.lines.Line2D( + (endpoint0[i, 0], endpoint1[i, 0]), + (endpoint0[i, 1], endpoint1[i, 1]), + zorder=1, transform=fig.transFigure, c='red', + alpha=0.7, linewidth=lw) for i in range(n_lines)] + + # Draw matched subsegment + endpoint0 = transFigure.transform(a.transData.transform(ss[:, 0])) + endpoint1 = transFigure.transform(a.transData.transform(ss[:, 1])) + fig.lines += [matplotlib.lines.Line2D( + (endpoint0[i, 0], endpoint1[i, 0]), + (endpoint0[i, 1], endpoint1[i, 1]), + zorder=1, transform=fig.transFigure, c=colors[i], + alpha=1, linewidth=lw) for i in range(n_lines)] \ No newline at end of file diff --git a/third_party/SOLD2/sold2/model/__init__.py b/third_party/SOLD2/sold2/model/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/SOLD2/sold2/model/line_detection.py b/third_party/SOLD2/sold2/model/line_detection.py new file mode 100644 index 0000000000000000000000000000000000000000..0c186337b0ce2072ddd5246408c538dac2cf325f --- /dev/null +++ b/third_party/SOLD2/sold2/model/line_detection.py @@ -0,0 +1,506 @@ +""" +Implementation of the line segment detection module. +""" +import math +import numpy as np +import torch + + +class LineSegmentDetectionModule(object): + """ Module extracting line segments from junctions and line heatmaps. """ + def __init__( + self, detect_thresh, num_samples=64, sampling_method="local_max", + inlier_thresh=0., heatmap_low_thresh=0.15, heatmap_high_thresh=0.2, + max_local_patch_radius=3, lambda_radius=2., + use_candidate_suppression=False, nms_dist_tolerance=3., + use_heatmap_refinement=False, heatmap_refine_cfg=None, + use_junction_refinement=False, junction_refine_cfg=None): + """ + Parameters: + detect_thresh: The probability threshold for mean activation (0. ~ 1.) + num_samples: Number of sampling locations along the line segments. + sampling_method: Sampling method on locations ("bilinear" or "local_max"). + inlier_thresh: The min inlier ratio to satisfy (0. ~ 1.) => 0. means no threshold. + heatmap_low_thresh: The lowest threshold for the pixel to be considered as candidate in junction recovery. + heatmap_high_thresh: The higher threshold for NMS in junction recovery. + max_local_patch_radius: The max patch to be considered in local maximum search. + lambda_radius: The lambda factor in linear local maximum search formulation + use_candidate_suppression: Apply candidate suppression to break long segments into short sub-segments. + nms_dist_tolerance: The distance tolerance for nms. Decide whether the junctions are on the line. + use_heatmap_refinement: Use heatmap refinement method or not. + heatmap_refine_cfg: The configs for heatmap refinement methods. + use_junction_refinement: Use junction refinement method or not. + junction_refine_cfg: The configs for junction refinement methods. + """ + # Line detection parameters + self.detect_thresh = detect_thresh + + # Line sampling parameters + self.num_samples = num_samples + self.sampling_method = sampling_method + self.inlier_thresh = inlier_thresh + self.local_patch_radius = max_local_patch_radius + self.lambda_radius = lambda_radius + + # Detecting junctions on the boundary parameters + self.low_thresh = heatmap_low_thresh + self.high_thresh = heatmap_high_thresh + + # Pre-compute the linspace sampler + self.sampler = np.linspace(0, 1, self.num_samples) + self.torch_sampler = torch.linspace(0, 1, self.num_samples) + + # Long line segment suppression configuration + self.use_candidate_suppression = use_candidate_suppression + self.nms_dist_tolerance = nms_dist_tolerance + + # Heatmap refinement configuration + self.use_heatmap_refinement = use_heatmap_refinement + self.heatmap_refine_cfg = heatmap_refine_cfg + if self.use_heatmap_refinement and self.heatmap_refine_cfg is None: + raise ValueError("[Error] Missing heatmap refinement config.") + + # Junction refinement configuration + self.use_junction_refinement = use_junction_refinement + self.junction_refine_cfg = junction_refine_cfg + if self.use_junction_refinement and self.junction_refine_cfg is None: + raise ValueError("[Error] Missing junction refinement config.") + + def convert_inputs(self, inputs, device): + """ Convert inputs to desired torch tensor. """ + if isinstance(inputs, np.ndarray): + outputs = torch.tensor(inputs, dtype=torch.float32, device=device) + elif isinstance(inputs, torch.Tensor): + outputs = inputs.to(torch.float32).to(device) + else: + raise ValueError( + "[Error] Inputs must either be torch tensor or numpy ndarray.") + + return outputs + + def detect(self, junctions, heatmap, device=torch.device("cpu")): + """ Main function performing line segment detection. """ + # Convert inputs to torch tensor + junctions = self.convert_inputs(junctions, device=device) + heatmap = self.convert_inputs(heatmap, device=device) + + # Perform the heatmap refinement + if self.use_heatmap_refinement: + if self.heatmap_refine_cfg["mode"] == "global": + heatmap = self.refine_heatmap( + heatmap, + self.heatmap_refine_cfg["ratio"], + self.heatmap_refine_cfg["valid_thresh"] + ) + elif self.heatmap_refine_cfg["mode"] == "local": + heatmap = self.refine_heatmap_local( + heatmap, + self.heatmap_refine_cfg["num_blocks"], + self.heatmap_refine_cfg["overlap_ratio"], + self.heatmap_refine_cfg["ratio"], + self.heatmap_refine_cfg["valid_thresh"] + ) + + # Initialize empty line map + num_junctions = junctions.shape[0] + line_map_pred = torch.zeros([num_junctions, num_junctions], + device=device, dtype=torch.int32) + + # Stop if there are not enough junctions + if num_junctions < 2: + return line_map_pred, junctions, heatmap + + # Generate the candidate map + candidate_map = torch.triu(torch.ones( + [num_junctions, num_junctions], device=device, dtype=torch.int32), + diagonal=1) + + # Fetch the image boundary + if len(heatmap.shape) > 2: + H, W, _ = heatmap.shape + else: + H, W = heatmap.shape + + # Optionally perform candidate filtering + if self.use_candidate_suppression: + candidate_map = self.candidate_suppression(junctions, + candidate_map) + + # Fetch the candidates + candidate_index_map = torch.where(candidate_map) + candidate_index_map = torch.cat([candidate_index_map[0][..., None], + candidate_index_map[1][..., None]], + dim=-1) + + # Get the corresponding start and end junctions + candidate_junc_start = junctions[candidate_index_map[:, 0], :] + candidate_junc_end = junctions[candidate_index_map[:, 1], :] + + # Get the sampling locations (N x 64) + sampler = self.torch_sampler.to(device)[None, ...] + cand_samples_h = candidate_junc_start[:, 0:1] * sampler + \ + candidate_junc_end[:, 0:1] * (1 - sampler) + cand_samples_w = candidate_junc_start[:, 1:2] * sampler + \ + candidate_junc_end[:, 1:2] * (1 - sampler) + + # Clip to image boundary + cand_h = torch.clamp(cand_samples_h, min=0, max=H-1) + cand_w = torch.clamp(cand_samples_w, min=0, max=W-1) + + # Local maximum search + if self.sampling_method == "local_max": + # Compute normalized segment lengths + segments_length = torch.sqrt(torch.sum( + (candidate_junc_start.to(torch.float32) - + candidate_junc_end.to(torch.float32)) ** 2, dim=-1)) + normalized_seg_length = (segments_length + / (((H ** 2) + (W ** 2)) ** 0.5)) + + # Perform local max search + num_cand = cand_h.shape[0] + group_size = 10000 + if num_cand > group_size: + num_iter = math.ceil(num_cand / group_size) + sampled_feat_lst = [] + for iter_idx in range(num_iter): + if not iter_idx == num_iter-1: + cand_h_ = cand_h[iter_idx * group_size: + (iter_idx+1) * group_size, :] + cand_w_ = cand_w[iter_idx * group_size: + (iter_idx+1) * group_size, :] + normalized_seg_length_ = normalized_seg_length[ + iter_idx * group_size: (iter_idx+1) * group_size] + else: + cand_h_ = cand_h[iter_idx * group_size:, :] + cand_w_ = cand_w[iter_idx * group_size:, :] + normalized_seg_length_ = normalized_seg_length[ + iter_idx * group_size:] + sampled_feat_ = self.detect_local_max( + heatmap, cand_h_, cand_w_, H, W, + normalized_seg_length_, device) + sampled_feat_lst.append(sampled_feat_) + sampled_feat = torch.cat(sampled_feat_lst, dim=0) + else: + sampled_feat = self.detect_local_max( + heatmap, cand_h, cand_w, H, W, + normalized_seg_length, device) + # Bilinear sampling + elif self.sampling_method == "bilinear": + # Perform bilinear sampling + sampled_feat = self.detect_bilinear( + heatmap, cand_h, cand_w, H, W, device) + else: + raise ValueError("[Error] Unknown sampling method.") + + # [Simple threshold detection] + # detection_results is a mask over all candidates + detection_results = (torch.mean(sampled_feat, dim=-1) + > self.detect_thresh) + + # [Inlier threshold detection] + if self.inlier_thresh > 0.: + inlier_ratio = torch.sum( + sampled_feat > self.detect_thresh, + dim=-1).to(torch.float32) / self.num_samples + detection_results_inlier = inlier_ratio >= self.inlier_thresh + detection_results = detection_results * detection_results_inlier + + # Convert detection results back to line_map_pred + detected_junc_indexes = candidate_index_map[detection_results, :] + line_map_pred[detected_junc_indexes[:, 0], + detected_junc_indexes[:, 1]] = 1 + line_map_pred[detected_junc_indexes[:, 1], + detected_junc_indexes[:, 0]] = 1 + + # Perform junction refinement + if self.use_junction_refinement and len(detected_junc_indexes) > 0: + junctions, line_map_pred = self.refine_junction_perturb( + junctions, line_map_pred, heatmap, H, W, device) + + return line_map_pred, junctions, heatmap + + def refine_heatmap(self, heatmap, ratio=0.2, valid_thresh=1e-2): + """ Global heatmap refinement method. """ + # Grab the top 10% values + heatmap_values = heatmap[heatmap > valid_thresh] + sorted_values = torch.sort(heatmap_values, descending=True)[0] + top10_len = math.ceil(sorted_values.shape[0] * ratio) + max20 = torch.mean(sorted_values[:top10_len]) + heatmap = torch.clamp(heatmap / max20, min=0., max=1.) + return heatmap + + def refine_heatmap_local(self, heatmap, num_blocks=5, overlap_ratio=0.5, + ratio=0.2, valid_thresh=2e-3): + """ Local heatmap refinement method. """ + # Get the shape of the heatmap + H, W = heatmap.shape + increase_ratio = 1 - overlap_ratio + h_block = round(H / (1 + (num_blocks - 1) * increase_ratio)) + w_block = round(W / (1 + (num_blocks - 1) * increase_ratio)) + + count_map = torch.zeros(heatmap.shape, dtype=torch.int, + device=heatmap.device) + heatmap_output = torch.zeros(heatmap.shape, dtype=torch.float, + device=heatmap.device) + # Iterate through each block + for h_idx in range(num_blocks): + for w_idx in range(num_blocks): + # Fetch the heatmap + h_start = round(h_idx * h_block * increase_ratio) + w_start = round(w_idx * w_block * increase_ratio) + h_end = h_start + h_block if h_idx < num_blocks - 1 else H + w_end = w_start + w_block if w_idx < num_blocks - 1 else W + + subheatmap = heatmap[h_start:h_end, w_start:w_end] + if subheatmap.max() > valid_thresh: + subheatmap = self.refine_heatmap( + subheatmap, ratio, valid_thresh=valid_thresh) + + # Aggregate it to the final heatmap + heatmap_output[h_start:h_end, w_start:w_end] += subheatmap + count_map[h_start:h_end, w_start:w_end] += 1 + heatmap_output = torch.clamp(heatmap_output / count_map, + max=1., min=0.) + + return heatmap_output + + def candidate_suppression(self, junctions, candidate_map): + """ Suppress overlapping long lines in the candidate segments. """ + # Define the distance tolerance + dist_tolerance = self.nms_dist_tolerance + + # Compute distance between junction pairs + # (num_junc x 1 x 2) - (1 x num_junc x 2) => num_junc x num_junc map + line_dist_map = torch.sum((torch.unsqueeze(junctions, dim=1) + - junctions[None, ...]) ** 2, dim=-1) ** 0.5 + + # Fetch all the "detected lines" + seg_indexes = torch.where(torch.triu(candidate_map, diagonal=1)) + start_point_idxs = seg_indexes[0] + end_point_idxs = seg_indexes[1] + start_points = junctions[start_point_idxs, :] + end_points = junctions[end_point_idxs, :] + + # Fetch corresponding entries + line_dists = line_dist_map[start_point_idxs, end_point_idxs] + + # Check whether they are on the line + dir_vecs = ((end_points - start_points) + / torch.norm(end_points - start_points, + dim=-1)[..., None]) + # Get the orthogonal distance + cand_vecs = junctions[None, ...] - start_points.unsqueeze(dim=1) + cand_vecs_norm = torch.norm(cand_vecs, dim=-1) + # Check whether they are projected directly onto the segment + proj = (torch.einsum('bij,bjk->bik', cand_vecs, dir_vecs[..., None]) + / line_dists[..., None, None]) + # proj is num_segs x num_junction x 1 + proj_mask = (proj >=0) * (proj <= 1) + cand_angles = torch.acos( + torch.einsum('bij,bjk->bik', cand_vecs, dir_vecs[..., None]) + / cand_vecs_norm[..., None]) + cand_dists = cand_vecs_norm[..., None] * torch.sin(cand_angles) + junc_dist_mask = cand_dists <= dist_tolerance + junc_mask = junc_dist_mask * proj_mask + + # Minus starting points + num_segs = start_point_idxs.shape[0] + junc_counts = torch.sum(junc_mask, dim=[1, 2]) + junc_counts -= junc_mask[..., 0][torch.arange(0, num_segs), + start_point_idxs].to(torch.int) + junc_counts -= junc_mask[..., 0][torch.arange(0, num_segs), + end_point_idxs].to(torch.int) + + # Get the invalid candidate mask + final_mask = junc_counts > 0 + candidate_map[start_point_idxs[final_mask], + end_point_idxs[final_mask]] = 0 + + return candidate_map + + def refine_junction_perturb(self, junctions, line_map_pred, + heatmap, H, W, device): + """ Refine the line endpoints in a similar way as in LSD. """ + # Get the config + junction_refine_cfg = self.junction_refine_cfg + + # Fetch refinement parameters + num_perturbs = junction_refine_cfg["num_perturbs"] + perturb_interval = junction_refine_cfg["perturb_interval"] + side_perturbs = (num_perturbs - 1) // 2 + # Fetch the 2D perturb mat + perturb_vec = torch.arange( + start=-perturb_interval*side_perturbs, + end=perturb_interval*(side_perturbs+1), + step=perturb_interval, device=device) + w1_grid, h1_grid, w2_grid, h2_grid = torch.meshgrid( + perturb_vec, perturb_vec, perturb_vec, perturb_vec) + perturb_tensor = torch.cat([ + w1_grid[..., None], h1_grid[..., None], + w2_grid[..., None], h2_grid[..., None]], dim=-1) + perturb_tensor_flat = perturb_tensor.view(-1, 2, 2) + + # Fetch the junctions and line_map + junctions = junctions.clone() + line_map = line_map_pred + + # Fetch all the detected lines + detected_seg_indexes = torch.where(torch.triu(line_map, diagonal=1)) + start_point_idxs = detected_seg_indexes[0] + end_point_idxs = detected_seg_indexes[1] + start_points = junctions[start_point_idxs, :] + end_points = junctions[end_point_idxs, :] + + line_segments = torch.cat([start_points.unsqueeze(dim=1), + end_points.unsqueeze(dim=1)], dim=1) + + line_segment_candidates = (line_segments.unsqueeze(dim=1) + + perturb_tensor_flat[None, ...]) + # Clip the boundaries + line_segment_candidates[..., 0] = torch.clamp( + line_segment_candidates[..., 0], min=0, max=H - 1) + line_segment_candidates[..., 1] = torch.clamp( + line_segment_candidates[..., 1], min=0, max=W - 1) + + # Iterate through all the segments + refined_segment_lst = [] + num_segments = line_segments.shape[0] + for idx in range(num_segments): + segment = line_segment_candidates[idx, ...] + # Get the corresponding start and end junctions + candidate_junc_start = segment[:, 0, :] + candidate_junc_end = segment[:, 1, :] + + # Get the sampling locations (N x 64) + sampler = self.torch_sampler.to(device)[None, ...] + cand_samples_h = (candidate_junc_start[:, 0:1] * sampler + + candidate_junc_end[:, 0:1] * (1 - sampler)) + cand_samples_w = (candidate_junc_start[:, 1:2] * sampler + + candidate_junc_end[:, 1:2] * (1 - sampler)) + + # Clip to image boundary + cand_h = torch.clamp(cand_samples_h, min=0, max=H - 1) + cand_w = torch.clamp(cand_samples_w, min=0, max=W - 1) + + # Perform bilinear sampling + segment_feat = self.detect_bilinear( + heatmap, cand_h, cand_w, H, W, device) + segment_results = torch.mean(segment_feat, dim=-1) + max_idx = torch.argmax(segment_results) + refined_segment_lst.append(segment[max_idx, ...][None, ...]) + + # Concatenate back to segments + refined_segments = torch.cat(refined_segment_lst, dim=0) + + # Convert back to junctions and line_map + junctions_new = torch.cat( + [refined_segments[:, 0, :], refined_segments[:, 1, :]], dim=0) + junctions_new = torch.unique(junctions_new, dim=0) + line_map_new = self.segments_to_line_map(junctions_new, + refined_segments) + + return junctions_new, line_map_new + + def segments_to_line_map(self, junctions, segments): + """ Convert the list of segments to line map. """ + # Create empty line map + device = junctions.device + num_junctions = junctions.shape[0] + line_map = torch.zeros([num_junctions, num_junctions], device=device) + + # Iterate through every segment + for idx in range(segments.shape[0]): + # Get the junctions from a single segement + seg = segments[idx, ...] + junction1 = seg[0, :] + junction2 = seg[1, :] + + # Get index + idx_junction1 = torch.where( + (junctions == junction1).sum(axis=1) == 2)[0] + idx_junction2 = torch.where( + (junctions == junction2).sum(axis=1) == 2)[0] + + # label the corresponding entries + line_map[idx_junction1, idx_junction2] = 1 + line_map[idx_junction2, idx_junction1] = 1 + + return line_map + + def detect_bilinear(self, heatmap, cand_h, cand_w, H, W, device): + """ Detection by bilinear sampling. """ + # Get the floor and ceiling locations + cand_h_floor = torch.floor(cand_h).to(torch.long) + cand_h_ceil = torch.ceil(cand_h).to(torch.long) + cand_w_floor = torch.floor(cand_w).to(torch.long) + cand_w_ceil = torch.ceil(cand_w).to(torch.long) + + # Perform the bilinear sampling + cand_samples_feat = ( + heatmap[cand_h_floor, cand_w_floor] * (cand_h_ceil - cand_h) + * (cand_w_ceil - cand_w) + heatmap[cand_h_floor, cand_w_ceil] + * (cand_h_ceil - cand_h) * (cand_w - cand_w_floor) + + heatmap[cand_h_ceil, cand_w_floor] * (cand_h - cand_h_floor) + * (cand_w_ceil - cand_w) + heatmap[cand_h_ceil, cand_w_ceil] + * (cand_h - cand_h_floor) * (cand_w - cand_w_floor)) + + return cand_samples_feat + + def detect_local_max(self, heatmap, cand_h, cand_w, H, W, + normalized_seg_length, device): + """ Detection by local maximum search. """ + # Compute the distance threshold + dist_thresh = (0.5 * (2 ** 0.5) + + self.lambda_radius * normalized_seg_length) + # Make it N x 64 + dist_thresh = torch.repeat_interleave(dist_thresh[..., None], + self.num_samples, dim=-1) + + # Compute the candidate points + cand_points = torch.cat([cand_h[..., None], cand_w[..., None]], + dim=-1) + cand_points_round = torch.round(cand_points) # N x 64 x 2 + + # Construct local patches 9x9 = 81 + patch_mask = torch.zeros([int(2 * self.local_patch_radius + 1), + int(2 * self.local_patch_radius + 1)], + device=device) + patch_center = torch.tensor( + [[self.local_patch_radius, self.local_patch_radius]], + device=device, dtype=torch.float32) + H_patch_points, W_patch_points = torch.where(patch_mask >= 0) + patch_points = torch.cat([H_patch_points[..., None], + W_patch_points[..., None]], dim=-1) + # Fetch the circle region + patch_center_dist = torch.sqrt(torch.sum( + (patch_points - patch_center) ** 2, dim=-1)) + patch_points = (patch_points[patch_center_dist + <= self.local_patch_radius, :]) + # Shift [0, 0] to the center + patch_points = patch_points - self.local_patch_radius + + # Construct local patch mask + patch_points_shifted = (torch.unsqueeze(cand_points_round, dim=2) + + patch_points[None, None, ...]) + patch_dist = torch.sqrt(torch.sum((torch.unsqueeze(cand_points, dim=2) + - patch_points_shifted) ** 2, + dim=-1)) + patch_dist_mask = patch_dist < dist_thresh[..., None] + + # Get all points => num_points_center x num_patch_points x 2 + points_H = torch.clamp(patch_points_shifted[:, :, :, 0], min=0, + max=H - 1).to(torch.long) + points_W = torch.clamp(patch_points_shifted[:, :, :, 1], min=0, + max=W - 1).to(torch.long) + points = torch.cat([points_H[..., None], points_W[..., None]], dim=-1) + + # Sample the feature (N x 64 x 81) + sampled_feat = heatmap[points[:, :, :, 0], points[:, :, :, 1]] + # Filtering using the valid mask + sampled_feat = sampled_feat * patch_dist_mask.to(torch.float32) + if len(sampled_feat) == 0: + sampled_feat_lmax = torch.empty(0, 64) + else: + sampled_feat_lmax, _ = torch.max(sampled_feat, dim=-1) + + return sampled_feat_lmax diff --git a/third_party/SOLD2/sold2/model/line_detector.py b/third_party/SOLD2/sold2/model/line_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..2f3d059e130178c482e8e569171ef9e0370424c7 --- /dev/null +++ b/third_party/SOLD2/sold2/model/line_detector.py @@ -0,0 +1,127 @@ +""" +Line segment detection from raw images. +""" +import time +import numpy as np +import torch +from torch.nn.functional import softmax + +from .model_util import get_model +from .loss import get_loss_and_weights +from .line_detection import LineSegmentDetectionModule +from ..train import convert_junc_predictions +from ..misc.train_utils import adapt_checkpoint + + +def line_map_to_segments(junctions, line_map): + """ Convert a line map to a Nx2x2 list of segments. """ + line_map_tmp = line_map.copy() + + output_segments = np.zeros([0, 2, 2]) + for idx in range(junctions.shape[0]): + # if no connectivity, just skip it + if line_map_tmp[idx, :].sum() == 0: + continue + # Record the line segment + else: + for idx2 in np.where(line_map_tmp[idx, :] == 1)[0]: + p1 = junctions[idx, :] # HW format + p2 = junctions[idx2, :] + single_seg = np.concatenate([p1[None, ...], p2[None, ...]], + axis=0) + output_segments = np.concatenate( + (output_segments, single_seg[None, ...]), axis=0) + + # Update line_map + line_map_tmp[idx, idx2] = 0 + line_map_tmp[idx2, idx] = 0 + + return output_segments + + +class LineDetector(object): + def __init__(self, model_cfg, ckpt_path, device, line_detector_cfg, + junc_detect_thresh=None): + """ SOLD² line detector taking raw images as input. + Parameters: + model_cfg: config for CNN model + ckpt_path: path to the weights + line_detector_cfg: config file for the line detection module + """ + # Get loss weights if dynamic weighting + _, loss_weights = get_loss_and_weights(model_cfg, device) + self.device = device + + # Initialize the cnn backbone + self.model = get_model(model_cfg, loss_weights) + checkpoint = torch.load(ckpt_path, map_location=self.device) + checkpoint = adapt_checkpoint(checkpoint["model_state_dict"]) + self.model.load_state_dict(checkpoint) + self.model = self.model.to(self.device) + self.model = self.model.eval() + + self.grid_size = model_cfg["grid_size"] + + if junc_detect_thresh is not None: + self.junc_detect_thresh = junc_detect_thresh + else: + self.junc_detect_thresh = model_cfg.get("detection_thresh", 1/65) + self.max_num_junctions = model_cfg.get("max_num_junctions", 300) + + # Initialize the line detector + self.line_detector_cfg = line_detector_cfg + self.line_detector = LineSegmentDetectionModule(**line_detector_cfg) + + def __call__(self, input_image, valid_mask=None, + return_heatmap=False, profile=False): + # Now we restrict input_image to 4D torch tensor + if ((not len(input_image.shape) == 4) + or (not isinstance(input_image, torch.Tensor))): + raise ValueError( + "[Error] the input image should be a 4D torch tensor.") + + # Move the input to corresponding device + input_image = input_image.to(self.device) + + # Forward of the CNN backbone + start_time = time.time() + with torch.no_grad(): + net_outputs = self.model(input_image) + + junc_np = convert_junc_predictions( + net_outputs["junctions"], self.grid_size, + self.junc_detect_thresh, self.max_num_junctions) + if valid_mask is None: + junctions = np.where(junc_np["junc_pred_nms"].squeeze()) + else: + junctions = np.where(junc_np["junc_pred_nms"].squeeze() + * valid_mask) + junctions = np.concatenate( + [junctions[0][..., None], junctions[1][..., None]], axis=-1) + + if net_outputs["heatmap"].shape[1] == 2: + # Convert to single channel directly from here + heatmap = softmax(net_outputs["heatmap"], dim=1)[:, 1:, :, :] + else: + heatmap = torch.sigmoid(net_outputs["heatmap"]) + heatmap = heatmap.cpu().numpy().transpose(0, 2, 3, 1)[0, :, :, 0] + + # Run the line detector. + line_map, junctions, heatmap = self.line_detector.detect( + junctions, heatmap, device=self.device) + heatmap = heatmap.cpu().numpy() + if isinstance(line_map, torch.Tensor): + line_map = line_map.cpu().numpy() + if isinstance(junctions, torch.Tensor): + junctions = junctions.cpu().numpy() + line_segments = line_map_to_segments(junctions, line_map) + end_time = time.time() + + outputs = {"line_segments": line_segments} + + if return_heatmap: + outputs["heatmap"] = heatmap + if profile: + outputs["time"] = end_time - start_time + + return outputs diff --git a/third_party/SOLD2/sold2/model/line_matcher.py b/third_party/SOLD2/sold2/model/line_matcher.py new file mode 100644 index 0000000000000000000000000000000000000000..bc5a003573c91313e2295c75871edcb1c113662a --- /dev/null +++ b/third_party/SOLD2/sold2/model/line_matcher.py @@ -0,0 +1,279 @@ +""" +Implements the full pipeline from raw images to line matches. +""" +import time +import cv2 +import numpy as np +import torch +import torch.nn.functional as F +from torch.nn.functional import softmax + +from .model_util import get_model +from .loss import get_loss_and_weights +from .metrics import super_nms +from .line_detection import LineSegmentDetectionModule +from .line_matching import WunschLineMatcher +from ..train import convert_junc_predictions +from ..misc.train_utils import adapt_checkpoint +from .line_detector import line_map_to_segments + + +class LineMatcher(object): + """ Full line matcher including line detection and matching + with the Needleman-Wunsch algorithm. """ + def __init__(self, model_cfg, ckpt_path, device, line_detector_cfg, + line_matcher_cfg, multiscale=False, scales=[1., 2.]): + # Get loss weights if dynamic weighting + _, loss_weights = get_loss_and_weights(model_cfg, device) + self.device = device + + # Initialize the cnn backbone + self.model = get_model(model_cfg, loss_weights) + checkpoint = torch.load(ckpt_path, map_location=self.device) + checkpoint = adapt_checkpoint(checkpoint["model_state_dict"]) + self.model.load_state_dict(checkpoint) + self.model = self.model.to(self.device) + self.model = self.model.eval() + + self.grid_size = model_cfg["grid_size"] + self.junc_detect_thresh = model_cfg["detection_thresh"] + self.max_num_junctions = model_cfg.get("max_num_junctions", 300) + + # Initialize the line detector + self.line_detector = LineSegmentDetectionModule(**line_detector_cfg) + self.multiscale = multiscale + self.scales = scales + + # Initialize the line matcher + self.line_matcher = WunschLineMatcher(**line_matcher_cfg) + + # Print some debug messages + for key, val in line_detector_cfg.items(): + print(f"[Debug] {key}: {val}") + # print("[Debug] detect_thresh: %f" % (line_detector_cfg["detect_thresh"])) + # print("[Debug] num_samples: %d" % (line_detector_cfg["num_samples"])) + + + + # Perform line detection and descriptor inference on a single image + def line_detection(self, input_image, valid_mask=None, + desc_only=False, profile=False): + # Restrict input_image to 4D torch tensor + if ((not len(input_image.shape) == 4) + or (not isinstance(input_image, torch.Tensor))): + raise ValueError( + "[Error] the input image should be a 4D torch tensor") + + # Move the input to corresponding device + input_image = input_image.to(self.device) + + # Forward of the CNN backbone + start_time = time.time() + with torch.no_grad(): + net_outputs = self.model(input_image) + + outputs = {"descriptor": net_outputs["descriptors"]} + + if not desc_only: + junc_np = convert_junc_predictions( + net_outputs["junctions"], self.grid_size, + self.junc_detect_thresh, self.max_num_junctions) + if valid_mask is None: + junctions = np.where(junc_np["junc_pred_nms"].squeeze()) + else: + junctions = np.where( + junc_np["junc_pred_nms"].squeeze() * valid_mask) + junctions = np.concatenate([junctions[0][..., None], + junctions[1][..., None]], axis=-1) + + if net_outputs["heatmap"].shape[1] == 2: + # Convert to single channel directly from here + heatmap = softmax( + net_outputs["heatmap"], + dim=1)[:, 1:, :, :].cpu().numpy().transpose(0, 2, 3, 1) + else: + heatmap = torch.sigmoid( + net_outputs["heatmap"]).cpu().numpy().transpose(0, 2, 3, 1) + heatmap = heatmap[0, :, :, 0] + + # Run the line detector. + line_map, junctions, heatmap = self.line_detector.detect( + junctions, heatmap, device=self.device) + if isinstance(line_map, torch.Tensor): + line_map = line_map.cpu().numpy() + if isinstance(junctions, torch.Tensor): + junctions = junctions.cpu().numpy() + outputs["heatmap"] = heatmap.cpu().numpy() + outputs["junctions"] = junctions + + # If it's a line map with multiple detect_thresh and inlier_thresh + if len(line_map.shape) > 2: + num_detect_thresh = line_map.shape[0] + num_inlier_thresh = line_map.shape[1] + line_segments = [] + for detect_idx in range(num_detect_thresh): + line_segments_inlier = [] + for inlier_idx in range(num_inlier_thresh): + line_map_tmp = line_map[detect_idx, inlier_idx, :, :] + line_segments_tmp = line_map_to_segments(junctions, line_map_tmp) + line_segments_inlier.append(line_segments_tmp) + line_segments.append(line_segments_inlier) + else: + line_segments = line_map_to_segments(junctions, line_map) + + outputs["line_segments"] = line_segments + + end_time = time.time() + + if profile: + outputs["time"] = end_time - start_time + + return outputs + + # Perform line detection and descriptor inference at multiple scales + def multiscale_line_detection(self, input_image, valid_mask=None, + desc_only=False, profile=False, + scales=[1., 2.], aggregation='mean'): + # Restrict input_image to 4D torch tensor + if ((not len(input_image.shape) == 4) + or (not isinstance(input_image, torch.Tensor))): + raise ValueError( + "[Error] the input image should be a 4D torch tensor") + + # Move the input to corresponding device + input_image = input_image.to(self.device) + img_size = input_image.shape[2:4] + desc_size = tuple(np.array(img_size) // 4) + + # Run the inference at multiple image scales + start_time = time.time() + junctions, heatmaps, descriptors = [], [], [] + for s in scales: + # Resize the image + resized_img = F.interpolate(input_image, scale_factor=s, + mode='bilinear') + + # Forward of the CNN backbone + with torch.no_grad(): + net_outputs = self.model(resized_img) + + descriptors.append(F.interpolate( + net_outputs["descriptors"], size=desc_size, mode="bilinear")) + + if not desc_only: + junc_prob = convert_junc_predictions( + net_outputs["junctions"], self.grid_size)["junc_pred"] + junctions.append(cv2.resize(junc_prob.squeeze(), + (img_size[1], img_size[0]), + interpolation=cv2.INTER_LINEAR)) + + if net_outputs["heatmap"].shape[1] == 2: + # Convert to single channel directly from here + heatmap = softmax(net_outputs["heatmap"], + dim=1)[:, 1:, :, :] + else: + heatmap = torch.sigmoid(net_outputs["heatmap"]) + heatmaps.append(F.interpolate(heatmap, size=img_size, + mode="bilinear")) + + # Aggregate the results + if aggregation == 'mean': + # Aggregation through the mean activation + descriptors = torch.stack(descriptors, dim=0).mean(0) + else: + # Aggregation through the max activation + descriptors = torch.stack(descriptors, dim=0).max(0)[0] + outputs = {"descriptor": descriptors} + + if not desc_only: + if aggregation == 'mean': + junctions = np.stack(junctions, axis=0).mean(0)[None] + heatmap = torch.stack(heatmaps, dim=0).mean(0)[0, 0, :, :] + heatmap = heatmap.cpu().numpy() + else: + junctions = np.stack(junctions, axis=0).max(0)[None] + heatmap = torch.stack(heatmaps, dim=0).max(0)[0][0, 0, :, :] + heatmap = heatmap.cpu().numpy() + + # Extract junctions + junc_pred_nms = super_nms( + junctions[..., None], self.grid_size, + self.junc_detect_thresh, self.max_num_junctions) + if valid_mask is None: + junctions = np.where(junc_pred_nms.squeeze()) + else: + junctions = np.where(junc_pred_nms.squeeze() * valid_mask) + junctions = np.concatenate([junctions[0][..., None], + junctions[1][..., None]], axis=-1) + + # Run the line detector. + line_map, junctions, heatmap = self.line_detector.detect( + junctions, heatmap, device=self.device) + if isinstance(line_map, torch.Tensor): + line_map = line_map.cpu().numpy() + if isinstance(junctions, torch.Tensor): + junctions = junctions.cpu().numpy() + outputs["heatmap"] = heatmap.cpu().numpy() + outputs["junctions"] = junctions + + # If it's a line map with multiple detect_thresh and inlier_thresh + if len(line_map.shape) > 2: + num_detect_thresh = line_map.shape[0] + num_inlier_thresh = line_map.shape[1] + line_segments = [] + for detect_idx in range(num_detect_thresh): + line_segments_inlier = [] + for inlier_idx in range(num_inlier_thresh): + line_map_tmp = line_map[detect_idx, inlier_idx, :, :] + line_segments_tmp = line_map_to_segments( + junctions, line_map_tmp) + line_segments_inlier.append(line_segments_tmp) + line_segments.append(line_segments_inlier) + else: + line_segments = line_map_to_segments(junctions, line_map) + + outputs["line_segments"] = line_segments + + end_time = time.time() + + if profile: + outputs["time"] = end_time - start_time + + return outputs + + def __call__(self, images, valid_masks=[None, None], profile=False): + # Line detection and descriptor inference on both images + if self.multiscale: + forward_outputs = [ + self.multiscale_line_detection( + images[0], valid_masks[0], profile=profile, + scales=self.scales), + self.multiscale_line_detection( + images[1], valid_masks[1], profile=profile, + scales=self.scales)] + else: + forward_outputs = [ + self.line_detection(images[0], valid_masks[0], + profile=profile), + self.line_detection(images[1], valid_masks[1], + profile=profile)] + line_seg1 = forward_outputs[0]["line_segments"] + line_seg2 = forward_outputs[1]["line_segments"] + desc1 = forward_outputs[0]["descriptor"] + desc2 = forward_outputs[1]["descriptor"] + + # Match the lines in both images + start_time = time.time() + matches = self.line_matcher.forward(line_seg1, line_seg2, + desc1, desc2) + end_time = time.time() + + outputs = {"line_segments": [line_seg1, line_seg2], + "matches": matches} + + if profile: + outputs["line_detection_time"] = (forward_outputs[0]["time"] + + forward_outputs[1]["time"]) + outputs["line_matching_time"] = end_time - start_time + + return outputs diff --git a/third_party/SOLD2/sold2/model/line_matching.py b/third_party/SOLD2/sold2/model/line_matching.py new file mode 100644 index 0000000000000000000000000000000000000000..89b71879e3104f9a8b52c1cf5e534cd124fe83b2 --- /dev/null +++ b/third_party/SOLD2/sold2/model/line_matching.py @@ -0,0 +1,390 @@ +""" +Implementation of the line matching methods. +""" +import numpy as np +import cv2 +import torch +import torch.nn.functional as F + +from ..misc.geometry_utils import keypoints_to_grid + + +class WunschLineMatcher(object): + """ Class matching two sets of line segments + with the Needleman-Wunsch algorithm. """ + def __init__(self, cross_check=True, num_samples=10, min_dist_pts=8, + top_k_candidates=10, grid_size=8, sampling="regular", + line_score=False): + self.cross_check = cross_check + self.num_samples = num_samples + self.min_dist_pts = min_dist_pts + self.top_k_candidates = top_k_candidates + self.grid_size = grid_size + self.line_score = line_score # True to compute saliency on a line + self.sampling_mode = sampling + if sampling not in ["regular", "d2_net", "asl_feat"]: + raise ValueError("Wrong sampling mode: " + sampling) + + def forward(self, line_seg1, line_seg2, desc1, desc2): + """ + Find the best matches between two sets of line segments + and their corresponding descriptors. + """ + img_size1 = (desc1.shape[2] * self.grid_size, + desc1.shape[3] * self.grid_size) + img_size2 = (desc2.shape[2] * self.grid_size, + desc2.shape[3] * self.grid_size) + device = desc1.device + + # Default case when an image has no lines + if len(line_seg1) == 0: + return np.empty((0), dtype=int) + if len(line_seg2) == 0: + return -np.ones(len(line_seg1), dtype=int) + + # Sample points regularly along each line + if self.sampling_mode == "regular": + line_points1, valid_points1 = self.sample_line_points(line_seg1) + line_points2, valid_points2 = self.sample_line_points(line_seg2) + else: + line_points1, valid_points1 = self.sample_salient_points( + line_seg1, desc1, img_size1, self.sampling_mode) + line_points2, valid_points2 = self.sample_salient_points( + line_seg2, desc2, img_size2, self.sampling_mode) + line_points1 = torch.tensor(line_points1.reshape(-1, 2), + dtype=torch.float, device=device) + line_points2 = torch.tensor(line_points2.reshape(-1, 2), + dtype=torch.float, device=device) + + # Extract the descriptors for each point + grid1 = keypoints_to_grid(line_points1, img_size1) + grid2 = keypoints_to_grid(line_points2, img_size2) + desc1 = F.normalize(F.grid_sample(desc1, grid1)[0, :, :, 0], dim=0) + desc2 = F.normalize(F.grid_sample(desc2, grid2)[0, :, :, 0], dim=0) + + # Precompute the distance between line points for every pair of lines + # Assign a score of -1 for unvalid points + scores = desc1.t() @ desc2 + scores[~valid_points1.flatten()] = -1 + scores[:, ~valid_points2.flatten()] = -1 + scores = scores.reshape(len(line_seg1), self.num_samples, + len(line_seg2), self.num_samples) + scores = scores.permute(0, 2, 1, 3) + # scores.shape = (n_lines1, n_lines2, num_samples, num_samples) + + # Pre-filter the line candidates and find the best match for each line + matches = self.filter_and_match_lines(scores) + + # [Optionally] filter matches with mutual nearest neighbor filtering + if self.cross_check: + matches2 = self.filter_and_match_lines( + scores.permute(1, 0, 3, 2)) + mutual = matches2[matches] == np.arange(len(line_seg1)) + matches[~mutual] = -1 + + return matches + + def d2_net_saliency_score(self, desc): + """ Compute the D2-Net saliency score + on a 3D or 4D descriptor. """ + is_3d = len(desc.shape) == 3 + b_size = len(desc) + feat = F.relu(desc) + + # Compute the soft local max + exp = torch.exp(feat) + if is_3d: + sum_exp = 3 * F.avg_pool1d(exp, kernel_size=3, stride=1, + padding=1) + else: + sum_exp = 9 * F.avg_pool2d(exp, kernel_size=3, stride=1, + padding=1) + soft_local_max = exp / sum_exp + + # Compute the depth-wise maximum + depth_wise_max = torch.max(feat, dim=1)[0] + depth_wise_max = feat / depth_wise_max.unsqueeze(1) + + # Total saliency score + score = torch.max(soft_local_max * depth_wise_max, dim=1)[0] + normalization = torch.sum(score.reshape(b_size, -1), dim=1) + if is_3d: + normalization = normalization.reshape(b_size, 1) + else: + normalization = normalization.reshape(b_size, 1, 1) + score = score / normalization + return score + + def asl_feat_saliency_score(self, desc): + """ Compute the ASLFeat saliency score on a 3D or 4D descriptor. """ + is_3d = len(desc.shape) == 3 + b_size = len(desc) + + # Compute the soft local peakiness + if is_3d: + local_avg = F.avg_pool1d(desc, kernel_size=3, stride=1, padding=1) + else: + local_avg = F.avg_pool2d(desc, kernel_size=3, stride=1, padding=1) + soft_local_score = F.softplus(desc - local_avg) + + # Compute the depth-wise peakiness + depth_wise_mean = torch.mean(desc, dim=1).unsqueeze(1) + depth_wise_score = F.softplus(desc - depth_wise_mean) + + # Total saliency score + score = torch.max(soft_local_score * depth_wise_score, dim=1)[0] + normalization = torch.sum(score.reshape(b_size, -1), dim=1) + if is_3d: + normalization = normalization.reshape(b_size, 1) + else: + normalization = normalization.reshape(b_size, 1, 1) + score = score / normalization + return score + + def sample_salient_points(self, line_seg, desc, img_size, + saliency_type='d2_net'): + """ + Sample the most salient points along each line segments, with a + minimal distance between each point. Pad the remaining points. + Inputs: + line_seg: an Nx2x2 torch.Tensor. + desc: a NxDxHxW torch.Tensor. + image_size: the original image size. + saliency_type: 'd2_net' or 'asl_feat'. + Outputs: + line_points: an Nxnum_samplesx2 np.array. + valid_points: a boolean Nxnum_samples np.array. + """ + device = desc.device + if not self.line_score: + # Compute the score map + if saliency_type == "d2_net": + score = self.d2_net_saliency_score(desc) + else: + score = self.asl_feat_saliency_score(desc) + + num_lines = len(line_seg) + line_lengths = np.linalg.norm(line_seg[:, 0] - line_seg[:, 1], axis=1) + + # The number of samples depends on the length of the line + num_samples_lst = np.clip(line_lengths // self.min_dist_pts, + 2, self.num_samples) + line_points = np.empty((num_lines, self.num_samples, 2), dtype=float) + valid_points = np.empty((num_lines, self.num_samples), dtype=bool) + + # Sample the score on a fixed number of points of each line + n_samples_per_region = 4 + for n in np.arange(2, self.num_samples + 1): + sample_rate = n * n_samples_per_region + # Consider all lines where we can fit up to n points + cur_mask = num_samples_lst == n + cur_line_seg = line_seg[cur_mask] + cur_num_lines = len(cur_line_seg) + if cur_num_lines == 0: + continue + line_points_x = np.linspace(cur_line_seg[:, 0, 0], + cur_line_seg[:, 1, 0], + sample_rate, axis=-1) + line_points_y = np.linspace(cur_line_seg[:, 0, 1], + cur_line_seg[:, 1, 1], + sample_rate, axis=-1) + cur_line_points = np.stack([line_points_x, line_points_y], + axis=-1).reshape(-1, 2) + # cur_line_points is of shape (n_cur_lines * sample_rate, 2) + cur_line_points = torch.tensor(cur_line_points, dtype=torch.float, + device=device) + grid_points = keypoints_to_grid(cur_line_points, img_size) + + if self.line_score: + # The saliency score is high when the activation are locally + # maximal along the line (and not in a square neigborhood) + line_desc = F.grid_sample(desc, grid_points).squeeze() + line_desc = line_desc.reshape(-1, cur_num_lines, sample_rate) + line_desc = line_desc.permute(1, 0, 2) + if saliency_type == "d2_net": + scores = self.d2_net_saliency_score(line_desc) + else: + scores = self.asl_feat_saliency_score(line_desc) + else: + scores = F.grid_sample(score.unsqueeze(1), + grid_points).squeeze() + + # Take the most salient point in n distinct regions + scores = scores.reshape(-1, n, n_samples_per_region) + best = torch.max(scores, dim=2, keepdim=True)[1].cpu().numpy() + cur_line_points = cur_line_points.reshape(-1, n, + n_samples_per_region, 2) + cur_line_points = np.take_along_axis( + cur_line_points, best[..., None], axis=2)[:, :, 0] + + # Pad + cur_valid_points = np.ones((cur_num_lines, self.num_samples), + dtype=bool) + cur_valid_points[:, n:] = False + cur_line_points = np.concatenate([ + cur_line_points, + np.zeros((cur_num_lines, self.num_samples - n, 2), dtype=float)], + axis=1) + + line_points[cur_mask] = cur_line_points + valid_points[cur_mask] = cur_valid_points + + return line_points, valid_points + + def sample_line_points(self, line_seg): + """ + Regularly sample points along each line segments, with a minimal + distance between each point. Pad the remaining points. + Inputs: + line_seg: an Nx2x2 torch.Tensor. + Outputs: + line_points: an Nxnum_samplesx2 np.array. + valid_points: a boolean Nxnum_samples np.array. + """ + num_lines = len(line_seg) + line_lengths = np.linalg.norm(line_seg[:, 0] - line_seg[:, 1], axis=1) + + # Sample the points separated by at least min_dist_pts along each line + # The number of samples depends on the length of the line + num_samples_lst = np.clip(line_lengths // self.min_dist_pts, + 2, self.num_samples) + line_points = np.empty((num_lines, self.num_samples, 2), dtype=float) + valid_points = np.empty((num_lines, self.num_samples), dtype=bool) + for n in np.arange(2, self.num_samples + 1): + # Consider all lines where we can fit up to n points + cur_mask = num_samples_lst == n + cur_line_seg = line_seg[cur_mask] + line_points_x = np.linspace(cur_line_seg[:, 0, 0], + cur_line_seg[:, 1, 0], + n, axis=-1) + line_points_y = np.linspace(cur_line_seg[:, 0, 1], + cur_line_seg[:, 1, 1], + n, axis=-1) + cur_line_points = np.stack([line_points_x, line_points_y], axis=-1) + + # Pad + cur_num_lines = len(cur_line_seg) + cur_valid_points = np.ones((cur_num_lines, self.num_samples), + dtype=bool) + cur_valid_points[:, n:] = False + cur_line_points = np.concatenate([ + cur_line_points, + np.zeros((cur_num_lines, self.num_samples - n, 2), dtype=float)], + axis=1) + + line_points[cur_mask] = cur_line_points + valid_points[cur_mask] = cur_valid_points + + return line_points, valid_points + + def filter_and_match_lines(self, scores): + """ + Use the scores to keep the top k best lines, compute the Needleman- + Wunsch algorithm on each candidate pairs, and keep the highest score. + Inputs: + scores: a (N, M, n, n) torch.Tensor containing the pairwise scores + of the elements to match. + Outputs: + matches: a (N) np.array containing the indices of the best match + """ + # Pre-filter the pairs and keep the top k best candidate lines + line_scores1 = scores.max(3)[0] + valid_scores1 = line_scores1 != -1 + line_scores1 = ((line_scores1 * valid_scores1).sum(2) + / valid_scores1.sum(2)) + line_scores2 = scores.max(2)[0] + valid_scores2 = line_scores2 != -1 + line_scores2 = ((line_scores2 * valid_scores2).sum(2) + / valid_scores2.sum(2)) + line_scores = (line_scores1 + line_scores2) / 2 + topk_lines = torch.argsort(line_scores, + dim=1)[:, -self.top_k_candidates:] + scores, topk_lines = scores.cpu().numpy(), topk_lines.cpu().numpy() + # topk_lines.shape = (n_lines1, top_k_candidates) + top_scores = np.take_along_axis(scores, topk_lines[:, :, None, None], + axis=1) + + # Consider the reversed line segments as well + top_scores = np.concatenate([top_scores, top_scores[..., ::-1]], + axis=1) + + # Compute the line distance matrix with Needleman-Wunsch algo and + # retrieve the closest line neighbor + n_lines1, top2k, n, m = top_scores.shape + top_scores = top_scores.reshape(n_lines1 * top2k, n, m) + nw_scores = self.needleman_wunsch(top_scores) + nw_scores = nw_scores.reshape(n_lines1, top2k) + matches = np.mod(np.argmax(nw_scores, axis=1), top2k // 2) + matches = topk_lines[np.arange(n_lines1), matches] + return matches + + def needleman_wunsch(self, scores): + """ + Batched implementation of the Needleman-Wunsch algorithm. + The cost of the InDel operation is set to 0 by subtracting the gap + penalty to the scores. + Inputs: + scores: a (B, N, M) np.array containing the pairwise scores + of the elements to match. + """ + b, n, m = scores.shape + + # Recalibrate the scores to get a gap score of 0 + gap = 0.1 + nw_scores = scores - gap + + # Run the dynamic programming algorithm + nw_grid = np.zeros((b, n + 1, m + 1), dtype=float) + for i in range(n): + for j in range(m): + nw_grid[:, i + 1, j + 1] = np.maximum( + np.maximum(nw_grid[:, i + 1, j], nw_grid[:, i, j + 1]), + nw_grid[:, i, j] + nw_scores[:, i, j]) + + return nw_grid[:, -1, -1] + + def get_pairwise_distance(self, line_seg1, line_seg2, desc1, desc2): + """ + Compute the OPPOSITE of the NW score for pairs of line segments + and their corresponding descriptors. + """ + num_lines = len(line_seg1) + assert num_lines == len(line_seg2), "The same number of lines is required in pairwise score." + img_size1 = (desc1.shape[2] * self.grid_size, + desc1.shape[3] * self.grid_size) + img_size2 = (desc2.shape[2] * self.grid_size, + desc2.shape[3] * self.grid_size) + device = desc1.device + + # Sample points regularly along each line + line_points1, valid_points1 = self.sample_line_points(line_seg1) + line_points2, valid_points2 = self.sample_line_points(line_seg2) + line_points1 = torch.tensor(line_points1.reshape(-1, 2), + dtype=torch.float, device=device) + line_points2 = torch.tensor(line_points2.reshape(-1, 2), + dtype=torch.float, device=device) + + # Extract the descriptors for each point + grid1 = keypoints_to_grid(line_points1, img_size1) + grid2 = keypoints_to_grid(line_points2, img_size2) + desc1 = F.normalize(F.grid_sample(desc1, grid1)[0, :, :, 0], dim=0) + desc1 = desc1.reshape(-1, num_lines, self.num_samples) + desc2 = F.normalize(F.grid_sample(desc2, grid2)[0, :, :, 0], dim=0) + desc2 = desc2.reshape(-1, num_lines, self.num_samples) + + # Compute the distance between line points for every pair of lines + # Assign a score of -1 for unvalid points + scores = torch.einsum('dns,dnt->nst', desc1, desc2).cpu().numpy() + scores = scores.reshape(num_lines * self.num_samples, + self.num_samples) + scores[~valid_points1.flatten()] = -1 + scores = scores.reshape(num_lines, self.num_samples, self.num_samples) + scores = scores.transpose(1, 0, 2).reshape(self.num_samples, -1) + scores[:, ~valid_points2.flatten()] = -1 + scores = scores.reshape(self.num_samples, num_lines, self.num_samples) + scores = scores.transpose(1, 0, 2) + # scores.shape = (num_lines, num_samples, num_samples) + + # Compute the NW score for each pair of lines + pairwise_scores = np.array([self.needleman_wunsch(s) for s in scores]) + return -pairwise_scores diff --git a/third_party/SOLD2/sold2/model/loss.py b/third_party/SOLD2/sold2/model/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..aaad3c67f3fd59db308869901f8a56623901e318 --- /dev/null +++ b/third_party/SOLD2/sold2/model/loss.py @@ -0,0 +1,445 @@ +""" +Loss function implementations. +""" +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from kornia.geometry import warp_perspective + +from ..misc.geometry_utils import (keypoints_to_grid, get_dist_mask, + get_common_line_mask) + + +def get_loss_and_weights(model_cfg, device=torch.device("cuda")): + """ Get loss functions and either static or dynamic weighting. """ + # Get the global weighting policy + w_policy = model_cfg.get("weighting_policy", "static") + if not w_policy in ["static", "dynamic"]: + raise ValueError("[Error] Not supported weighting policy.") + + loss_func = {} + loss_weight = {} + # Get junction loss function and weight + w_junc, junc_loss_func = get_junction_loss_and_weight(model_cfg, w_policy) + loss_func["junc_loss"] = junc_loss_func.to(device) + loss_weight["w_junc"] = w_junc + + # Get heatmap loss function and weight + w_heatmap, heatmap_loss_func = get_heatmap_loss_and_weight( + model_cfg, w_policy, device) + loss_func["heatmap_loss"] = heatmap_loss_func.to(device) + loss_weight["w_heatmap"] = w_heatmap + + # [Optionally] get descriptor loss function and weight + if model_cfg.get("descriptor_loss_func", None) is not None: + w_descriptor, descriptor_loss_func = get_descriptor_loss_and_weight( + model_cfg, w_policy) + loss_func["descriptor_loss"] = descriptor_loss_func.to(device) + loss_weight["w_desc"] = w_descriptor + + return loss_func, loss_weight + + +def get_junction_loss_and_weight(model_cfg, global_w_policy): + """ Get the junction loss function and weight. """ + junction_loss_cfg = model_cfg.get("junction_loss_cfg", {}) + + # Get the junction loss weight + w_policy = junction_loss_cfg.get("policy", global_w_policy) + if w_policy == "static": + w_junc = torch.tensor(model_cfg["w_junc"], dtype=torch.float32) + elif w_policy == "dynamic": + w_junc = nn.Parameter( + torch.tensor(model_cfg["w_junc"], dtype=torch.float32), + requires_grad=True) + else: + raise ValueError( + "[Error] Unknown weighting policy for junction loss weight.") + + # Get the junction loss function + junc_loss_name = model_cfg.get("junction_loss_func", "superpoint") + if junc_loss_name == "superpoint": + junc_loss_func = JunctionDetectionLoss(model_cfg["grid_size"], + model_cfg["keep_border_valid"]) + else: + raise ValueError("[Error] Not supported junction loss function.") + + return w_junc, junc_loss_func + + +def get_heatmap_loss_and_weight(model_cfg, global_w_policy, device): + """ Get the heatmap loss function and weight. """ + heatmap_loss_cfg = model_cfg.get("heatmap_loss_cfg", {}) + + # Get the heatmap loss weight + w_policy = heatmap_loss_cfg.get("policy", global_w_policy) + if w_policy == "static": + w_heatmap = torch.tensor(model_cfg["w_heatmap"], dtype=torch.float32) + elif w_policy == "dynamic": + w_heatmap = nn.Parameter( + torch.tensor(model_cfg["w_heatmap"], dtype=torch.float32), + requires_grad=True) + else: + raise ValueError( + "[Error] Unknown weighting policy for junction loss weight.") + + # Get the corresponding heatmap loss based on the config + heatmap_loss_name = model_cfg.get("heatmap_loss_func", "cross_entropy") + if heatmap_loss_name == "cross_entropy": + # Get the heatmap class weight (always static) + heatmap_class_w = model_cfg.get("w_heatmap_class", 1.) + class_weight = torch.tensor( + np.array([1., heatmap_class_w])).to(torch.float).to(device) + heatmap_loss_func = HeatmapLoss(class_weight=class_weight) + else: + raise ValueError("[Error] Not supported heatmap loss function.") + + return w_heatmap, heatmap_loss_func + + +def get_descriptor_loss_and_weight(model_cfg, global_w_policy): + """ Get the descriptor loss function and weight. """ + descriptor_loss_cfg = model_cfg.get("descriptor_loss_cfg", {}) + + # Get the descriptor loss weight + w_policy = descriptor_loss_cfg.get("policy", global_w_policy) + if w_policy == "static": + w_descriptor = torch.tensor(model_cfg["w_desc"], dtype=torch.float32) + elif w_policy == "dynamic": + w_descriptor = nn.Parameter(torch.tensor(model_cfg["w_desc"], + dtype=torch.float32), requires_grad=True) + else: + raise ValueError( + "[Error] Unknown weighting policy for descriptor loss weight.") + + # Get the descriptor loss function + descriptor_loss_name = model_cfg.get("descriptor_loss_func", + "regular_sampling") + if descriptor_loss_name == "regular_sampling": + descriptor_loss_func = TripletDescriptorLoss( + descriptor_loss_cfg["grid_size"], + descriptor_loss_cfg["dist_threshold"], + descriptor_loss_cfg["margin"]) + else: + raise ValueError("[Error] Not supported descriptor loss function.") + + return w_descriptor, descriptor_loss_func + + +def space_to_depth(input_tensor, grid_size): + """ PixelUnshuffle for pytorch. """ + N, C, H, W = input_tensor.size() + # (N, C, H//bs, bs, W//bs, bs) + x = input_tensor.view(N, C, H // grid_size, grid_size, W // grid_size, grid_size) + # (N, bs, bs, C, H//bs, W//bs) + x = x.permute(0, 3, 5, 1, 2, 4).contiguous() + # (N, C*bs^2, H//bs, W//bs) + x = x.view(N, C * (grid_size ** 2), H // grid_size, W // grid_size) + return x + + +def junction_detection_loss(junction_map, junc_predictions, valid_mask=None, + grid_size=8, keep_border=True): + """ Junction detection loss. """ + # Convert junc_map to channel tensor + junc_map = space_to_depth(junction_map, grid_size) + map_shape = junc_map.shape[-2:] + batch_size = junc_map.shape[0] + dust_bin_label = torch.ones( + [batch_size, 1, map_shape[0], + map_shape[1]]).to(junc_map.device).to(torch.int) + junc_map = torch.cat([junc_map*2, dust_bin_label], dim=1) + labels = torch.argmax( + junc_map.to(torch.float) + + torch.distributions.Uniform(0, 0.1).sample(junc_map.shape).to(junc_map.device), + dim=1) + + # Also convert the valid mask to channel tensor + valid_mask = (torch.ones(junction_map.shape) if valid_mask is None + else valid_mask) + valid_mask = space_to_depth(valid_mask, grid_size) + + # Compute junction loss on the border patch or not + if keep_border: + valid_mask = torch.sum(valid_mask.to(torch.bool).to(torch.int), + dim=1, keepdim=True) > 0 + else: + valid_mask = torch.sum(valid_mask.to(torch.bool).to(torch.int), + dim=1, keepdim=True) >= grid_size * grid_size + + # Compute the classification loss + loss_func = nn.CrossEntropyLoss(reduction="none") + # The loss still need NCHW format + loss = loss_func(input=junc_predictions, + target=labels.to(torch.long)) + + # Weighted sum by the valid mask + loss_ = torch.sum(loss * torch.squeeze(valid_mask.to(torch.float), + dim=1), dim=[0, 1, 2]) + loss_final = loss_ / torch.sum(torch.squeeze(valid_mask.to(torch.float), + dim=1)) + + return loss_final + + +def heatmap_loss(heatmap_gt, heatmap_pred, valid_mask=None, + class_weight=None): + """ Heatmap prediction loss. """ + # Compute the classification loss on each pixel + if class_weight is None: + loss_func = nn.CrossEntropyLoss(reduction="none") + else: + loss_func = nn.CrossEntropyLoss(class_weight, reduction="none") + + loss = loss_func(input=heatmap_pred, + target=torch.squeeze(heatmap_gt.to(torch.long), dim=1)) + + # Weighted sum by the valid mask + # Sum over H and W + loss_spatial_sum = torch.sum(loss * torch.squeeze( + valid_mask.to(torch.float), dim=1), dim=[1, 2]) + valid_spatial_sum = torch.sum(torch.squeeze(valid_mask.to(torch.float32), + dim=1), dim=[1, 2]) + # Mean to single scalar over batch dimension + loss = torch.sum(loss_spatial_sum) / torch.sum(valid_spatial_sum) + + return loss + + +class JunctionDetectionLoss(nn.Module): + """ Junction detection loss. """ + def __init__(self, grid_size, keep_border): + super(JunctionDetectionLoss, self).__init__() + self.grid_size = grid_size + self.keep_border = keep_border + + def forward(self, prediction, target, valid_mask=None): + return junction_detection_loss(target, prediction, valid_mask, + self.grid_size, self.keep_border) + + +class HeatmapLoss(nn.Module): + """ Heatmap prediction loss. """ + def __init__(self, class_weight): + super(HeatmapLoss, self).__init__() + self.class_weight = class_weight + + def forward(self, prediction, target, valid_mask=None): + return heatmap_loss(target, prediction, valid_mask, self.class_weight) + + +class RegularizationLoss(nn.Module): + """ Module for regularization loss. """ + def __init__(self): + super(RegularizationLoss, self).__init__() + self.name = "regularization_loss" + self.loss_init = torch.zeros([]) + + def forward(self, loss_weights): + # Place it to the same device + loss = self.loss_init.to(loss_weights["w_junc"].device) + for _, val in loss_weights.items(): + if isinstance(val, nn.Parameter): + loss += val + + return loss + + +def triplet_loss(desc_pred1, desc_pred2, points1, points2, line_indices, + epoch, grid_size=8, dist_threshold=8, + init_dist_threshold=64, margin=1): + """ Regular triplet loss for descriptor learning. """ + b_size, _, Hc, Wc = desc_pred1.size() + img_size = (Hc * grid_size, Wc * grid_size) + device = desc_pred1.device + + # Extract valid keypoints + n_points = line_indices.size()[1] + valid_points = line_indices.bool().flatten() + n_correct_points = torch.sum(valid_points).item() + if n_correct_points == 0: + return torch.tensor(0., dtype=torch.float, device=device) + + # Check which keypoints are too close to be matched + # dist_threshold is decreased at each epoch for easier training + dist_threshold = max(dist_threshold, + 2 * init_dist_threshold // (epoch + 1)) + dist_mask = get_dist_mask(points1, points2, valid_points, dist_threshold) + + # Additionally ban negative mining along the same line + common_line_mask = get_common_line_mask(line_indices, valid_points) + dist_mask = dist_mask | common_line_mask + + # Convert the keypoints to a grid suitable for interpolation + grid1 = keypoints_to_grid(points1, img_size) + grid2 = keypoints_to_grid(points2, img_size) + + # Extract the descriptors + desc1 = F.grid_sample(desc_pred1, grid1).permute( + 0, 2, 3, 1).reshape(b_size * n_points, -1)[valid_points] + desc1 = F.normalize(desc1, dim=1) + desc2 = F.grid_sample(desc_pred2, grid2).permute( + 0, 2, 3, 1).reshape(b_size * n_points, -1)[valid_points] + desc2 = F.normalize(desc2, dim=1) + desc_dists = 2 - 2 * (desc1 @ desc2.t()) + + # Positive distance loss + pos_dist = torch.diag(desc_dists) + + # Negative distance loss + max_dist = torch.tensor(4., dtype=torch.float, device=device) + desc_dists[ + torch.arange(n_correct_points, dtype=torch.long), + torch.arange(n_correct_points, dtype=torch.long)] = max_dist + desc_dists[dist_mask] = max_dist + neg_dist = torch.min(torch.min(desc_dists, dim=1)[0], + torch.min(desc_dists, dim=0)[0]) + + triplet_loss = F.relu(margin + pos_dist - neg_dist) + return triplet_loss, grid1, grid2, valid_points + + +class TripletDescriptorLoss(nn.Module): + """ Triplet descriptor loss. """ + def __init__(self, grid_size, dist_threshold, margin): + super(TripletDescriptorLoss, self).__init__() + self.grid_size = grid_size + self.init_dist_threshold = 64 + self.dist_threshold = dist_threshold + self.margin = margin + + def forward(self, desc_pred1, desc_pred2, points1, + points2, line_indices, epoch): + return self.descriptor_loss(desc_pred1, desc_pred2, points1, + points2, line_indices, epoch) + + # The descriptor loss based on regularly sampled points along the lines + def descriptor_loss(self, desc_pred1, desc_pred2, points1, + points2, line_indices, epoch): + return torch.mean(triplet_loss( + desc_pred1, desc_pred2, points1, points2, line_indices, epoch, + self.grid_size, self.dist_threshold, self.init_dist_threshold, + self.margin)[0]) + + +class TotalLoss(nn.Module): + """ Total loss summing junction, heatma, descriptor + and regularization losses. """ + def __init__(self, loss_funcs, loss_weights, weighting_policy): + super(TotalLoss, self).__init__() + # Whether we need to compute the descriptor loss + self.compute_descriptors = "descriptor_loss" in loss_funcs.keys() + + self.loss_funcs = loss_funcs + self.loss_weights = loss_weights + self.weighting_policy = weighting_policy + + # Always add regularization loss (it will return zero if not used) + self.loss_funcs["reg_loss"] = RegularizationLoss().cuda() + + def forward(self, junc_pred, junc_target, heatmap_pred, + heatmap_target, valid_mask=None): + """ Detection only loss. """ + # Compute the junction loss + junc_loss = self.loss_funcs["junc_loss"](junc_pred, junc_target, + valid_mask) + # Compute the heatmap loss + heatmap_loss = self.loss_funcs["heatmap_loss"]( + heatmap_pred, heatmap_target, valid_mask) + + # Compute the total loss. + if self.weighting_policy == "dynamic": + reg_loss = self.loss_funcs["reg_loss"](self.loss_weights) + total_loss = junc_loss * torch.exp(-self.loss_weights["w_junc"]) + \ + heatmap_loss * torch.exp(-self.loss_weights["w_heatmap"]) + \ + reg_loss + + return { + "total_loss": total_loss, + "junc_loss": junc_loss, + "heatmap_loss": heatmap_loss, + "reg_loss": reg_loss, + "w_junc": torch.exp(-self.loss_weights["w_junc"]).item(), + "w_heatmap": torch.exp(-self.loss_weights["w_heatmap"]).item(), + } + + elif self.weighting_policy == "static": + total_loss = junc_loss * self.loss_weights["w_junc"] + \ + heatmap_loss * self.loss_weights["w_heatmap"] + + return { + "total_loss": total_loss, + "junc_loss": junc_loss, + "heatmap_loss": heatmap_loss + } + + else: + raise ValueError("[Error] Unknown weighting policy.") + + def forward_descriptors(self, + junc_map_pred1, junc_map_pred2, junc_map_target1, + junc_map_target2, heatmap_pred1, heatmap_pred2, heatmap_target1, + heatmap_target2, line_points1, line_points2, line_indices, + desc_pred1, desc_pred2, epoch, valid_mask1=None, + valid_mask2=None): + """ Loss for detection + description. """ + # Compute junction loss + junc_loss = self.loss_funcs["junc_loss"]( + torch.cat([junc_map_pred1, junc_map_pred2], dim=0), + torch.cat([junc_map_target1, junc_map_target2], dim=0), + torch.cat([valid_mask1, valid_mask2], dim=0) + ) + # Get junction loss weight (dynamic or not) + if isinstance(self.loss_weights["w_junc"], nn.Parameter): + w_junc = torch.exp(-self.loss_weights["w_junc"]) + else: + w_junc = self.loss_weights["w_junc"] + + # Compute heatmap loss + heatmap_loss = self.loss_funcs["heatmap_loss"]( + torch.cat([heatmap_pred1, heatmap_pred2], dim=0), + torch.cat([heatmap_target1, heatmap_target2], dim=0), + torch.cat([valid_mask1, valid_mask2], dim=0) + ) + # Get heatmap loss weight (dynamic or not) + if isinstance(self.loss_weights["w_heatmap"], nn.Parameter): + w_heatmap = torch.exp(-self.loss_weights["w_heatmap"]) + else: + w_heatmap = self.loss_weights["w_heatmap"] + + # Compute the descriptor loss + descriptor_loss = self.loss_funcs["descriptor_loss"]( + desc_pred1, desc_pred2, line_points1, + line_points2, line_indices, epoch) + # Get descriptor loss weight (dynamic or not) + if isinstance(self.loss_weights["w_desc"], nn.Parameter): + w_descriptor = torch.exp(-self.loss_weights["w_desc"]) + else: + w_descriptor = self.loss_weights["w_desc"] + + # Update the total loss + total_loss = (junc_loss * w_junc + + heatmap_loss * w_heatmap + + descriptor_loss * w_descriptor) + outputs = { + "junc_loss": junc_loss, + "heatmap_loss": heatmap_loss, + "w_junc": w_junc.item() \ + if isinstance(w_junc, nn.Parameter) else w_junc, + "w_heatmap": w_heatmap.item() \ + if isinstance(w_heatmap, nn.Parameter) else w_heatmap, + "descriptor_loss": descriptor_loss, + "w_desc": w_descriptor.item() \ + if isinstance(w_descriptor, nn.Parameter) else w_descriptor + } + + # Compute the regularization loss + reg_loss = self.loss_funcs["reg_loss"](self.loss_weights) + total_loss += reg_loss + outputs.update({ + "reg_loss": reg_loss, + "total_loss": total_loss + }) + + return outputs diff --git a/third_party/SOLD2/sold2/model/lr_scheduler.py b/third_party/SOLD2/sold2/model/lr_scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..3faa4f68a67564719008a932b40c16c5e908949f --- /dev/null +++ b/third_party/SOLD2/sold2/model/lr_scheduler.py @@ -0,0 +1,22 @@ +""" +This file implements different learning rate schedulers +""" +import torch + + +def get_lr_scheduler(lr_decay, lr_decay_cfg, optimizer): + """ Get the learning rate scheduler according to the config. """ + # If no lr_decay is specified => return None + if (lr_decay == False) or (lr_decay_cfg is None): + schduler = None + # Exponential decay + elif (lr_decay == True) and (lr_decay_cfg["policy"] == "exp"): + schduler = torch.optim.lr_scheduler.ExponentialLR( + optimizer, + gamma=lr_decay_cfg["gamma"] + ) + # Unknown policy + else: + raise ValueError("[Error] Unknow learning rate decay policy!") + + return schduler \ No newline at end of file diff --git a/third_party/SOLD2/sold2/model/metrics.py b/third_party/SOLD2/sold2/model/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..0894a7207ee4afa344cb332c605c715b14db73a4 --- /dev/null +++ b/third_party/SOLD2/sold2/model/metrics.py @@ -0,0 +1,528 @@ +""" +This file implements the evaluation metrics. +""" +import torch +import torch.nn.functional as F +import numpy as np +from torchvision.ops.boxes import batched_nms + +from ..misc.geometry_utils import keypoints_to_grid + + +class Metrics(object): + """ Metric evaluation calculator. """ + def __init__(self, detection_thresh, prob_thresh, grid_size, + junc_metric_lst=None, heatmap_metric_lst=None, + pr_metric_lst=None, desc_metric_lst=None): + # List supported metrics + self.supported_junc_metrics = ["junc_precision", "junc_precision_nms", + "junc_recall", "junc_recall_nms"] + self.supported_heatmap_metrics = ["heatmap_precision", + "heatmap_recall"] + self.supported_pr_metrics = ["junc_pr", "junc_nms_pr"] + self.supported_desc_metrics = ["matching_score"] + + # If metric_lst is None, default to use all metrics + if junc_metric_lst is None: + self.junc_metric_lst = self.supported_junc_metrics + else: + self.junc_metric_lst = junc_metric_lst + if heatmap_metric_lst is None: + self.heatmap_metric_lst = self.supported_heatmap_metrics + else: + self.heatmap_metric_lst = heatmap_metric_lst + if pr_metric_lst is None: + self.pr_metric_lst = self.supported_pr_metrics + else: + self.pr_metric_lst = pr_metric_lst + # For the descriptors, the default None assumes no desc metric at all + if desc_metric_lst is None: + self.desc_metric_lst = [] + elif desc_metric_lst == 'all': + self.desc_metric_lst = self.supported_desc_metrics + else: + self.desc_metric_lst = desc_metric_lst + + if not self._check_metrics(): + raise ValueError( + "[Error] Some elements in the metric_lst are invalid.") + + # Metric mapping table + self.metric_table = { + "junc_precision": junction_precision(detection_thresh), + "junc_precision_nms": junction_precision(detection_thresh), + "junc_recall": junction_recall(detection_thresh), + "junc_recall_nms": junction_recall(detection_thresh), + "heatmap_precision": heatmap_precision(prob_thresh), + "heatmap_recall": heatmap_recall(prob_thresh), + "junc_pr": junction_pr(), + "junc_nms_pr": junction_pr(), + "matching_score": matching_score(grid_size) + } + + # Initialize the results + self.metric_results = {} + for key in self.metric_table.keys(): + self.metric_results[key] = 0. + + def evaluate(self, junc_pred, junc_pred_nms, junc_gt, heatmap_pred, + heatmap_gt, valid_mask, line_points1=None, line_points2=None, + desc_pred1=None, desc_pred2=None, valid_points=None): + """ Perform evaluation. """ + for metric in self.junc_metric_lst: + # If nms metrics then use nms to compute it. + if "nms" in metric: + junc_pred_input = junc_pred_nms + # Use normal inputs instead. + else: + junc_pred_input = junc_pred + self.metric_results[metric] = self.metric_table[metric]( + junc_pred_input, junc_gt, valid_mask) + + for metric in self.heatmap_metric_lst: + self.metric_results[metric] = self.metric_table[metric]( + heatmap_pred, heatmap_gt, valid_mask) + + for metric in self.pr_metric_lst: + if "nms" in metric: + self.metric_results[metric] = self.metric_table[metric]( + junc_pred_nms, junc_gt, valid_mask) + else: + self.metric_results[metric] = self.metric_table[metric]( + junc_pred, junc_gt, valid_mask) + + for metric in self.desc_metric_lst: + self.metric_results[metric] = self.metric_table[metric]( + line_points1, line_points2, desc_pred1, + desc_pred2, valid_points) + + def _check_metrics(self): + """ Check if all input metrics are valid. """ + flag = True + for metric in self.junc_metric_lst: + if not metric in self.supported_junc_metrics: + flag = False + break + for metric in self.heatmap_metric_lst: + if not metric in self.supported_heatmap_metrics: + flag = False + break + for metric in self.desc_metric_lst: + if not metric in self.supported_desc_metrics: + flag = False + break + + return flag + + +class AverageMeter(object): + def __init__(self, junc_metric_lst=None, heatmap_metric_lst=None, + is_training=True, desc_metric_lst=None): + # List supported metrics + self.supported_junc_metrics = ["junc_precision", "junc_precision_nms", + "junc_recall", "junc_recall_nms"] + self.supported_heatmap_metrics = ["heatmap_precision", + "heatmap_recall"] + self.supported_pr_metrics = ["junc_pr", "junc_nms_pr"] + self.supported_desc_metrics = ["matching_score"] + # Record loss in training mode + # if is_training: + self.supported_loss = [ + "junc_loss", "heatmap_loss", "descriptor_loss", "total_loss"] + + self.is_training = is_training + + # If metric_lst is None, default to use all metrics + if junc_metric_lst is None: + self.junc_metric_lst = self.supported_junc_metrics + else: + self.junc_metric_lst = junc_metric_lst + if heatmap_metric_lst is None: + self.heatmap_metric_lst = self.supported_heatmap_metrics + else: + self.heatmap_metric_lst = heatmap_metric_lst + # For the descriptors, the default None assumes no desc metric at all + if desc_metric_lst is None: + self.desc_metric_lst = [] + elif desc_metric_lst == 'all': + self.desc_metric_lst = self.supported_desc_metrics + else: + self.desc_metric_lst = desc_metric_lst + + if not self._check_metrics(): + raise ValueError( + "[Error] Some elements in the metric_lst are invalid.") + + # Initialize the results + self.metric_results = {} + for key in (self.supported_junc_metrics + + self.supported_heatmap_metrics + + self.supported_loss + self.supported_desc_metrics): + self.metric_results[key] = 0. + for key in self.supported_pr_metrics: + zero_lst = [0 for _ in range(50)] + self.metric_results[key] = { + "tp": zero_lst, + "tn": zero_lst, + "fp": zero_lst, + "fn": zero_lst, + "precision": zero_lst, + "recall": zero_lst + } + + # Initialize total count + self.count = 0 + + def update(self, metrics, loss_dict=None, num_samples=1): + # loss should be given in the training mode + if self.is_training and (loss_dict is None): + raise ValueError( + "[Error] loss info should be given in the training mode.") + + # update total counts + self.count += num_samples + + # update all the metrics + for met in (self.supported_junc_metrics + + self.supported_heatmap_metrics + + self.supported_desc_metrics): + self.metric_results[met] += (num_samples + * metrics.metric_results[met]) + + # Update all the losses + for loss in loss_dict.keys(): + self.metric_results[loss] += num_samples * loss_dict[loss] + + # Update all pr counts + for pr_met in self.supported_pr_metrics: + # Update all tp, tn, fp, fn, precision, and recall. + for key in metrics.metric_results[pr_met].keys(): + # Update each interval + for idx in range(len(self.metric_results[pr_met][key])): + self.metric_results[pr_met][key][idx] += ( + num_samples + * metrics.metric_results[pr_met][key][idx]) + + def average(self): + results = {} + for met in self.metric_results.keys(): + # Skip pr curve metrics + if not met in self.supported_pr_metrics: + results[met] = self.metric_results[met] / self.count + # Only update precision and recall in pr metrics + else: + met_results = { + "tp": self.metric_results[met]["tp"], + "tn": self.metric_results[met]["tn"], + "fp": self.metric_results[met]["fp"], + "fn": self.metric_results[met]["fn"], + "precision": [], + "recall": [] + } + for idx in range(len(self.metric_results[met]["precision"])): + met_results["precision"].append( + self.metric_results[met]["precision"][idx] + / self.count) + met_results["recall"].append( + self.metric_results[met]["recall"][idx] / self.count) + + results[met] = met_results + + return results + + def _check_metrics(self): + """ Check if all input metrics are valid. """ + flag = True + for metric in self.junc_metric_lst: + if not metric in self.supported_junc_metrics: + flag = False + break + for metric in self.heatmap_metric_lst: + if not metric in self.supported_heatmap_metrics: + flag = False + break + for metric in self.desc_metric_lst: + if not metric in self.supported_desc_metrics: + flag = False + break + + return flag + + +class junction_precision(object): + """ Junction precision. """ + def __init__(self, detection_thresh): + self.detection_thresh = detection_thresh + + # Compute the evaluation result + def __call__(self, junc_pred, junc_gt, valid_mask): + # Convert prediction to discrete detection + junc_pred = (junc_pred >= self.detection_thresh).astype(np.int) + junc_pred = junc_pred * valid_mask.squeeze() + + # Deal with the corner case of the prediction + if np.sum(junc_pred) > 0: + precision = (np.sum(junc_pred * junc_gt.squeeze()) + / np.sum(junc_pred)) + else: + precision = 0 + + return float(precision) + + +class junction_recall(object): + """ Junction recall. """ + def __init__(self, detection_thresh): + self.detection_thresh = detection_thresh + + # Compute the evaluation result + def __call__(self, junc_pred, junc_gt, valid_mask): + # Convert prediction to discrete detection + junc_pred = (junc_pred >= self.detection_thresh).astype(np.int) + junc_pred = junc_pred * valid_mask.squeeze() + + # Deal with the corner case of the recall. + if np.sum(junc_gt): + recall = np.sum(junc_pred * junc_gt.squeeze()) / np.sum(junc_gt) + else: + recall = 0 + + return float(recall) + + +class junction_pr(object): + """ Junction precision-recall info. """ + def __init__(self, num_threshold=50): + self.max = 0.4 + step = self.max / num_threshold + self.min = step + self.intervals = np.flip(np.arange(self.min, self.max + step, step)) + + def __call__(self, junc_pred_raw, junc_gt, valid_mask): + tp_lst = [] + fp_lst = [] + tn_lst = [] + fn_lst = [] + precision_lst = [] + recall_lst = [] + + valid_mask = valid_mask.squeeze() + # Iterate through all the thresholds + for thresh in list(self.intervals): + # Convert prediction to discrete detection + junc_pred = (junc_pred_raw >= thresh).astype(np.int) + junc_pred = junc_pred * valid_mask + + # Compute tp, fp, tn, fn + junc_gt = junc_gt.squeeze() + tp = np.sum(junc_pred * junc_gt) + tn = np.sum((junc_pred == 0).astype(np.float) + * (junc_gt == 0).astype(np.float) * valid_mask) + fp = np.sum((junc_pred == 1).astype(np.float) + * (junc_gt == 0).astype(np.float) * valid_mask) + fn = np.sum((junc_pred == 0).astype(np.float) + * (junc_gt == 1).astype(np.float) * valid_mask) + + tp_lst.append(tp) + tn_lst.append(tn) + fp_lst.append(fp) + fn_lst.append(fn) + precision_lst.append(tp / (tp + fp)) + recall_lst.append(tp / (tp + fn)) + + return { + "tp": np.array(tp_lst), + "tn": np.array(tn_lst), + "fp": np.array(fp_lst), + "fn": np.array(fn_lst), + "precision": np.array(precision_lst), + "recall": np.array(recall_lst) + } + + +class heatmap_precision(object): + """ Heatmap precision. """ + def __init__(self, prob_thresh): + self.prob_thresh = prob_thresh + + def __call__(self, heatmap_pred, heatmap_gt, valid_mask): + # Assume NHWC (Handle L1 and L2 cases) NxHxWx1 + heatmap_pred = np.squeeze(heatmap_pred > self.prob_thresh) + heatmap_pred = heatmap_pred * valid_mask.squeeze() + + # Deal with the corner case of the prediction + if np.sum(heatmap_pred) > 0: + precision = (np.sum(heatmap_pred * heatmap_gt.squeeze()) + / np.sum(heatmap_pred)) + else: + precision = 0. + + return precision + + +class heatmap_recall(object): + """ Heatmap recall. """ + def __init__(self, prob_thresh): + self.prob_thresh = prob_thresh + + def __call__(self, heatmap_pred, heatmap_gt, valid_mask): + # Assume NHWC (Handle L1 and L2 cases) NxHxWx1 + heatmap_pred = np.squeeze(heatmap_pred > self.prob_thresh) + heatmap_pred = heatmap_pred * valid_mask.squeeze() + + # Deal with the corner case of the ground truth + if np.sum(heatmap_gt) > 0: + recall = (np.sum(heatmap_pred * heatmap_gt.squeeze()) + / np.sum(heatmap_gt)) + else: + recall = 0. + + return recall + + +class matching_score(object): + """ Descriptors matching score. """ + def __init__(self, grid_size): + self.grid_size = grid_size + + def __call__(self, points1, points2, desc_pred1, + desc_pred2, line_indices): + b_size, _, Hc, Wc = desc_pred1.size() + img_size = (Hc * self.grid_size, Wc * self.grid_size) + device = desc_pred1.device + + # Extract valid keypoints + n_points = line_indices.size()[1] + valid_points = line_indices.bool().flatten() + n_correct_points = torch.sum(valid_points).item() + if n_correct_points == 0: + return torch.tensor(0., dtype=torch.float, device=device) + + # Convert the keypoints to a grid suitable for interpolation + grid1 = keypoints_to_grid(points1, img_size) + grid2 = keypoints_to_grid(points2, img_size) + + # Extract the descriptors + desc1 = F.grid_sample(desc_pred1, grid1).permute( + 0, 2, 3, 1).reshape(b_size * n_points, -1)[valid_points] + desc1 = F.normalize(desc1, dim=1) + desc2 = F.grid_sample(desc_pred2, grid2).permute( + 0, 2, 3, 1).reshape(b_size * n_points, -1)[valid_points] + desc2 = F.normalize(desc2, dim=1) + desc_dists = 2 - 2 * (desc1 @ desc2.t()) + + # Compute percentage of correct matches + matches0 = torch.min(desc_dists, dim=1)[1] + matches1 = torch.min(desc_dists, dim=0)[1] + matching_score = (matches1[matches0] + == torch.arange(len(matches0)).to(device)) + matching_score = matching_score.float().mean() + return matching_score + + +def super_nms(prob_predictions, dist_thresh, prob_thresh=0.01, top_k=0): + """ Non-maximum suppression adapted from SuperPoint. """ + # Iterate through batch dimension + im_h = prob_predictions.shape[1] + im_w = prob_predictions.shape[2] + output_lst = [] + for i in range(prob_predictions.shape[0]): + # print(i) + prob_pred = prob_predictions[i, ...] + # Filter the points using prob_thresh + coord = np.where(prob_pred >= prob_thresh) # HW format + points = np.concatenate((coord[0][..., None], coord[1][..., None]), + axis=1) # HW format + + # Get the probability score + prob_score = prob_pred[points[:, 0], points[:, 1]] + + # Perform super nms + # Modify the in_points to xy format (instead of HW format) + in_points = np.concatenate((coord[1][..., None], coord[0][..., None], + prob_score), axis=1).T + keep_points_, keep_inds = nms_fast(in_points, im_h, im_w, dist_thresh) + # Remember to flip outputs back to HW format + keep_points = np.round(np.flip(keep_points_[:2, :], axis=0).T) + keep_score = keep_points_[-1, :].T + + # Whether we only keep the topk value + if (top_k > 0) or (top_k is None): + k = min([keep_points.shape[0], top_k]) + keep_points = keep_points[:k, :] + keep_score = keep_score[:k] + + # Re-compose the probability map + output_map = np.zeros([im_h, im_w]) + output_map[keep_points[:, 0].astype(np.int), + keep_points[:, 1].astype(np.int)] = keep_score.squeeze() + + output_lst.append(output_map[None, ...]) + + return np.concatenate(output_lst, axis=0) + + +def nms_fast(in_corners, H, W, dist_thresh): + """ + Run a faster approximate Non-Max-Suppression on numpy corners shaped: + 3xN [x_i,y_i,conf_i]^T + + Algo summary: Create a grid sized HxW. Assign each corner location a 1, + rest are zeros. Iterate through all the 1's and convert them to -1 or 0. + Suppress points by setting nearby values to 0. + + Grid Value Legend: + -1 : Kept. + 0 : Empty or suppressed. + 1 : To be processed (converted to either kept or supressed). + + NOTE: The NMS first rounds points to integers, so NMS distance might not + be exactly dist_thresh. It also assumes points are within image boundary. + + Inputs + in_corners - 3xN numpy array with corners [x_i, y_i, confidence_i]^T. + H - Image height. + W - Image width. + dist_thresh - Distance to suppress, measured as an infinite distance. + Returns + nmsed_corners - 3xN numpy matrix with surviving corners. + nmsed_inds - N length numpy vector with surviving corner indices. + """ + grid = np.zeros((H, W)).astype(int) # Track NMS data. + inds = np.zeros((H, W)).astype(int) # Store indices of points. + # Sort by confidence and round to nearest int. + inds1 = np.argsort(-in_corners[2, :]) + corners = in_corners[:, inds1] + rcorners = corners[:2, :].round().astype(int) # Rounded corners. + # Check for edge case of 0 or 1 corners. + if rcorners.shape[1] == 0: + return np.zeros((3, 0)).astype(int), np.zeros(0).astype(int) + if rcorners.shape[1] == 1: + out = np.vstack((rcorners, in_corners[2])).reshape(3, 1) + return out, np.zeros((1)).astype(int) + # Initialize the grid. + for i, rc in enumerate(rcorners.T): + grid[rcorners[1, i], rcorners[0, i]] = 1 + inds[rcorners[1, i], rcorners[0, i]] = i + # Pad the border of the grid, so that we can NMS points near the border. + pad = dist_thresh + grid = np.pad(grid, ((pad, pad), (pad, pad)), mode='constant') + # Iterate through points, highest to lowest conf, suppress neighborhood. + count = 0 + for i, rc in enumerate(rcorners.T): + # Account for top and left padding. + pt = (rc[0] + pad, rc[1] + pad) + if grid[pt[1], pt[0]] == 1: # If not yet suppressed. + grid[pt[1] - pad:pt[1] + pad + 1, pt[0] - pad:pt[0] + pad + 1] = 0 + grid[pt[1], pt[0]] = -1 + count += 1 + # Get all surviving -1's and return sorted array of remaining corners. + keepy, keepx = np.where(grid == -1) + keepy, keepx = keepy - pad, keepx - pad + inds_keep = inds[keepy, keepx] + out = corners[:, inds_keep] + values = out[-1, :] + inds2 = np.argsort(-values) + out = out[:, inds2] + out_inds = inds1[inds_keep[inds2]] + return out, out_inds diff --git a/third_party/SOLD2/sold2/model/model_util.py b/third_party/SOLD2/sold2/model/model_util.py new file mode 100644 index 0000000000000000000000000000000000000000..f70d80da40a72c207edfcfc1509e820846f0b731 --- /dev/null +++ b/third_party/SOLD2/sold2/model/model_util.py @@ -0,0 +1,203 @@ +import torch +import torch.nn as nn +import torch.nn.init as init + +from .nets.backbone import HourglassBackbone, SuperpointBackbone +from .nets.junction_decoder import SuperpointDecoder +from .nets.heatmap_decoder import PixelShuffleDecoder +from .nets.descriptor_decoder import SuperpointDescriptor + + +def get_model(model_cfg=None, loss_weights=None, mode="train"): + """ Get model based on the model configuration. """ + # Check dataset config is given + if model_cfg is None: + raise ValueError("[Error] The model config is required!") + + # List the supported options here + print("\n\n\t--------Initializing model----------") + supported_arch = ["simple"] + if not model_cfg["model_architecture"] in supported_arch: + raise ValueError( + "[Error] The model architecture is not in supported arch!") + + if model_cfg["model_architecture"] == "simple": + model = SOLD2Net(model_cfg) + else: + raise ValueError( + "[Error] The model architecture is not in supported arch!") + + # Optionally register loss weights to the model + if mode == "train": + if loss_weights is not None: + for param_name, param in loss_weights.items(): + if isinstance(param, nn.Parameter): + print("\t [Debug] Adding %s with value %f to model" + % (param_name, param.item())) + model.register_parameter(param_name, param) + else: + raise ValueError( + "[Error] the loss weights can not be None in dynamic weighting mode during training.") + + # Display some summary info. + print("\tModel architecture: %s" % model_cfg["model_architecture"]) + print("\tBackbone: %s" % model_cfg["backbone"]) + print("\tJunction decoder: %s" % model_cfg["junction_decoder"]) + print("\tHeatmap decoder: %s" % model_cfg["heatmap_decoder"]) + print("\t-------------------------------------") + + return model + + +class SOLD2Net(nn.Module): + """ Full network for SOLD². """ + def __init__(self, model_cfg): + super(SOLD2Net, self).__init__() + self.name = model_cfg["model_name"] + self.cfg = model_cfg + + # List supported network options + self.supported_backbone = ["lcnn", "superpoint"] + self.backbone_net, self.feat_channel = self.get_backbone() + + # List supported junction decoder options + self.supported_junction_decoder = ["superpoint_decoder"] + self.junction_decoder = self.get_junction_decoder() + + # List supported heatmap decoder options + self.supported_heatmap_decoder = ["pixel_shuffle", + "pixel_shuffle_single"] + self.heatmap_decoder = self.get_heatmap_decoder() + + # List supported descriptor decoder options + if "descriptor_decoder" in self.cfg: + self.supported_descriptor_decoder = ["superpoint_descriptor"] + self.descriptor_decoder = self.get_descriptor_decoder() + + # Initialize the model weights + self.apply(weight_init) + + def forward(self, input_images): + # The backbone + features = self.backbone_net(input_images) + + # junction decoder + junctions = self.junction_decoder(features) + + # heatmap decoder + heatmaps = self.heatmap_decoder(features) + + outputs = {"junctions": junctions, "heatmap": heatmaps} + + # Descriptor decoder + if "descriptor_decoder" in self.cfg: + outputs["descriptors"] = self.descriptor_decoder(features) + + return outputs + + def get_backbone(self): + """ Retrieve the backbone encoder network. """ + if not self.cfg["backbone"] in self.supported_backbone: + raise ValueError( + "[Error] The backbone selection is not supported.") + + # lcnn backbone (stacked hourglass) + if self.cfg["backbone"] == "lcnn": + backbone_cfg = self.cfg["backbone_cfg"] + backbone = HourglassBackbone(**backbone_cfg) + feat_channel = 256 + + elif self.cfg["backbone"] == "superpoint": + backbone_cfg = self.cfg["backbone_cfg"] + backbone = SuperpointBackbone() + feat_channel = 128 + + else: + raise ValueError( + "[Error] The backbone selection is not supported.") + + return backbone, feat_channel + + def get_junction_decoder(self): + """ Get the junction decoder. """ + if (not self.cfg["junction_decoder"] + in self.supported_junction_decoder): + raise ValueError( + "[Error] The junction decoder selection is not supported.") + + # superpoint decoder + if self.cfg["junction_decoder"] == "superpoint_decoder": + decoder = SuperpointDecoder(self.feat_channel, + self.cfg["backbone"]) + else: + raise ValueError( + "[Error] The junction decoder selection is not supported.") + + return decoder + + def get_heatmap_decoder(self): + """ Get the heatmap decoder. """ + if not self.cfg["heatmap_decoder"] in self.supported_heatmap_decoder: + raise ValueError( + "[Error] The heatmap decoder selection is not supported.") + + # Pixel_shuffle decoder + if self.cfg["heatmap_decoder"] == "pixel_shuffle": + if self.cfg["backbone"] == "lcnn": + decoder = PixelShuffleDecoder(self.feat_channel, + num_upsample=2) + elif self.cfg["backbone"] == "superpoint": + decoder = PixelShuffleDecoder(self.feat_channel, + num_upsample=3) + else: + raise ValueError("[Error] Unknown backbone option.") + # Pixel_shuffle decoder with single channel output + elif self.cfg["heatmap_decoder"] == "pixel_shuffle_single": + if self.cfg["backbone"] == "lcnn": + decoder = PixelShuffleDecoder( + self.feat_channel, num_upsample=2, output_channel=1) + elif self.cfg["backbone"] == "superpoint": + decoder = PixelShuffleDecoder( + self.feat_channel, num_upsample=3, output_channel=1) + else: + raise ValueError("[Error] Unknown backbone option.") + else: + raise ValueError( + "[Error] The heatmap decoder selection is not supported.") + + return decoder + + def get_descriptor_decoder(self): + """ Get the descriptor decoder. """ + if (not self.cfg["descriptor_decoder"] + in self.supported_descriptor_decoder): + raise ValueError( + "[Error] The descriptor decoder selection is not supported.") + + # SuperPoint descriptor + if self.cfg["descriptor_decoder"] == "superpoint_descriptor": + decoder = SuperpointDescriptor(self.feat_channel) + else: + raise ValueError( + "[Error] The descriptor decoder selection is not supported.") + + return decoder + + +def weight_init(m): + """ Weight initialization function. """ + # Conv2D + if isinstance(m, nn.Conv2d): + init.xavier_normal_(m.weight.data) + if m.bias is not None: + init.normal_(m.bias.data) + # Batchnorm + elif isinstance(m, nn.BatchNorm2d): + init.normal_(m.weight.data, mean=1, std=0.02) + init.constant_(m.bias.data, 0) + # Linear + elif isinstance(m, nn.Linear): + init.xavier_normal_(m.weight.data) + init.normal_(m.bias.data) + else: + pass diff --git a/third_party/SOLD2/sold2/model/nets/__init__.py b/third_party/SOLD2/sold2/model/nets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/SOLD2/sold2/model/nets/backbone.py b/third_party/SOLD2/sold2/model/nets/backbone.py new file mode 100644 index 0000000000000000000000000000000000000000..71f260aef108c77d54319cab7bc082c3c51112e7 --- /dev/null +++ b/third_party/SOLD2/sold2/model/nets/backbone.py @@ -0,0 +1,65 @@ +import torch +import torch.nn as nn + +from .lcnn_hourglass import MultitaskHead, hg + + +class HourglassBackbone(nn.Module): + """ Hourglass backbone. """ + def __init__(self, input_channel=1, depth=4, num_stacks=2, + num_blocks=1, num_classes=5): + super(HourglassBackbone, self).__init__() + self.head = MultitaskHead + self.net = hg(**{ + "head": self.head, + "depth": depth, + "num_stacks": num_stacks, + "num_blocks": num_blocks, + "num_classes": num_classes, + "input_channels": input_channel + }) + + def forward(self, input_images): + return self.net(input_images)[1] + + +class SuperpointBackbone(nn.Module): + """ SuperPoint backbone. """ + def __init__(self): + super(SuperpointBackbone, self).__init__() + self.relu = torch.nn.ReLU(inplace=True) + self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2) + c1, c2, c3, c4 = 64, 64, 128, 128 + # Shared Encoder. + self.conv1a = torch.nn.Conv2d(1, c1, kernel_size=3, + stride=1, padding=1) + self.conv1b = torch.nn.Conv2d(c1, c1, kernel_size=3, + stride=1, padding=1) + self.conv2a = torch.nn.Conv2d(c1, c2, kernel_size=3, + stride=1, padding=1) + self.conv2b = torch.nn.Conv2d(c2, c2, kernel_size=3, + stride=1, padding=1) + self.conv3a = torch.nn.Conv2d(c2, c3, kernel_size=3, + stride=1, padding=1) + self.conv3b = torch.nn.Conv2d(c3, c3, kernel_size=3, + stride=1, padding=1) + self.conv4a = torch.nn.Conv2d(c3, c4, kernel_size=3, + stride=1, padding=1) + self.conv4b = torch.nn.Conv2d(c4, c4, kernel_size=3, + stride=1, padding=1) + + def forward(self, input_images): + # Shared Encoder. + x = self.relu(self.conv1a(input_images)) + x = self.relu(self.conv1b(x)) + x = self.pool(x) + x = self.relu(self.conv2a(x)) + x = self.relu(self.conv2b(x)) + x = self.pool(x) + x = self.relu(self.conv3a(x)) + x = self.relu(self.conv3b(x)) + x = self.pool(x) + x = self.relu(self.conv4a(x)) + x = self.relu(self.conv4b(x)) + + return x diff --git a/third_party/SOLD2/sold2/model/nets/descriptor_decoder.py b/third_party/SOLD2/sold2/model/nets/descriptor_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..6ed4306fad764efab2c22ede9cae253c9b17d6c2 --- /dev/null +++ b/third_party/SOLD2/sold2/model/nets/descriptor_decoder.py @@ -0,0 +1,19 @@ +import torch +import torch.nn as nn + + +class SuperpointDescriptor(nn.Module): + """ Descriptor decoder based on the SuperPoint arcihtecture. """ + def __init__(self, input_feat_dim=128): + super(SuperpointDescriptor, self).__init__() + self.relu = torch.nn.ReLU(inplace=True) + self.convPa = torch.nn.Conv2d(input_feat_dim, 256, kernel_size=3, + stride=1, padding=1) + self.convPb = torch.nn.Conv2d(256, 128, kernel_size=1, + stride=1, padding=0) + + def forward(self, input_features): + feat = self.relu(self.convPa(input_features)) + semi = self.convPb(feat) + + return semi \ No newline at end of file diff --git a/third_party/SOLD2/sold2/model/nets/heatmap_decoder.py b/third_party/SOLD2/sold2/model/nets/heatmap_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..bd5157ca740c8c7e25f2183b2a3c1fefa813deca --- /dev/null +++ b/third_party/SOLD2/sold2/model/nets/heatmap_decoder.py @@ -0,0 +1,59 @@ +import torch.nn as nn + + +class PixelShuffleDecoder(nn.Module): + """ Pixel shuffle decoder. """ + def __init__(self, input_feat_dim=128, num_upsample=2, output_channel=2): + super(PixelShuffleDecoder, self).__init__() + # Get channel parameters + self.channel_conf = self.get_channel_conf(num_upsample) + + # Define the pixel shuffle + self.pixshuffle = nn.PixelShuffle(2) + + # Process the feature + self.conv_block_lst = [] + # The input block + self.conv_block_lst.append( + nn.Sequential( + nn.Conv2d(input_feat_dim, self.channel_conf[0], + kernel_size=3, stride=1, padding=1), + nn.BatchNorm2d(self.channel_conf[0]), + nn.ReLU(inplace=True) + )) + + # Intermediate block + for channel in self.channel_conf[1:-1]: + self.conv_block_lst.append( + nn.Sequential( + nn.Conv2d(channel, channel, kernel_size=3, + stride=1, padding=1), + nn.BatchNorm2d(channel), + nn.ReLU(inplace=True) + )) + + # Output block + self.conv_block_lst.append( + nn.Conv2d(self.channel_conf[-1], output_channel, + kernel_size=1, stride=1, padding=0) + ) + self.conv_block_lst = nn.ModuleList(self.conv_block_lst) + + # Get num of channels based on number of upsampling. + def get_channel_conf(self, num_upsample): + if num_upsample == 2: + return [256, 64, 16] + elif num_upsample == 3: + return [256, 64, 16, 4] + + def forward(self, input_features): + # Iterate til output block + out = input_features + for block in self.conv_block_lst[:-1]: + out = block(out) + out = self.pixshuffle(out) + + # Output layer + out = self.conv_block_lst[-1](out) + + return out diff --git a/third_party/SOLD2/sold2/model/nets/junction_decoder.py b/third_party/SOLD2/sold2/model/nets/junction_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..d2bb649518896501c784940028a772d688c2b3a7 --- /dev/null +++ b/third_party/SOLD2/sold2/model/nets/junction_decoder.py @@ -0,0 +1,27 @@ +import torch +import torch.nn as nn + + +class SuperpointDecoder(nn.Module): + """ Junction decoder based on the SuperPoint architecture. """ + def __init__(self, input_feat_dim=128, backbone_name="lcnn"): + super(SuperpointDecoder, self).__init__() + self.relu = torch.nn.ReLU(inplace=True) + # Perform strided convolution when using lcnn backbone. + if backbone_name == "lcnn": + self.convPa = torch.nn.Conv2d(input_feat_dim, 256, kernel_size=3, + stride=2, padding=1) + elif backbone_name == "superpoint": + self.convPa = torch.nn.Conv2d(input_feat_dim, 256, kernel_size=3, + stride=1, padding=1) + else: + raise ValueError("[Error] Unknown backbone option.") + + self.convPb = torch.nn.Conv2d(256, 65, kernel_size=1, + stride=1, padding=0) + + def forward(self, input_features): + feat = self.relu(self.convPa(input_features)) + semi = self.convPb(feat) + + return semi \ No newline at end of file diff --git a/third_party/SOLD2/sold2/model/nets/lcnn_hourglass.py b/third_party/SOLD2/sold2/model/nets/lcnn_hourglass.py new file mode 100644 index 0000000000000000000000000000000000000000..a9dc78eef34e7ee146166b1b66c10070799d63f3 --- /dev/null +++ b/third_party/SOLD2/sold2/model/nets/lcnn_hourglass.py @@ -0,0 +1,226 @@ +""" +Hourglass network, taken from https://github.com/zhou13/lcnn +""" +import torch +import torch.nn as nn +import torch.nn.functional as F + +__all__ = ["HourglassNet", "hg"] + + +class MultitaskHead(nn.Module): + def __init__(self, input_channels, num_class): + super(MultitaskHead, self).__init__() + + m = int(input_channels / 4) + head_size = [[2], [1], [2]] + heads = [] + for output_channels in sum(head_size, []): + heads.append( + nn.Sequential( + nn.Conv2d(input_channels, m, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(m, output_channels, kernel_size=1), + ) + ) + self.heads = nn.ModuleList(heads) + assert num_class == sum(sum(head_size, [])) + + def forward(self, x): + return torch.cat([head(x) for head in self.heads], dim=1) + + +class Bottleneck2D(nn.Module): + expansion = 2 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck2D, self).__init__() + + self.bn1 = nn.BatchNorm2d(inplanes) + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1) + self.bn2 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, + stride=stride, padding=1) + self.bn3 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * 2, kernel_size=1) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.bn1(x) + out = self.relu(out) + out = self.conv1(out) + + out = self.bn2(out) + out = self.relu(out) + out = self.conv2(out) + + out = self.bn3(out) + out = self.relu(out) + out = self.conv3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + + return out + + +class Hourglass(nn.Module): + def __init__(self, block, num_blocks, planes, depth): + super(Hourglass, self).__init__() + self.depth = depth + self.block = block + self.hg = self._make_hour_glass(block, num_blocks, planes, depth) + + def _make_residual(self, block, num_blocks, planes): + layers = [] + for i in range(0, num_blocks): + layers.append(block(planes * block.expansion, planes)) + return nn.Sequential(*layers) + + def _make_hour_glass(self, block, num_blocks, planes, depth): + hg = [] + for i in range(depth): + res = [] + for j in range(3): + res.append(self._make_residual(block, num_blocks, planes)) + if i == 0: + res.append(self._make_residual(block, num_blocks, planes)) + hg.append(nn.ModuleList(res)) + return nn.ModuleList(hg) + + def _hour_glass_forward(self, n, x): + up1 = self.hg[n - 1][0](x) + low1 = F.max_pool2d(x, 2, stride=2) + low1 = self.hg[n - 1][1](low1) + + if n > 1: + low2 = self._hour_glass_forward(n - 1, low1) + else: + low2 = self.hg[n - 1][3](low1) + low3 = self.hg[n - 1][2](low2) + # up2 = F.interpolate(low3, scale_factor=2) + up2 = F.interpolate(low3, size=up1.shape[2:]) + out = up1 + up2 + return out + + def forward(self, x): + return self._hour_glass_forward(self.depth, x) + + +class HourglassNet(nn.Module): + """Hourglass model from Newell et al ECCV 2016""" + + def __init__(self, block, head, depth, num_stacks, num_blocks, + num_classes, input_channels): + super(HourglassNet, self).__init__() + + self.inplanes = 64 + self.num_feats = 128 + self.num_stacks = num_stacks + self.conv1 = nn.Conv2d(input_channels, self.inplanes, kernel_size=7, + stride=2, padding=3) + self.bn1 = nn.BatchNorm2d(self.inplanes) + self.relu = nn.ReLU(inplace=True) + self.layer1 = self._make_residual(block, self.inplanes, 1) + self.layer2 = self._make_residual(block, self.inplanes, 1) + self.layer3 = self._make_residual(block, self.num_feats, 1) + self.maxpool = nn.MaxPool2d(2, stride=2) + + # build hourglass modules + ch = self.num_feats * block.expansion + # vpts = [] + hg, res, fc, score, fc_, score_ = [], [], [], [], [], [] + for i in range(num_stacks): + hg.append(Hourglass(block, num_blocks, self.num_feats, depth)) + res.append(self._make_residual(block, self.num_feats, num_blocks)) + fc.append(self._make_fc(ch, ch)) + score.append(head(ch, num_classes)) + # vpts.append(VptsHead(ch)) + # vpts.append(nn.Linear(ch, 9)) + # score.append(nn.Conv2d(ch, num_classes, kernel_size=1)) + # score[i].bias.data[0] += 4.6 + # score[i].bias.data[2] += 4.6 + if i < num_stacks - 1: + fc_.append(nn.Conv2d(ch, ch, kernel_size=1)) + score_.append(nn.Conv2d(num_classes, ch, kernel_size=1)) + self.hg = nn.ModuleList(hg) + self.res = nn.ModuleList(res) + self.fc = nn.ModuleList(fc) + self.score = nn.ModuleList(score) + # self.vpts = nn.ModuleList(vpts) + self.fc_ = nn.ModuleList(fc_) + self.score_ = nn.ModuleList(score_) + + def _make_residual(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + self.inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + ) + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def _make_fc(self, inplanes, outplanes): + bn = nn.BatchNorm2d(inplanes) + conv = nn.Conv2d(inplanes, outplanes, kernel_size=1) + return nn.Sequential(conv, bn, self.relu) + + def forward(self, x): + out = [] + # out_vps = [] + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + + x = self.layer1(x) + x = self.maxpool(x) + x = self.layer2(x) + x = self.layer3(x) + + for i in range(self.num_stacks): + y = self.hg[i](x) + y = self.res[i](y) + y = self.fc[i](y) + score = self.score[i](y) + # pre_vpts = F.adaptive_avg_pool2d(x, (1, 1)) + # pre_vpts = pre_vpts.reshape(-1, 256) + # vpts = self.vpts[i](x) + out.append(score) + # out_vps.append(vpts) + if i < self.num_stacks - 1: + fc_ = self.fc_[i](y) + score_ = self.score_[i](score) + x = x + fc_ + score_ + + return out[::-1], y # , out_vps[::-1] + + +def hg(**kwargs): + model = HourglassNet( + Bottleneck2D, + head=kwargs.get("head", + lambda c_in, c_out: nn.Conv2D(c_in, c_out, 1)), + depth=kwargs["depth"], + num_stacks=kwargs["num_stacks"], + num_blocks=kwargs["num_blocks"], + num_classes=kwargs["num_classes"], + input_channels=kwargs["input_channels"] + ) + return model diff --git a/third_party/SOLD2/sold2/postprocess/__init__.py b/third_party/SOLD2/sold2/postprocess/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/SOLD2/sold2/postprocess/convert_homography_results.py b/third_party/SOLD2/sold2/postprocess/convert_homography_results.py new file mode 100644 index 0000000000000000000000000000000000000000..352eebbde00f6d8a9c20517dccd7024fd0758ffd --- /dev/null +++ b/third_party/SOLD2/sold2/postprocess/convert_homography_results.py @@ -0,0 +1,136 @@ +""" +Convert the aggregation results from the homography adaptation to GT labels. +""" +import sys +sys.path.append("../") +import os +import yaml +import argparse +import numpy as np +import h5py +import torch +from tqdm import tqdm + +from config.project_config import Config as cfg +from model.line_detection import LineSegmentDetectionModule +from model.metrics import super_nms +from misc.train_utils import parse_h5_data + + +def convert_raw_exported_predictions(input_data, grid_size=8, + detect_thresh=1/65, topk=300): + """ Convert the exported junctions and heatmaps predictions + to a standard format. + Arguments: + input_data: the raw data (dict) decoded from the hdf5 dataset + outputs: dict containing required entries including: + junctions_pred: Nx2 ndarray containing nms junction predictions. + heatmap_pred: HxW ndarray containing predicted heatmaps + valid_mask: HxW ndarray containing the valid mask + """ + # Check the input_data is from (1) single prediction, + # or (2) homography adaptation. + # Homography adaptation raw predictions + if (("junc_prob_mean" in input_data.keys()) + and ("heatmap_prob_mean" in input_data.keys())): + # Get the junction predictions and convert if to Nx2 format + junc_prob = input_data["junc_prob_mean"] + junc_pred_np = junc_prob[None, ...] + junc_pred_np_nms = super_nms(junc_pred_np, grid_size, + detect_thresh, topk) + junctions = np.where(junc_pred_np_nms.squeeze()) + junc_points_pred = np.concatenate([junctions[0][..., None], + junctions[1][..., None]], axis=-1) + + # Get the heatmap predictions + heatmap_pred = input_data["heatmap_prob_mean"].squeeze() + valid_mask = np.ones(heatmap_pred.shape, dtype=np.int32) + + # Single predictions + else: + # Get the junction point predictions and convert to Nx2 format + junc_points_pred = np.where(input_data["junc_pred_nms"]) + junc_points_pred = np.concatenate( + [junc_points_pred[0][..., None], + junc_points_pred[1][..., None]], axis=-1) + + # Get the heatmap predictions + heatmap_pred = input_data["heatmap_pred"] + valid_mask = input_data["valid_mask"] + + return { + "junctions_pred": junc_points_pred, + "heatmap_pred": heatmap_pred, + "valid_mask": valid_mask + } + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("input_dataset", type=str, + help="Name of the exported dataset.") + parser.add_argument("output_dataset", type=str, + help="Name of the output dataset.") + parser.add_argument("config", type=str, + help="Path to the model config.") + args = parser.parse_args() + + # Define the path to the input exported dataset + exported_dataset_path = os.path.join(cfg.export_dataroot, + args.input_dataset) + if not os.path.exists(exported_dataset_path): + raise ValueError("Missing input dataset: " + exported_dataset_path) + exported_dataset = h5py.File(exported_dataset_path, "r") + + # Define the output path for the results + output_dataset_path = os.path.join(cfg.export_dataroot, + args.output_dataset) + + device = torch.device("cuda") + nms_device = torch.device("cuda") + + # Read the config file + if not os.path.exists(args.config): + raise ValueError("Missing config file: " + args.config) + with open(args.config, "r") as f: + config = yaml.safe_load(f) + model_cfg = config["model_cfg"] + line_detector_cfg = config["line_detector_cfg"] + + # Initialize the line detection module + line_detector = LineSegmentDetectionModule(**line_detector_cfg) + + # Iterate through all the dataset keys + with h5py.File(output_dataset_path, "w") as output_dataset: + for idx, output_key in enumerate(tqdm(list(exported_dataset.keys()), + ascii=True)): + # Get the data + data = parse_h5_data(exported_dataset[output_key]) + + # Preprocess the data + converted_data = convert_raw_exported_predictions( + data, grid_size=model_cfg["grid_size"], + detect_thresh=model_cfg["detection_thresh"]) + junctions_pred_raw = converted_data["junctions_pred"] + heatmap_pred = converted_data["heatmap_pred"] + valid_mask = converted_data["valid_mask"] + + line_map_pred, junctions_pred, heatmap_pred = line_detector.detect( + junctions_pred_raw, heatmap_pred, device=device) + if isinstance(line_map_pred, torch.Tensor): + line_map_pred = line_map_pred.cpu().numpy() + if isinstance(junctions_pred, torch.Tensor): + junctions_pred = junctions_pred.cpu().numpy() + if isinstance(heatmap_pred, torch.Tensor): + heatmap_pred = heatmap_pred.cpu().numpy() + + output_data = {"junctions": junctions_pred, + "line_map": line_map_pred} + + # Record it to the h5 dataset + f_group = output_dataset.create_group(output_key) + + # Store data + for key, output_data in output_data.items(): + f_group.create_dataset(key, data=output_data, + compression="gzip") diff --git a/third_party/SOLD2/sold2/train.py b/third_party/SOLD2/sold2/train.py new file mode 100644 index 0000000000000000000000000000000000000000..2064e00e6d192f9202f011c3626d6f53c4fe6270 --- /dev/null +++ b/third_party/SOLD2/sold2/train.py @@ -0,0 +1,752 @@ +""" +This file implements the training process and all the summaries +""" +import os +import numpy as np +import cv2 +import torch +from torch.nn.functional import pixel_shuffle, softmax +from torch.utils.data import DataLoader +import torch.utils.data.dataloader as torch_loader +from tensorboardX import SummaryWriter + +from .dataset.dataset_util import get_dataset +from .model.model_util import get_model +from .model.loss import TotalLoss, get_loss_and_weights +from .model.metrics import AverageMeter, Metrics, super_nms +from .model.lr_scheduler import get_lr_scheduler +from .misc.train_utils import (convert_image, get_latest_checkpoint, + remove_old_checkpoints) + + +def customized_collate_fn(batch): + """ Customized collate_fn. """ + batch_keys = ["image", "junction_map", "heatmap", "valid_mask"] + list_keys = ["junctions", "line_map"] + + outputs = {} + for key in batch_keys: + outputs[key] = torch_loader.default_collate([b[key] for b in batch]) + for key in list_keys: + outputs[key] = [b[key] for b in batch] + + return outputs + + +def restore_weights(model, state_dict, strict=True): + """ Restore weights in compatible mode. """ + # Try to directly load state dict + try: + model.load_state_dict(state_dict, strict=strict) + # Deal with some version compatibility issue (catch version incompatible) + except: + err = model.load_state_dict(state_dict, strict=False) + + # missing keys are those in model but not in state_dict + missing_keys = err.missing_keys + # Unexpected keys are those in state_dict but not in model + unexpected_keys = err.unexpected_keys + + # Load mismatched keys manually + model_dict = model.state_dict() + for idx, key in enumerate(missing_keys): + dict_keys = [_ for _ in unexpected_keys if not "tracked" in _] + model_dict[key] = state_dict[dict_keys[idx]] + model.load_state_dict(model_dict) + + return model + + +def train_net(args, dataset_cfg, model_cfg, output_path): + """ Main training function. """ + # Add some version compatibility check + if model_cfg.get("weighting_policy") is None: + # Default to static + model_cfg["weighting_policy"] = "static" + + # Get the train, val, test config + train_cfg = model_cfg["train"] + test_cfg = model_cfg["test"] + + # Create train and test dataset + print("\t Initializing dataset...") + train_dataset, train_collate_fn = get_dataset("train", dataset_cfg) + test_dataset, test_collate_fn = get_dataset("test", dataset_cfg) + + # Create the dataloader + train_loader = DataLoader(train_dataset, + batch_size=train_cfg["batch_size"], + num_workers=8, + shuffle=True, pin_memory=True, + collate_fn=train_collate_fn) + test_loader = DataLoader(test_dataset, + batch_size=test_cfg.get("batch_size", 1), + num_workers=test_cfg.get("num_workers", 1), + shuffle=False, pin_memory=False, + collate_fn=test_collate_fn) + print("\t Successfully intialized dataloaders.") + + + # Get the loss function and weight first + loss_funcs, loss_weights = get_loss_and_weights(model_cfg) + + # If resume. + if args.resume: + # Create model and load the state dict + checkpoint = get_latest_checkpoint(args.resume_path, + args.checkpoint_name) + model = get_model(model_cfg, loss_weights) + model = restore_weights(model, checkpoint["model_state_dict"]) + model = model.cuda() + optimizer = torch.optim.Adam( + [{"params": model.parameters(), + "initial_lr": model_cfg["learning_rate"]}], + model_cfg["learning_rate"], + amsgrad=True) + optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) + # Optionally get the learning rate scheduler + scheduler = get_lr_scheduler( + lr_decay=model_cfg.get("lr_decay", False), + lr_decay_cfg=model_cfg.get("lr_decay_cfg", None), + optimizer=optimizer) + # If we start to use learning rate scheduler from the middle + if ((scheduler is not None) + and (checkpoint.get("scheduler_state_dict", None) is not None)): + scheduler.load_state_dict(checkpoint["scheduler_state_dict"]) + start_epoch = checkpoint["epoch"] + 1 + # Initialize all the components. + else: + # Create model and optimizer + model = get_model(model_cfg, loss_weights) + # Optionally get the pretrained wieghts + if args.pretrained: + print("\t [Debug] Loading pretrained weights...") + checkpoint = get_latest_checkpoint(args.pretrained_path, + args.checkpoint_name) + # If auto weighting restore from non-auto weighting + model = restore_weights(model, checkpoint["model_state_dict"], + strict=False) + print("\t [Debug] Finished loading pretrained weights!") + + model = model.cuda() + optimizer = torch.optim.Adam( + [{"params": model.parameters(), + "initial_lr": model_cfg["learning_rate"]}], + model_cfg["learning_rate"], + amsgrad=True) + # Optionally get the learning rate scheduler + scheduler = get_lr_scheduler( + lr_decay=model_cfg.get("lr_decay", False), + lr_decay_cfg=model_cfg.get("lr_decay_cfg", None), + optimizer=optimizer) + start_epoch = 0 + + print("\t Successfully initialized model") + + # Define the total loss + policy = model_cfg.get("weighting_policy", "static") + loss_func = TotalLoss(loss_funcs, loss_weights, policy).cuda() + if "descriptor_decoder" in model_cfg: + metric_func = Metrics(model_cfg["detection_thresh"], + model_cfg["prob_thresh"], + model_cfg["descriptor_loss_cfg"]["grid_size"], + desc_metric_lst='all') + else: + metric_func = Metrics(model_cfg["detection_thresh"], + model_cfg["prob_thresh"], + model_cfg["grid_size"]) + + # Define the summary writer + logdir = os.path.join(output_path, "log") + writer = SummaryWriter(logdir=logdir) + + # Start the training loop + for epoch in range(start_epoch, model_cfg["epochs"]): + # Record the learning rate + current_lr = optimizer.state_dict()["param_groups"][0]["lr"] + writer.add_scalar("LR/lr", current_lr, epoch) + + # Train for one epochs + print("\n\n================== Training ====================") + train_single_epoch( + model=model, + model_cfg=model_cfg, + optimizer=optimizer, + loss_func=loss_func, + metric_func=metric_func, + train_loader=train_loader, + writer=writer, + epoch=epoch) + + # Do the validation + print("\n\n================== Validation ==================") + validate( + model=model, + model_cfg=model_cfg, + loss_func=loss_func, + metric_func=metric_func, + val_loader=test_loader, + writer=writer, + epoch=epoch) + + # Update the scheduler + if scheduler is not None: + scheduler.step() + + # Save checkpoints + file_name = os.path.join(output_path, + "checkpoint-epoch%03d-end.tar"%(epoch)) + print("[Info] Saving checkpoint %s ..." % file_name) + save_dict = { + "epoch": epoch, + "model_state_dict": model.state_dict(), + "optimizer_state_dict": optimizer.state_dict(), + "model_cfg": model_cfg} + if scheduler is not None: + save_dict.update({"scheduler_state_dict": scheduler.state_dict()}) + torch.save(save_dict, file_name) + + # Remove the outdated checkpoints + remove_old_checkpoints(output_path, model_cfg.get("max_ckpt", 15)) + + +def train_single_epoch(model, model_cfg, optimizer, loss_func, metric_func, + train_loader, writer, epoch): + """ Train for one epoch. """ + # Switch the model to training mode + model.train() + + # Initialize the average meter + compute_descriptors = loss_func.compute_descriptors + if compute_descriptors: + average_meter = AverageMeter(is_training=True, desc_metric_lst='all') + else: + average_meter = AverageMeter(is_training=True) + + # The training loop + for idx, data in enumerate(train_loader): + if compute_descriptors: + junc_map = data["ref_junction_map"].cuda() + junc_map2 = data["target_junction_map"].cuda() + heatmap = data["ref_heatmap"].cuda() + heatmap2 = data["target_heatmap"].cuda() + line_points = data["ref_line_points"].cuda() + line_points2 = data["target_line_points"].cuda() + line_indices = data["ref_line_indices"].cuda() + valid_mask = data["ref_valid_mask"].cuda() + valid_mask2 = data["target_valid_mask"].cuda() + input_images = data["ref_image"].cuda() + input_images2 = data["target_image"].cuda() + + # Run the forward pass + outputs = model(input_images) + outputs2 = model(input_images2) + + # Compute losses + losses = loss_func.forward_descriptors( + outputs["junctions"], outputs2["junctions"], + junc_map, junc_map2, outputs["heatmap"], outputs2["heatmap"], + heatmap, heatmap2, line_points, line_points2, + line_indices, outputs['descriptors'], outputs2['descriptors'], + epoch, valid_mask, valid_mask2) + else: + junc_map = data["junction_map"].cuda() + heatmap = data["heatmap"].cuda() + valid_mask = data["valid_mask"].cuda() + input_images = data["image"].cuda() + + # Run the forward pass + outputs = model(input_images) + + # Compute losses + losses = loss_func( + outputs["junctions"], junc_map, + outputs["heatmap"], heatmap, + valid_mask) + + total_loss = losses["total_loss"] + + # Update the model + optimizer.zero_grad() + total_loss.backward() + optimizer.step() + + # Compute the global step + global_step = epoch * len(train_loader) + idx + ############## Measure the metric error ######################### + # Only do this when needed + if (((idx % model_cfg["disp_freq"]) == 0) + or ((idx % model_cfg["summary_freq"]) == 0)): + junc_np = convert_junc_predictions( + outputs["junctions"], model_cfg["grid_size"], + model_cfg["detection_thresh"], 300) + junc_map_np = junc_map.cpu().numpy().transpose(0, 2, 3, 1) + + # Always fetch only one channel (compatible with L1, L2, and CE) + if outputs["heatmap"].shape[1] == 2: + heatmap_np = softmax(outputs["heatmap"].detach(), + dim=1).cpu().numpy() + heatmap_np = heatmap_np.transpose(0, 2, 3, 1)[:, :, :, 1:] + else: + heatmap_np = torch.sigmoid(outputs["heatmap"].detach()) + heatmap_np = heatmap_np.cpu().numpy().transpose(0, 2, 3, 1) + + heatmap_gt_np = heatmap.cpu().numpy().transpose(0, 2, 3, 1) + valid_mask_np = valid_mask.cpu().numpy().transpose(0, 2, 3, 1) + + # Evaluate metric results + if compute_descriptors: + metric_func.evaluate( + junc_np["junc_pred"], junc_np["junc_pred_nms"], + junc_map_np, heatmap_np, heatmap_gt_np, valid_mask_np, + line_points, line_points2, outputs["descriptors"], + outputs2["descriptors"], line_indices) + else: + metric_func.evaluate( + junc_np["junc_pred"], junc_np["junc_pred_nms"], + junc_map_np, heatmap_np, heatmap_gt_np, valid_mask_np) + # Update average meter + junc_loss = losses["junc_loss"].item() + heatmap_loss = losses["heatmap_loss"].item() + loss_dict = { + "junc_loss": junc_loss, + "heatmap_loss": heatmap_loss, + "total_loss": total_loss.item()} + if compute_descriptors: + descriptor_loss = losses["descriptor_loss"].item() + loss_dict["descriptor_loss"] = losses["descriptor_loss"].item() + + average_meter.update(metric_func, loss_dict, num_samples=junc_map.shape[0]) + + # Display the progress + if (idx % model_cfg["disp_freq"]) == 0: + results = metric_func.metric_results + average = average_meter.average() + # Get gpu memory usage in GB + gpu_mem_usage = torch.cuda.max_memory_allocated() / (1024 ** 3) + if compute_descriptors: + print("Epoch [%d / %d] Iter [%d / %d] loss=%.4f (%.4f), junc_loss=%.4f (%.4f), heatmap_loss=%.4f (%.4f), descriptor_loss=%.4f (%.4f), gpu_mem=%.4fGB" + % (epoch, model_cfg["epochs"], idx, len(train_loader), + total_loss.item(), average["total_loss"], junc_loss, + average["junc_loss"], heatmap_loss, + average["heatmap_loss"], descriptor_loss, + average["descriptor_loss"], gpu_mem_usage)) + else: + print("Epoch [%d / %d] Iter [%d / %d] loss=%.4f (%.4f), junc_loss=%.4f (%.4f), heatmap_loss=%.4f (%.4f), gpu_mem=%.4fGB" + % (epoch, model_cfg["epochs"], idx, len(train_loader), + total_loss.item(), average["total_loss"], + junc_loss, average["junc_loss"], heatmap_loss, + average["heatmap_loss"], gpu_mem_usage)) + print("\t Junction precision=%.4f (%.4f) / recall=%.4f (%.4f)" + % (results["junc_precision"], average["junc_precision"], + results["junc_recall"], average["junc_recall"])) + print("\t Junction nms precision=%.4f (%.4f) / recall=%.4f (%.4f)" + % (results["junc_precision_nms"], + average["junc_precision_nms"], + results["junc_recall_nms"], average["junc_recall_nms"])) + print("\t Heatmap precision=%.4f (%.4f) / recall=%.4f (%.4f)" + %(results["heatmap_precision"], + average["heatmap_precision"], + results["heatmap_recall"], average["heatmap_recall"])) + if compute_descriptors: + print("\t Descriptors matching score=%.4f (%.4f)" + %(results["matching_score"], average["matching_score"])) + + # Record summaries + if (idx % model_cfg["summary_freq"]) == 0: + results = metric_func.metric_results + average = average_meter.average() + # Add the shared losses + scalar_summaries = { + "junc_loss": junc_loss, + "heatmap_loss": heatmap_loss, + "total_loss": total_loss.detach().cpu().numpy(), + "metrics": results, + "average": average} + # Add descriptor terms + if compute_descriptors: + scalar_summaries["descriptor_loss"] = descriptor_loss + scalar_summaries["w_desc"] = losses["w_desc"] + + # Add weighting terms (even for static terms) + scalar_summaries["w_junc"] = losses["w_junc"] + scalar_summaries["w_heatmap"] = losses["w_heatmap"] + scalar_summaries["reg_loss"] = losses["reg_loss"].item() + + num_images = 3 + junc_pred_binary = (junc_np["junc_pred"][:num_images, ...] + > model_cfg["detection_thresh"]) + junc_pred_nms_binary = (junc_np["junc_pred_nms"][:num_images, ...] + > model_cfg["detection_thresh"]) + image_summaries = { + "image": input_images.cpu().numpy()[:num_images, ...], + "valid_mask": valid_mask_np[:num_images, ...], + "junc_map_pred": junc_pred_binary, + "junc_map_pred_nms": junc_pred_nms_binary, + "junc_map_gt": junc_map_np[:num_images, ...], + "junc_prob_map": junc_np["junc_prob"][:num_images, ...], + "heatmap_pred": heatmap_np[:num_images, ...], + "heatmap_gt": heatmap_gt_np[:num_images, ...]} + # Record the training summary + record_train_summaries( + writer, global_step, scalars=scalar_summaries, + images=image_summaries) + + +def validate(model, model_cfg, loss_func, metric_func, val_loader, writer, epoch): + """ Validation. """ + # Switch the model to eval mode + model.eval() + + # Initialize the average meter + compute_descriptors = loss_func.compute_descriptors + if compute_descriptors: + average_meter = AverageMeter(is_training=True, desc_metric_lst='all') + else: + average_meter = AverageMeter(is_training=True) + + # The validation loop + for idx, data in enumerate(val_loader): + if compute_descriptors: + junc_map = data["ref_junction_map"].cuda() + junc_map2 = data["target_junction_map"].cuda() + heatmap = data["ref_heatmap"].cuda() + heatmap2 = data["target_heatmap"].cuda() + line_points = data["ref_line_points"].cuda() + line_points2 = data["target_line_points"].cuda() + line_indices = data["ref_line_indices"].cuda() + valid_mask = data["ref_valid_mask"].cuda() + valid_mask2 = data["target_valid_mask"].cuda() + input_images = data["ref_image"].cuda() + input_images2 = data["target_image"].cuda() + + # Run the forward pass + with torch.no_grad(): + outputs = model(input_images) + outputs2 = model(input_images2) + + # Compute losses + losses = loss_func.forward_descriptors( + outputs["junctions"], outputs2["junctions"], + junc_map, junc_map2, outputs["heatmap"], + outputs2["heatmap"], heatmap, heatmap2, line_points, + line_points2, line_indices, outputs['descriptors'], + outputs2['descriptors'], epoch, valid_mask, valid_mask2) + else: + junc_map = data["junction_map"].cuda() + heatmap = data["heatmap"].cuda() + valid_mask = data["valid_mask"].cuda() + input_images = data["image"].cuda() + + # Run the forward pass + with torch.no_grad(): + outputs = model(input_images) + + # Compute losses + losses = loss_func( + outputs["junctions"], junc_map, + outputs["heatmap"], heatmap, + valid_mask) + total_loss = losses["total_loss"] + + ############## Measure the metric error ######################### + junc_np = convert_junc_predictions( + outputs["junctions"], model_cfg["grid_size"], + model_cfg["detection_thresh"], 300) + junc_map_np = junc_map.cpu().numpy().transpose(0, 2, 3, 1) + # Always fetch only one channel (compatible with L1, L2, and CE) + if outputs["heatmap"].shape[1] == 2: + heatmap_np = softmax(outputs["heatmap"].detach(), + dim=1).cpu().numpy().transpose(0, 2, 3, 1) + heatmap_np = heatmap_np[:, :, :, 1:] + else: + heatmap_np = torch.sigmoid(outputs["heatmap"].detach()) + heatmap_np = heatmap_np.cpu().numpy().transpose(0, 2, 3, 1) + + + heatmap_gt_np = heatmap.cpu().numpy().transpose(0, 2, 3, 1) + valid_mask_np = valid_mask.cpu().numpy().transpose(0, 2, 3, 1) + + # Evaluate metric results + if compute_descriptors: + metric_func.evaluate( + junc_np["junc_pred"], junc_np["junc_pred_nms"], + junc_map_np, heatmap_np, heatmap_gt_np, valid_mask_np, + line_points, line_points2, outputs["descriptors"], + outputs2["descriptors"], line_indices) + else: + metric_func.evaluate( + junc_np["junc_pred"], junc_np["junc_pred_nms"], junc_map_np, + heatmap_np, heatmap_gt_np, valid_mask_np) + # Update average meter + junc_loss = losses["junc_loss"].item() + heatmap_loss = losses["heatmap_loss"].item() + loss_dict = { + "junc_loss": junc_loss, + "heatmap_loss": heatmap_loss, + "total_loss": total_loss.item()} + if compute_descriptors: + descriptor_loss = losses["descriptor_loss"].item() + loss_dict["descriptor_loss"] = losses["descriptor_loss"].item() + average_meter.update(metric_func, loss_dict, num_samples=junc_map.shape[0]) + + # Display the progress + if (idx % model_cfg["disp_freq"]) == 0: + results = metric_func.metric_results + average = average_meter.average() + if compute_descriptors: + print("Iter [%d / %d] loss=%.4f (%.4f), junc_loss=%.4f (%.4f), heatmap_loss=%.4f (%.4f), descriptor_loss=%.4f (%.4f)" + % (idx, len(val_loader), + total_loss.item(), average["total_loss"], + junc_loss, average["junc_loss"], + heatmap_loss, average["heatmap_loss"], + descriptor_loss, average["descriptor_loss"])) + else: + print("Iter [%d / %d] loss=%.4f (%.4f), junc_loss=%.4f (%.4f), heatmap_loss=%.4f (%.4f)" + % (idx, len(val_loader), + total_loss.item(), average["total_loss"], + junc_loss, average["junc_loss"], + heatmap_loss, average["heatmap_loss"])) + print("\t Junction precision=%.4f (%.4f) / recall=%.4f (%.4f)" + % (results["junc_precision"], average["junc_precision"], + results["junc_recall"], average["junc_recall"])) + print("\t Junction nms precision=%.4f (%.4f) / recall=%.4f (%.4f)" + % (results["junc_precision_nms"], + average["junc_precision_nms"], + results["junc_recall_nms"], average["junc_recall_nms"])) + print("\t Heatmap precision=%.4f (%.4f) / recall=%.4f (%.4f)" + % (results["heatmap_precision"], + average["heatmap_precision"], + results["heatmap_recall"], average["heatmap_recall"])) + if compute_descriptors: + print("\t Descriptors matching score=%.4f (%.4f)" + %(results["matching_score"], average["matching_score"])) + + # Record summaries + average = average_meter.average() + scalar_summaries = {"average": average} + # Record the training summary + record_test_summaries(writer, epoch, scalar_summaries) + + +def convert_junc_predictions(predictions, grid_size, + detect_thresh=1/65, topk=300): + """ Convert torch predictions to numpy arrays for evaluation. """ + # Convert to probability outputs first + junc_prob = softmax(predictions.detach(), dim=1).cpu() + junc_pred = junc_prob[:, :-1, :, :] + + junc_prob_np = junc_prob.numpy().transpose(0, 2, 3, 1)[:, :, :, :-1] + junc_prob_np = np.sum(junc_prob_np, axis=-1) + junc_pred_np = pixel_shuffle( + junc_pred, grid_size).cpu().numpy().transpose(0, 2, 3, 1) + junc_pred_np_nms = super_nms(junc_pred_np, grid_size, detect_thresh, topk) + junc_pred_np = junc_pred_np.squeeze(-1) + + return {"junc_pred": junc_pred_np, "junc_pred_nms": junc_pred_np_nms, + "junc_prob": junc_prob_np} + + +def record_train_summaries(writer, global_step, scalars, images): + """ Record training summaries. """ + # Record the scalar summaries + results = scalars["metrics"] + average = scalars["average"] + + # GPU memory part + # Get gpu memory usage in GB + gpu_mem_usage = torch.cuda.max_memory_allocated() / (1024 ** 3) + writer.add_scalar("GPU/GPU_memory_usage", gpu_mem_usage, global_step) + + # Loss part + writer.add_scalar("Train_loss/junc_loss", scalars["junc_loss"], + global_step) + writer.add_scalar("Train_loss/heatmap_loss", scalars["heatmap_loss"], + global_step) + writer.add_scalar("Train_loss/total_loss", scalars["total_loss"], + global_step) + # Add regularization loss + if "reg_loss" in scalars.keys(): + writer.add_scalar("Train_loss/reg_loss", scalars["reg_loss"], + global_step) + # Add descriptor loss + if "descriptor_loss" in scalars.keys(): + key = "descriptor_loss" + writer.add_scalar("Train_loss/%s"%(key), scalars[key], global_step) + writer.add_scalar("Train_loss_average/%s"%(key), average[key], + global_step) + + # Record weighting + for key in scalars.keys(): + if "w_" in key: + writer.add_scalar("Train_weight/%s"%(key), scalars[key], + global_step) + + # Smoothed loss + writer.add_scalar("Train_loss_average/junc_loss", average["junc_loss"], + global_step) + writer.add_scalar("Train_loss_average/heatmap_loss", + average["heatmap_loss"], global_step) + writer.add_scalar("Train_loss_average/total_loss", average["total_loss"], + global_step) + # Add smoothed descriptor loss + if "descriptor_loss" in average.keys(): + writer.add_scalar("Train_loss_average/descriptor_loss", + average["descriptor_loss"], global_step) + + # Metrics part + writer.add_scalar("Train_metrics/junc_precision", + results["junc_precision"], global_step) + writer.add_scalar("Train_metrics/junc_precision_nms", + results["junc_precision_nms"], global_step) + writer.add_scalar("Train_metrics/junc_recall", + results["junc_recall"], global_step) + writer.add_scalar("Train_metrics/junc_recall_nms", + results["junc_recall_nms"], global_step) + writer.add_scalar("Train_metrics/heatmap_precision", + results["heatmap_precision"], global_step) + writer.add_scalar("Train_metrics/heatmap_recall", + results["heatmap_recall"], global_step) + # Add descriptor metric + if "matching_score" in results.keys(): + writer.add_scalar("Train_metrics/matching_score", + results["matching_score"], global_step) + + # Average part + writer.add_scalar("Train_metrics_average/junc_precision", + average["junc_precision"], global_step) + writer.add_scalar("Train_metrics_average/junc_precision_nms", + average["junc_precision_nms"], global_step) + writer.add_scalar("Train_metrics_average/junc_recall", + average["junc_recall"], global_step) + writer.add_scalar("Train_metrics_average/junc_recall_nms", + average["junc_recall_nms"], global_step) + writer.add_scalar("Train_metrics_average/heatmap_precision", + average["heatmap_precision"], global_step) + writer.add_scalar("Train_metrics_average/heatmap_recall", + average["heatmap_recall"], global_step) + # Add smoothed descriptor metric + if "matching_score" in average.keys(): + writer.add_scalar("Train_metrics_average/matching_score", + average["matching_score"], global_step) + + # Record the image summary + # Image part + image_tensor = convert_image(images["image"], 1) + valid_masks = convert_image(images["valid_mask"], -1) + writer.add_images("Train/images", image_tensor, global_step, + dataformats="NCHW") + writer.add_images("Train/valid_map", valid_masks, global_step, + dataformats="NHWC") + + # Heatmap part + writer.add_images("Train/heatmap_gt", + convert_image(images["heatmap_gt"], -1), global_step, + dataformats="NHWC") + writer.add_images("Train/heatmap_pred", + convert_image(images["heatmap_pred"], -1), global_step, + dataformats="NHWC") + + # Junction prediction part + junc_plots = plot_junction_detection( + image_tensor, images["junc_map_pred"], + images["junc_map_pred_nms"], images["junc_map_gt"]) + writer.add_images("Train/junc_gt", junc_plots["junc_gt_plot"] / 255., + global_step, dataformats="NHWC") + writer.add_images("Train/junc_pred", junc_plots["junc_pred_plot"] / 255., + global_step, dataformats="NHWC") + writer.add_images("Train/junc_pred_nms", + junc_plots["junc_pred_nms_plot"] / 255., global_step, + dataformats="NHWC") + writer.add_images( + "Train/junc_prob_map", + convert_image(images["junc_prob_map"][..., None], axis=-1), + global_step, dataformats="NHWC") + + +def record_test_summaries(writer, epoch, scalars): + """ Record testing summaries. """ + average = scalars["average"] + + # Average loss + writer.add_scalar("Val_loss/junc_loss", average["junc_loss"], epoch) + writer.add_scalar("Val_loss/heatmap_loss", average["heatmap_loss"], epoch) + writer.add_scalar("Val_loss/total_loss", average["total_loss"], epoch) + # Add descriptor loss + if "descriptor_loss" in average.keys(): + key = "descriptor_loss" + writer.add_scalar("Val_loss/%s"%(key), average[key], epoch) + + # Average metrics + writer.add_scalar("Val_metrics/junc_precision", average["junc_precision"], + epoch) + writer.add_scalar("Val_metrics/junc_precision_nms", + average["junc_precision_nms"], epoch) + writer.add_scalar("Val_metrics/junc_recall", + average["junc_recall"], epoch) + writer.add_scalar("Val_metrics/junc_recall_nms", + average["junc_recall_nms"], epoch) + writer.add_scalar("Val_metrics/heatmap_precision", + average["heatmap_precision"], epoch) + writer.add_scalar("Val_metrics/heatmap_recall", + average["heatmap_recall"], epoch) + # Add descriptor metric + if "matching_score" in average.keys(): + writer.add_scalar("Val_metrics/matching_score", + average["matching_score"], epoch) + + +def plot_junction_detection(image_tensor, junc_pred_tensor, + junc_pred_nms_tensor, junc_gt_tensor): + """ Plot the junction points on images. """ + # Get the batch_size + batch_size = image_tensor.shape[0] + + # Process through batch dimension + junc_pred_lst = [] + junc_pred_nms_lst = [] + junc_gt_lst = [] + for i in range(batch_size): + # Convert image to 255 uint8 + image = (image_tensor[i, :, :, :] + * 255.).astype(np.uint8).transpose(1,2,0) + + # Plot groundtruth onto image + junc_gt = junc_gt_tensor[i, ...] + coord_gt = np.where(junc_gt.squeeze() > 0) + points_gt = np.concatenate((coord_gt[0][..., None], + coord_gt[1][..., None]), + axis=1) + plot_gt = image.copy() + for id in range(points_gt.shape[0]): + cv2.circle(plot_gt, tuple(np.flip(points_gt[id, :])), 3, + color=(255, 0, 0), thickness=2) + junc_gt_lst.append(plot_gt[None, ...]) + + # Plot junc_pred + junc_pred = junc_pred_tensor[i, ...] + coord_pred = np.where(junc_pred > 0) + points_pred = np.concatenate((coord_pred[0][..., None], + coord_pred[1][..., None]), + axis=1) + plot_pred = image.copy() + for id in range(points_pred.shape[0]): + cv2.circle(plot_pred, tuple(np.flip(points_pred[id, :])), 3, + color=(0, 255, 0), thickness=2) + junc_pred_lst.append(plot_pred[None, ...]) + + # Plot junc_pred_nms + junc_pred_nms = junc_pred_nms_tensor[i, ...] + coord_pred_nms = np.where(junc_pred_nms > 0) + points_pred_nms = np.concatenate((coord_pred_nms[0][..., None], + coord_pred_nms[1][..., None]), + axis=1) + plot_pred_nms = image.copy() + for id in range(points_pred_nms.shape[0]): + cv2.circle(plot_pred_nms, tuple(np.flip(points_pred_nms[id, :])), + 3, color=(0, 255, 0), thickness=2) + junc_pred_nms_lst.append(plot_pred_nms[None, ...]) + + return {"junc_gt_plot": np.concatenate(junc_gt_lst, axis=0), + "junc_pred_plot": np.concatenate(junc_pred_lst, axis=0), + "junc_pred_nms_plot": np.concatenate(junc_pred_nms_lst, axis=0)} diff --git a/third_party/TopicFM/.github/workflows/sync.yml b/third_party/TopicFM/.github/workflows/sync.yml new file mode 100644 index 0000000000000000000000000000000000000000..efbf881c64bdeac6916473e4391e23e87af5b69d --- /dev/null +++ b/third_party/TopicFM/.github/workflows/sync.yml @@ -0,0 +1,39 @@ +name: Upstream Sync + +permissions: + contents: write + +on: + schedule: + - cron: "0 0 * * *" # every day + workflow_dispatch: + +jobs: + sync_latest_from_upstream: + name: Sync latest commits from upstream repo + runs-on: ubuntu-latest + if: ${{ github.event.repository.fork }} + + steps: + # Step 1: run a standard checkout action + - name: Checkout target repo + uses: actions/checkout@v3 + + # Step 2: run the sync action + - name: Sync upstream changes + id: sync + uses: aormsby/Fork-Sync-With-Upstream-action@v3.4 + with: + upstream_sync_repo: TruongKhang/TopicFM + upstream_sync_branch: main + target_sync_branch: main + target_repo_token: ${{ secrets.GITHUB_TOKEN }} # automatically generated, no need to set + + # Set test_mode true to run tests instead of the true action!! + test_mode: false + + - name: Sync check + if: failure() + run: | + echo "::error::Due to insufficient permissions, synchronization failed (as expected). Please go to the repository homepage and manually perform [Sync fork]." + exit 1 diff --git a/third_party/TopicFM/.gitignore b/third_party/TopicFM/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..7ed07d081a940b02ce92ceb6aa8fb66925e32224 --- /dev/null +++ b/third_party/TopicFM/.gitignore @@ -0,0 +1,130 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ +.idea/ diff --git a/third_party/TopicFM/.gitmodules b/third_party/TopicFM/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..313403ddfa5b06a038a75467352c3821a19a78c4 --- /dev/null +++ b/third_party/TopicFM/.gitmodules @@ -0,0 +1,3 @@ +# [submodule "third_party/loftr"] +# path = third_party/loftr +# url = https://github.com/zju3dv/git diff --git a/third_party/TopicFM/LICENSE b/third_party/TopicFM/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 --- /dev/null +++ b/third_party/TopicFM/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/third_party/TopicFM/README.md b/third_party/TopicFM/README.md new file mode 100644 index 0000000000000000000000000000000000000000..be60b38c8c265deeef5d7827d9fae4f65e842868 --- /dev/null +++ b/third_party/TopicFM/README.md @@ -0,0 +1,130 @@ +# Submodule used in [hloc](https://github.com/Vincentqyw/Hierarchical-Localization) toolbox + +# [AAAI-23] TopicFM: Robust and Interpretable Topic-Assisted Feature Matching + +Our method first inferred the latent topics (high-level context information) for each image and then use them to explicitly learn robust feature representation for the matching task. Please check out the details in [our paper](https://arxiv.org/abs/2207.00328) + +![Alt Text](demo/topicfm.gif) + +**Overall Architecture:** + +![Alt Text](demo/architecture_v4.png) + +## TODO List + +- [x] Release training and evaluation code on MegaDepth and ScanNet +- [x] Evaluation on HPatches, Aachen Day&Night, and InLoc +- [x] Evaluation for Image Matching Challenge + +## Requirements + +All experiments in this paper are implemented on the Ubuntu environment +with a NVIDIA driver of at least 430.64 and CUDA 10.1. + +First, create a virtual environment by anaconda as follows, + + conda create -n topicfm python=3.8 + conda activate topicfm + conda install pytorch==1.8.1 torchvision==0.9.1 cudatoolkit=10.1 -c pytorch + pip install -r requirements.txt + # using pip to install any missing packages + +## Data Preparation + +The proposed method is trained on the MegaDepth dataset and evaluated on the MegaDepth test, ScanNet, HPatches, Aachen Day and Night (v1.1), and InLoc dataset. +All these datasets are large, so we cannot include them in this code. +The following descriptions help download these datasets. + +### MegaDepth + +This dataset is used for both training and evaluation (Li and Snavely 2018). +To use this dataset with our code, please follow the [instruction of LoFTR](https://github.com/zju3dv/LoFTR/blob/master/docs/TRAINING.md) (Sun et al. 2021) + +### ScanNet +We only use 1500 image pairs of ScanNet (Dai et al. 2017) for evaluation. +Please download and prepare [test data](https://drive.google.com/drive/folders/1DOcOPZb3-5cWxLqn256AhwUVjBPifhuf) of ScanNet +provided by [LoFTR](https://github.com/zju3dv/LoFTR/blob/master/docs/TRAINING.md). + +## Training + +To train our model, we recommend to use GPUs card as much as possible, and each GPU should be at least 12GB. +In our settings, we train on 4 GPUs, each of which is 12GB. +Please setup your hardware environment in `scripts/reproduce_train/outdoor.sh`. +And then run this command to start training. + + bash scripts/reproduce_train/outdoor.sh + + We then provide the trained model in `pretrained/model_best.ckpt` +## Evaluation + +### MegaDepth (relative pose estimation) + + bash scripts/reproduce_test/outdoor.sh + +### ScanNet (relative pose estimation) + + bash scripts/reproduce_test/indoor.sh + +### HPatches, Aachen v1.1, InLoc + +To evaluate on these datasets, we integrate our code to the image-matching-toolbox provided by Zhou et al. (2021). +The updated code is available [here](https://github.com/TruongKhang/image-matching-toolbox). +After cloning this code, please follow instructions of image-matching-toolbox to install all required packages and prepare data for evaluation. + +Then, run these commands to perform evaluation: (note that all hyperparameter settings are in `configs/topicfm.yml`) + +**HPatches (homography estimation)** + + python -m immatch.eval_hpatches --gpu 0 --config 'topicfm' --task 'both' --h_solver 'cv' --ransac_thres 3 --root_dir . --odir 'outputs/hpatches' + +**Aachen Day-Night v1.1 (visual localization)** + + python -m immatch.eval_aachen --gpu 0 --config 'topicfm' --colmap --benchmark_name 'aachen_v1.1' + +**InLoc (visual localization)** + + python -m immatch.eval_inloc --gpu 0 --config 'topicfm' + +### Image Matching Challenge 2022 (IMC-2022) +IMC-2022 was held on [Kaggle](https://www.kaggle.com/competitions/image-matching-challenge-2022/overview). +Most high ranking methods were achieved by using an ensemble method which combines the matching results of +various state-of-the-art methods including LoFTR, SuperPoint+SuperGlue, MatchFormer, or QuadTree Attention. + +In this evaluation, we only submit the results produced by our method (TopicFM) alone. Please refer to [this notebook](https://www.kaggle.com/code/khangtg09121995/topicfm-eval). +This table compares our results with the other methods such as LoFTR (ref. [here](https://www.kaggle.com/code/mcwema/imc-2022-kornia-loftr-score-plateau-0-726)), +SP+SuperGlue (ref. [here](https://www.kaggle.com/code/yufei12/superglue-baseline)). + +| | Public Score | Private Score | +|----------------|--------------|---------------| +| SP + SuperGlue | 0.678 | 0.677 | +| LoFTR | 0.726 | 0.736 | +| TopicFM (ours) | **0.804** | **0.811** | + + +### Runtime comparison + +The runtime reported in the paper is measured by averaging runtime of 1500 image pairs of the ScanNet evaluation dataset. +The image size can be changed at `configs/data/scannet_test_1500.py` + + python visualization.py --method --dataset_name "scannet" --measure_time --no_viz + # note that method_name is in ["topicfm", "loftr"] + +To measure time for LoFTR, please download the LoFTR's code as follows: + + git submodule update --init + # download pretrained models + mkdir third_party/loftr/pretrained + gdown --id 1M-VD35-qdB5Iw-AtbDBCKC7hPolFW9UY -O third_party/loftr/pretrained/outdoor_ds.ckpt + +## Citations +If you find this work useful, please cite this: + + @article{giang2022topicfm, + title={TopicFM: Robust and Interpretable Topic-assisted Feature Matching}, + author={Giang, Khang Truong and Song, Soohwan and Jo, Sungho}, + journal={arXiv preprint arXiv:2207.00328}, + year={2022} + } + +## Acknowledgement +This code is built based on [LoFTR](https://github.com/zju3dv/LoFTR). We thank the authors for their useful source code. diff --git a/third_party/TopicFM/assets/megadepth_test_1500_scene_info/0015_0.1_0.3.npz b/third_party/TopicFM/assets/megadepth_test_1500_scene_info/0015_0.1_0.3.npz new file mode 100644 index 0000000000000000000000000000000000000000..f4b1b79acff510aab203a8b604955dd89edffc45 --- /dev/null +++ b/third_party/TopicFM/assets/megadepth_test_1500_scene_info/0015_0.1_0.3.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d441df1d380b2ed34449b944d9f13127e695542fa275098d38a6298835672f22 +size 231253 diff --git a/third_party/TopicFM/assets/megadepth_test_1500_scene_info/0015_0.3_0.5.npz b/third_party/TopicFM/assets/megadepth_test_1500_scene_info/0015_0.3_0.5.npz new file mode 100644 index 0000000000000000000000000000000000000000..2b2de7bda22dc6e78e01e3f56ba1dafd46c1c581 --- /dev/null +++ b/third_party/TopicFM/assets/megadepth_test_1500_scene_info/0015_0.3_0.5.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f34b5231d04a84d84378c671dd26854869663b5eafeae2ebaf624a279325139 +size 231253 diff --git a/third_party/TopicFM/assets/megadepth_test_1500_scene_info/0022_0.1_0.3.npz b/third_party/TopicFM/assets/megadepth_test_1500_scene_info/0022_0.1_0.3.npz new file mode 100644 index 0000000000000000000000000000000000000000..5680f3747296a4d565dc9a95c719dce0472c7e63 --- /dev/null +++ b/third_party/TopicFM/assets/megadepth_test_1500_scene_info/0022_0.1_0.3.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba46e6b9ec291fc7271eb9741d5c75ca04b83d3d7281e049815de9cb9024f4d9 +size 272610 diff --git a/third_party/TopicFM/assets/megadepth_test_1500_scene_info/0022_0.3_0.5.npz b/third_party/TopicFM/assets/megadepth_test_1500_scene_info/0022_0.3_0.5.npz new file mode 100644 index 0000000000000000000000000000000000000000..79f5a30dd0a8cd8b60263fa721a4e5ef8394801c --- /dev/null +++ b/third_party/TopicFM/assets/megadepth_test_1500_scene_info/0022_0.3_0.5.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f4465da174b96deba61e5328886e4f2e687d34b890efca69e0c838736f8ae12 +size 272610 diff --git a/third_party/TopicFM/assets/megadepth_test_1500_scene_info/0022_0.5_0.7.npz b/third_party/TopicFM/assets/megadepth_test_1500_scene_info/0022_0.5_0.7.npz new file mode 100644 index 0000000000000000000000000000000000000000..0c1315698e217f3be3dbcc85be72fcd16477b9dd --- /dev/null +++ b/third_party/TopicFM/assets/megadepth_test_1500_scene_info/0022_0.5_0.7.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:684ae10f03001917c3ca0d12d441f372ce3c7e6637bd1277a3cda60df4207fe9 +size 272610 diff --git a/third_party/TopicFM/assets/megadepth_test_1500_scene_info/megadepth_test_1500.txt b/third_party/TopicFM/assets/megadepth_test_1500_scene_info/megadepth_test_1500.txt new file mode 100644 index 0000000000000000000000000000000000000000..85a2e16722183d3fe209a9ceb60c43d8315c32cf --- /dev/null +++ b/third_party/TopicFM/assets/megadepth_test_1500_scene_info/megadepth_test_1500.txt @@ -0,0 +1,5 @@ +0022_0.1_0.3 +0015_0.1_0.3 +0015_0.3_0.5 +0022_0.3_0.5 +0022_0.5_0.7 \ No newline at end of file diff --git a/third_party/TopicFM/assets/scannet_sample_images/scene0711_00_frame-001680.jpg b/third_party/TopicFM/assets/scannet_sample_images/scene0711_00_frame-001680.jpg new file mode 100644 index 0000000000000000000000000000000000000000..352d91fbf3d08d2aef8bf75377a302419e1d5c59 --- /dev/null +++ b/third_party/TopicFM/assets/scannet_sample_images/scene0711_00_frame-001680.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:373126837fbd4c6f202dbade2e87fd310df5a98ad493069beed4809bc78c6d07 +size 190290 diff --git a/third_party/TopicFM/assets/scannet_sample_images/scene0711_00_frame-001995.jpg b/third_party/TopicFM/assets/scannet_sample_images/scene0711_00_frame-001995.jpg new file mode 100644 index 0000000000000000000000000000000000000000..bef3f16c0403c0884cfea5423ba8ed7972f964c0 --- /dev/null +++ b/third_party/TopicFM/assets/scannet_sample_images/scene0711_00_frame-001995.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6955a68c1f053682660c0c1f9c6ed84b76dc617199d966860c2e11edf0a0f782 +size 188834 diff --git a/third_party/TopicFM/assets/scannet_test_1500/intrinsics.npz b/third_party/TopicFM/assets/scannet_test_1500/intrinsics.npz new file mode 100644 index 0000000000000000000000000000000000000000..bcba553dab19a57fcea336e69abd77ca9e87bce1 --- /dev/null +++ b/third_party/TopicFM/assets/scannet_test_1500/intrinsics.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25ac102c69e2e4e2f0ab9c0d64f4da2b815e0901630768bdfde30080ced3605c +size 23922 diff --git a/third_party/TopicFM/assets/scannet_test_1500/scannet_test.txt b/third_party/TopicFM/assets/scannet_test_1500/scannet_test.txt new file mode 100644 index 0000000000000000000000000000000000000000..45cc7ffd9ca2fb5750ce3e545f58410674d7ab9d --- /dev/null +++ b/third_party/TopicFM/assets/scannet_test_1500/scannet_test.txt @@ -0,0 +1 @@ +test.npz \ No newline at end of file diff --git a/third_party/TopicFM/assets/scannet_test_1500/statistics.json b/third_party/TopicFM/assets/scannet_test_1500/statistics.json new file mode 100644 index 0000000000000000000000000000000000000000..0e3ff582943ac12711da7a392a55f0a42d3b4449 --- /dev/null +++ b/third_party/TopicFM/assets/scannet_test_1500/statistics.json @@ -0,0 +1,102 @@ +{ + "scene0707_00": 15, + "scene0708_00": 15, + "scene0709_00": 15, + "scene0710_00": 15, + "scene0711_00": 15, + "scene0712_00": 15, + "scene0713_00": 15, + "scene0714_00": 15, + "scene0715_00": 15, + "scene0716_00": 15, + "scene0717_00": 15, + "scene0718_00": 15, + "scene0719_00": 15, + "scene0720_00": 15, + "scene0721_00": 15, + "scene0722_00": 15, + "scene0723_00": 15, + "scene0724_00": 15, + "scene0725_00": 15, + "scene0726_00": 15, + "scene0727_00": 15, + "scene0728_00": 15, + "scene0729_00": 15, + "scene0730_00": 15, + "scene0731_00": 15, + "scene0732_00": 15, + "scene0733_00": 15, + "scene0734_00": 15, + "scene0735_00": 15, + "scene0736_00": 15, + "scene0737_00": 15, + "scene0738_00": 15, + "scene0739_00": 15, + "scene0740_00": 15, + "scene0741_00": 15, + "scene0742_00": 15, + "scene0743_00": 15, + "scene0744_00": 15, + "scene0745_00": 15, + "scene0746_00": 15, + "scene0747_00": 15, + "scene0748_00": 15, + "scene0749_00": 15, + "scene0750_00": 15, + "scene0751_00": 15, + "scene0752_00": 15, + "scene0753_00": 15, + "scene0754_00": 15, + "scene0755_00": 15, + "scene0756_00": 15, + "scene0757_00": 15, + "scene0758_00": 15, + "scene0759_00": 15, + "scene0760_00": 15, + "scene0761_00": 15, + "scene0762_00": 15, + "scene0763_00": 15, + "scene0764_00": 15, + "scene0765_00": 15, + "scene0766_00": 15, + "scene0767_00": 15, + "scene0768_00": 15, + "scene0769_00": 15, + "scene0770_00": 15, + "scene0771_00": 15, + "scene0772_00": 15, + "scene0773_00": 15, + "scene0774_00": 15, + "scene0775_00": 15, + "scene0776_00": 15, + "scene0777_00": 15, + "scene0778_00": 15, + "scene0779_00": 15, + "scene0780_00": 15, + "scene0781_00": 15, + "scene0782_00": 15, + "scene0783_00": 15, + "scene0784_00": 15, + "scene0785_00": 15, + "scene0786_00": 15, + "scene0787_00": 15, + "scene0788_00": 15, + "scene0789_00": 15, + "scene0790_00": 15, + "scene0791_00": 15, + "scene0792_00": 15, + "scene0793_00": 15, + "scene0794_00": 15, + "scene0795_00": 15, + "scene0796_00": 15, + "scene0797_00": 15, + "scene0798_00": 15, + "scene0799_00": 15, + "scene0800_00": 15, + "scene0801_00": 15, + "scene0802_00": 15, + "scene0803_00": 15, + "scene0804_00": 15, + "scene0805_00": 15, + "scene0806_00": 15 +} \ No newline at end of file diff --git a/third_party/TopicFM/assets/scannet_test_1500/test.npz b/third_party/TopicFM/assets/scannet_test_1500/test.npz new file mode 100644 index 0000000000000000000000000000000000000000..d2011c2913a9ae1311d18b08c089bd999ba3ad30 --- /dev/null +++ b/third_party/TopicFM/assets/scannet_test_1500/test.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b982b9c1f762e7d31af552ecc1ccf1a6add013197f74ec69c84a6deaa6f580ad +size 71687 diff --git a/third_party/TopicFM/configs/data/__init__.py b/third_party/TopicFM/configs/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/TopicFM/configs/data/base.py b/third_party/TopicFM/configs/data/base.py new file mode 100644 index 0000000000000000000000000000000000000000..6cab7e67019a6fee2657c1a28609c8aca5b2a1d8 --- /dev/null +++ b/third_party/TopicFM/configs/data/base.py @@ -0,0 +1,37 @@ +""" +The data config will be the last one merged into the main config. +Setups in data configs will override all existed setups! +""" + +from yacs.config import CfgNode as CN +_CN = CN() +_CN.DATASET = CN() +_CN.TRAINER = CN() + +# training data config +_CN.DATASET.TRAIN_DATA_ROOT = None +_CN.DATASET.TRAIN_POSE_ROOT = None +_CN.DATASET.TRAIN_NPZ_ROOT = None +_CN.DATASET.TRAIN_LIST_PATH = None +_CN.DATASET.TRAIN_INTRINSIC_PATH = None +# validation set config +_CN.DATASET.VAL_DATA_ROOT = None +_CN.DATASET.VAL_POSE_ROOT = None +_CN.DATASET.VAL_NPZ_ROOT = None +_CN.DATASET.VAL_LIST_PATH = None +_CN.DATASET.VAL_INTRINSIC_PATH = None + +# testing data config +_CN.DATASET.TEST_DATA_SOURCE = None +_CN.DATASET.TEST_DATA_ROOT = None +_CN.DATASET.TEST_POSE_ROOT = None +_CN.DATASET.TEST_NPZ_ROOT = None +_CN.DATASET.TEST_LIST_PATH = None +_CN.DATASET.TEST_INTRINSIC_PATH = None +_CN.DATASET.TEST_IMGSIZE = None + +# dataset config +_CN.DATASET.MIN_OVERLAP_SCORE_TRAIN = 0.4 +_CN.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0 # for both test and val + +cfg = _CN diff --git a/third_party/TopicFM/configs/data/megadepth_test_1500.py b/third_party/TopicFM/configs/data/megadepth_test_1500.py new file mode 100644 index 0000000000000000000000000000000000000000..9fd107fc07ecd464f793d13282939ddb26032922 --- /dev/null +++ b/third_party/TopicFM/configs/data/megadepth_test_1500.py @@ -0,0 +1,11 @@ +from configs.data.base import cfg + +TEST_BASE_PATH = "assets/megadepth_test_1500_scene_info" + +cfg.DATASET.TEST_DATA_SOURCE = "MegaDepth" +cfg.DATASET.TEST_DATA_ROOT = "data/megadepth/test" +cfg.DATASET.TEST_NPZ_ROOT = f"{TEST_BASE_PATH}" +cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/megadepth_test_1500.txt" + +cfg.DATASET.MGDPT_IMG_RESIZE = 1200 +cfg.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0 diff --git a/third_party/TopicFM/configs/data/megadepth_trainval.py b/third_party/TopicFM/configs/data/megadepth_trainval.py new file mode 100644 index 0000000000000000000000000000000000000000..215b5c34cc41d36aa4444a58ca0cb69afbc11952 --- /dev/null +++ b/third_party/TopicFM/configs/data/megadepth_trainval.py @@ -0,0 +1,22 @@ +from configs.data.base import cfg + + +TRAIN_BASE_PATH = "data/megadepth/index" +cfg.DATASET.TRAINVAL_DATA_SOURCE = "MegaDepth" +cfg.DATASET.TRAIN_DATA_ROOT = "data/megadepth/train" +cfg.DATASET.TRAIN_NPZ_ROOT = f"{TRAIN_BASE_PATH}/scene_info_0.1_0.7" +cfg.DATASET.TRAIN_LIST_PATH = f"{TRAIN_BASE_PATH}/trainvaltest_list/train_list.txt" +cfg.DATASET.MIN_OVERLAP_SCORE_TRAIN = 0.0 + +TEST_BASE_PATH = "data/megadepth/index" +cfg.DATASET.TEST_DATA_SOURCE = "MegaDepth" +cfg.DATASET.VAL_DATA_ROOT = cfg.DATASET.TEST_DATA_ROOT = "data/megadepth/test" +cfg.DATASET.VAL_NPZ_ROOT = cfg.DATASET.TEST_NPZ_ROOT = f"{TEST_BASE_PATH}/scene_info_val_1500" +cfg.DATASET.VAL_LIST_PATH = cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/trainvaltest_list/val_list.txt" +cfg.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0 # for both test and val + +# 368 scenes in total for MegaDepth +# (with difficulty balanced (further split each scene to 3 sub-scenes)) +cfg.TRAINER.N_SAMPLES_PER_SUBSET = 100 + +cfg.DATASET.MGDPT_IMG_RESIZE = 800 # for training on 11GB mem GPUs diff --git a/third_party/TopicFM/configs/data/scannet_test_1500.py b/third_party/TopicFM/configs/data/scannet_test_1500.py new file mode 100644 index 0000000000000000000000000000000000000000..ce3b0846b61c567b053d12fb636982ce02e21a5c --- /dev/null +++ b/third_party/TopicFM/configs/data/scannet_test_1500.py @@ -0,0 +1,12 @@ +from configs.data.base import cfg + +TEST_BASE_PATH = "assets/scannet_test_1500" + +cfg.DATASET.TEST_DATA_SOURCE = "ScanNet" +cfg.DATASET.TEST_DATA_ROOT = "data/scannet/test" +cfg.DATASET.TEST_NPZ_ROOT = f"{TEST_BASE_PATH}" +cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/scannet_test.txt" +cfg.DATASET.TEST_INTRINSIC_PATH = f"{TEST_BASE_PATH}/intrinsics.npz" +cfg.DATASET.TEST_IMGSIZE = (640, 480) + +cfg.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0 diff --git a/third_party/TopicFM/configs/model/indoor/debug/.gitignore b/third_party/TopicFM/configs/model/indoor/debug/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..94548af5beba7825284af746324c8dc5b2f1ea31 --- /dev/null +++ b/third_party/TopicFM/configs/model/indoor/debug/.gitignore @@ -0,0 +1,3 @@ +* +*/ +!.gitignore diff --git a/third_party/TopicFM/configs/model/indoor/model_cfg_test.py b/third_party/TopicFM/configs/model/indoor/model_cfg_test.py new file mode 100644 index 0000000000000000000000000000000000000000..8e8872d3b79de529aa375127ea5beb7e81d9d5b1 --- /dev/null +++ b/third_party/TopicFM/configs/model/indoor/model_cfg_test.py @@ -0,0 +1,4 @@ +from src.config.default import _CN as cfg + +cfg.MODEL.COARSE.N_SAMPLES = 5 +cfg.MODEL.MATCH_COARSE.THR = 0.3 diff --git a/third_party/TopicFM/configs/model/outdoor/debug/.gitignore b/third_party/TopicFM/configs/model/outdoor/debug/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..94548af5beba7825284af746324c8dc5b2f1ea31 --- /dev/null +++ b/third_party/TopicFM/configs/model/outdoor/debug/.gitignore @@ -0,0 +1,3 @@ +* +*/ +!.gitignore diff --git a/third_party/TopicFM/configs/model/outdoor/model_cfg_test.py b/third_party/TopicFM/configs/model/outdoor/model_cfg_test.py new file mode 100644 index 0000000000000000000000000000000000000000..692497457c2a7b9ad823f94546e38f15732ca632 --- /dev/null +++ b/third_party/TopicFM/configs/model/outdoor/model_cfg_test.py @@ -0,0 +1,4 @@ +from src.config.default import _CN as cfg + +cfg.MODEL.COARSE.N_SAMPLES = 10 +cfg.MODEL.MATCH_COARSE.THR = 0.2 diff --git a/third_party/TopicFM/configs/model/outdoor/model_ds.py b/third_party/TopicFM/configs/model/outdoor/model_ds.py new file mode 100644 index 0000000000000000000000000000000000000000..2c090edbfbdcd66cea225c39af6f62da8feb50b9 --- /dev/null +++ b/third_party/TopicFM/configs/model/outdoor/model_ds.py @@ -0,0 +1,16 @@ +from src.config.default import _CN as cfg + +cfg.MODEL.MATCH_COARSE.MATCH_TYPE = 'dual_softmax' +cfg.MODEL.COARSE.N_SAMPLES = 8 + +cfg.TRAINER.CANONICAL_LR = 1e-2 +cfg.TRAINER.WARMUP_STEP = 1875 # 3 epochs +cfg.TRAINER.WARMUP_RATIO = 0.1 +cfg.TRAINER.MSLR_MILESTONES = [3, 6, 9, 12, 16, 20, 24, 28] + +# pose estimation +cfg.TRAINER.RANSAC_PIXEL_THR = 0.5 + +cfg.TRAINER.OPTIMIZER = "adamw" +cfg.TRAINER.ADAMW_DECAY = 0.1 +cfg.MODEL.MATCH_COARSE.TRAIN_COARSE_PERCENT = 0.3 diff --git a/third_party/TopicFM/data/megadepth/index/.gitignore b/third_party/TopicFM/data/megadepth/index/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..5e7d2734cfc60289debf74293817c0a8f572ff32 --- /dev/null +++ b/third_party/TopicFM/data/megadepth/index/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore diff --git a/third_party/TopicFM/data/megadepth/test/.gitignore b/third_party/TopicFM/data/megadepth/test/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..5e7d2734cfc60289debf74293817c0a8f572ff32 --- /dev/null +++ b/third_party/TopicFM/data/megadepth/test/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore diff --git a/third_party/TopicFM/data/megadepth/train/.gitignore b/third_party/TopicFM/data/megadepth/train/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..5e7d2734cfc60289debf74293817c0a8f572ff32 --- /dev/null +++ b/third_party/TopicFM/data/megadepth/train/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore diff --git a/third_party/TopicFM/data/scannet/index/.gitignore b/third_party/TopicFM/data/scannet/index/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..94548af5beba7825284af746324c8dc5b2f1ea31 --- /dev/null +++ b/third_party/TopicFM/data/scannet/index/.gitignore @@ -0,0 +1,3 @@ +* +*/ +!.gitignore diff --git a/third_party/TopicFM/data/scannet/intrinsics.npz b/third_party/TopicFM/data/scannet/intrinsics.npz new file mode 100644 index 0000000000000000000000000000000000000000..4d1fe65c8834ebc44b12870d36edbf57db216f08 --- /dev/null +++ b/third_party/TopicFM/data/scannet/intrinsics.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46db15f5ed21f34998613d07110e577205736a57eb5dfd04db96c189958d79f6 +size 343135 diff --git a/third_party/TopicFM/demo/architecture_v4.png b/third_party/TopicFM/demo/architecture_v4.png new file mode 100644 index 0000000000000000000000000000000000000000..8c99e3064caa21d208b393e61a2c1697a9902935 --- /dev/null +++ b/third_party/TopicFM/demo/architecture_v4.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:001c17a032f5ad1da63dc2dd4f63f4c74bb340356beeeabf3772bd18723f2c3e +size 472773 diff --git a/third_party/TopicFM/demo/demo_aachen.txt b/third_party/TopicFM/demo/demo_aachen.txt new file mode 100644 index 0000000000000000000000000000000000000000..3dd483efd19e2b6d3498672c16a9eb1434628ae4 --- /dev/null +++ b/third_party/TopicFM/demo/demo_aachen.txt @@ -0,0 +1,50 @@ +query/night/nexus5x/IMG_20161227_173141.jpg +db/4273.jpg +db/1967.jpg +db/1966.jpg +db/4247.jpg +db/1050.jpg +db/4240.jpg +db/4246.jpg +db/1785.jpg +db/1051.jpg +db/4218.jpg +db/1052.jpg +db/4244.jpg +db/4239.jpg +db/4272.jpg +db/4242.jpg +db/4274.jpg +db/1112.jpg +db/2493.jpg +db/4224.jpg +db/4213.jpg +db/4248.jpg +db/1114.jpg +db/1777.jpg +db/1049.jpg +db/4226.jpg +db/1048.jpg +db/4236.jpg +db/4225.jpg +db/4216.jpg +db/4243.jpg +db/4227.jpg +db/4241.jpg +db/388.jpg +db/4267.jpg +db/4238.jpg +db/4271.jpg +db/2021.jpg +db/1116.jpg +db/1759.jpg +db/1113.jpg +db/1040.jpg +sequences/nexus4_sequences/sequence_4/aachen_nexus4_seq4_0200.png +db/4223.jpg +db/4231.jpg +sequences/nexus4_sequences/sequence_4/aachen_nexus4_seq4_0196.png +db/4228.jpg +db/1760.jpg +db/1057.jpg +db/4211.jpg \ No newline at end of file diff --git a/third_party/TopicFM/demo/topicfm.gif b/third_party/TopicFM/demo/topicfm.gif new file mode 100644 index 0000000000000000000000000000000000000000..7b86a556a022ea7e120c9ba6ed648bcc0cca162e --- /dev/null +++ b/third_party/TopicFM/demo/topicfm.gif @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:519caa12c626b0d5a984b7b6af92271f88be1cb4dea697165e065cc8290c0a66 +size 15532016 diff --git a/third_party/TopicFM/flop_counter.py b/third_party/TopicFM/flop_counter.py new file mode 100644 index 0000000000000000000000000000000000000000..ea87fa0139897434ca52b369450aa82203311181 --- /dev/null +++ b/third_party/TopicFM/flop_counter.py @@ -0,0 +1,55 @@ +import torch +from fvcore.nn import FlopCountAnalysis +from einops.einops import rearrange + +from src import get_model_cfg +from src.models.backbone import FPN as topicfm_featnet +from src.models.modules import TopicFormer +from src.utils.dataset import read_scannet_gray + +from third_party.loftr.src.loftr.utils.cvpr_ds_config import default_cfg +from third_party.loftr.src.loftr.backbone import ResNetFPN_8_2 as loftr_featnet +from third_party.loftr.src.loftr.loftr_module import LocalFeatureTransformer + + +def feat_net_flops(feat_net, config, input): + model = feat_net(config) + model.eval() + flops = FlopCountAnalysis(model, input) + feat_c, _ = model(input) + return feat_c, flops.total() / 1e9 + + +def coarse_model_flops(coarse_model, config, inputs): + model = coarse_model(config) + model.eval() + flops = FlopCountAnalysis(model, inputs) + return flops.total() / 1e9 + + +if __name__ == '__main__': + path_img0 = "assets/scannet_sample_images/scene0711_00_frame-001680.jpg" + path_img1 = "assets/scannet_sample_images/scene0711_00_frame-001995.jpg" + img0, img1 = read_scannet_gray(path_img0), read_scannet_gray(path_img1) + img0, img1 = img0.unsqueeze(0), img1.unsqueeze(0) + + # LoFTR + loftr_conf = dict(default_cfg) + feat_c0, loftr_featnet_flops0 = feat_net_flops(loftr_featnet, loftr_conf["resnetfpn"], img0) + feat_c1, loftr_featnet_flops1 = feat_net_flops(loftr_featnet, loftr_conf["resnetfpn"], img1) + print("FLOPs of feature extraction in LoFTR: {} GFLOPs".format((loftr_featnet_flops0 + loftr_featnet_flops1)/2)) + feat_c0 = rearrange(feat_c0, 'n c h w -> n (h w) c') + feat_c1 = rearrange(feat_c1, 'n c h w -> n (h w) c') + loftr_coarse_model_flops = coarse_model_flops(LocalFeatureTransformer, loftr_conf["coarse"], (feat_c0, feat_c1)) + print("FLOPs of coarse matching model in LoFTR: {} GFLOPs".format(loftr_coarse_model_flops)) + + # TopicFM + topicfm_conf = get_model_cfg() + feat_c0, topicfm_featnet_flops0 = feat_net_flops(topicfm_featnet, topicfm_conf["fpn"], img0) + feat_c1, topicfm_featnet_flops1 = feat_net_flops(topicfm_featnet, topicfm_conf["fpn"], img1) + print("FLOPs of feature extraction in TopicFM: {} GFLOPs".format((topicfm_featnet_flops0 + topicfm_featnet_flops1) / 2)) + feat_c0 = rearrange(feat_c0, 'n c h w -> n (h w) c') + feat_c1 = rearrange(feat_c1, 'n c h w -> n (h w) c') + topicfm_coarse_model_flops = coarse_model_flops(TopicFormer, topicfm_conf["coarse"], (feat_c0, feat_c1)) + print("FLOPs of coarse matching model in TopicFM: {} GFLOPs".format(topicfm_coarse_model_flops)) + diff --git a/third_party/TopicFM/pretrained/model_best.ckpt b/third_party/TopicFM/pretrained/model_best.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..159151a3bca11ed02b51169622413f9d4937d3c7 --- /dev/null +++ b/third_party/TopicFM/pretrained/model_best.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d6ff9b47594e393b0f8d4cb29a9098eaf39af628f3d22d6adf5bc396622df71 +size 47458400 diff --git a/third_party/TopicFM/requirements.txt b/third_party/TopicFM/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..9edb3640108d86b645f234894469a915a364f527 --- /dev/null +++ b/third_party/TopicFM/requirements.txt @@ -0,0 +1,18 @@ +albumentations==0.5.1 +einops==0.3.0 +future==0.18.2 +fvcore==0.1.5.post20220512 +h5py==3.1.0 +joblib==1.1.0 +kornia==0.4.1 +loguru==0.5.3 +matplotlib==3.5.1 +opencv-python==4.4.0.46 +Pillow==9.0.1 +pytorch-lightning==1.3.5 +scikit-image==0.19.1 +scikit-learn==1.1.2 +tqdm==4.62.3 +yacs==0.1.8 +torchmetrics==0.7.0 +gdown \ No newline at end of file diff --git a/third_party/TopicFM/scripts/reproduce_test/indoor.sh b/third_party/TopicFM/scripts/reproduce_test/indoor.sh new file mode 100644 index 0000000000000000000000000000000000000000..76494f2e1734bfd3a2653ef3c96a557793b54f05 --- /dev/null +++ b/third_party/TopicFM/scripts/reproduce_test/indoor.sh @@ -0,0 +1,29 @@ +#!/bin/bash -l + +SCRIPTPATH=$(dirname $(readlink -f "$0")) +PROJECT_DIR="${SCRIPTPATH}/../../" + +# conda activate loftr +export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH +cd $PROJECT_DIR + +data_cfg_path="configs/data/scannet_test_1500.py" +main_cfg_path="configs/model/indoor/model_cfg_test.py" +ckpt_path="pretrained/model_best.ckpt" +dump_dir="dump/loftr_ds_indoor" +profiler_name="inference" +n_nodes=1 # mannually keep this the same with --nodes +n_gpus_per_node=-1 +torch_num_workers=4 +batch_size=1 # per gpu + +python -u ./test.py \ + ${data_cfg_path} \ + ${main_cfg_path} \ + --ckpt_path=${ckpt_path} \ + --dump_dir=${dump_dir} \ + --gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \ + --batch_size=${batch_size} --num_workers=${torch_num_workers}\ + --profiler_name=${profiler_name} \ + --benchmark + diff --git a/third_party/TopicFM/scripts/reproduce_test/outdoor.sh b/third_party/TopicFM/scripts/reproduce_test/outdoor.sh new file mode 100644 index 0000000000000000000000000000000000000000..e6217883a1ea9c17edf2ce0ff0ee97d26868b5d9 --- /dev/null +++ b/third_party/TopicFM/scripts/reproduce_test/outdoor.sh @@ -0,0 +1,29 @@ +#!/bin/bash -l + +SCRIPTPATH=$(dirname $(readlink -f "$0")) +PROJECT_DIR="${SCRIPTPATH}/../../" + +# conda activate loftr +export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH +cd $PROJECT_DIR + +data_cfg_path="configs/data/megadepth_test_1500.py" +main_cfg_path="configs/model/outdoor/model_cfg_test.py" +ckpt_path="pretrained/model_best.ckpt" +dump_dir="dump/loftr_ds_outdoor" +profiler_name="inference" +n_nodes=1 # mannually keep this the same with --nodes +n_gpus_per_node=-1 +torch_num_workers=4 +batch_size=1 # per gpu + +python -u ./test.py \ + ${data_cfg_path} \ + ${main_cfg_path} \ + --ckpt_path=${ckpt_path} \ + --dump_dir=${dump_dir} \ + --gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \ + --batch_size=${batch_size} --num_workers=${torch_num_workers}\ + --profiler_name=${profiler_name} \ + --benchmark + diff --git a/third_party/TopicFM/scripts/reproduce_train/debug/.gitignore b/third_party/TopicFM/scripts/reproduce_train/debug/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..94548af5beba7825284af746324c8dc5b2f1ea31 --- /dev/null +++ b/third_party/TopicFM/scripts/reproduce_train/debug/.gitignore @@ -0,0 +1,3 @@ +* +*/ +!.gitignore diff --git a/third_party/TopicFM/scripts/reproduce_train/outdoor.sh b/third_party/TopicFM/scripts/reproduce_train/outdoor.sh new file mode 100644 index 0000000000000000000000000000000000000000..d30320f04e0b560f4b4de9ee68305a4e698b538b --- /dev/null +++ b/third_party/TopicFM/scripts/reproduce_train/outdoor.sh @@ -0,0 +1,32 @@ +#!/bin/bash -l + +SCRIPTPATH=$(dirname $(readlink -f "$0")) +PROJECT_DIR="${SCRIPTPATH}/../../" + +# conda activate loftr +export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH +cd $PROJECT_DIR + +data_cfg_path="configs/data/megadepth_trainval.py" +main_cfg_path="configs/model/outdoor/model_ds.py" + +n_nodes=1 +n_gpus_per_node=4 +torch_num_workers=4 +batch_size=1 +pin_memory=true +exp_name="outdoor-bs=$(($n_gpus_per_node * $n_nodes * $batch_size))" + +python -u ./train.py \ + ${data_cfg_path} \ + ${main_cfg_path} \ + --exp_name=${exp_name} \ + --gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \ + --batch_size=${batch_size} --num_workers=${torch_num_workers} --pin_memory=${pin_memory} \ + --check_val_every_n_epoch=1 \ + --log_every_n_steps=30000 \ + --flush_logs_every_n_steps=30000 \ + --limit_val_batches=1. \ + --num_sanity_val_steps=10 \ + --benchmark=True \ + --max_epochs=40 # --ckpt_path="pretrained_epoch22.ckpt" diff --git a/third_party/TopicFM/src/__init__.py b/third_party/TopicFM/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..30caef94f911f99e0c12510d8181b3c1537daf1a --- /dev/null +++ b/third_party/TopicFM/src/__init__.py @@ -0,0 +1,11 @@ +from yacs.config import CfgNode +from .config.default import _CN + +def lower_config(yacs_cfg): + if not isinstance(yacs_cfg, CfgNode): + return yacs_cfg + return {k.lower(): lower_config(v) for k, v in yacs_cfg.items()} + +def get_model_cfg(): + cfg = lower_config(lower_config(_CN)) + return cfg["model"] \ No newline at end of file diff --git a/third_party/TopicFM/src/config/default.py b/third_party/TopicFM/src/config/default.py new file mode 100644 index 0000000000000000000000000000000000000000..591558b3f358cdce0e9e72e94acba702b2a4e896 --- /dev/null +++ b/third_party/TopicFM/src/config/default.py @@ -0,0 +1,171 @@ +from yacs.config import CfgNode as CN +_CN = CN() + +############## ↓ MODEL Pipeline ↓ ############## +_CN.MODEL = CN() +_CN.MODEL.BACKBONE_TYPE = 'FPN' +_CN.MODEL.RESOLUTION = (8, 2) # options: [(8, 2), (16, 4)] +_CN.MODEL.FINE_WINDOW_SIZE = 5 # window_size in fine_level, must be odd +_CN.MODEL.FINE_CONCAT_COARSE_FEAT = False + +# 1. MODEL-backbone (local feature CNN) config +_CN.MODEL.FPN = CN() +_CN.MODEL.FPN.INITIAL_DIM = 128 +_CN.MODEL.FPN.BLOCK_DIMS = [128, 192, 256, 384] # s1, s2, s3 + +# 2. MODEL-coarse module config +_CN.MODEL.COARSE = CN() +_CN.MODEL.COARSE.D_MODEL = 256 +_CN.MODEL.COARSE.D_FFN = 256 +_CN.MODEL.COARSE.NHEAD = 8 +_CN.MODEL.COARSE.LAYER_NAMES = ['seed', 'seed', 'seed', 'seed', 'seed'] +_CN.MODEL.COARSE.ATTENTION = 'linear' # options: ['linear', 'full'] +_CN.MODEL.COARSE.TEMP_BUG_FIX = True +_CN.MODEL.COARSE.N_TOPICS = 100 +_CN.MODEL.COARSE.N_SAMPLES = 6 +_CN.MODEL.COARSE.N_TOPIC_TRANSFORMERS = 1 + +# 3. Coarse-Matching config +_CN.MODEL.MATCH_COARSE = CN() +_CN.MODEL.MATCH_COARSE.THR = 0.2 +_CN.MODEL.MATCH_COARSE.BORDER_RM = 2 +_CN.MODEL.MATCH_COARSE.MATCH_TYPE = 'dual_softmax' +_CN.MODEL.MATCH_COARSE.DSMAX_TEMPERATURE = 0.1 +_CN.MODEL.MATCH_COARSE.TRAIN_COARSE_PERCENT = 0.2 # training tricks: save GPU memory +_CN.MODEL.MATCH_COARSE.TRAIN_PAD_NUM_GT_MIN = 200 # training tricks: avoid DDP deadlock +_CN.MODEL.MATCH_COARSE.SPARSE_SPVS = True + +# 4. MODEL-fine module config +_CN.MODEL.FINE = CN() +_CN.MODEL.FINE.D_MODEL = 128 +_CN.MODEL.FINE.D_FFN = 128 +_CN.MODEL.FINE.NHEAD = 4 +_CN.MODEL.FINE.LAYER_NAMES = ['cross'] * 1 +_CN.MODEL.FINE.ATTENTION = 'linear' +_CN.MODEL.FINE.N_TOPICS = 1 + +# 5. MODEL Losses +# -- # coarse-level +_CN.MODEL.LOSS = CN() +_CN.MODEL.LOSS.COARSE_WEIGHT = 1.0 +# _CN.MODEL.LOSS.SPARSE_SPVS = False +# -- - -- # focal loss (coarse) +_CN.MODEL.LOSS.FOCAL_ALPHA = 0.25 +_CN.MODEL.LOSS.POS_WEIGHT = 1.0 +_CN.MODEL.LOSS.NEG_WEIGHT = 1.0 +# _CN.MODEL.LOSS.DUAL_SOFTMAX = False # whether coarse-level use dual-softmax or not. +# use `_CN.MODEL.MATCH_COARSE.MATCH_TYPE` + +# -- # fine-level +_CN.MODEL.LOSS.FINE_TYPE = 'l2_with_std' # ['l2_with_std', 'l2'] +_CN.MODEL.LOSS.FINE_WEIGHT = 1.0 +_CN.MODEL.LOSS.FINE_CORRECT_THR = 1.0 # for filtering valid fine-level gts (some gt matches might fall out of the fine-level window) + + +############## Dataset ############## +_CN.DATASET = CN() +# 1. data config +# training and validating +_CN.DATASET.TRAINVAL_DATA_SOURCE = None # options: ['ScanNet', 'MegaDepth'] +_CN.DATASET.TRAIN_DATA_ROOT = None +_CN.DATASET.TRAIN_POSE_ROOT = None # (optional directory for poses) +_CN.DATASET.TRAIN_NPZ_ROOT = None +_CN.DATASET.TRAIN_LIST_PATH = None +_CN.DATASET.TRAIN_INTRINSIC_PATH = None +_CN.DATASET.VAL_DATA_ROOT = None +_CN.DATASET.VAL_POSE_ROOT = None # (optional directory for poses) +_CN.DATASET.VAL_NPZ_ROOT = None +_CN.DATASET.VAL_LIST_PATH = None # None if val data from all scenes are bundled into a single npz file +_CN.DATASET.VAL_INTRINSIC_PATH = None +# testing +_CN.DATASET.TEST_DATA_SOURCE = None +_CN.DATASET.TEST_DATA_ROOT = None +_CN.DATASET.TEST_POSE_ROOT = None # (optional directory for poses) +_CN.DATASET.TEST_NPZ_ROOT = None +_CN.DATASET.TEST_LIST_PATH = None # None if test data from all scenes are bundled into a single npz file +_CN.DATASET.TEST_INTRINSIC_PATH = None +_CN.DATASET.TEST_IMGSIZE = None + +# 2. dataset config +# general options +_CN.DATASET.MIN_OVERLAP_SCORE_TRAIN = 0.4 # discard data with overlap_score < min_overlap_score +_CN.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0 +_CN.DATASET.AUGMENTATION_TYPE = None # options: [None, 'dark', 'mobile'] + +# MegaDepth options +_CN.DATASET.MGDPT_IMG_RESIZE = 640 # resize the longer side, zero-pad bottom-right to square. +_CN.DATASET.MGDPT_IMG_PAD = True # pad img to square with size = MGDPT_IMG_RESIZE +_CN.DATASET.MGDPT_DEPTH_PAD = True # pad depthmap to square with size = 2000 +_CN.DATASET.MGDPT_DF = 8 + +############## Trainer ############## +_CN.TRAINER = CN() +_CN.TRAINER.WORLD_SIZE = 1 +_CN.TRAINER.CANONICAL_BS = 64 +_CN.TRAINER.CANONICAL_LR = 6e-3 +_CN.TRAINER.SCALING = None # this will be calculated automatically +_CN.TRAINER.FIND_LR = False # use learning rate finder from pytorch-lightning + +# optimizer +_CN.TRAINER.OPTIMIZER = "adamw" # [adam, adamw] +_CN.TRAINER.TRUE_LR = None # this will be calculated automatically at runtime +_CN.TRAINER.ADAM_DECAY = 0. # ADAM: for adam +_CN.TRAINER.ADAMW_DECAY = 0.01 + +# step-based warm-up +_CN.TRAINER.WARMUP_TYPE = 'linear' # [linear, constant] +_CN.TRAINER.WARMUP_RATIO = 0. +_CN.TRAINER.WARMUP_STEP = 4800 + +# learning rate scheduler +_CN.TRAINER.SCHEDULER = 'MultiStepLR' # [MultiStepLR, CosineAnnealing, ExponentialLR] +_CN.TRAINER.SCHEDULER_INTERVAL = 'epoch' # [epoch, step] +_CN.TRAINER.MSLR_MILESTONES = [3, 6, 9, 12] # MSLR: MultiStepLR +_CN.TRAINER.MSLR_GAMMA = 0.5 +_CN.TRAINER.COSA_TMAX = 30 # COSA: CosineAnnealing +_CN.TRAINER.ELR_GAMMA = 0.999992 # ELR: ExponentialLR, this value for 'step' interval + +# plotting related +_CN.TRAINER.ENABLE_PLOTTING = True +_CN.TRAINER.N_VAL_PAIRS_TO_PLOT = 32 # number of val/test paris for plotting +_CN.TRAINER.PLOT_MODE = 'evaluation' # ['evaluation', 'confidence'] +_CN.TRAINER.PLOT_MATCHES_ALPHA = 'dynamic' + +# geometric metrics and pose solver +_CN.TRAINER.EPI_ERR_THR = 5e-4 # recommendation: 5e-4 for ScanNet, 1e-4 for MegaDepth (from SuperGlue) +_CN.TRAINER.POSE_GEO_MODEL = 'E' # ['E', 'F', 'H'] +_CN.TRAINER.POSE_ESTIMATION_METHOD = 'RANSAC' # [RANSAC, DEGENSAC, MAGSAC] +_CN.TRAINER.RANSAC_PIXEL_THR = 0.5 +_CN.TRAINER.RANSAC_CONF = 0.99999 +_CN.TRAINER.RANSAC_MAX_ITERS = 10000 +_CN.TRAINER.USE_MAGSACPP = False + +# data sampler for train_dataloader +_CN.TRAINER.DATA_SAMPLER = 'scene_balance' # options: ['scene_balance', 'random', 'normal'] +# 'scene_balance' config +_CN.TRAINER.N_SAMPLES_PER_SUBSET = 200 +_CN.TRAINER.SB_SUBSET_SAMPLE_REPLACEMENT = True # whether sample each scene with replacement or not +_CN.TRAINER.SB_SUBSET_SHUFFLE = True # after sampling from scenes, whether shuffle within the epoch or not +_CN.TRAINER.SB_REPEAT = 1 # repeat N times for training the sampled data +# 'random' config +_CN.TRAINER.RDM_REPLACEMENT = True +_CN.TRAINER.RDM_NUM_SAMPLES = None + +# gradient clipping +_CN.TRAINER.GRADIENT_CLIPPING = 0.5 + +# reproducibility +# This seed affects the data sampling. With the same seed, the data sampling is promised +# to be the same. When resume training from a checkpoint, it's better to use a different +# seed, otherwise the sampled data will be exactly the same as before resuming, which will +# cause less unique data items sampled during the entire training. +# Use of different seed values might affect the final training result, since not all data items +# are used during training on ScanNet. (60M pairs of images sampled during traing from 230M pairs in total.) +_CN.TRAINER.SEED = 66 + + +def get_cfg_defaults(): + """Get a yacs CfgNode object with default values for my_project.""" + # Return a clone so that the defaults will not be altered + # This is for the "local variable" use pattern + return _CN.clone() diff --git a/third_party/TopicFM/src/datasets/aachen.py b/third_party/TopicFM/src/datasets/aachen.py new file mode 100644 index 0000000000000000000000000000000000000000..ebfeee4dbfbd78770976ec027ceee8ef333a4574 --- /dev/null +++ b/third_party/TopicFM/src/datasets/aachen.py @@ -0,0 +1,29 @@ +import os +from torch.utils.data import Dataset + +from src.utils.dataset import read_img_gray + + +class AachenDataset(Dataset): + def __init__(self, img_path, match_list_path, img_resize=None, down_factor=16): + self.img_path = img_path + self.img_resize = img_resize + self.down_factor = down_factor + with open(match_list_path, 'r') as f: + self.raw_pairs = f.readlines() + print("number of matching pairs: ", len(self.raw_pairs)) + + def __len__(self): + return len(self.raw_pairs) + + def __getitem__(self, idx): + raw_pair = self.raw_pairs[idx] + image_name0, image_name1 = raw_pair.strip('\n').split(' ') + path_img0 = os.path.join(self.img_path, image_name0) + path_img1 = os.path.join(self.img_path, image_name1) + img0, scale0 = read_img_gray(path_img0, resize=self.img_resize, down_factor=self.down_factor) + img1, scale1 = read_img_gray(path_img1, resize=self.img_resize, down_factor=self.down_factor) + return {"image0": img0, "image1": img1, + "scale0": scale0, "scale1": scale1, + "pair_names": (image_name0, image_name1), + "dataset_name": "AachenDayNight"} \ No newline at end of file diff --git a/third_party/TopicFM/src/datasets/custom_dataloader.py b/third_party/TopicFM/src/datasets/custom_dataloader.py new file mode 100644 index 0000000000000000000000000000000000000000..46d55d4f4d56d2c96cd42b6597834f945a5eb20d --- /dev/null +++ b/third_party/TopicFM/src/datasets/custom_dataloader.py @@ -0,0 +1,126 @@ +from tqdm import tqdm +from os import path as osp +from torch.utils.data import Dataset, DataLoader, ConcatDataset + +from src.datasets.megadepth import MegaDepthDataset +from src.datasets.scannet import ScanNetDataset +from src.datasets.aachen import AachenDataset +from src.datasets.inloc import InLocDataset + + +class TestDataLoader(DataLoader): + """ + For distributed training, each training process is assgined + only a part of the training scenes to reduce memory overhead. + """ + + def __init__(self, config): + + # 1. data config + self.test_data_source = config.DATASET.TEST_DATA_SOURCE + dataset_name = str(self.test_data_source).lower() + # testing + self.test_data_root = config.DATASET.TEST_DATA_ROOT + self.test_pose_root = config.DATASET.TEST_POSE_ROOT # (optional) + self.test_npz_root = config.DATASET.TEST_NPZ_ROOT + self.test_list_path = config.DATASET.TEST_LIST_PATH + self.test_intrinsic_path = config.DATASET.TEST_INTRINSIC_PATH + + # 2. dataset config + # general options + self.min_overlap_score_test = config.DATASET.MIN_OVERLAP_SCORE_TEST # 0.4, omit data with overlap_score < min_overlap_score + + # MegaDepth options + if dataset_name == 'megadepth': + self.mgdpt_img_resize = config.DATASET.MGDPT_IMG_RESIZE # 800 + self.mgdpt_img_pad = True + self.mgdpt_depth_pad = True + self.mgdpt_df = 8 + self.coarse_scale = 0.125 + if dataset_name == 'scannet': + self.img_resize = config.DATASET.TEST_IMGSIZE + + if (dataset_name == 'megadepth') or (dataset_name == 'scannet'): + test_dataset = self._setup_dataset( + self.test_data_root, + self.test_npz_root, + self.test_list_path, + self.test_intrinsic_path, + mode='test', + min_overlap_score=self.min_overlap_score_test, + pose_dir=self.test_pose_root) + elif dataset_name == 'aachen_v1.1': + test_dataset = AachenDataset(self.test_data_root, self.test_list_path, + img_resize=config.DATASET.TEST_IMGSIZE) + elif dataset_name == 'inloc': + test_dataset = InLocDataset(self.test_data_root, self.test_list_path, + img_resize=config.DATASET.TEST_IMGSIZE) + else: + raise "unknown dataset" + + self.test_loader_params = { + 'batch_size': 1, + 'shuffle': False, + 'num_workers': 4, + 'pin_memory': True + } + + # sampler = Seq(self.test_dataset, shuffle=False) + super(TestDataLoader, self).__init__(test_dataset, **self.test_loader_params) + + def _setup_dataset(self, + data_root, + split_npz_root, + scene_list_path, + intri_path, + mode='train', + min_overlap_score=0., + pose_dir=None): + """ Setup train / val / test set""" + with open(scene_list_path, 'r') as f: + npz_names = [name.split()[0] for name in f.readlines()] + local_npz_names = npz_names + + return self._build_concat_dataset(data_root, local_npz_names, split_npz_root, intri_path, + mode=mode, min_overlap_score=min_overlap_score, pose_dir=pose_dir) + + def _build_concat_dataset( + self, + data_root, + npz_names, + npz_dir, + intrinsic_path, + mode, + min_overlap_score=0., + pose_dir=None + ): + datasets = [] + # augment_fn = self.augment_fn if mode == 'train' else None + data_source = self.test_data_source + if str(data_source).lower() == 'megadepth': + npz_names = [f'{n}.npz' for n in npz_names] + for npz_name in tqdm(npz_names): + # `ScanNetDataset`/`MegaDepthDataset` load all data from npz_path when initialized, which might take time. + npz_path = osp.join(npz_dir, npz_name) + if data_source == 'ScanNet': + datasets.append( + ScanNetDataset(data_root, + npz_path, + intrinsic_path, + mode=mode, img_resize=self.img_resize, + min_overlap_score=min_overlap_score, + pose_dir=pose_dir)) + elif data_source == 'MegaDepth': + datasets.append( + MegaDepthDataset(data_root, + npz_path, + mode=mode, + min_overlap_score=min_overlap_score, + img_resize=self.mgdpt_img_resize, + df=self.mgdpt_df, + img_padding=self.mgdpt_img_pad, + depth_padding=self.mgdpt_depth_pad, + coarse_scale=self.coarse_scale)) + else: + raise NotImplementedError() + return ConcatDataset(datasets) diff --git a/third_party/TopicFM/src/datasets/inloc.py b/third_party/TopicFM/src/datasets/inloc.py new file mode 100644 index 0000000000000000000000000000000000000000..5421099d11b4dbbea8c09568c493d844d5c6a1b0 --- /dev/null +++ b/third_party/TopicFM/src/datasets/inloc.py @@ -0,0 +1,29 @@ +import os +from torch.utils.data import Dataset + +from src.utils.dataset import read_img_gray + + +class InLocDataset(Dataset): + def __init__(self, img_path, match_list_path, img_resize=None, down_factor=16): + self.img_path = img_path + self.img_resize = img_resize + self.down_factor = down_factor + with open(match_list_path, 'r') as f: + self.raw_pairs = f.readlines() + print("number of matching pairs: ", len(self.raw_pairs)) + + def __len__(self): + return len(self.raw_pairs) + + def __getitem__(self, idx): + raw_pair = self.raw_pairs[idx] + image_name0, image_name1 = raw_pair.strip('\n').split(' ') + path_img0 = os.path.join(self.img_path, image_name0) + path_img1 = os.path.join(self.img_path, image_name1) + img0, scale0 = read_img_gray(path_img0, resize=self.img_resize, down_factor=self.down_factor) + img1, scale1 = read_img_gray(path_img1, resize=self.img_resize, down_factor=self.down_factor) + return {"image0": img0, "image1": img1, + "scale0": scale0, "scale1": scale1, + "pair_names": (image_name0, image_name1), + "dataset_name": "InLoc"} \ No newline at end of file diff --git a/third_party/TopicFM/src/datasets/megadepth.py b/third_party/TopicFM/src/datasets/megadepth.py new file mode 100644 index 0000000000000000000000000000000000000000..e92768e72e373c2a8ebeaf1158f9710fb1bfb5f1 --- /dev/null +++ b/third_party/TopicFM/src/datasets/megadepth.py @@ -0,0 +1,129 @@ +import os.path as osp +import numpy as np +import torch +import torch.nn.functional as F +from torch.utils.data import Dataset +from loguru import logger + +from src.utils.dataset import read_megadepth_gray, read_megadepth_depth + + +class MegaDepthDataset(Dataset): + def __init__(self, + root_dir, + npz_path, + mode='train', + min_overlap_score=0.4, + img_resize=None, + df=None, + img_padding=False, + depth_padding=False, + augment_fn=None, + **kwargs): + """ + Manage one scene(npz_path) of MegaDepth dataset. + + Args: + root_dir (str): megadepth root directory that has `phoenix`. + npz_path (str): {scene_id}.npz path. This contains image pair information of a scene. + mode (str): options are ['train', 'val', 'test'] + min_overlap_score (float): how much a pair should have in common. In range of [0, 1]. Set to 0 when testing. + img_resize (int, optional): the longer edge of resized images. None for no resize. 640 is recommended. + This is useful during training with batches and testing with memory intensive algorithms. + df (int, optional): image size division factor. NOTE: this will change the final image size after img_resize. + img_padding (bool): If set to 'True', zero-pad the image to squared size. This is useful during training. + depth_padding (bool): If set to 'True', zero-pad depthmap to (2000, 2000). This is useful during training. + augment_fn (callable, optional): augments images with pre-defined visual effects. + """ + super().__init__() + self.root_dir = root_dir + self.mode = mode + self.scene_id = npz_path.split('.')[0] + + # prepare scene_info and pair_info + if mode == 'test' and min_overlap_score != 0: + logger.warning("You are using `min_overlap_score`!=0 in test mode. Set to 0.") + min_overlap_score = 0 + self.scene_info = np.load(npz_path, allow_pickle=True) + self.pair_infos = self.scene_info['pair_infos'].copy() + del self.scene_info['pair_infos'] + self.pair_infos = [pair_info for pair_info in self.pair_infos if pair_info[1] > min_overlap_score] + + # parameters for image resizing, padding and depthmap padding + if mode == 'train': + assert img_resize is not None and img_padding and depth_padding + self.img_resize = img_resize + if mode == 'val': + self.img_resize = 864 + self.df = df + self.img_padding = img_padding + self.depth_max_size = 2000 if depth_padding else None # the upperbound of depthmaps size in megadepth. + + # for training LoFTR + self.augment_fn = augment_fn if mode == 'train' else None + self.coarse_scale = getattr(kwargs, 'coarse_scale', 0.125) + + def __len__(self): + return len(self.pair_infos) + + def __getitem__(self, idx): + (idx0, idx1), overlap_score, central_matches = self.pair_infos[idx] + + # read grayscale image and mask. (1, h, w) and (h, w) + img_name0 = osp.join(self.root_dir, self.scene_info['image_paths'][idx0]) + img_name1 = osp.join(self.root_dir, self.scene_info['image_paths'][idx1]) + + # TODO: Support augmentation & handle seeds for each worker correctly. + image0, mask0, scale0 = read_megadepth_gray( + img_name0, self.img_resize, self.df, self.img_padding, None) + # np.random.choice([self.augment_fn, None], p=[0.5, 0.5])) + image1, mask1, scale1 = read_megadepth_gray( + img_name1, self.img_resize, self.df, self.img_padding, None) + # np.random.choice([self.augment_fn, None], p=[0.5, 0.5])) + + # read depth. shape: (h, w) + if self.mode in ['train', 'val']: + depth0 = read_megadepth_depth( + osp.join(self.root_dir, self.scene_info['depth_paths'][idx0]), pad_to=self.depth_max_size) + depth1 = read_megadepth_depth( + osp.join(self.root_dir, self.scene_info['depth_paths'][idx1]), pad_to=self.depth_max_size) + else: + depth0 = depth1 = torch.tensor([]) + + # read intrinsics of original size + K_0 = torch.tensor(self.scene_info['intrinsics'][idx0].copy(), dtype=torch.float).reshape(3, 3) + K_1 = torch.tensor(self.scene_info['intrinsics'][idx1].copy(), dtype=torch.float).reshape(3, 3) + + # read and compute relative poses + T0 = self.scene_info['poses'][idx0] + T1 = self.scene_info['poses'][idx1] + T_0to1 = torch.tensor(np.matmul(T1, np.linalg.inv(T0)), dtype=torch.float)[:4, :4] # (4, 4) + T_1to0 = T_0to1.inverse() + + data = { + 'image0': image0, # (1, h, w) + 'depth0': depth0, # (h, w) + 'image1': image1, + 'depth1': depth1, + 'T_0to1': T_0to1, # (4, 4) + 'T_1to0': T_1to0, + 'K0': K_0, # (3, 3) + 'K1': K_1, + 'scale0': scale0, # [scale_w, scale_h] + 'scale1': scale1, + 'dataset_name': 'MegaDepth', + 'scene_id': self.scene_id, + 'pair_id': idx, + 'pair_names': (self.scene_info['image_paths'][idx0], self.scene_info['image_paths'][idx1]), + } + + # for LoFTR training + if mask0 is not None: # img_padding is True + if self.coarse_scale: + [ts_mask_0, ts_mask_1] = F.interpolate(torch.stack([mask0, mask1], dim=0)[None].float(), + scale_factor=self.coarse_scale, + mode='nearest', + recompute_scale_factor=False)[0].bool() + data.update({'mask0': ts_mask_0, 'mask1': ts_mask_1}) + + return data diff --git a/third_party/TopicFM/src/datasets/sampler.py b/third_party/TopicFM/src/datasets/sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..81b6f435645632a013476f9a665a0861ab7fcb61 --- /dev/null +++ b/third_party/TopicFM/src/datasets/sampler.py @@ -0,0 +1,77 @@ +import torch +from torch.utils.data import Sampler, ConcatDataset + + +class RandomConcatSampler(Sampler): + """ Random sampler for ConcatDataset. At each epoch, `n_samples_per_subset` samples will be draw from each subset + in the ConcatDataset. If `subset_replacement` is ``True``, sampling within each subset will be done with replacement. + However, it is impossible to sample data without replacement between epochs, unless bulding a stateful sampler lived along the entire training phase. + + For current implementation, the randomness of sampling is ensured no matter the sampler is recreated across epochs or not and call `torch.manual_seed()` or not. + Args: + shuffle (bool): shuffle the random sampled indices across all sub-datsets. + repeat (int): repeatedly use the sampled indices multiple times for training. + [arXiv:1902.05509, arXiv:1901.09335] + NOTE: Don't re-initialize the sampler between epochs (will lead to repeated samples) + NOTE: This sampler behaves differently with DistributedSampler. + It assume the dataset is splitted across ranks instead of replicated. + TODO: Add a `set_epoch()` method to fullfill sampling without replacement across epochs. + ref: https://github.com/PyTorchLightning/pytorch-lightning/blob/e9846dd758cfb1500eb9dba2d86f6912eb487587/pytorch_lightning/trainer/training_loop.py#L373 + """ + def __init__(self, + data_source: ConcatDataset, + n_samples_per_subset: int, + subset_replacement: bool=True, + shuffle: bool=True, + repeat: int=1, + seed: int=None): + if not isinstance(data_source, ConcatDataset): + raise TypeError("data_source should be torch.utils.data.ConcatDataset") + + self.data_source = data_source + self.n_subset = len(self.data_source.datasets) + self.n_samples_per_subset = n_samples_per_subset + self.n_samples = self.n_subset * self.n_samples_per_subset * repeat + self.subset_replacement = subset_replacement + self.repeat = repeat + self.shuffle = shuffle + self.generator = torch.manual_seed(seed) + assert self.repeat >= 1 + + def __len__(self): + return self.n_samples + + def __iter__(self): + indices = [] + # sample from each sub-dataset + for d_idx in range(self.n_subset): + low = 0 if d_idx==0 else self.data_source.cumulative_sizes[d_idx-1] + high = self.data_source.cumulative_sizes[d_idx] + if self.subset_replacement: + rand_tensor = torch.randint(low, high, (self.n_samples_per_subset, ), + generator=self.generator, dtype=torch.int64) + else: # sample without replacement + len_subset = len(self.data_source.datasets[d_idx]) + rand_tensor = torch.randperm(len_subset, generator=self.generator) + low + if len_subset >= self.n_samples_per_subset: + rand_tensor = rand_tensor[:self.n_samples_per_subset] + else: # padding with replacement + rand_tensor_replacement = torch.randint(low, high, (self.n_samples_per_subset - len_subset, ), + generator=self.generator, dtype=torch.int64) + rand_tensor = torch.cat([rand_tensor, rand_tensor_replacement]) + indices.append(rand_tensor) + indices = torch.cat(indices) + if self.shuffle: # shuffle the sampled dataset (from multiple subsets) + rand_tensor = torch.randperm(len(indices), generator=self.generator) + indices = indices[rand_tensor] + + # repeat the sampled indices (can be used for RepeatAugmentation or pure RepeatSampling) + if self.repeat > 1: + repeat_indices = [indices.clone() for _ in range(self.repeat - 1)] + if self.shuffle: + _choice = lambda x: x[torch.randperm(len(x), generator=self.generator)] + repeat_indices = map(_choice, repeat_indices) + indices = torch.cat([indices, *repeat_indices], 0) + + assert indices.shape[0] == self.n_samples + return iter(indices.tolist()) diff --git a/third_party/TopicFM/src/datasets/scannet.py b/third_party/TopicFM/src/datasets/scannet.py new file mode 100644 index 0000000000000000000000000000000000000000..fb5dab7b150a3c6f54eb07b0459bbf3e9ba58fbf --- /dev/null +++ b/third_party/TopicFM/src/datasets/scannet.py @@ -0,0 +1,115 @@ +from os import path as osp +from typing import Dict +from unicodedata import name + +import numpy as np +import torch +import torch.utils as utils +from numpy.linalg import inv +from src.utils.dataset import ( + read_scannet_gray, + read_scannet_depth, + read_scannet_pose, + read_scannet_intrinsic +) + + +class ScanNetDataset(utils.data.Dataset): + def __init__(self, + root_dir, + npz_path, + intrinsic_path, + mode='train', + min_overlap_score=0.4, + augment_fn=None, + pose_dir=None, + **kwargs): + """Manage one scene of ScanNet Dataset. + Args: + root_dir (str): ScanNet root directory that contains scene folders. + npz_path (str): {scene_id}.npz path. This contains image pair information of a scene. + intrinsic_path (str): path to depth-camera intrinsic file. + mode (str): options are ['train', 'val', 'test']. + augment_fn (callable, optional): augments images with pre-defined visual effects. + pose_dir (str): ScanNet root directory that contains all poses. + (we use a separate (optional) pose_dir since we store images and poses separately.) + """ + super().__init__() + self.root_dir = root_dir + self.pose_dir = pose_dir if pose_dir is not None else root_dir + self.mode = mode + self.img_resize = (640, 480) if 'img_resize' not in kwargs else kwargs['img_resize'] + + # prepare data_names, intrinsics and extrinsics(T) + with np.load(npz_path) as data: + self.data_names = data['name'] + if 'score' in data.keys() and mode not in ['val' or 'test']: + kept_mask = data['score'] > min_overlap_score + self.data_names = self.data_names[kept_mask] + self.intrinsics = dict(np.load(intrinsic_path)) + + # for training LoFTR + self.augment_fn = augment_fn if mode == 'train' else None + + def __len__(self): + return len(self.data_names) + + def _read_abs_pose(self, scene_name, name): + pth = osp.join(self.pose_dir, + scene_name, + 'pose', f'{name}.txt') + return read_scannet_pose(pth) + + def _compute_rel_pose(self, scene_name, name0, name1): + pose0 = self._read_abs_pose(scene_name, name0) + pose1 = self._read_abs_pose(scene_name, name1) + + return np.matmul(pose1, inv(pose0)) # (4, 4) + + def __getitem__(self, idx): + data_name = self.data_names[idx] + scene_name, scene_sub_name, stem_name_0, stem_name_1 = data_name + scene_name = f'scene{scene_name:04d}_{scene_sub_name:02d}' + + # read the grayscale image which will be resized to (1, 480, 640) + img_name0 = osp.join(self.root_dir, scene_name, 'color', f'{stem_name_0}.jpg') + img_name1 = osp.join(self.root_dir, scene_name, 'color', f'{stem_name_1}.jpg') + + # TODO: Support augmentation & handle seeds for each worker correctly. + image0 = read_scannet_gray(img_name0, resize=self.img_resize, augment_fn=None) + # augment_fn=np.random.choice([self.augment_fn, None], p=[0.5, 0.5])) + image1 = read_scannet_gray(img_name1, resize=self.img_resize, augment_fn=None) + # augment_fn=np.random.choice([self.augment_fn, None], p=[0.5, 0.5])) + + # read the depthmap which is stored as (480, 640) + if self.mode in ['train', 'val']: + depth0 = read_scannet_depth(osp.join(self.root_dir, scene_name, 'depth', f'{stem_name_0}.png')) + depth1 = read_scannet_depth(osp.join(self.root_dir, scene_name, 'depth', f'{stem_name_1}.png')) + else: + depth0 = depth1 = torch.tensor([]) + + # read the intrinsic of depthmap + K_0 = K_1 = torch.tensor(self.intrinsics[scene_name].copy(), dtype=torch.float).reshape(3, 3) + + # read and compute relative poses + T_0to1 = torch.tensor(self._compute_rel_pose(scene_name, stem_name_0, stem_name_1), + dtype=torch.float32) + T_1to0 = T_0to1.inverse() + + data = { + 'image0': image0, # (1, h, w) + 'depth0': depth0, # (h, w) + 'image1': image1, + 'depth1': depth1, + 'T_0to1': T_0to1, # (4, 4) + 'T_1to0': T_1to0, + 'K0': K_0, # (3, 3) + 'K1': K_1, + 'dataset_name': 'ScanNet', + 'scene_id': scene_name, + 'pair_id': idx, + 'pair_names': (osp.join(scene_name, 'color', f'{stem_name_0}.jpg'), + osp.join(scene_name, 'color', f'{stem_name_1}.jpg')) + } + + return data diff --git a/third_party/TopicFM/src/lightning_trainer/data.py b/third_party/TopicFM/src/lightning_trainer/data.py new file mode 100644 index 0000000000000000000000000000000000000000..8deb713b6300e0e9e8a261e2230031174b452862 --- /dev/null +++ b/third_party/TopicFM/src/lightning_trainer/data.py @@ -0,0 +1,320 @@ +import os +import math +from collections import abc +from loguru import logger +from torch.utils.data.dataset import Dataset +from tqdm import tqdm +from os import path as osp +from pathlib import Path +from joblib import Parallel, delayed + +import pytorch_lightning as pl +from torch import distributed as dist +from torch.utils.data import ( + Dataset, + DataLoader, + ConcatDataset, + DistributedSampler, + RandomSampler, + dataloader +) + +from src.utils.augment import build_augmentor +from src.utils.dataloader import get_local_split +from src.utils.misc import tqdm_joblib +from src.utils import comm +from src.datasets.megadepth import MegaDepthDataset +from src.datasets.scannet import ScanNetDataset +from src.datasets.sampler import RandomConcatSampler + + +class MultiSceneDataModule(pl.LightningDataModule): + """ + For distributed training, each training process is assgined + only a part of the training scenes to reduce memory overhead. + """ + def __init__(self, args, config): + super().__init__() + + # 1. data config + # Train and Val should from the same data source + self.trainval_data_source = config.DATASET.TRAINVAL_DATA_SOURCE + self.test_data_source = config.DATASET.TEST_DATA_SOURCE + # training and validating + self.train_data_root = config.DATASET.TRAIN_DATA_ROOT + self.train_pose_root = config.DATASET.TRAIN_POSE_ROOT # (optional) + self.train_npz_root = config.DATASET.TRAIN_NPZ_ROOT + self.train_list_path = config.DATASET.TRAIN_LIST_PATH + self.train_intrinsic_path = config.DATASET.TRAIN_INTRINSIC_PATH + self.val_data_root = config.DATASET.VAL_DATA_ROOT + self.val_pose_root = config.DATASET.VAL_POSE_ROOT # (optional) + self.val_npz_root = config.DATASET.VAL_NPZ_ROOT + self.val_list_path = config.DATASET.VAL_LIST_PATH + self.val_intrinsic_path = config.DATASET.VAL_INTRINSIC_PATH + # testing + self.test_data_root = config.DATASET.TEST_DATA_ROOT + self.test_pose_root = config.DATASET.TEST_POSE_ROOT # (optional) + self.test_npz_root = config.DATASET.TEST_NPZ_ROOT + self.test_list_path = config.DATASET.TEST_LIST_PATH + self.test_intrinsic_path = config.DATASET.TEST_INTRINSIC_PATH + + # 2. dataset config + # general options + self.min_overlap_score_test = config.DATASET.MIN_OVERLAP_SCORE_TEST # 0.4, omit data with overlap_score < min_overlap_score + self.min_overlap_score_train = config.DATASET.MIN_OVERLAP_SCORE_TRAIN + self.augment_fn = build_augmentor(config.DATASET.AUGMENTATION_TYPE) # None, options: [None, 'dark', 'mobile'] + + # MegaDepth options + self.mgdpt_img_resize = config.DATASET.MGDPT_IMG_RESIZE # 840 + self.mgdpt_img_pad = config.DATASET.MGDPT_IMG_PAD # True + self.mgdpt_depth_pad = config.DATASET.MGDPT_DEPTH_PAD # True + self.mgdpt_df = config.DATASET.MGDPT_DF # 8 + self.coarse_scale = 1 / config.MODEL.RESOLUTION[0] # 0.125. for training loftr. + + # 3.loader parameters + self.train_loader_params = { + 'batch_size': args.batch_size, + 'num_workers': args.num_workers, + 'pin_memory': getattr(args, 'pin_memory', True) + } + self.val_loader_params = { + 'batch_size': 1, + 'shuffle': False, + 'num_workers': args.num_workers, + 'pin_memory': getattr(args, 'pin_memory', True) + } + self.test_loader_params = { + 'batch_size': 1, + 'shuffle': False, + 'num_workers': args.num_workers, + 'pin_memory': True + } + + # 4. sampler + self.data_sampler = config.TRAINER.DATA_SAMPLER + self.n_samples_per_subset = config.TRAINER.N_SAMPLES_PER_SUBSET + self.subset_replacement = config.TRAINER.SB_SUBSET_SAMPLE_REPLACEMENT + self.shuffle = config.TRAINER.SB_SUBSET_SHUFFLE + self.repeat = config.TRAINER.SB_REPEAT + + # (optional) RandomSampler for debugging + + # misc configurations + self.parallel_load_data = getattr(args, 'parallel_load_data', False) + self.seed = config.TRAINER.SEED # 66 + + def setup(self, stage=None): + """ + Setup train / val / test dataset. This method will be called by PL automatically. + Args: + stage (str): 'fit' in training phase, and 'test' in testing phase. + """ + + assert stage in ['fit', 'test'], "stage must be either fit or test" + + try: + self.world_size = dist.get_world_size() + self.rank = dist.get_rank() + logger.info(f"[rank:{self.rank}] world_size: {self.world_size}") + except AssertionError as ae: + self.world_size = 1 + self.rank = 0 + logger.warning(str(ae) + " (set wolrd_size=1 and rank=0)") + + if stage == 'fit': + self.train_dataset = self._setup_dataset( + self.train_data_root, + self.train_npz_root, + self.train_list_path, + self.train_intrinsic_path, + mode='train', + min_overlap_score=self.min_overlap_score_train, + pose_dir=self.train_pose_root) + # setup multiple (optional) validation subsets + if isinstance(self.val_list_path, (list, tuple)): + self.val_dataset = [] + if not isinstance(self.val_npz_root, (list, tuple)): + self.val_npz_root = [self.val_npz_root for _ in range(len(self.val_list_path))] + for npz_list, npz_root in zip(self.val_list_path, self.val_npz_root): + self.val_dataset.append(self._setup_dataset( + self.val_data_root, + npz_root, + npz_list, + self.val_intrinsic_path, + mode='val', + min_overlap_score=self.min_overlap_score_test, + pose_dir=self.val_pose_root)) + else: + self.val_dataset = self._setup_dataset( + self.val_data_root, + self.val_npz_root, + self.val_list_path, + self.val_intrinsic_path, + mode='val', + min_overlap_score=self.min_overlap_score_test, + pose_dir=self.val_pose_root) + logger.info(f'[rank:{self.rank}] Train & Val Dataset loaded!') + else: # stage == 'test + self.test_dataset = self._setup_dataset( + self.test_data_root, + self.test_npz_root, + self.test_list_path, + self.test_intrinsic_path, + mode='test', + min_overlap_score=self.min_overlap_score_test, + pose_dir=self.test_pose_root) + logger.info(f'[rank:{self.rank}]: Test Dataset loaded!') + + def _setup_dataset(self, + data_root, + split_npz_root, + scene_list_path, + intri_path, + mode='train', + min_overlap_score=0., + pose_dir=None): + """ Setup train / val / test set""" + with open(scene_list_path, 'r') as f: + npz_names = [name.split()[0] for name in f.readlines()] + + if mode == 'train': + local_npz_names = get_local_split(npz_names, self.world_size, self.rank, self.seed) + else: + local_npz_names = npz_names + logger.info(f'[rank {self.rank}]: {len(local_npz_names)} scene(s) assigned.') + + dataset_builder = self._build_concat_dataset_parallel \ + if self.parallel_load_data \ + else self._build_concat_dataset + return dataset_builder(data_root, local_npz_names, split_npz_root, intri_path, + mode=mode, min_overlap_score=min_overlap_score, pose_dir=pose_dir) + + def _build_concat_dataset( + self, + data_root, + npz_names, + npz_dir, + intrinsic_path, + mode, + min_overlap_score=0., + pose_dir=None + ): + datasets = [] + augment_fn = self.augment_fn if mode == 'train' else None + data_source = self.trainval_data_source if mode in ['train', 'val'] else self.test_data_source + if str(data_source).lower() == 'megadepth': + npz_names = [f'{n}.npz' for n in npz_names] + for npz_name in tqdm(npz_names, + desc=f'[rank:{self.rank}] loading {mode} datasets', + disable=int(self.rank) != 0): + # `ScanNetDataset`/`MegaDepthDataset` load all data from npz_path when initialized, which might take time. + npz_path = osp.join(npz_dir, npz_name) + if data_source == 'ScanNet': + datasets.append( + ScanNetDataset(data_root, + npz_path, + intrinsic_path, + mode=mode, + min_overlap_score=min_overlap_score, + augment_fn=augment_fn, + pose_dir=pose_dir)) + elif data_source == 'MegaDepth': + datasets.append( + MegaDepthDataset(data_root, + npz_path, + mode=mode, + min_overlap_score=min_overlap_score, + img_resize=self.mgdpt_img_resize, + df=self.mgdpt_df, + img_padding=self.mgdpt_img_pad, + depth_padding=self.mgdpt_depth_pad, + augment_fn=augment_fn, + coarse_scale=self.coarse_scale)) + else: + raise NotImplementedError() + return ConcatDataset(datasets) + + def _build_concat_dataset_parallel( + self, + data_root, + npz_names, + npz_dir, + intrinsic_path, + mode, + min_overlap_score=0., + pose_dir=None, + ): + augment_fn = self.augment_fn if mode == 'train' else None + data_source = self.trainval_data_source if mode in ['train', 'val'] else self.test_data_source + if str(data_source).lower() == 'megadepth': + npz_names = [f'{n}.npz' for n in npz_names] + with tqdm_joblib(tqdm(desc=f'[rank:{self.rank}] loading {mode} datasets', + total=len(npz_names), disable=int(self.rank) != 0)): + if data_source == 'ScanNet': + datasets = Parallel(n_jobs=math.floor(len(os.sched_getaffinity(0)) * 0.9 / comm.get_local_size()))( + delayed(lambda x: _build_dataset( + ScanNetDataset, + data_root, + osp.join(npz_dir, x), + intrinsic_path, + mode=mode, + min_overlap_score=min_overlap_score, + augment_fn=augment_fn, + pose_dir=pose_dir))(name) + for name in npz_names) + elif data_source == 'MegaDepth': + # TODO: _pickle.PicklingError: Could not pickle the task to send it to the workers. + raise NotImplementedError() + datasets = Parallel(n_jobs=math.floor(len(os.sched_getaffinity(0)) * 0.9 / comm.get_local_size()))( + delayed(lambda x: _build_dataset( + MegaDepthDataset, + data_root, + osp.join(npz_dir, x), + mode=mode, + min_overlap_score=min_overlap_score, + img_resize=self.mgdpt_img_resize, + df=self.mgdpt_df, + img_padding=self.mgdpt_img_pad, + depth_padding=self.mgdpt_depth_pad, + augment_fn=augment_fn, + coarse_scale=self.coarse_scale))(name) + for name in npz_names) + else: + raise ValueError(f'Unknown dataset: {data_source}') + return ConcatDataset(datasets) + + def train_dataloader(self): + """ Build training dataloader for ScanNet / MegaDepth. """ + assert self.data_sampler in ['scene_balance'] + logger.info(f'[rank:{self.rank}/{self.world_size}]: Train Sampler and DataLoader re-init (should not re-init between epochs!).') + if self.data_sampler == 'scene_balance': + sampler = RandomConcatSampler(self.train_dataset, + self.n_samples_per_subset, + self.subset_replacement, + self.shuffle, self.repeat, self.seed) + else: + sampler = None + dataloader = DataLoader(self.train_dataset, sampler=sampler, **self.train_loader_params) + return dataloader + + def val_dataloader(self): + """ Build validation dataloader for ScanNet / MegaDepth. """ + logger.info(f'[rank:{self.rank}/{self.world_size}]: Val Sampler and DataLoader re-init.') + if not isinstance(self.val_dataset, abc.Sequence): + sampler = DistributedSampler(self.val_dataset, shuffle=False) + return DataLoader(self.val_dataset, sampler=sampler, **self.val_loader_params) + else: + dataloaders = [] + for dataset in self.val_dataset: + sampler = DistributedSampler(dataset, shuffle=False) + dataloaders.append(DataLoader(dataset, sampler=sampler, **self.val_loader_params)) + return dataloaders + + def test_dataloader(self, *args, **kwargs): + logger.info(f'[rank:{self.rank}/{self.world_size}]: Test Sampler and DataLoader re-init.') + sampler = DistributedSampler(self.test_dataset, shuffle=False) + return DataLoader(self.test_dataset, sampler=sampler, **self.test_loader_params) + + +def _build_dataset(dataset: Dataset, *args, **kwargs): + return dataset(*args, **kwargs) diff --git a/third_party/TopicFM/src/lightning_trainer/trainer.py b/third_party/TopicFM/src/lightning_trainer/trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..acf51f66130be66b7d3294ca5c081a2df3856d96 --- /dev/null +++ b/third_party/TopicFM/src/lightning_trainer/trainer.py @@ -0,0 +1,244 @@ + +from collections import defaultdict +import pprint +from loguru import logger +from pathlib import Path + +import torch +import numpy as np +import pytorch_lightning as pl +from matplotlib import pyplot as plt + +from src.models import TopicFM +from src.models.utils.supervision import compute_supervision_coarse, compute_supervision_fine +from src.losses.loss import TopicFMLoss +from src.optimizers import build_optimizer, build_scheduler +from src.utils.metrics import ( + compute_symmetrical_epipolar_errors, + compute_pose_errors, + aggregate_metrics +) +from src.utils.plotting import make_matching_figures +from src.utils.comm import gather, all_gather +from src.utils.misc import lower_config, flattenList +from src.utils.profiler import PassThroughProfiler + + +class PL_Trainer(pl.LightningModule): + def __init__(self, config, pretrained_ckpt=None, profiler=None, dump_dir=None): + """ + TODO: + - use the new version of PL logging API. + """ + super().__init__() + # Misc + self.config = config # full config + _config = lower_config(self.config) + self.model_cfg = lower_config(_config['model']) + self.profiler = profiler or PassThroughProfiler() + self.n_vals_plot = max(config.TRAINER.N_VAL_PAIRS_TO_PLOT // config.TRAINER.WORLD_SIZE, 1) + + # Matcher: TopicFM + self.matcher = TopicFM(config=_config['model']) + self.loss = TopicFMLoss(_config) + + # Pretrained weights + if pretrained_ckpt: + state_dict = torch.load(pretrained_ckpt, map_location='cpu')['state_dict'] + self.matcher.load_state_dict(state_dict, strict=True) + logger.info(f"Load \'{pretrained_ckpt}\' as pretrained checkpoint") + + # Testing + self.dump_dir = dump_dir + + def configure_optimizers(self): + # FIXME: The scheduler did not work properly when `--resume_from_checkpoint` + optimizer = build_optimizer(self, self.config) + scheduler = build_scheduler(self.config, optimizer) + return [optimizer], [scheduler] + + def optimizer_step( + self, epoch, batch_idx, optimizer, optimizer_idx, + optimizer_closure, on_tpu, using_native_amp, using_lbfgs): + # learning rate warm up + warmup_step = self.config.TRAINER.WARMUP_STEP + if self.trainer.global_step < warmup_step: + if self.config.TRAINER.WARMUP_TYPE == 'linear': + base_lr = self.config.TRAINER.WARMUP_RATIO * self.config.TRAINER.TRUE_LR + lr = base_lr + \ + (self.trainer.global_step / self.config.TRAINER.WARMUP_STEP) * \ + abs(self.config.TRAINER.TRUE_LR - base_lr) + for pg in optimizer.param_groups: + pg['lr'] = lr + elif self.config.TRAINER.WARMUP_TYPE == 'constant': + pass + else: + raise ValueError(f'Unknown lr warm-up strategy: {self.config.TRAINER.WARMUP_TYPE}') + + # update params + optimizer.step(closure=optimizer_closure) + optimizer.zero_grad() + + def _trainval_inference(self, batch): + with self.profiler.profile("Compute coarse supervision"): + compute_supervision_coarse(batch, self.config) + + with self.profiler.profile("TopicFM"): + self.matcher(batch) + + with self.profiler.profile("Compute fine supervision"): + compute_supervision_fine(batch, self.config) + + with self.profiler.profile("Compute losses"): + self.loss(batch) + + def _compute_metrics(self, batch): + with self.profiler.profile("Copmute metrics"): + compute_symmetrical_epipolar_errors(batch) # compute epi_errs for each match + compute_pose_errors(batch, self.config) # compute R_errs, t_errs, pose_errs for each pair + + rel_pair_names = list(zip(*batch['pair_names'])) + bs = batch['image0'].size(0) + metrics = { + # to filter duplicate pairs caused by DistributedSampler + 'identifiers': ['#'.join(rel_pair_names[b]) for b in range(bs)], + 'epi_errs': [batch['epi_errs'][batch['m_bids'] == b].cpu().numpy() for b in range(bs)], + 'R_errs': batch['R_errs'], + 't_errs': batch['t_errs'], + 'inliers': batch['inliers']} + ret_dict = {'metrics': metrics} + return ret_dict, rel_pair_names + + def training_step(self, batch, batch_idx): + self._trainval_inference(batch) + + # logging + if self.trainer.global_rank == 0 and self.global_step % self.trainer.log_every_n_steps == 0: + # scalars + for k, v in batch['loss_scalars'].items(): + self.logger.experiment.add_scalar(f'train/{k}', v, self.global_step) + + # figures + if self.config.TRAINER.ENABLE_PLOTTING: + compute_symmetrical_epipolar_errors(batch) # compute epi_errs for each match + figures = make_matching_figures(batch, self.config, self.config.TRAINER.PLOT_MODE) + for k, v in figures.items(): + self.logger.experiment.add_figure(f'train_match/{k}', v, self.global_step) + + return {'loss': batch['loss']} + + def training_epoch_end(self, outputs): + avg_loss = torch.stack([x['loss'] for x in outputs]).mean() + if self.trainer.global_rank == 0: + self.logger.experiment.add_scalar( + 'train/avg_loss_on_epoch', avg_loss, + global_step=self.current_epoch) + + def validation_step(self, batch, batch_idx): + self._trainval_inference(batch) + + ret_dict, _ = self._compute_metrics(batch) + + val_plot_interval = max(self.trainer.num_val_batches[0] // self.n_vals_plot, 1) + figures = {self.config.TRAINER.PLOT_MODE: []} + if batch_idx % val_plot_interval == 0: + figures = make_matching_figures(batch, self.config, mode=self.config.TRAINER.PLOT_MODE) + + return { + **ret_dict, + 'loss_scalars': batch['loss_scalars'], + 'figures': figures, + } + + def validation_epoch_end(self, outputs): + # handle multiple validation sets + multi_outputs = [outputs] if not isinstance(outputs[0], (list, tuple)) else outputs + multi_val_metrics = defaultdict(list) + + for valset_idx, outputs in enumerate(multi_outputs): + # since pl performs sanity_check at the very begining of the training + cur_epoch = self.trainer.current_epoch + if not self.trainer.resume_from_checkpoint and self.trainer.running_sanity_check: + cur_epoch = -1 + + # 1. loss_scalars: dict of list, on cpu + _loss_scalars = [o['loss_scalars'] for o in outputs] + loss_scalars = {k: flattenList(all_gather([_ls[k] for _ls in _loss_scalars])) for k in _loss_scalars[0]} + + # 2. val metrics: dict of list, numpy + _metrics = [o['metrics'] for o in outputs] + metrics = {k: flattenList(all_gather(flattenList([_me[k] for _me in _metrics]))) for k in _metrics[0]} + # NOTE: all ranks need to `aggregate_merics`, but only log at rank-0 + val_metrics_4tb = aggregate_metrics(metrics, self.config.TRAINER.EPI_ERR_THR) + for thr in [5, 10, 20]: + multi_val_metrics[f'auc@{thr}'].append(val_metrics_4tb[f'auc@{thr}']) + + # 3. figures + _figures = [o['figures'] for o in outputs] + figures = {k: flattenList(gather(flattenList([_me[k] for _me in _figures]))) for k in _figures[0]} + + # tensorboard records only on rank 0 + if self.trainer.global_rank == 0: + for k, v in loss_scalars.items(): + mean_v = torch.stack(v).mean() + self.logger.experiment.add_scalar(f'val_{valset_idx}/avg_{k}', mean_v, global_step=cur_epoch) + + for k, v in val_metrics_4tb.items(): + self.logger.experiment.add_scalar(f"metrics_{valset_idx}/{k}", v, global_step=cur_epoch) + + for k, v in figures.items(): + if self.trainer.global_rank == 0: + for plot_idx, fig in enumerate(v): + self.logger.experiment.add_figure( + f'val_match_{valset_idx}/{k}/pair-{plot_idx}', fig, cur_epoch, close=True) + plt.close('all') + + for thr in [5, 10, 20]: + # log on all ranks for ModelCheckpoint callback to work properly + self.log(f'auc@{thr}', torch.tensor(np.mean(multi_val_metrics[f'auc@{thr}']))) # ckpt monitors on this + + def test_step(self, batch, batch_idx): + with self.profiler.profile("TopicFM"): + self.matcher(batch) + + ret_dict, rel_pair_names = self._compute_metrics(batch) + + with self.profiler.profile("dump_results"): + if self.dump_dir is not None: + # dump results for further analysis + keys_to_save = {'mkpts0_f', 'mkpts1_f', 'mconf', 'epi_errs'} + pair_names = list(zip(*batch['pair_names'])) + bs = batch['image0'].shape[0] + dumps = [] + for b_id in range(bs): + item = {} + mask = batch['m_bids'] == b_id + item['pair_names'] = pair_names[b_id] + item['identifier'] = '#'.join(rel_pair_names[b_id]) + for key in keys_to_save: + item[key] = batch[key][mask].cpu().numpy() + for key in ['R_errs', 't_errs', 'inliers']: + item[key] = batch[key][b_id] + dumps.append(item) + ret_dict['dumps'] = dumps + + return ret_dict + + def test_epoch_end(self, outputs): + # metrics: dict of list, numpy + _metrics = [o['metrics'] for o in outputs] + metrics = {k: flattenList(gather(flattenList([_me[k] for _me in _metrics]))) for k in _metrics[0]} + + # [{key: [{...}, *#bs]}, *#batch] + if self.dump_dir is not None: + Path(self.dump_dir).mkdir(parents=True, exist_ok=True) + _dumps = flattenList([o['dumps'] for o in outputs]) # [{...}, #bs*#batch] + dumps = flattenList(gather(_dumps)) # [{...}, #proc*#bs*#batch] + logger.info(f'Prediction and evaluation results will be saved to: {self.dump_dir}') + + if self.trainer.global_rank == 0: + print(self.profiler.summary()) + val_metrics_4tb = aggregate_metrics(metrics, self.config.TRAINER.EPI_ERR_THR) + logger.info('\n' + pprint.pformat(val_metrics_4tb)) + if self.dump_dir is not None: + np.save(Path(self.dump_dir) / 'TopicFM_pred_eval', dumps) diff --git a/third_party/TopicFM/src/losses/loss.py b/third_party/TopicFM/src/losses/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..4be58498579c9fe649ed0ce2d42f230e59cef581 --- /dev/null +++ b/third_party/TopicFM/src/losses/loss.py @@ -0,0 +1,182 @@ +from loguru import logger + +import torch +import torch.nn as nn + + +def sample_non_matches(pos_mask, match_ids=None, sampling_ratio=10): + # assert (pos_mask.shape == mask.shape) # [B, H*W, H*W] + if match_ids is not None: + HW = pos_mask.shape[1] + b_ids, i_ids, j_ids = match_ids + if len(b_ids) == 0: + return ~pos_mask + + neg_mask = torch.zeros_like(pos_mask) + probs = torch.ones((HW - 1)//3, device=pos_mask.device) + for _ in range(sampling_ratio): + d = torch.multinomial(probs, len(j_ids), replacement=True) + sampled_j_ids = (j_ids + d*3 + 1) % HW + neg_mask[b_ids, i_ids, sampled_j_ids] = True + # neg_mask = neg_matrix == 1 + else: + neg_mask = ~pos_mask + + return neg_mask + + +class TopicFMLoss(nn.Module): + def __init__(self, config): + super().__init__() + self.config = config # config under the global namespace + self.loss_config = config['model']['loss'] + self.match_type = self.config['model']['match_coarse']['match_type'] + + # coarse-level + self.correct_thr = self.loss_config['fine_correct_thr'] + self.c_pos_w = self.loss_config['pos_weight'] + self.c_neg_w = self.loss_config['neg_weight'] + # fine-level + self.fine_type = self.loss_config['fine_type'] + + def compute_coarse_loss(self, conf, topic_mat, conf_gt, match_ids=None, weight=None): + """ Point-wise CE / Focal Loss with 0 / 1 confidence as gt. + Args: + conf (torch.Tensor): (N, HW0, HW1) / (N, HW0+1, HW1+1) + conf_gt (torch.Tensor): (N, HW0, HW1) + weight (torch.Tensor): (N, HW0, HW1) + """ + pos_mask = conf_gt == 1 + neg_mask = sample_non_matches(pos_mask, match_ids=match_ids) + c_pos_w, c_neg_w = self.c_pos_w, self.c_neg_w + # corner case: no gt coarse-level match at all + if not pos_mask.any(): # assign a wrong gt + pos_mask[0, 0, 0] = True + if weight is not None: + weight[0, 0, 0] = 0. + c_pos_w = 0. + if not neg_mask.any(): + neg_mask[0, 0, 0] = True + if weight is not None: + weight[0, 0, 0] = 0. + c_neg_w = 0. + + conf = torch.clamp(conf, 1e-6, 1 - 1e-6) + alpha = self.loss_config['focal_alpha'] + + loss = 0.0 + if isinstance(topic_mat, torch.Tensor): + pos_topic = topic_mat[pos_mask] + loss_pos_topic = - alpha * (pos_topic + 1e-6).log() + neg_topic = topic_mat[neg_mask] + loss_neg_topic = - alpha * (1 - neg_topic + 1e-6).log() + if weight is not None: + loss_pos_topic = loss_pos_topic * weight[pos_mask] + loss_neg_topic = loss_neg_topic * weight[neg_mask] + loss = loss_pos_topic.mean() + loss_neg_topic.mean() + + pos_conf = conf[pos_mask] + loss_pos = - alpha * pos_conf.log() + # handle loss weights + if weight is not None: + # Different from dense-spvs, the loss w.r.t. padded regions aren't directly zeroed out, + # but only through manually setting corresponding regions in sim_matrix to '-inf'. + loss_pos = loss_pos * weight[pos_mask] + + loss = loss + c_pos_w * loss_pos.mean() + + return loss + + def compute_fine_loss(self, expec_f, expec_f_gt): + if self.fine_type == 'l2_with_std': + return self._compute_fine_loss_l2_std(expec_f, expec_f_gt) + elif self.fine_type == 'l2': + return self._compute_fine_loss_l2(expec_f, expec_f_gt) + else: + raise NotImplementedError() + + def _compute_fine_loss_l2(self, expec_f, expec_f_gt): + """ + Args: + expec_f (torch.Tensor): [M, 2] + expec_f_gt (torch.Tensor): [M, 2] + """ + correct_mask = torch.linalg.norm(expec_f_gt, ord=float('inf'), dim=1) < self.correct_thr + if correct_mask.sum() == 0: + if self.training: # this seldomly happen when training, since we pad prediction with gt + logger.warning("assign a false supervision to avoid ddp deadlock") + correct_mask[0] = True + else: + return None + offset_l2 = ((expec_f_gt[correct_mask] - expec_f[correct_mask]) ** 2).sum(-1) + return offset_l2.mean() + + def _compute_fine_loss_l2_std(self, expec_f, expec_f_gt): + """ + Args: + expec_f (torch.Tensor): [M, 3] + expec_f_gt (torch.Tensor): [M, 2] + """ + # correct_mask tells you which pair to compute fine-loss + correct_mask = torch.linalg.norm(expec_f_gt, ord=float('inf'), dim=1) < self.correct_thr + + # use std as weight that measures uncertainty + std = expec_f[:, 2] + inverse_std = 1. / torch.clamp(std, min=1e-10) + weight = (inverse_std / torch.mean(inverse_std)).detach() # avoid minizing loss through increase std + + # corner case: no correct coarse match found + if not correct_mask.any(): + if self.training: # this seldomly happen during training, since we pad prediction with gt + # sometimes there is not coarse-level gt at all. + logger.warning("assign a false supervision to avoid ddp deadlock") + correct_mask[0] = True + weight[0] = 0. + else: + return None + + # l2 loss with std + offset_l2 = ((expec_f_gt[correct_mask] - expec_f[correct_mask, :2]) ** 2).sum(-1) + loss = (offset_l2 * weight[correct_mask]).mean() + + return loss + + @torch.no_grad() + def compute_c_weight(self, data): + """ compute element-wise weights for computing coarse-level loss. """ + if 'mask0' in data: + c_weight = (data['mask0'].flatten(-2)[..., None] * data['mask1'].flatten(-2)[:, None]).float() + else: + c_weight = None + return c_weight + + def forward(self, data): + """ + Update: + data (dict): update{ + 'loss': [1] the reduced loss across a batch, + 'loss_scalars' (dict): loss scalars for tensorboard_record + } + """ + loss_scalars = {} + # 0. compute element-wise loss weight + c_weight = self.compute_c_weight(data) + + # 1. coarse-level loss + loss_c = self.compute_coarse_loss(data['conf_matrix'], data['topic_matrix'], + data['conf_matrix_gt'], match_ids=(data['spv_b_ids'], data['spv_i_ids'], data['spv_j_ids']), + weight=c_weight) + loss = loss_c * self.loss_config['coarse_weight'] + loss_scalars.update({"loss_c": loss_c.clone().detach().cpu()}) + + # 2. fine-level loss + loss_f = self.compute_fine_loss(data['expec_f'], data['expec_f_gt']) + if loss_f is not None: + loss += loss_f * self.loss_config['fine_weight'] + loss_scalars.update({"loss_f": loss_f.clone().detach().cpu()}) + else: + assert self.training is False + loss_scalars.update({'loss_f': torch.tensor(1.)}) # 1 is the upper bound + + loss_scalars.update({'loss': loss.clone().detach().cpu()}) + data.update({"loss": loss, "loss_scalars": loss_scalars}) diff --git a/third_party/TopicFM/src/models/__init__.py b/third_party/TopicFM/src/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9abdbdaebbf6c91a6fdc24e23d62c73003b204bf --- /dev/null +++ b/third_party/TopicFM/src/models/__init__.py @@ -0,0 +1 @@ +from .topic_fm import TopicFM diff --git a/third_party/TopicFM/src/models/backbone/__init__.py b/third_party/TopicFM/src/models/backbone/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..53f98db4e910b46716bed7cfc6ebbf8c8bfad399 --- /dev/null +++ b/third_party/TopicFM/src/models/backbone/__init__.py @@ -0,0 +1,5 @@ +from .fpn import FPN + + +def build_backbone(config): + return FPN(config['fpn']) diff --git a/third_party/TopicFM/src/models/backbone/fpn.py b/third_party/TopicFM/src/models/backbone/fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..93cc475f57317f9dbb8132cdfe0297391972f9e2 --- /dev/null +++ b/third_party/TopicFM/src/models/backbone/fpn.py @@ -0,0 +1,109 @@ +import torch.nn as nn +import torch.nn.functional as F + + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution without padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, padding=0, bias=False) + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) + + +class ConvBlock(nn.Module): + def __init__(self, in_planes, planes, stride=1, bn=True): + super().__init__() + self.conv = conv3x3(in_planes, planes, stride) + self.bn = nn.BatchNorm2d(planes) if bn is True else None + self.act = nn.GELU() + + def forward(self, x): + y = self.conv(x) + if self.bn: + y = self.bn(y) #F.layer_norm(y, y.shape[1:]) + y = self.act(y) + return y + + +class FPN(nn.Module): + """ + ResNet+FPN, output resolution are 1/8 and 1/2. + Each block has 2 layers. + """ + + def __init__(self, config): + super().__init__() + # Config + block = ConvBlock + initial_dim = config['initial_dim'] + block_dims = config['block_dims'] + + # Class Variable + self.in_planes = initial_dim + + # Networks + self.conv1 = nn.Conv2d(1, initial_dim, kernel_size=7, stride=2, padding=3, bias=False) + self.bn1 = nn.BatchNorm2d(initial_dim) + self.relu = nn.ReLU(inplace=True) + + self.layer1 = self._make_layer(block, block_dims[0], stride=1) # 1/2 + self.layer2 = self._make_layer(block, block_dims[1], stride=2) # 1/4 + self.layer3 = self._make_layer(block, block_dims[2], stride=2) # 1/8 + self.layer4 = self._make_layer(block, block_dims[3], stride=2) # 1/16 + + # 3. FPN upsample + self.layer3_outconv = conv1x1(block_dims[2], block_dims[3]) + self.layer3_outconv2 = nn.Sequential( + ConvBlock(block_dims[3], block_dims[2]), + conv3x3(block_dims[2], block_dims[2]), + ) + self.layer2_outconv = conv1x1(block_dims[1], block_dims[2]) + self.layer2_outconv2 = nn.Sequential( + ConvBlock(block_dims[2], block_dims[1]), + conv3x3(block_dims[1], block_dims[1]), + ) + self.layer1_outconv = conv1x1(block_dims[0], block_dims[1]) + self.layer1_outconv2 = nn.Sequential( + ConvBlock(block_dims[1], block_dims[0]), + conv3x3(block_dims[0], block_dims[0]), + ) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def _make_layer(self, block, dim, stride=1): + layer1 = block(self.in_planes, dim, stride=stride) + layer2 = block(dim, dim, stride=1) + layers = (layer1, layer2) + + self.in_planes = dim + return nn.Sequential(*layers) + + def forward(self, x): + # ResNet Backbone + x0 = self.relu(self.bn1(self.conv1(x))) + x1 = self.layer1(x0) # 1/2 + x2 = self.layer2(x1) # 1/4 + x3 = self.layer3(x2) # 1/8 + x4 = self.layer4(x3) # 1/16 + + # FPN + x4_out_2x = F.interpolate(x4, scale_factor=2., mode='bilinear', align_corners=True) + x3_out = self.layer3_outconv(x3) + x3_out = self.layer3_outconv2(x3_out+x4_out_2x) + + x3_out_2x = F.interpolate(x3_out, scale_factor=2., mode='bilinear', align_corners=True) + x2_out = self.layer2_outconv(x2) + x2_out = self.layer2_outconv2(x2_out+x3_out_2x) + + x2_out_2x = F.interpolate(x2_out, scale_factor=2., mode='bilinear', align_corners=True) + x1_out = self.layer1_outconv(x1) + x1_out = self.layer1_outconv2(x1_out+x2_out_2x) + + return [x3_out, x1_out] diff --git a/third_party/TopicFM/src/models/modules/__init__.py b/third_party/TopicFM/src/models/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..59cf36da37104dcf080e1b2c119c8123fa8d147f --- /dev/null +++ b/third_party/TopicFM/src/models/modules/__init__.py @@ -0,0 +1,2 @@ +from .transformer import LocalFeatureTransformer, TopicFormer +from .fine_preprocess import FinePreprocess diff --git a/third_party/TopicFM/src/models/modules/fine_preprocess.py b/third_party/TopicFM/src/models/modules/fine_preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..4c8d264c1895be8f4e124fc3982d4e0d3b876af3 --- /dev/null +++ b/third_party/TopicFM/src/models/modules/fine_preprocess.py @@ -0,0 +1,59 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from einops.einops import rearrange, repeat + + +class FinePreprocess(nn.Module): + def __init__(self, config): + super().__init__() + + self.config = config + self.cat_c_feat = config['fine_concat_coarse_feat'] + self.W = self.config['fine_window_size'] + + d_model_c = self.config['coarse']['d_model'] + d_model_f = self.config['fine']['d_model'] + self.d_model_f = d_model_f + if self.cat_c_feat: + self.down_proj = nn.Linear(d_model_c, d_model_f, bias=True) + self.merge_feat = nn.Linear(2*d_model_f, d_model_f, bias=True) + + self._reset_parameters() + + def _reset_parameters(self): + for p in self.parameters(): + if p.dim() > 1: + nn.init.kaiming_normal_(p, mode="fan_out", nonlinearity="relu") + + def forward(self, feat_f0, feat_f1, feat_c0, feat_c1, data): + W = self.W + stride = data['hw0_f'][0] // data['hw0_c'][0] + + data.update({'W': W}) + if data['b_ids'].shape[0] == 0: + feat0 = torch.empty(0, self.W**2, self.d_model_f, device=feat_f0.device) + feat1 = torch.empty(0, self.W**2, self.d_model_f, device=feat_f0.device) + return feat0, feat1 + + # 1. unfold(crop) all local windows + feat_f0_unfold = F.unfold(feat_f0, kernel_size=(W, W), stride=stride, padding=W//2) + feat_f0_unfold = rearrange(feat_f0_unfold, 'n (c ww) l -> n l ww c', ww=W**2) + feat_f1_unfold = F.unfold(feat_f1, kernel_size=(W, W), stride=stride, padding=W//2) + feat_f1_unfold = rearrange(feat_f1_unfold, 'n (c ww) l -> n l ww c', ww=W**2) + + # 2. select only the predicted matches + feat_f0_unfold = feat_f0_unfold[data['b_ids'], data['i_ids']] # [n, ww, cf] + feat_f1_unfold = feat_f1_unfold[data['b_ids'], data['j_ids']] + + # option: use coarse-level feature as context: concat and linear + if self.cat_c_feat: + feat_c_win = self.down_proj(torch.cat([feat_c0[data['b_ids'], data['i_ids']], + feat_c1[data['b_ids'], data['j_ids']]], 0)) # [2n, c] + feat_cf_win = self.merge_feat(torch.cat([ + torch.cat([feat_f0_unfold, feat_f1_unfold], 0), # [2n, ww, cf] + repeat(feat_c_win, 'n c -> n ww c', ww=W**2), # [2n, ww, cf] + ], -1)) + feat_f0_unfold, feat_f1_unfold = torch.chunk(feat_cf_win, 2, dim=0) + + return feat_f0_unfold, feat_f1_unfold diff --git a/third_party/TopicFM/src/models/modules/linear_attention.py b/third_party/TopicFM/src/models/modules/linear_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..af6cd825033e98b7be15cc694ce28110ef84cc93 --- /dev/null +++ b/third_party/TopicFM/src/models/modules/linear_attention.py @@ -0,0 +1,81 @@ +""" +Linear Transformer proposed in "Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention" +Modified from: https://github.com/idiap/fast-transformers/blob/master/fast_transformers/attention/linear_attention.py +""" + +import torch +from torch.nn import Module, Dropout + + +def elu_feature_map(x): + return torch.nn.functional.elu(x) + 1 + + +class LinearAttention(Module): + def __init__(self, eps=1e-6): + super().__init__() + self.feature_map = elu_feature_map + self.eps = eps + + def forward(self, queries, keys, values, q_mask=None, kv_mask=None): + """ Multi-Head linear attention proposed in "Transformers are RNNs" + Args: + queries: [N, L, H, D] + keys: [N, S, H, D] + values: [N, S, H, D] + q_mask: [N, L] + kv_mask: [N, S] + Returns: + queried_values: (N, L, H, D) + """ + Q = self.feature_map(queries) + K = self.feature_map(keys) + + # set padded position to zero + if q_mask is not None: + Q = Q * q_mask[:, :, None, None] + if kv_mask is not None: + K = K * kv_mask[:, :, None, None] + values = values * kv_mask[:, :, None, None] + + v_length = values.size(1) + values = values / v_length # prevent fp16 overflow + KV = torch.einsum("nshd,nshv->nhdv", K, values) # (S,D)' @ S,V + Z = 1 / (torch.einsum("nlhd,nhd->nlh", Q, K.sum(dim=1)) + self.eps) + queried_values = torch.einsum("nlhd,nhdv,nlh->nlhv", Q, KV, Z) * v_length + + return queried_values.contiguous() + + +class FullAttention(Module): + def __init__(self, use_dropout=False, attention_dropout=0.1): + super().__init__() + self.use_dropout = use_dropout + self.dropout = Dropout(attention_dropout) + + def forward(self, queries, keys, values, q_mask=None, kv_mask=None): + """ Multi-head scaled dot-product attention, a.k.a full attention. + Args: + queries: [N, L, H, D] + keys: [N, S, H, D] + values: [N, S, H, D] + q_mask: [N, L] + kv_mask: [N, S] + Returns: + queried_values: (N, L, H, D) + """ + + # Compute the unnormalized attention and apply the masks + QK = torch.einsum("nlhd,nshd->nlsh", queries, keys) + if kv_mask is not None: + QK.masked_fill_(~(q_mask[:, :, None, None] * kv_mask[:, None, :, None]).bool(), -1e9) + + # Compute the attention and the weighted average + softmax_temp = 1. / queries.size(3)**.5 # sqrt(D) + A = torch.softmax(softmax_temp * QK, dim=2) + if self.use_dropout: + A = self.dropout(A) + + queried_values = torch.einsum("nlsh,nshd->nlhd", A, values) + + return queried_values.contiguous() diff --git a/third_party/TopicFM/src/models/modules/transformer.py b/third_party/TopicFM/src/models/modules/transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..27ff8f6554844b1e14a7094fcbad40876f766db8 --- /dev/null +++ b/third_party/TopicFM/src/models/modules/transformer.py @@ -0,0 +1,232 @@ +from loguru import logger +import copy +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .linear_attention import LinearAttention, FullAttention + + +class LoFTREncoderLayer(nn.Module): + def __init__(self, + d_model, + nhead, + attention='linear'): + super(LoFTREncoderLayer, self).__init__() + + self.dim = d_model // nhead + self.nhead = nhead + + # multi-head attention + self.q_proj = nn.Linear(d_model, d_model, bias=False) + self.k_proj = nn.Linear(d_model, d_model, bias=False) + self.v_proj = nn.Linear(d_model, d_model, bias=False) + self.attention = LinearAttention() if attention == 'linear' else FullAttention() + self.merge = nn.Linear(d_model, d_model, bias=False) + + # feed-forward network + self.mlp = nn.Sequential( + nn.Linear(d_model*2, d_model*2, bias=False), + nn.GELU(), + nn.Linear(d_model*2, d_model, bias=False), + ) + + # norm and dropout + self.norm1 = nn.LayerNorm(d_model) + self.norm2 = nn.LayerNorm(d_model) + + def forward(self, x, source, x_mask=None, source_mask=None): + """ + Args: + x (torch.Tensor): [N, L, C] + source (torch.Tensor): [N, S, C] + x_mask (torch.Tensor): [N, L] (optional) + source_mask (torch.Tensor): [N, S] (optional) + """ + bs = x.shape[0] + query, key, value = x, source, source + + # multi-head attention + query = self.q_proj(query).view(bs, -1, self.nhead, self.dim) # [N, L, (H, D)] + key = self.k_proj(key).view(bs, -1, self.nhead, self.dim) # [N, S, (H, D)] + value = self.v_proj(value).view(bs, -1, self.nhead, self.dim) + message = self.attention(query, key, value, q_mask=x_mask, kv_mask=source_mask) # [N, L, (H, D)] + message = self.merge(message.view(bs, -1, self.nhead*self.dim)) # [N, L, C] + message = self.norm1(message) + + # feed-forward network + message = self.mlp(torch.cat([x, message], dim=2)) + message = self.norm2(message) + + return x + message + + +class TopicFormer(nn.Module): + """A Local Feature Transformer (LoFTR) module.""" + + def __init__(self, config): + super(TopicFormer, self).__init__() + + self.config = config + self.d_model = config['d_model'] + self.nhead = config['nhead'] + self.layer_names = config['layer_names'] + encoder_layer = LoFTREncoderLayer(config['d_model'], config['nhead'], config['attention']) + self.layers = nn.ModuleList([copy.deepcopy(encoder_layer) for _ in range(len(self.layer_names))]) + + self.topic_transformers = nn.ModuleList([copy.deepcopy(encoder_layer) for _ in range(2*config['n_topic_transformers'])]) if config['n_samples'] > 0 else None #nn.ModuleList([copy.deepcopy(encoder_layer) for _ in range(2)]) + self.n_iter_topic_transformer = config['n_topic_transformers'] + + self.seed_tokens = nn.Parameter(torch.randn(config['n_topics'], config['d_model'])) + self.register_parameter('seed_tokens', self.seed_tokens) + self.n_samples = config['n_samples'] + + self._reset_parameters() + + def _reset_parameters(self): + for p in self.parameters(): + if p.dim() > 1: + nn.init.xavier_uniform_(p) + + def sample_topic(self, prob_topics, topics, L): + """ + Args: + topics (torch.Tensor): [N, L+S, K] + """ + prob_topics0, prob_topics1 = prob_topics[:, :L], prob_topics[:, L:] + topics0, topics1 = topics[:, :L], topics[:, L:] + + theta0 = F.normalize(prob_topics0.sum(dim=1), p=1, dim=-1) # [N, K] + theta1 = F.normalize(prob_topics1.sum(dim=1), p=1, dim=-1) + theta = F.normalize(theta0 * theta1, p=1, dim=-1) + if self.n_samples == 0: + return None + if self.training: + sampled_inds = torch.multinomial(theta, self.n_samples) + sampled_values = torch.gather(theta, dim=-1, index=sampled_inds) + else: + sampled_values, sampled_inds = torch.topk(theta, self.n_samples, dim=-1) + sampled_topics0 = torch.gather(topics0, dim=-1, index=sampled_inds.unsqueeze(1).repeat(1, topics0.shape[1], 1)) + sampled_topics1 = torch.gather(topics1, dim=-1, index=sampled_inds.unsqueeze(1).repeat(1, topics1.shape[1], 1)) + return sampled_topics0, sampled_topics1 + + def reduce_feat(self, feat, topick, N, C): + len_topic = topick.sum(dim=-1).int() + max_len = len_topic.max().item() + selected_ids = topick.bool() + resized_feat = torch.zeros((N, max_len, C), dtype=torch.float32, device=feat.device) + new_mask = torch.zeros_like(resized_feat[..., 0]).bool() + for i in range(N): + new_mask[i, :len_topic[i]] = True + resized_feat[new_mask, :] = feat[selected_ids, :] + return resized_feat, new_mask, selected_ids + + def forward(self, feat0, feat1, mask0=None, mask1=None): + """ + Args: + feat0 (torch.Tensor): [N, L, C] + feat1 (torch.Tensor): [N, S, C] + mask0 (torch.Tensor): [N, L] (optional) + mask1 (torch.Tensor): [N, S] (optional) + """ + + assert self.d_model == feat0.shape[2], "the feature number of src and transformer must be equal" + N, L, S, C, K = feat0.shape[0], feat0.shape[1], feat1.shape[1], feat0.shape[2], self.config['n_topics'] + + seeds = self.seed_tokens.unsqueeze(0).repeat(N, 1, 1) + + feat = torch.cat((feat0, feat1), dim=1) + if mask0 is not None: + mask = torch.cat((mask0, mask1), dim=-1) + else: + mask = None + + for layer, name in zip(self.layers, self.layer_names): + if name == 'seed': + # seeds = layer(seeds, feat0, None, mask0) + # seeds = layer(seeds, feat1, None, mask1) + seeds = layer(seeds, feat, None, mask) + elif name == 'feat': + feat0 = layer(feat0, seeds, mask0, None) + feat1 = layer(feat1, seeds, mask1, None) + + dmatrix = torch.einsum("nmd,nkd->nmk", feat, seeds) + prob_topics = F.softmax(dmatrix, dim=-1) + + feat_topics = torch.zeros_like(dmatrix).scatter_(-1, torch.argmax(dmatrix, dim=-1, keepdim=True), 1.0) + + if mask is not None: + feat_topics = feat_topics * mask.unsqueeze(-1) + prob_topics = prob_topics * mask.unsqueeze(-1) + + if (feat_topics.detach().sum(dim=1).sum(dim=0) > 100).sum() <= 3: + logger.warning("topic distribution is highly sparse!") + sampled_topics = self.sample_topic(prob_topics.detach(), feat_topics, L) + if sampled_topics is not None: + updated_feat0, updated_feat1 = torch.zeros_like(feat0), torch.zeros_like(feat1) + s_topics0, s_topics1 = sampled_topics + for k in range(s_topics0.shape[-1]): + topick0, topick1 = s_topics0[..., k], s_topics1[..., k] # [N, L+S] + if (topick0.sum() > 0) and (topick1.sum() > 0): + new_feat0, new_mask0, selected_ids0 = self.reduce_feat(feat0, topick0, N, C) + new_feat1, new_mask1, selected_ids1 = self.reduce_feat(feat1, topick1, N, C) + for idt in range(self.n_iter_topic_transformer): + new_feat0 = self.topic_transformers[idt*2](new_feat0, new_feat0, new_mask0, new_mask0) + new_feat1 = self.topic_transformers[idt*2](new_feat1, new_feat1, new_mask1, new_mask1) + new_feat0 = self.topic_transformers[idt*2+1](new_feat0, new_feat1, new_mask0, new_mask1) + new_feat1 = self.topic_transformers[idt*2+1](new_feat1, new_feat0, new_mask1, new_mask0) + updated_feat0[selected_ids0, :] = new_feat0[new_mask0, :] + updated_feat1[selected_ids1, :] = new_feat1[new_mask1, :] + + feat0 = (1 - s_topics0.sum(dim=-1, keepdim=True)) * feat0 + updated_feat0 + feat1 = (1 - s_topics1.sum(dim=-1, keepdim=True)) * feat1 + updated_feat1 + + conf_matrix = torch.einsum("nlc,nsc->nls", feat0, feat1) / C**.5 #(C * temperature) + if self.training: + topic_matrix = torch.einsum("nlk,nsk->nls", prob_topics[:, :L], prob_topics[:, L:]) + outlier_mask = torch.einsum("nlk,nsk->nls", feat_topics[:, :L], feat_topics[:, L:]) + else: + topic_matrix = {"img0": feat_topics[:, :L], "img1": feat_topics[:, L:]} + outlier_mask = torch.ones_like(conf_matrix) + if mask0 is not None: + outlier_mask = (outlier_mask * mask0[..., None] * mask1[:, None]) #.bool() + conf_matrix.masked_fill_(~outlier_mask.bool(), -1e9) + conf_matrix = F.softmax(conf_matrix, 1) * F.softmax(conf_matrix, 2) # * topic_matrix + + return feat0, feat1, conf_matrix, topic_matrix + + +class LocalFeatureTransformer(nn.Module): + """A Local Feature Transformer (LoFTR) module.""" + + def __init__(self, config): + super(LocalFeatureTransformer, self).__init__() + + self.config = config + self.d_model = config['d_model'] + self.nhead = config['nhead'] + self.layer_names = config['layer_names'] + encoder_layer = LoFTREncoderLayer(config['d_model'], config['nhead'], config['attention']) + self.layers = nn.ModuleList([copy.deepcopy(encoder_layer) for _ in range(2)]) #len(self.layer_names))]) + self._reset_parameters() + + def _reset_parameters(self): + for p in self.parameters(): + if p.dim() > 1: + nn.init.xavier_uniform_(p) + + def forward(self, feat0, feat1, mask0=None, mask1=None): + """ + Args: + feat0 (torch.Tensor): [N, L, C] + feat1 (torch.Tensor): [N, S, C] + mask0 (torch.Tensor): [N, L] (optional) + mask1 (torch.Tensor): [N, S] (optional) + """ + + assert self.d_model == feat0.shape[2], "the feature number of src and transformer must be equal" + + feat0 = self.layers[0](feat0, feat1, mask0, mask1) + feat1 = self.layers[1](feat1, feat0, mask1, mask0) + + return feat0, feat1 diff --git a/third_party/TopicFM/src/models/topic_fm.py b/third_party/TopicFM/src/models/topic_fm.py new file mode 100644 index 0000000000000000000000000000000000000000..95cd22f9b66d08760382fe4cd22c4df918cc9f68 --- /dev/null +++ b/third_party/TopicFM/src/models/topic_fm.py @@ -0,0 +1,79 @@ +import torch +import torch.nn as nn +from einops.einops import rearrange + +from .backbone import build_backbone +from .modules import LocalFeatureTransformer, FinePreprocess, TopicFormer +from .utils.coarse_matching import CoarseMatching +from .utils.fine_matching import FineMatching + + +class TopicFM(nn.Module): + def __init__(self, config): + super().__init__() + # Misc + self.config = config + + # Modules + self.backbone = build_backbone(config) + + self.loftr_coarse = TopicFormer(config['coarse']) + self.coarse_matching = CoarseMatching(config['match_coarse']) + self.fine_preprocess = FinePreprocess(config) + self.loftr_fine = LocalFeatureTransformer(config["fine"]) + self.fine_matching = FineMatching() + + def forward(self, data): + """ + Update: + data (dict): { + 'image0': (torch.Tensor): (N, 1, H, W) + 'image1': (torch.Tensor): (N, 1, H, W) + 'mask0'(optional) : (torch.Tensor): (N, H, W) '0' indicates a padded position + 'mask1'(optional) : (torch.Tensor): (N, H, W) + } + """ + # 1. Local Feature CNN + data.update({ + 'bs': data['image0'].size(0), + 'hw0_i': data['image0'].shape[2:], 'hw1_i': data['image1'].shape[2:] + }) + + if data['hw0_i'] == data['hw1_i']: # faster & better BN convergence + feats_c, feats_f = self.backbone(torch.cat([data['image0'], data['image1']], dim=0)) + (feat_c0, feat_c1), (feat_f0, feat_f1) = feats_c.split(data['bs']), feats_f.split(data['bs']) + else: # handle different input shapes + (feat_c0, feat_f0), (feat_c1, feat_f1) = self.backbone(data['image0']), self.backbone(data['image1']) + + data.update({ + 'hw0_c': feat_c0.shape[2:], 'hw1_c': feat_c1.shape[2:], + 'hw0_f': feat_f0.shape[2:], 'hw1_f': feat_f1.shape[2:] + }) + + # 2. coarse-level loftr module + feat_c0 = rearrange(feat_c0, 'n c h w -> n (h w) c') + feat_c1 = rearrange(feat_c1, 'n c h w -> n (h w) c') + + mask_c0 = mask_c1 = None # mask is useful in training + if 'mask0' in data: + mask_c0, mask_c1 = data['mask0'].flatten(-2), data['mask1'].flatten(-2) + + feat_c0, feat_c1, conf_matrix, topic_matrix = self.loftr_coarse(feat_c0, feat_c1, mask_c0, mask_c1) + data.update({"conf_matrix": conf_matrix, "topic_matrix": topic_matrix}) ###### + + # 3. match coarse-level + self.coarse_matching(data) + + # 4. fine-level refinement + feat_f0_unfold, feat_f1_unfold = self.fine_preprocess(feat_f0, feat_f1, feat_c0.detach(), feat_c1.detach(), data) + if feat_f0_unfold.size(0) != 0: # at least one coarse level predicted + feat_f0_unfold, feat_f1_unfold = self.loftr_fine(feat_f0_unfold, feat_f1_unfold) + + # 5. match fine-level + self.fine_matching(feat_f0_unfold, feat_f1_unfold, data) + + def load_state_dict(self, state_dict, *args, **kwargs): + for k in list(state_dict.keys()): + if k.startswith('matcher.'): + state_dict[k.replace('matcher.', '', 1)] = state_dict.pop(k) + return super().load_state_dict(state_dict, *args, **kwargs) diff --git a/third_party/TopicFM/src/models/utils/coarse_matching.py b/third_party/TopicFM/src/models/utils/coarse_matching.py new file mode 100644 index 0000000000000000000000000000000000000000..75adbb5cc465220e759a044f96f86c08da2d7a50 --- /dev/null +++ b/third_party/TopicFM/src/models/utils/coarse_matching.py @@ -0,0 +1,217 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from einops.einops import rearrange + +INF = 1e9 + +def mask_border(m, b: int, v): + """ Mask borders with value + Args: + m (torch.Tensor): [N, H0, W0, H1, W1] + b (int) + v (m.dtype) + """ + if b <= 0: + return + + m[:, :b] = v + m[:, :, :b] = v + m[:, :, :, :b] = v + m[:, :, :, :, :b] = v + m[:, -b:] = v + m[:, :, -b:] = v + m[:, :, :, -b:] = v + m[:, :, :, :, -b:] = v + + +def mask_border_with_padding(m, bd, v, p_m0, p_m1): + if bd <= 0: + return + + m[:, :bd] = v + m[:, :, :bd] = v + m[:, :, :, :bd] = v + m[:, :, :, :, :bd] = v + + h0s, w0s = p_m0.sum(1).max(-1)[0].int(), p_m0.sum(-1).max(-1)[0].int() + h1s, w1s = p_m1.sum(1).max(-1)[0].int(), p_m1.sum(-1).max(-1)[0].int() + for b_idx, (h0, w0, h1, w1) in enumerate(zip(h0s, w0s, h1s, w1s)): + m[b_idx, h0 - bd:] = v + m[b_idx, :, w0 - bd:] = v + m[b_idx, :, :, h1 - bd:] = v + m[b_idx, :, :, :, w1 - bd:] = v + + +def compute_max_candidates(p_m0, p_m1): + """Compute the max candidates of all pairs within a batch + + Args: + p_m0, p_m1 (torch.Tensor): padded masks + """ + h0s, w0s = p_m0.sum(1).max(-1)[0], p_m0.sum(-1).max(-1)[0] + h1s, w1s = p_m1.sum(1).max(-1)[0], p_m1.sum(-1).max(-1)[0] + max_cand = torch.sum( + torch.min(torch.stack([h0s * w0s, h1s * w1s], -1), -1)[0]) + return max_cand + + +class CoarseMatching(nn.Module): + def __init__(self, config): + super().__init__() + self.config = config + # general config + self.thr = config['thr'] + self.border_rm = config['border_rm'] + # -- # for trainig fine-level LoFTR + self.train_coarse_percent = config['train_coarse_percent'] + self.train_pad_num_gt_min = config['train_pad_num_gt_min'] + + # we provide 2 options for differentiable matching + self.match_type = config['match_type'] + if self.match_type == 'dual_softmax': + self.temperature = config['dsmax_temperature'] + elif self.match_type == 'sinkhorn': + try: + from .superglue import log_optimal_transport + except ImportError: + raise ImportError("download superglue.py first!") + self.log_optimal_transport = log_optimal_transport + self.bin_score = nn.Parameter( + torch.tensor(config['skh_init_bin_score'], requires_grad=True)) + self.skh_iters = config['skh_iters'] + self.skh_prefilter = config['skh_prefilter'] + else: + raise NotImplementedError() + + def forward(self, data): + """ + Args: + data (dict) + Update: + data (dict): { + 'b_ids' (torch.Tensor): [M'], + 'i_ids' (torch.Tensor): [M'], + 'j_ids' (torch.Tensor): [M'], + 'gt_mask' (torch.Tensor): [M'], + 'mkpts0_c' (torch.Tensor): [M, 2], + 'mkpts1_c' (torch.Tensor): [M, 2], + 'mconf' (torch.Tensor): [M]} + NOTE: M' != M during training. + """ + conf_matrix = data['conf_matrix'] + # predict coarse matches from conf_matrix + data.update(**self.get_coarse_match(conf_matrix, data)) + + @torch.no_grad() + def get_coarse_match(self, conf_matrix, data): + """ + Args: + conf_matrix (torch.Tensor): [N, L, S] + data (dict): with keys ['hw0_i', 'hw1_i', 'hw0_c', 'hw1_c'] + Returns: + coarse_matches (dict): { + 'b_ids' (torch.Tensor): [M'], + 'i_ids' (torch.Tensor): [M'], + 'j_ids' (torch.Tensor): [M'], + 'gt_mask' (torch.Tensor): [M'], + 'm_bids' (torch.Tensor): [M], + 'mkpts0_c' (torch.Tensor): [M, 2], + 'mkpts1_c' (torch.Tensor): [M, 2], + 'mconf' (torch.Tensor): [M]} + """ + axes_lengths = { + 'h0c': data['hw0_c'][0], + 'w0c': data['hw0_c'][1], + 'h1c': data['hw1_c'][0], + 'w1c': data['hw1_c'][1] + } + _device = conf_matrix.device + # 1. confidence thresholding + mask = conf_matrix > self.thr + mask = rearrange(mask, 'b (h0c w0c) (h1c w1c) -> b h0c w0c h1c w1c', + **axes_lengths) + if 'mask0' not in data: + mask_border(mask, self.border_rm, False) + else: + mask_border_with_padding(mask, self.border_rm, False, + data['mask0'], data['mask1']) + mask = rearrange(mask, 'b h0c w0c h1c w1c -> b (h0c w0c) (h1c w1c)', + **axes_lengths) + + # 2. mutual nearest + mask = mask \ + * (conf_matrix == conf_matrix.max(dim=2, keepdim=True)[0]) \ + * (conf_matrix == conf_matrix.max(dim=1, keepdim=True)[0]) + + # 3. find all valid coarse matches + # this only works when at most one `True` in each row + mask_v, all_j_ids = mask.max(dim=2) + b_ids, i_ids = torch.where(mask_v) + j_ids = all_j_ids[b_ids, i_ids] + mconf = conf_matrix[b_ids, i_ids, j_ids] + + # 4. Random sampling of training samples for fine-level LoFTR + # (optional) pad samples with gt coarse-level matches + if self.training: + # NOTE: + # The sampling is performed across all pairs in a batch without manually balancing + # #samples for fine-level increases w.r.t. batch_size + if 'mask0' not in data: + num_candidates_max = mask.size(0) * max( + mask.size(1), mask.size(2)) + else: + num_candidates_max = compute_max_candidates( + data['mask0'], data['mask1']) + num_matches_train = int(num_candidates_max * + self.train_coarse_percent) + num_matches_pred = len(b_ids) + assert self.train_pad_num_gt_min < num_matches_train, "min-num-gt-pad should be less than num-train-matches" + + # pred_indices is to select from prediction + if num_matches_pred <= num_matches_train - self.train_pad_num_gt_min: + pred_indices = torch.arange(num_matches_pred, device=_device) + else: + pred_indices = torch.randint( + num_matches_pred, + (num_matches_train - self.train_pad_num_gt_min, ), + device=_device) + + # gt_pad_indices is to select from gt padding. e.g. max(3787-4800, 200) + gt_pad_indices = torch.randint( + len(data['spv_b_ids']), + (max(num_matches_train - num_matches_pred, + self.train_pad_num_gt_min), ), + device=_device) + mconf_gt = torch.zeros(len(data['spv_b_ids']), device=_device) # set conf of gt paddings to all zero + + b_ids, i_ids, j_ids, mconf = map( + lambda x, y: torch.cat([x[pred_indices], y[gt_pad_indices]], + dim=0), + *zip([b_ids, data['spv_b_ids']], [i_ids, data['spv_i_ids']], + [j_ids, data['spv_j_ids']], [mconf, mconf_gt])) + + # These matches select patches that feed into fine-level network + coarse_matches = {'b_ids': b_ids, 'i_ids': i_ids, 'j_ids': j_ids} + + # 4. Update with matches in original image resolution + scale = data['hw0_i'][0] / data['hw0_c'][0] + scale0 = scale * data['scale0'][b_ids] if 'scale0' in data else scale + scale1 = scale * data['scale1'][b_ids] if 'scale1' in data else scale + mkpts0_c = torch.stack( + [i_ids % data['hw0_c'][1], i_ids // data['hw0_c'][1]], + dim=1) * scale0 + mkpts1_c = torch.stack( + [j_ids % data['hw1_c'][1], j_ids // data['hw1_c'][1]], + dim=1) * scale1 + + # These matches is the current prediction (for visualization) + coarse_matches.update({ + 'gt_mask': mconf == 0, + 'm_bids': b_ids[mconf != 0], # mconf == 0 => gt matches + 'mkpts0_c': mkpts0_c[mconf != 0], + 'mkpts1_c': mkpts1_c[mconf != 0], + 'mconf': mconf[mconf != 0] + }) + + return coarse_matches diff --git a/third_party/TopicFM/src/models/utils/fine_matching.py b/third_party/TopicFM/src/models/utils/fine_matching.py new file mode 100644 index 0000000000000000000000000000000000000000..018f2fe475600b319998c263a97237ce135c3aaf --- /dev/null +++ b/third_party/TopicFM/src/models/utils/fine_matching.py @@ -0,0 +1,80 @@ +import math +import torch +import torch.nn as nn +import torch.nn.functional as F + +from kornia.geometry.subpix import dsnt +from kornia.utils.grid import create_meshgrid + + +class FineMatching(nn.Module): + """FineMatching with s2d paradigm""" + + def __init__(self): + super().__init__() + + def forward(self, feat_f0, feat_f1, data): + """ + Args: + feat0 (torch.Tensor): [M, WW, C] + feat1 (torch.Tensor): [M, WW, C] + data (dict) + Update: + data (dict):{ + 'expec_f' (torch.Tensor): [M, 3], + 'mkpts0_f' (torch.Tensor): [M, 2], + 'mkpts1_f' (torch.Tensor): [M, 2]} + """ + M, WW, C = feat_f0.shape + W = int(math.sqrt(WW)) + scale = data['hw0_i'][0] / data['hw0_f'][0] + self.M, self.W, self.WW, self.C, self.scale = M, W, WW, C, scale + + # corner case: if no coarse matches found + if M == 0: + assert self.training == False, "M is always >0, when training, see coarse_matching.py" + # logger.warning('No matches found in coarse-level.') + data.update({ + 'expec_f': torch.empty(0, 3, device=feat_f0.device), + 'mkpts0_f': data['mkpts0_c'], + 'mkpts1_f': data['mkpts1_c'], + }) + return + + feat_f0_picked = feat_f0[:, WW//2, :] + + sim_matrix = torch.einsum('mc,mrc->mr', feat_f0_picked, feat_f1) + softmax_temp = 1. / C**.5 + heatmap = torch.softmax(softmax_temp * sim_matrix, dim=1) + feat_f1_picked = (feat_f1 * heatmap.unsqueeze(-1)).sum(dim=1) # [M, C] + heatmap = heatmap.view(-1, W, W) + + # compute coordinates from heatmap + coords1_normalized = dsnt.spatial_expectation2d(heatmap[None], True)[0] # [M, 2] + grid_normalized = create_meshgrid(W, W, True, heatmap.device).reshape(1, -1, 2) # [1, WW, 2] + + # compute std over + var = torch.sum(grid_normalized**2 * heatmap.view(-1, WW, 1), dim=1) - coords1_normalized**2 # [M, 2] + std = torch.sum(torch.sqrt(torch.clamp(var, min=1e-10)), -1) # [M] clamp needed for numerical stability + + # for fine-level supervision + data.update({'expec_f': torch.cat([coords1_normalized, std.unsqueeze(1)], -1), + 'descriptors0': feat_f0_picked.detach(), 'descriptors1': feat_f1_picked.detach()}) + + # compute absolute kpt coords + self.get_fine_match(coords1_normalized, data) + + @torch.no_grad() + def get_fine_match(self, coords1_normed, data): + W, WW, C, scale = self.W, self.WW, self.C, self.scale + + # mkpts0_f and mkpts1_f + # scale0 = scale * data['scale0'][data['b_ids']] if 'scale0' in data else scale + mkpts0_f = data['mkpts0_c'] # + (coords0_normed * (W // 2) * scale0 )[:len(data['mconf'])] + scale1 = scale * data['scale1'][data['b_ids']] if 'scale1' in data else scale + mkpts1_f = data['mkpts1_c'] + (coords1_normed * (W // 2) * scale1)[:len(data['mconf'])] + + data.update({ + "mkpts0_f": mkpts0_f, + "mkpts1_f": mkpts1_f + }) diff --git a/third_party/TopicFM/src/models/utils/geometry.py b/third_party/TopicFM/src/models/utils/geometry.py new file mode 100644 index 0000000000000000000000000000000000000000..f95cdb65b48324c4f4ceb20231b1bed992b41116 --- /dev/null +++ b/third_party/TopicFM/src/models/utils/geometry.py @@ -0,0 +1,54 @@ +import torch + + +@torch.no_grad() +def warp_kpts(kpts0, depth0, depth1, T_0to1, K0, K1): + """ Warp kpts0 from I0 to I1 with depth, K and Rt + Also check covisibility and depth consistency. + Depth is consistent if relative error < 0.2 (hard-coded). + + Args: + kpts0 (torch.Tensor): [N, L, 2] - , + depth0 (torch.Tensor): [N, H, W], + depth1 (torch.Tensor): [N, H, W], + T_0to1 (torch.Tensor): [N, 3, 4], + K0 (torch.Tensor): [N, 3, 3], + K1 (torch.Tensor): [N, 3, 3], + Returns: + calculable_mask (torch.Tensor): [N, L] + warped_keypoints0 (torch.Tensor): [N, L, 2] + """ + kpts0_long = kpts0.round().long() + + # Sample depth, get calculable_mask on depth != 0 + kpts0_depth = torch.stack( + [depth0[i, kpts0_long[i, :, 1], kpts0_long[i, :, 0]] for i in range(kpts0.shape[0])], dim=0 + ) # (N, L) + nonzero_mask = kpts0_depth != 0 + + # Unproject + kpts0_h = torch.cat([kpts0, torch.ones_like(kpts0[:, :, [0]])], dim=-1) * kpts0_depth[..., None] # (N, L, 3) + kpts0_cam = K0.inverse() @ kpts0_h.transpose(2, 1) # (N, 3, L) + + # Rigid Transform + w_kpts0_cam = T_0to1[:, :3, :3] @ kpts0_cam + T_0to1[:, :3, [3]] # (N, 3, L) + w_kpts0_depth_computed = w_kpts0_cam[:, 2, :] + + # Project + w_kpts0_h = (K1 @ w_kpts0_cam).transpose(2, 1) # (N, L, 3) + w_kpts0 = w_kpts0_h[:, :, :2] / (w_kpts0_h[:, :, [2]] + 1e-4) # (N, L, 2), +1e-4 to avoid zero depth + + # Covisible Check + h, w = depth1.shape[1:3] + covisible_mask = (w_kpts0[:, :, 0] > 0) * (w_kpts0[:, :, 0] < w-1) * \ + (w_kpts0[:, :, 1] > 0) * (w_kpts0[:, :, 1] < h-1) + w_kpts0_long = w_kpts0.long() + w_kpts0_long[~covisible_mask, :] = 0 + + w_kpts0_depth = torch.stack( + [depth1[i, w_kpts0_long[i, :, 1], w_kpts0_long[i, :, 0]] for i in range(w_kpts0_long.shape[0])], dim=0 + ) # (N, L) + consistent_mask = ((w_kpts0_depth - w_kpts0_depth_computed) / w_kpts0_depth).abs() < 0.2 + valid_mask = nonzero_mask * covisible_mask * consistent_mask + + return valid_mask, w_kpts0 diff --git a/third_party/TopicFM/src/models/utils/supervision.py b/third_party/TopicFM/src/models/utils/supervision.py new file mode 100644 index 0000000000000000000000000000000000000000..1f1f0478fdcbe7f8ceffbc4aff4d507cec55bbd2 --- /dev/null +++ b/third_party/TopicFM/src/models/utils/supervision.py @@ -0,0 +1,151 @@ +from math import log +from loguru import logger + +import torch +from einops import repeat +from kornia.utils import create_meshgrid + +from .geometry import warp_kpts + +############## ↓ Coarse-Level supervision ↓ ############## + + +@torch.no_grad() +def mask_pts_at_padded_regions(grid_pt, mask): + """For megadepth dataset, zero-padding exists in images""" + mask = repeat(mask, 'n h w -> n (h w) c', c=2) + grid_pt[~mask.bool()] = 0 + return grid_pt + + +@torch.no_grad() +def spvs_coarse(data, config): + """ + Update: + data (dict): { + "conf_matrix_gt": [N, hw0, hw1], + 'spv_b_ids': [M] + 'spv_i_ids': [M] + 'spv_j_ids': [M] + 'spv_w_pt0_i': [N, hw0, 2], in original image resolution + 'spv_pt1_i': [N, hw1, 2], in original image resolution + } + + NOTE: + - for scannet dataset, there're 3 kinds of resolution {i, c, f} + - for megadepth dataset, there're 4 kinds of resolution {i, i_resize, c, f} + """ + # 1. misc + device = data['image0'].device + N, _, H0, W0 = data['image0'].shape + _, _, H1, W1 = data['image1'].shape + scale = config['MODEL']['RESOLUTION'][0] + scale0 = scale * data['scale0'][:, None] if 'scale0' in data else scale + scale1 = scale * data['scale1'][:, None] if 'scale0' in data else scale + h0, w0, h1, w1 = map(lambda x: x // scale, [H0, W0, H1, W1]) + + # 2. warp grids + # create kpts in meshgrid and resize them to image resolution + grid_pt0_c = create_meshgrid(h0, w0, False, device).reshape(1, h0*w0, 2).repeat(N, 1, 1) # [N, hw, 2] + grid_pt0_i = scale0 * grid_pt0_c + grid_pt1_c = create_meshgrid(h1, w1, False, device).reshape(1, h1*w1, 2).repeat(N, 1, 1) + grid_pt1_i = scale1 * grid_pt1_c + + # mask padded region to (0, 0), so no need to manually mask conf_matrix_gt + if 'mask0' in data: + grid_pt0_i = mask_pts_at_padded_regions(grid_pt0_i, data['mask0']) + grid_pt1_i = mask_pts_at_padded_regions(grid_pt1_i, data['mask1']) + + # warp kpts bi-directionally and resize them to coarse-level resolution + # (no depth consistency check, since it leads to worse results experimentally) + # (unhandled edge case: points with 0-depth will be warped to the left-up corner) + _, w_pt0_i = warp_kpts(grid_pt0_i, data['depth0'], data['depth1'], data['T_0to1'], data['K0'], data['K1']) + _, w_pt1_i = warp_kpts(grid_pt1_i, data['depth1'], data['depth0'], data['T_1to0'], data['K1'], data['K0']) + w_pt0_c = w_pt0_i / scale1 + w_pt1_c = w_pt1_i / scale0 + + # 3. check if mutual nearest neighbor + w_pt0_c_round = w_pt0_c[:, :, :].round().long() + nearest_index1 = w_pt0_c_round[..., 0] + w_pt0_c_round[..., 1] * w1 + w_pt1_c_round = w_pt1_c[:, :, :].round().long() + nearest_index0 = w_pt1_c_round[..., 0] + w_pt1_c_round[..., 1] * w0 + + # corner case: out of boundary + def out_bound_mask(pt, w, h): + return (pt[..., 0] < 0) + (pt[..., 0] >= w) + (pt[..., 1] < 0) + (pt[..., 1] >= h) + nearest_index1[out_bound_mask(w_pt0_c_round, w1, h1)] = 0 + nearest_index0[out_bound_mask(w_pt1_c_round, w0, h0)] = 0 + + loop_back = torch.stack([nearest_index0[_b][_i] for _b, _i in enumerate(nearest_index1)], dim=0) + correct_0to1 = loop_back == torch.arange(h0*w0, device=device)[None].repeat(N, 1) + correct_0to1[:, 0] = False # ignore the top-left corner + + # 4. construct a gt conf_matrix + conf_matrix_gt = torch.zeros(N, h0*w0, h1*w1, device=device) + b_ids, i_ids = torch.where(correct_0to1 != 0) + j_ids = nearest_index1[b_ids, i_ids] + + conf_matrix_gt[b_ids, i_ids, j_ids] = 1 + data.update({'conf_matrix_gt': conf_matrix_gt}) + + # 5. save coarse matches(gt) for training fine level + if len(b_ids) == 0: + logger.warning(f"No groundtruth coarse match found for: {data['pair_names']}") + # this won't affect fine-level loss calculation + b_ids = torch.tensor([0], device=device) + i_ids = torch.tensor([0], device=device) + j_ids = torch.tensor([0], device=device) + + data.update({ + 'spv_b_ids': b_ids, + 'spv_i_ids': i_ids, + 'spv_j_ids': j_ids + }) + + # 6. save intermediate results (for fast fine-level computation) + data.update({ + 'spv_w_pt0_i': w_pt0_i, + 'spv_pt1_i': grid_pt1_i + }) + + +def compute_supervision_coarse(data, config): + assert len(set(data['dataset_name'])) == 1, "Do not support mixed datasets training!" + data_source = data['dataset_name'][0] + if data_source.lower() in ['scannet', 'megadepth']: + spvs_coarse(data, config) + else: + raise ValueError(f'Unknown data source: {data_source}') + + +############## ↓ Fine-Level supervision ↓ ############## + +@torch.no_grad() +def spvs_fine(data, config): + """ + Update: + data (dict):{ + "expec_f_gt": [M, 2]} + """ + # 1. misc + # w_pt0_i, pt1_i = data.pop('spv_w_pt0_i'), data.pop('spv_pt1_i') + w_pt0_i, pt1_i = data['spv_w_pt0_i'], data['spv_pt1_i'] + scale = config['MODEL']['RESOLUTION'][1] + radius = config['MODEL']['FINE_WINDOW_SIZE'] // 2 + + # 2. get coarse prediction + b_ids, i_ids, j_ids = data['b_ids'], data['i_ids'], data['j_ids'] + + # 3. compute gt + scale = scale * data['scale1'][b_ids] if 'scale0' in data else scale + # `expec_f_gt` might exceed the window, i.e. abs(*) > 1, which would be filtered later + expec_f_gt = (w_pt0_i[b_ids, i_ids] - pt1_i[b_ids, j_ids]) / scale / radius # [M, 2] + data.update({"expec_f_gt": expec_f_gt}) + + +def compute_supervision_fine(data, config): + data_source = data['dataset_name'][0] + if data_source.lower() in ['scannet', 'megadepth']: + spvs_fine(data, config) + else: + raise NotImplementedError diff --git a/third_party/TopicFM/src/optimizers/__init__.py b/third_party/TopicFM/src/optimizers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e1db2285352586c250912bdd2c4ae5029620ab5f --- /dev/null +++ b/third_party/TopicFM/src/optimizers/__init__.py @@ -0,0 +1,42 @@ +import torch +from torch.optim.lr_scheduler import MultiStepLR, CosineAnnealingLR, ExponentialLR + + +def build_optimizer(model, config): + name = config.TRAINER.OPTIMIZER + lr = config.TRAINER.TRUE_LR + + if name == "adam": + return torch.optim.Adam(model.parameters(), lr=lr, weight_decay=config.TRAINER.ADAM_DECAY) + elif name == "adamw": + return torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=config.TRAINER.ADAMW_DECAY) + else: + raise ValueError(f"TRAINER.OPTIMIZER = {name} is not a valid optimizer!") + + +def build_scheduler(config, optimizer): + """ + Returns: + scheduler (dict):{ + 'scheduler': lr_scheduler, + 'interval': 'step', # or 'epoch' + 'monitor': 'val_f1', (optional) + 'frequency': x, (optional) + } + """ + scheduler = {'interval': config.TRAINER.SCHEDULER_INTERVAL} + name = config.TRAINER.SCHEDULER + + if name == 'MultiStepLR': + scheduler.update( + {'scheduler': MultiStepLR(optimizer, config.TRAINER.MSLR_MILESTONES, gamma=config.TRAINER.MSLR_GAMMA)}) + elif name == 'CosineAnnealing': + scheduler.update( + {'scheduler': CosineAnnealingLR(optimizer, config.TRAINER.COSA_TMAX)}) + elif name == 'ExponentialLR': + scheduler.update( + {'scheduler': ExponentialLR(optimizer, config.TRAINER.ELR_GAMMA)}) + else: + raise NotImplementedError() + + return scheduler diff --git a/third_party/TopicFM/src/utils/augment.py b/third_party/TopicFM/src/utils/augment.py new file mode 100644 index 0000000000000000000000000000000000000000..d7c5d3e11b6fe083aaeff7555bb7ce3a4bfb755d --- /dev/null +++ b/third_party/TopicFM/src/utils/augment.py @@ -0,0 +1,55 @@ +import albumentations as A + + +class DarkAug(object): + """ + Extreme dark augmentation aiming at Aachen Day-Night + """ + + def __init__(self) -> None: + self.augmentor = A.Compose([ + A.RandomBrightnessContrast(p=0.75, brightness_limit=(-0.6, 0.0), contrast_limit=(-0.5, 0.3)), + A.Blur(p=0.1, blur_limit=(3, 9)), + A.MotionBlur(p=0.2, blur_limit=(3, 25)), + A.RandomGamma(p=0.1, gamma_limit=(15, 65)), + A.HueSaturationValue(p=0.1, val_shift_limit=(-100, -40)) + ], p=0.75) + + def __call__(self, x): + return self.augmentor(image=x)['image'] + + +class MobileAug(object): + """ + Random augmentations aiming at images of mobile/handhold devices. + """ + + def __init__(self): + self.augmentor = A.Compose([ + A.MotionBlur(p=0.25), + A.ColorJitter(p=0.5), + A.RandomRain(p=0.1), # random occlusion + A.RandomSunFlare(p=0.1), + A.JpegCompression(p=0.25), + A.ISONoise(p=0.25) + ], p=1.0) + + def __call__(self, x): + return self.augmentor(image=x)['image'] + + +def build_augmentor(method=None, **kwargs): + if method is not None: + raise NotImplementedError('Using of augmentation functions are not supported yet!') + if method == 'dark': + return DarkAug() + elif method == 'mobile': + return MobileAug() + elif method is None: + return None + else: + raise ValueError(f'Invalid augmentation method: {method}') + + +if __name__ == '__main__': + augmentor = build_augmentor('FDA') diff --git a/third_party/TopicFM/src/utils/comm.py b/third_party/TopicFM/src/utils/comm.py new file mode 100644 index 0000000000000000000000000000000000000000..26ec9517cc47e224430106d8ae9aa99a3fe49167 --- /dev/null +++ b/third_party/TopicFM/src/utils/comm.py @@ -0,0 +1,265 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +""" +[Copied from detectron2] +This file contains primitives for multi-gpu communication. +This is useful when doing distributed training. +""" + +import functools +import logging +import numpy as np +import pickle +import torch +import torch.distributed as dist + +_LOCAL_PROCESS_GROUP = None +""" +A torch process group which only includes processes that on the same machine as the current process. +This variable is set when processes are spawned by `launch()` in "engine/launch.py". +""" + + +def get_world_size() -> int: + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank() -> int: + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + return dist.get_rank() + + +def get_local_rank() -> int: + """ + Returns: + The rank of the current process within the local (per-machine) process group. + """ + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + assert _LOCAL_PROCESS_GROUP is not None + return dist.get_rank(group=_LOCAL_PROCESS_GROUP) + + +def get_local_size() -> int: + """ + Returns: + The size of the per-machine process group, + i.e. the number of processes per machine. + """ + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size(group=_LOCAL_PROCESS_GROUP) + + +def is_main_process() -> bool: + return get_rank() == 0 + + +def synchronize(): + """ + Helper function to synchronize (barrier) among all processes when + using distributed training + """ + if not dist.is_available(): + return + if not dist.is_initialized(): + return + world_size = dist.get_world_size() + if world_size == 1: + return + dist.barrier() + + +@functools.lru_cache() +def _get_global_gloo_group(): + """ + Return a process group based on gloo backend, containing all the ranks + The result is cached. + """ + if dist.get_backend() == "nccl": + return dist.new_group(backend="gloo") + else: + return dist.group.WORLD + + +def _serialize_to_tensor(data, group): + backend = dist.get_backend(group) + assert backend in ["gloo", "nccl"] + device = torch.device("cpu" if backend == "gloo" else "cuda") + + buffer = pickle.dumps(data) + if len(buffer) > 1024 ** 3: + logger = logging.getLogger(__name__) + logger.warning( + "Rank {} trying to all-gather {:.2f} GB of data on device {}".format( + get_rank(), len(buffer) / (1024 ** 3), device + ) + ) + storage = torch.ByteStorage.from_buffer(buffer) + tensor = torch.ByteTensor(storage).to(device=device) + return tensor + + +def _pad_to_largest_tensor(tensor, group): + """ + Returns: + list[int]: size of the tensor, on each rank + Tensor: padded tensor that has the max size + """ + world_size = dist.get_world_size(group=group) + assert ( + world_size >= 1 + ), "comm.gather/all_gather must be called from ranks within the given group!" + local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device) + size_list = [ + torch.zeros([1], dtype=torch.int64, device=tensor.device) for _ in range(world_size) + ] + dist.all_gather(size_list, local_size, group=group) + + size_list = [int(size.item()) for size in size_list] + + max_size = max(size_list) + + # we pad the tensor because torch all_gather does not support + # gathering tensors of different shapes + if local_size != max_size: + padding = torch.zeros((max_size - local_size,), dtype=torch.uint8, device=tensor.device) + tensor = torch.cat((tensor, padding), dim=0) + return size_list, tensor + + +def all_gather(data, group=None): + """ + Run all_gather on arbitrary picklable data (not necessarily tensors). + + Args: + data: any picklable object + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + + Returns: + list[data]: list of data gathered from each rank + """ + if get_world_size() == 1: + return [data] + if group is None: + group = _get_global_gloo_group() + if dist.get_world_size(group) == 1: + return [data] + + tensor = _serialize_to_tensor(data, group) + + size_list, tensor = _pad_to_largest_tensor(tensor, group) + max_size = max(size_list) + + # receiving Tensor from all ranks + tensor_list = [ + torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list + ] + dist.all_gather(tensor_list, tensor, group=group) + + data_list = [] + for size, tensor in zip(size_list, tensor_list): + buffer = tensor.cpu().numpy().tobytes()[:size] + data_list.append(pickle.loads(buffer)) + + return data_list + + +def gather(data, dst=0, group=None): + """ + Run gather on arbitrary picklable data (not necessarily tensors). + + Args: + data: any picklable object + dst (int): destination rank + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + + Returns: + list[data]: on dst, a list of data gathered from each rank. Otherwise, + an empty list. + """ + if get_world_size() == 1: + return [data] + if group is None: + group = _get_global_gloo_group() + if dist.get_world_size(group=group) == 1: + return [data] + rank = dist.get_rank(group=group) + + tensor = _serialize_to_tensor(data, group) + size_list, tensor = _pad_to_largest_tensor(tensor, group) + + # receiving Tensor from all ranks + if rank == dst: + max_size = max(size_list) + tensor_list = [ + torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list + ] + dist.gather(tensor, tensor_list, dst=dst, group=group) + + data_list = [] + for size, tensor in zip(size_list, tensor_list): + buffer = tensor.cpu().numpy().tobytes()[:size] + data_list.append(pickle.loads(buffer)) + return data_list + else: + dist.gather(tensor, [], dst=dst, group=group) + return [] + + +def shared_random_seed(): + """ + Returns: + int: a random number that is the same across all workers. + If workers need a shared RNG, they can use this shared seed to + create one. + + All workers must call this function, otherwise it will deadlock. + """ + ints = np.random.randint(2 ** 31) + all_ints = all_gather(ints) + return all_ints[0] + + +def reduce_dict(input_dict, average=True): + """ + Reduce the values in the dictionary from all processes so that process with rank + 0 has the reduced results. + + Args: + input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor. + average (bool): whether to do average or sum + + Returns: + a dict with the same keys as input_dict, after reduction. + """ + world_size = get_world_size() + if world_size < 2: + return input_dict + with torch.no_grad(): + names = [] + values = [] + # sort the keys so that they are consistent across processes + for k in sorted(input_dict.keys()): + names.append(k) + values.append(input_dict[k]) + values = torch.stack(values, dim=0) + dist.reduce(values, dst=0) + if dist.get_rank() == 0 and average: + # only main process gets accumulated, so only divide by + # world_size in this case + values /= world_size + reduced_dict = {k: v for k, v in zip(names, values)} + return reduced_dict diff --git a/third_party/TopicFM/src/utils/dataloader.py b/third_party/TopicFM/src/utils/dataloader.py new file mode 100644 index 0000000000000000000000000000000000000000..6da37b880a290c2bb3ebb028d0c8dab592acc5c1 --- /dev/null +++ b/third_party/TopicFM/src/utils/dataloader.py @@ -0,0 +1,23 @@ +import numpy as np + + +# --- PL-DATAMODULE --- + +def get_local_split(items: list, world_size: int, rank: int, seed: int): + """ The local rank only loads a split of the dataset. """ + n_items = len(items) + items_permute = np.random.RandomState(seed).permutation(items) + if n_items % world_size == 0: + padded_items = items_permute + else: + padding = np.random.RandomState(seed).choice( + items, + world_size - (n_items % world_size), + replace=True) + padded_items = np.concatenate([items_permute, padding]) + assert len(padded_items) % world_size == 0, \ + f'len(padded_items): {len(padded_items)}; world_size: {world_size}; len(padding): {len(padding)}' + n_per_rank = len(padded_items) // world_size + local_items = padded_items[n_per_rank * rank: n_per_rank * (rank+1)] + + return local_items diff --git a/third_party/TopicFM/src/utils/dataset.py b/third_party/TopicFM/src/utils/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..647bbadd821b6c90736ed45462270670b1017b0b --- /dev/null +++ b/third_party/TopicFM/src/utils/dataset.py @@ -0,0 +1,201 @@ +import io +from loguru import logger + +import cv2 +import numpy as np +import h5py +import torch +from numpy.linalg import inv + + +MEGADEPTH_CLIENT = SCANNET_CLIENT = None + +# --- DATA IO --- + +def load_array_from_s3( + path, client, cv_type, + use_h5py=False, +): + byte_str = client.Get(path) + try: + if not use_h5py: + raw_array = np.fromstring(byte_str, np.uint8) + data = cv2.imdecode(raw_array, cv_type) + else: + f = io.BytesIO(byte_str) + data = np.array(h5py.File(f, 'r')['/depth']) + except Exception as ex: + print(f"==> Data loading failure: {path}") + raise ex + + assert data is not None + return data + + +def imread_gray(path, augment_fn=None, client=SCANNET_CLIENT): + cv_type = cv2.IMREAD_GRAYSCALE if augment_fn is None \ + else cv2.IMREAD_COLOR + if str(path).startswith('s3://'): + image = load_array_from_s3(str(path), client, cv_type) + else: + image = cv2.imread(str(path), cv_type) + + if augment_fn is not None: + image = cv2.imread(str(path), cv2.IMREAD_COLOR) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image = augment_fn(image) + image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) + return image # (h, w) + + +def get_resized_wh(w, h, resize=None): + if (resize is not None) and (max(h,w) > resize): # resize the longer edge + scale = resize / max(h, w) + w_new, h_new = int(round(w*scale)), int(round(h*scale)) + else: + w_new, h_new = w, h + return w_new, h_new + + +def get_divisible_wh(w, h, df=None): + if df is not None: + w_new, h_new = map(lambda x: int(x // df * df), [w, h]) + else: + w_new, h_new = w, h + return w_new, h_new + + +def pad_bottom_right(inp, pad_size, ret_mask=False): + assert isinstance(pad_size, int) and pad_size >= max(inp.shape[-2:]), f"{pad_size} < {max(inp.shape[-2:])}" + mask = None + if inp.ndim == 2: + padded = np.zeros((pad_size, pad_size), dtype=inp.dtype) + padded[:inp.shape[0], :inp.shape[1]] = inp + if ret_mask: + mask = np.zeros((pad_size, pad_size), dtype=bool) + mask[:inp.shape[0], :inp.shape[1]] = True + elif inp.ndim == 3: + padded = np.zeros((inp.shape[0], pad_size, pad_size), dtype=inp.dtype) + padded[:, :inp.shape[1], :inp.shape[2]] = inp + if ret_mask: + mask = np.zeros((inp.shape[0], pad_size, pad_size), dtype=bool) + mask[:, :inp.shape[1], :inp.shape[2]] = True + else: + raise NotImplementedError() + return padded, mask + + +# --- MEGADEPTH --- + +def read_megadepth_gray(path, resize=None, df=None, padding=False, augment_fn=None): + """ + Args: + resize (int, optional): the longer edge of resized images. None for no resize. + padding (bool): If set to 'True', zero-pad resized images to squared size. + augment_fn (callable, optional): augments images with pre-defined visual effects + Returns: + image (torch.tensor): (1, h, w) + mask (torch.tensor): (h, w) + scale (torch.tensor): [w/w_new, h/h_new] + """ + # read image + image = imread_gray(path, augment_fn, client=MEGADEPTH_CLIENT) + + # resize image + w, h = image.shape[1], image.shape[0] + w_new, h_new = get_resized_wh(w, h, resize) + w_new, h_new = get_divisible_wh(w_new, h_new, df) + + image = cv2.resize(image, (w_new, h_new)) + scale = torch.tensor([w/w_new, h/h_new], dtype=torch.float) + + if padding: # padding + pad_to = resize #max(h_new, w_new) + image, mask = pad_bottom_right(image, pad_to, ret_mask=True) + else: + mask = None + + image = torch.from_numpy(image).float()[None] / 255 # (h, w) -> (1, h, w) and normalized + mask = torch.from_numpy(mask) if mask is not None else None + + return image, mask, scale + + +def read_megadepth_depth(path, pad_to=None): + if str(path).startswith('s3://'): + depth = load_array_from_s3(path, MEGADEPTH_CLIENT, None, use_h5py=True) + else: + depth = np.array(h5py.File(path, 'r')['depth']) + if pad_to is not None: + depth, _ = pad_bottom_right(depth, pad_to, ret_mask=False) + depth = torch.from_numpy(depth).float() # (h, w) + return depth + + +# --- ScanNet --- + +def read_scannet_gray(path, resize=(640, 480), augment_fn=None): + """ + Args: + resize (tuple): align image to depthmap, in (w, h). + augment_fn (callable, optional): augments images with pre-defined visual effects + Returns: + image (torch.tensor): (1, h, w) + mask (torch.tensor): (h, w) + scale (torch.tensor): [w/w_new, h/h_new] + """ + # read and resize image + image = imread_gray(path, augment_fn) + image = cv2.resize(image, resize) + + # (h, w) -> (1, h, w) and normalized + image = torch.from_numpy(image).float()[None] / 255 + return image + + +# ---- evaluation datasets: HLoc, Aachen, InLoc + +def read_img_gray(path, resize=None, down_factor=16): + # read and resize image + image = imread_gray(path, None) + w, h = image.shape[1], image.shape[0] + if (resize is not None) and (max(h, w) > resize): + scale = float(resize / max(h, w)) + w_new, h_new = int(round(w * scale)), int(round(h * scale)) + else: + w_new, h_new = w, h + w_new, h_new = get_divisible_wh(w_new, h_new, down_factor) + image = cv2.resize(image, (w_new, h_new)) + + # (h, w) -> (1, h, w) and normalized + image = torch.from_numpy(image).float()[None] / 255 + scale = torch.tensor([w / w_new, h / h_new], dtype=torch.float) + return image, scale + + +def read_scannet_depth(path): + if str(path).startswith('s3://'): + depth = load_array_from_s3(str(path), SCANNET_CLIENT, cv2.IMREAD_UNCHANGED) + else: + depth = cv2.imread(str(path), cv2.IMREAD_UNCHANGED) + depth = depth / 1000 + depth = torch.from_numpy(depth).float() # (h, w) + return depth + + +def read_scannet_pose(path): + """ Read ScanNet's Camera2World pose and transform it to World2Camera. + + Returns: + pose_w2c (np.ndarray): (4, 4) + """ + cam2world = np.loadtxt(path, delimiter=' ') + world2cam = inv(cam2world) + return world2cam + + +def read_scannet_intrinsic(path): + """ Read ScanNet's intrinsic matrix and return the 3x3 matrix. + """ + intrinsic = np.loadtxt(path, delimiter=' ') + return intrinsic[:-1, :-1] diff --git a/third_party/TopicFM/src/utils/metrics.py b/third_party/TopicFM/src/utils/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..a93c31ed1d151cd41e2449a19be2d6abc5f9d419 --- /dev/null +++ b/third_party/TopicFM/src/utils/metrics.py @@ -0,0 +1,193 @@ +import torch +import cv2 +import numpy as np +from collections import OrderedDict +from loguru import logger +from kornia.geometry.epipolar import numeric +from kornia.geometry.conversions import convert_points_to_homogeneous + + +# --- METRICS --- + +def relative_pose_error(T_0to1, R, t, ignore_gt_t_thr=0.0): + # angle error between 2 vectors + t_gt = T_0to1[:3, 3] + n = np.linalg.norm(t) * np.linalg.norm(t_gt) + t_err = np.rad2deg(np.arccos(np.clip(np.dot(t, t_gt) / n, -1.0, 1.0))) + t_err = np.minimum(t_err, 180 - t_err) # handle E ambiguity + if np.linalg.norm(t_gt) < ignore_gt_t_thr: # pure rotation is challenging + t_err = 0 + + # angle error between 2 rotation matrices + R_gt = T_0to1[:3, :3] + cos = (np.trace(np.dot(R.T, R_gt)) - 1) / 2 + cos = np.clip(cos, -1., 1.) # handle numercial errors + R_err = np.rad2deg(np.abs(np.arccos(cos))) + + return t_err, R_err + + +def symmetric_epipolar_distance(pts0, pts1, E, K0, K1): + """Squared symmetric epipolar distance. + This can be seen as a biased estimation of the reprojection error. + Args: + pts0 (torch.Tensor): [N, 2] + E (torch.Tensor): [3, 3] + """ + pts0 = (pts0 - K0[[0, 1], [2, 2]][None]) / K0[[0, 1], [0, 1]][None] + pts1 = (pts1 - K1[[0, 1], [2, 2]][None]) / K1[[0, 1], [0, 1]][None] + pts0 = convert_points_to_homogeneous(pts0) + pts1 = convert_points_to_homogeneous(pts1) + + Ep0 = pts0 @ E.T # [N, 3] + p1Ep0 = torch.sum(pts1 * Ep0, -1) # [N,] + Etp1 = pts1 @ E # [N, 3] + + d = p1Ep0**2 * (1.0 / (Ep0[:, 0]**2 + Ep0[:, 1]**2) + 1.0 / (Etp1[:, 0]**2 + Etp1[:, 1]**2)) # N + return d + + +def compute_symmetrical_epipolar_errors(data): + """ + Update: + data (dict):{"epi_errs": [M]} + """ + Tx = numeric.cross_product_matrix(data['T_0to1'][:, :3, 3]) + E_mat = Tx @ data['T_0to1'][:, :3, :3] + + m_bids = data['m_bids'] + pts0 = data['mkpts0_f'] + pts1 = data['mkpts1_f'] + + epi_errs = [] + for bs in range(Tx.size(0)): + mask = m_bids == bs + epi_errs.append( + symmetric_epipolar_distance(pts0[mask], pts1[mask], E_mat[bs], data['K0'][bs], data['K1'][bs])) + epi_errs = torch.cat(epi_errs, dim=0) + + data.update({'epi_errs': epi_errs}) + + +def estimate_pose(kpts0, kpts1, K0, K1, thresh, conf=0.99999): + if len(kpts0) < 5: + return None + # normalize keypoints + kpts0 = (kpts0 - K0[[0, 1], [2, 2]][None]) / K0[[0, 1], [0, 1]][None] + kpts1 = (kpts1 - K1[[0, 1], [2, 2]][None]) / K1[[0, 1], [0, 1]][None] + + # normalize ransac threshold + ransac_thr = thresh / np.mean([K0[0, 0], K1[1, 1], K0[0, 0], K1[1, 1]]) + + # compute pose with cv2 + E, mask = cv2.findEssentialMat( + kpts0, kpts1, np.eye(3), threshold=ransac_thr, prob=conf, method=cv2.RANSAC) + if E is None: + print("\nE is None while trying to recover pose.\n") + return None + + # recover pose from E + best_num_inliers = 0 + ret = None + for _E in np.split(E, len(E) / 3): + n, R, t, _ = cv2.recoverPose(_E, kpts0, kpts1, np.eye(3), 1e9, mask=mask) + if n > best_num_inliers: + ret = (R, t[:, 0], mask.ravel() > 0) + best_num_inliers = n + + return ret + + +def compute_pose_errors(data, config=None, ransac_thr=0.5, ransac_conf=0.99999): + """ + Update: + data (dict):{ + "R_errs" List[float]: [N] + "t_errs" List[float]: [N] + "inliers" List[np.ndarray]: [N] + } + """ + pixel_thr = config.TRAINER.RANSAC_PIXEL_THR if config is not None else ransac_thr # 0.5 + conf = config.TRAINER.RANSAC_CONF if config is not None else ransac_conf # 0.99999 + data.update({'R_errs': [], 't_errs': [], 'inliers': []}) + + m_bids = data['m_bids'].cpu().numpy() + pts0 = data['mkpts0_f'].cpu().numpy() + pts1 = data['mkpts1_f'].cpu().numpy() + K0 = data['K0'].cpu().numpy() + K1 = data['K1'].cpu().numpy() + T_0to1 = data['T_0to1'].cpu().numpy() + + for bs in range(K0.shape[0]): + mask = m_bids == bs + ret = estimate_pose(pts0[mask], pts1[mask], K0[bs], K1[bs], pixel_thr, conf=conf) + + if ret is None: + data['R_errs'].append(np.inf) + data['t_errs'].append(np.inf) + data['inliers'].append(np.array([]).astype(np.bool)) + else: + R, t, inliers = ret + t_err, R_err = relative_pose_error(T_0to1[bs], R, t, ignore_gt_t_thr=0.0) + data['R_errs'].append(R_err) + data['t_errs'].append(t_err) + data['inliers'].append(inliers) + + +# --- METRIC AGGREGATION --- + +def error_auc(errors, thresholds): + """ + Args: + errors (list): [N,] + thresholds (list) + """ + errors = [0] + sorted(list(errors)) + recall = list(np.linspace(0, 1, len(errors))) + + aucs = [] + thresholds = [5, 10, 20] + for thr in thresholds: + last_index = np.searchsorted(errors, thr) + y = recall[:last_index] + [recall[last_index-1]] + x = errors[:last_index] + [thr] + aucs.append(np.trapz(y, x) / thr) + + return {f'auc@{t}': auc for t, auc in zip(thresholds, aucs)} + + +def epidist_prec(errors, thresholds, ret_dict=False): + precs = [] + for thr in thresholds: + prec_ = [] + for errs in errors: + correct_mask = errs < thr + prec_.append(np.mean(correct_mask) if len(correct_mask) > 0 else 0) + precs.append(np.mean(prec_) if len(prec_) > 0 else 0) + if ret_dict: + return {f'prec@{t:.0e}': prec for t, prec in zip(thresholds, precs)} + else: + return precs + + +def aggregate_metrics(metrics, epi_err_thr=5e-4): + """ Aggregate metrics for the whole dataset: + (This method should be called once per dataset) + 1. AUC of the pose error (angular) at the threshold [5, 10, 20] + 2. Mean matching precision at the threshold 5e-4(ScanNet), 1e-4(MegaDepth) + """ + # filter duplicates + unq_ids = OrderedDict((iden, id) for id, iden in enumerate(metrics['identifiers'])) + unq_ids = list(unq_ids.values()) + logger.info(f'Aggregating metrics over {len(unq_ids)} unique items...') + + # pose auc + angular_thresholds = [5, 10, 20] + pose_errors = np.max(np.stack([metrics['R_errs'], metrics['t_errs']]), axis=0)[unq_ids] + aucs = error_auc(pose_errors, angular_thresholds) # (auc@5, auc@10, auc@20) + + # matching precision + dist_thresholds = [epi_err_thr] + precs = epidist_prec(np.array(metrics['epi_errs'], dtype=object)[unq_ids], dist_thresholds, True) # (prec@err_thr) + + return {**aucs, **precs} diff --git a/third_party/TopicFM/src/utils/misc.py b/third_party/TopicFM/src/utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..9c8db04666519753ea2df43903ab6c47ec00a9a1 --- /dev/null +++ b/third_party/TopicFM/src/utils/misc.py @@ -0,0 +1,101 @@ +import os +import contextlib +import joblib +from typing import Union +from loguru import _Logger, logger +from itertools import chain + +import torch +from yacs.config import CfgNode as CN +from pytorch_lightning.utilities import rank_zero_only + + +def lower_config(yacs_cfg): + if not isinstance(yacs_cfg, CN): + return yacs_cfg + return {k.lower(): lower_config(v) for k, v in yacs_cfg.items()} + + +def upper_config(dict_cfg): + if not isinstance(dict_cfg, dict): + return dict_cfg + return {k.upper(): upper_config(v) for k, v in dict_cfg.items()} + + +def log_on(condition, message, level): + if condition: + assert level in ['INFO', 'DEBUG', 'WARNING', 'ERROR', 'CRITICAL'] + logger.log(level, message) + + +def get_rank_zero_only_logger(logger: _Logger): + if rank_zero_only.rank == 0: + return logger + else: + for _level in logger._core.levels.keys(): + level = _level.lower() + setattr(logger, level, + lambda x: None) + logger._log = lambda x: None + return logger + + +def setup_gpus(gpus: Union[str, int]) -> int: + """ A temporary fix for pytorch-lighting 1.3.x """ + gpus = str(gpus) + gpu_ids = [] + + if ',' not in gpus: + n_gpus = int(gpus) + return n_gpus if n_gpus != -1 else torch.cuda.device_count() + else: + gpu_ids = [i.strip() for i in gpus.split(',') if i != ''] + + # setup environment variables + visible_devices = os.getenv('CUDA_VISIBLE_DEVICES') + if visible_devices is None: + os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" + os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(i) for i in gpu_ids) + visible_devices = os.getenv('CUDA_VISIBLE_DEVICES') + logger.warning(f'[Temporary Fix] manually set CUDA_VISIBLE_DEVICES when specifying gpus to use: {visible_devices}') + else: + logger.warning('[Temporary Fix] CUDA_VISIBLE_DEVICES already set by user or the main process.') + return len(gpu_ids) + + +def flattenList(x): + return list(chain(*x)) + + +@contextlib.contextmanager +def tqdm_joblib(tqdm_object): + """Context manager to patch joblib to report into tqdm progress bar given as argument + + Usage: + with tqdm_joblib(tqdm(desc="My calculation", total=10)) as progress_bar: + Parallel(n_jobs=16)(delayed(sqrt)(i**2) for i in range(10)) + + When iterating over a generator, directly use of tqdm is also a solutin (but monitor the task queuing, instead of finishing) + ret_vals = Parallel(n_jobs=args.world_size)( + delayed(lambda x: _compute_cov_score(pid, *x))(param) + for param in tqdm(combinations(image_ids, 2), + desc=f'Computing cov_score of [{pid}]', + total=len(image_ids)*(len(image_ids)-1)/2)) + Src: https://stackoverflow.com/a/58936697 + """ + class TqdmBatchCompletionCallback(joblib.parallel.BatchCompletionCallBack): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, *args, **kwargs): + tqdm_object.update(n=self.batch_size) + return super().__call__(*args, **kwargs) + + old_batch_callback = joblib.parallel.BatchCompletionCallBack + joblib.parallel.BatchCompletionCallBack = TqdmBatchCompletionCallback + try: + yield tqdm_object + finally: + joblib.parallel.BatchCompletionCallBack = old_batch_callback + tqdm_object.close() + diff --git a/third_party/TopicFM/src/utils/plotting.py b/third_party/TopicFM/src/utils/plotting.py new file mode 100644 index 0000000000000000000000000000000000000000..89b22ef27e6152225d07ab24bb3e62718d180b59 --- /dev/null +++ b/third_party/TopicFM/src/utils/plotting.py @@ -0,0 +1,313 @@ +import bisect +import numpy as np +import matplotlib.pyplot as plt +import matplotlib, os, cv2 +import matplotlib.cm as cm +from PIL import Image +import torch.nn.functional as F +import torch + + +def _compute_conf_thresh(data): + dataset_name = data['dataset_name'][0].lower() + if dataset_name == 'scannet': + thr = 5e-4 + elif dataset_name == 'megadepth': + thr = 1e-4 + else: + raise ValueError(f'Unknown dataset: {dataset_name}') + return thr + + +# --- VISUALIZATION --- # + +def make_matching_figure( + img0, img1, mkpts0, mkpts1, color, + kpts0=None, kpts1=None, text=[], dpi=75, path=None): + # draw image pair + assert mkpts0.shape[0] == mkpts1.shape[0], f'mkpts0: {mkpts0.shape[0]} v.s. mkpts1: {mkpts1.shape[0]}' + fig, axes = plt.subplots(1, 2, figsize=(10, 6), dpi=dpi) + axes[0].imshow(img0) # , cmap='gray') + axes[1].imshow(img1) # , cmap='gray') + for i in range(2): # clear all frames + axes[i].get_yaxis().set_ticks([]) + axes[i].get_xaxis().set_ticks([]) + for spine in axes[i].spines.values(): + spine.set_visible(False) + plt.tight_layout(pad=1) + + if kpts0 is not None: + assert kpts1 is not None + axes[0].scatter(kpts0[:, 0], kpts0[:, 1], c='w', s=5) + axes[1].scatter(kpts1[:, 0], kpts1[:, 1], c='w', s=5) + + # draw matches + if mkpts0.shape[0] != 0 and mkpts1.shape[0] != 0: + fig.canvas.draw() + transFigure = fig.transFigure.inverted() + fkpts0 = transFigure.transform(axes[0].transData.transform(mkpts0)) + fkpts1 = transFigure.transform(axes[1].transData.transform(mkpts1)) + fig.lines = [matplotlib.lines.Line2D((fkpts0[i, 0], fkpts1[i, 0]), + (fkpts0[i, 1], fkpts1[i, 1]), + transform=fig.transFigure, c=color[i], linewidth=2) + for i in range(len(mkpts0))] + + axes[0].scatter(mkpts0[:, 0], mkpts0[:, 1], c=color[..., :3], s=4) + axes[1].scatter(mkpts1[:, 0], mkpts1[:, 1], c=color[..., :3], s=4) + + # put txts + txt_color = 'k' if img0[:100, :200].mean() > 200 else 'w' + fig.text( + 0.01, 0.99, '\n'.join(text), transform=fig.axes[0].transAxes, + fontsize=15, va='top', ha='left', color=txt_color) + + # save or return figure + if path: + plt.savefig(str(path), bbox_inches='tight', pad_inches=0) + plt.close() + else: + return fig + + +def _make_evaluation_figure(data, b_id, alpha='dynamic'): + b_mask = data['m_bids'] == b_id + conf_thr = _compute_conf_thresh(data) + + img0 = (data['image0'][b_id][0].cpu().numpy() * 255).round().astype(np.int32) + img1 = (data['image1'][b_id][0].cpu().numpy() * 255).round().astype(np.int32) + kpts0 = data['mkpts0_f'][b_mask].cpu().numpy() + kpts1 = data['mkpts1_f'][b_mask].cpu().numpy() + + # for megadepth, we visualize matches on the resized image + if 'scale0' in data: + kpts0 = kpts0 / data['scale0'][b_id].cpu().numpy()[[1, 0]] + kpts1 = kpts1 / data['scale1'][b_id].cpu().numpy()[[1, 0]] + + epi_errs = data['epi_errs'][b_mask].cpu().numpy() + correct_mask = epi_errs < conf_thr + precision = np.mean(correct_mask) if len(correct_mask) > 0 else 0 + n_correct = np.sum(correct_mask) + n_gt_matches = int(data['conf_matrix_gt'][b_id].sum().cpu()) + recall = 0 if n_gt_matches == 0 else n_correct / (n_gt_matches) + # recall might be larger than 1, since the calculation of conf_matrix_gt + # uses groundtruth depths and camera poses, but epipolar distance is used here. + + # matching info + if alpha == 'dynamic': + alpha = dynamic_alpha(len(correct_mask)) + color = error_colormap(epi_errs, conf_thr, alpha=alpha) + + text = [ + f'#Matches {len(kpts0)}', + f'Precision({conf_thr:.2e}) ({100 * precision:.1f}%): {n_correct}/{len(kpts0)}', + f'Recall({conf_thr:.2e}) ({100 * recall:.1f}%): {n_correct}/{n_gt_matches}' + ] + + # make the figure + figure = make_matching_figure(img0, img1, kpts0, kpts1, + color, text=text) + return figure + +def _make_confidence_figure(data, b_id): + # TODO: Implement confidence figure + raise NotImplementedError() + + +def make_matching_figures(data, config, mode='evaluation'): + """ Make matching figures for a batch. + + Args: + data (Dict): a batch updated by PL_LoFTR. + config (Dict): matcher config + Returns: + figures (Dict[str, List[plt.figure]] + """ + assert mode in ['evaluation', 'confidence'] # 'confidence' + figures = {mode: []} + for b_id in range(data['image0'].size(0)): + if mode == 'evaluation': + fig = _make_evaluation_figure( + data, b_id, + alpha=config.TRAINER.PLOT_MATCHES_ALPHA) + elif mode == 'confidence': + fig = _make_confidence_figure(data, b_id) + else: + raise ValueError(f'Unknown plot mode: {mode}') + figures[mode].append(fig) + return figures + + +def dynamic_alpha(n_matches, + milestones=[0, 300, 1000, 2000], + alphas=[1.0, 0.8, 0.4, 0.2]): + if n_matches == 0: + return 1.0 + ranges = list(zip(alphas, alphas[1:] + [None])) + loc = bisect.bisect_right(milestones, n_matches) - 1 + _range = ranges[loc] + if _range[1] is None: + return _range[0] + return _range[1] + (milestones[loc + 1] - n_matches) / ( + milestones[loc + 1] - milestones[loc]) * (_range[0] - _range[1]) + + +def error_colormap(err, thr, alpha=1.0): + assert alpha <= 1.0 and alpha > 0, f"Invaid alpha value: {alpha}" + x = 1 - np.clip(err / (thr * 2), 0, 1) + return np.clip( + np.stack([2-x*2, x*2, np.zeros_like(x), np.ones_like(x)*alpha], -1), 0, 1) + + +np.random.seed(1995) +color_map = np.arange(100) +np.random.shuffle(color_map) + + +def draw_topics(data, img0, img1, saved_folder="viz_topics", show_n_topics=8, saved_name=None): + + topic0, topic1 = data["topic_matrix"]["img0"], data["topic_matrix"]["img1"] + hw0_c, hw1_c = data["hw0_c"], data["hw1_c"] + hw0_i, hw1_i = data["hw0_i"], data["hw1_i"] + # print(hw0_i, hw1_i) + scale0, scale1 = hw0_i[0] // hw0_c[0], hw1_i[0] // hw1_c[0] + if "scale0" in data: + scale0 *= data["scale0"][0] + else: + scale0 = (scale0, scale0) + if "scale1" in data: + scale1 *= data["scale1"][0] + else: + scale1 = (scale1, scale1) + + n_topics = topic0.shape[-1] + # mask0_nonzero = topic0[0].sum(dim=-1, keepdim=True) > 0 + # mask1_nonzero = topic1[0].sum(dim=-1, keepdim=True) > 0 + theta0 = topic0[0].sum(dim=0) + theta0 /= theta0.sum().float() + theta1 = topic1[0].sum(dim=0) + theta1 /= theta1.sum().float() + # top_topic0 = torch.argsort(theta0, descending=True)[:show_n_topics] + # top_topic1 = torch.argsort(theta1, descending=True)[:show_n_topics] + top_topics = torch.argsort(theta0*theta1, descending=True)[:show_n_topics] + # print(sum_topic0, sum_topic1) + + topic0 = topic0[0].argmax(dim=-1, keepdim=True) #.float() / (n_topics - 1) #* 255 + 1 # + # topic0[~mask0_nonzero] = -1 + topic1 = topic1[0].argmax(dim=-1, keepdim=True) #.float() / (n_topics - 1) #* 255 + 1 + # topic1[~mask1_nonzero] = -1 + label_img0, label_img1 = torch.zeros_like(topic0) - 1, torch.zeros_like(topic1) - 1 + for i, k in enumerate(top_topics): + label_img0[topic0 == k] = color_map[k] + label_img1[topic1 == k] = color_map[k] + +# print(hw0_c, scale0) +# print(hw1_c, scale1) + # map_topic0 = F.fold(label_img0.unsqueeze(0), hw0_i, kernel_size=scale0, stride=scale0) + map_topic0 = label_img0.float().view(hw0_c).cpu().numpy() #map_topic0.squeeze(0).squeeze(0).cpu().numpy() + map_topic0 = cv2.resize(map_topic0, (int(hw0_c[1] * scale0[0]), int(hw0_c[0] * scale0[1]))) + # map_topic1 = F.fold(label_img1.unsqueeze(0), hw1_i, kernel_size=scale1, stride=scale1) + map_topic1 = label_img1.float().view(hw1_c).cpu().numpy() #map_topic1.squeeze(0).squeeze(0).cpu().numpy() + map_topic1 = cv2.resize(map_topic1, (int(hw1_c[1] * scale1[0]), int(hw1_c[0] * scale1[1]))) + + + # show image0 + if saved_name is None: + return map_topic0, map_topic1 + + if not os.path.exists(saved_folder): + os.makedirs(saved_folder) + path_saved_img0 = os.path.join(saved_folder, "{}_0.png".format(saved_name)) + plt.imshow(img0) + masked_map_topic0 = np.ma.masked_where(map_topic0 < 0, map_topic0) + plt.imshow(masked_map_topic0, cmap=plt.cm.jet, vmin=0, vmax=n_topics-1, alpha=.3, interpolation='bilinear') + # plt.show() + plt.axis('off') + plt.savefig(path_saved_img0, bbox_inches='tight', pad_inches=0, dpi=250) + plt.close() + + path_saved_img1 = os.path.join(saved_folder, "{}_1.png".format(saved_name)) + plt.imshow(img1) + masked_map_topic1 = np.ma.masked_where(map_topic1 < 0, map_topic1) + plt.imshow(masked_map_topic1, cmap=plt.cm.jet, vmin=0, vmax=n_topics-1, alpha=.3, interpolation='bilinear') + plt.axis('off') + plt.savefig(path_saved_img1, bbox_inches='tight', pad_inches=0, dpi=250) + plt.close() + + +def draw_topicfm_demo(data, img0, img1, mkpts0, mkpts1, mcolor, text, show_n_topics=8, + topic_alpha=0.3, margin=5, path=None, opencv_display=False, opencv_title=''): + topic_map0, topic_map1 = draw_topics(data, img0, img1, show_n_topics=show_n_topics) + + mask_tm0, mask_tm1 = np.expand_dims(topic_map0 >= 0, axis=-1), np.expand_dims(topic_map1 >= 0, axis=-1) + + topic_cm0, topic_cm1 = cm.jet(topic_map0 / 99.), cm.jet(topic_map1 / 99.) + topic_cm0 = cv2.cvtColor(topic_cm0[..., :3].astype(np.float32), cv2.COLOR_RGB2BGR) + topic_cm1 = cv2.cvtColor(topic_cm1[..., :3].astype(np.float32), cv2.COLOR_RGB2BGR) + overlay0 = (mask_tm0 * topic_cm0 + (1 - mask_tm0) * img0).astype(np.float32) + overlay1 = (mask_tm1 * topic_cm1 + (1 - mask_tm1) * img1).astype(np.float32) + + cv2.addWeighted(overlay0, topic_alpha, img0, 1 - topic_alpha, 0, overlay0) + cv2.addWeighted(overlay1, topic_alpha, img1, 1 - topic_alpha, 0, overlay1) + + overlay0, overlay1 = (overlay0 * 255).astype(np.uint8), (overlay1 * 255).astype(np.uint8) + + h0, w0 = img0.shape[:2] + h1, w1 = img1.shape[:2] + h, w = h0 * 2 + margin * 2, w0 * 2 + margin + out_fig = 255 * np.ones((h, w, 3), dtype=np.uint8) + out_fig[:h0, :w0] = overlay0 + if h0 >= h1: + start = (h0 - h1) // 2 + out_fig[start:(start+h1), (w0+margin):(w0+margin+w1)] = overlay1 + else: + start = (h1 - h0) // 2 + out_fig[:h0, (w0+margin):(w0+margin+w1)] = overlay1[start:(start+h0)] + + step_h = h0 + margin * 2 + out_fig[step_h:step_h+h0, :w0] = (img0 * 255).astype(np.uint8) + if h0 >= h1: + start = step_h + (h0 - h1) // 2 + out_fig[start:start+h1, (w0+margin):(w0+margin+w1)] = (img1 * 255).astype(np.uint8) + else: + start = (h1 - h0) // 2 + out_fig[step_h:step_h+h0, (w0+margin):(w0+margin+w1)] = (img1[start:start+h0] * 255).astype(np.uint8) + + # draw matching lines, this is inspried from https://raw.githubusercontent.com/magicleap/SuperGluePretrainedNetwork/master/models/utils.py + mkpts0, mkpts1 = np.round(mkpts0).astype(int), np.round(mkpts1).astype(int) + mcolor = (np.array(mcolor[:, [2, 1, 0]]) * 255).astype(int) + + for (x0, y0), (x1, y1), c in zip(mkpts0, mkpts1, mcolor): + c = c.tolist() + cv2.line(out_fig, (x0, y0+step_h), (x1+margin+w0, y1+step_h+(h0-h1)//2), + color=c, thickness=1, lineType=cv2.LINE_AA) + # display line end-points as circles + cv2.circle(out_fig, (x0, y0+step_h), 2, c, -1, lineType=cv2.LINE_AA) + cv2.circle(out_fig, (x1+margin+w0, y1+step_h+(h0-h1)//2), 2, c, -1, lineType=cv2.LINE_AA) + + # Scale factor for consistent visualization across scales. + sc = min(h / 960., 2.0) + + # Big text. + Ht = int(30 * sc) # text height + txt_color_fg = (255, 255, 255) + txt_color_bg = (0, 0, 0) + for i, t in enumerate(text): + cv2.putText(out_fig, t, (int(8 * sc), Ht + step_h*i), cv2.FONT_HERSHEY_DUPLEX, + 1.0 * sc, txt_color_bg, 2, cv2.LINE_AA) + cv2.putText(out_fig, t, (int(8 * sc), Ht + step_h*i), cv2.FONT_HERSHEY_DUPLEX, + 1.0 * sc, txt_color_fg, 1, cv2.LINE_AA) + + if path is not None: + cv2.imwrite(str(path), out_fig) + + if opencv_display: + cv2.imshow(opencv_title, out_fig) + cv2.waitKey(1) + + return out_fig + + + + + + diff --git a/third_party/TopicFM/src/utils/profiler.py b/third_party/TopicFM/src/utils/profiler.py new file mode 100644 index 0000000000000000000000000000000000000000..6d21ed79fb506ef09c75483355402c48a195aaa9 --- /dev/null +++ b/third_party/TopicFM/src/utils/profiler.py @@ -0,0 +1,39 @@ +import torch +from pytorch_lightning.profiler import SimpleProfiler, PassThroughProfiler +from contextlib import contextmanager +from pytorch_lightning.utilities import rank_zero_only + + +class InferenceProfiler(SimpleProfiler): + """ + This profiler records duration of actions with cuda.synchronize() + Use this in test time. + """ + + def __init__(self): + super().__init__() + self.start = rank_zero_only(self.start) + self.stop = rank_zero_only(self.stop) + self.summary = rank_zero_only(self.summary) + + @contextmanager + def profile(self, action_name: str) -> None: + try: + torch.cuda.synchronize() + self.start(action_name) + yield action_name + finally: + torch.cuda.synchronize() + self.stop(action_name) + + +def build_profiler(name): + if name == 'inference': + return InferenceProfiler() + elif name == 'pytorch': + from pytorch_lightning.profiler import PyTorchProfiler + return PyTorchProfiler(use_cuda=True, profile_memory=True, row_limit=100) + elif name is None: + return PassThroughProfiler() + else: + raise ValueError(f'Invalid profiler: {name}') diff --git a/third_party/TopicFM/test.py b/third_party/TopicFM/test.py new file mode 100644 index 0000000000000000000000000000000000000000..aeb451cde3674b70b0d2e02f37ff1fd391004d30 --- /dev/null +++ b/third_party/TopicFM/test.py @@ -0,0 +1,68 @@ +import pytorch_lightning as pl +import argparse +import pprint +from loguru import logger as loguru_logger + +from src.config.default import get_cfg_defaults +from src.utils.profiler import build_profiler + +from src.lightning_trainer.data import MultiSceneDataModule +from src.lightning_trainer.trainer import PL_Trainer + + +def parse_args(): + # init a costum parser which will be added into pl.Trainer parser + # check documentation: https://pytorch-lightning.readthedocs.io/en/latest/common/trainer.html#trainer-flags + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + 'data_cfg_path', type=str, help='data config path') + parser.add_argument( + 'main_cfg_path', type=str, help='main config path') + parser.add_argument( + '--ckpt_path', type=str, default="weights/indoor_ds.ckpt", help='path to the checkpoint') + parser.add_argument( + '--dump_dir', type=str, default=None, help="if set, the matching results will be dump to dump_dir") + parser.add_argument( + '--profiler_name', type=str, default=None, help='options: [inference, pytorch], or leave it unset') + parser.add_argument( + '--batch_size', type=int, default=1, help='batch_size per gpu') + parser.add_argument( + '--num_workers', type=int, default=2) + parser.add_argument( + '--thr', type=float, default=None, help='modify the coarse-level matching threshold.') + + parser = pl.Trainer.add_argparse_args(parser) + return parser.parse_args() + + +if __name__ == '__main__': + # parse arguments + args = parse_args() + pprint.pprint(vars(args)) + + # init default-cfg and merge it with the main- and data-cfg + config = get_cfg_defaults() + config.merge_from_file(args.main_cfg_path) + config.merge_from_file(args.data_cfg_path) + pl.seed_everything(config.TRAINER.SEED) # reproducibility + + # tune when testing + if args.thr is not None: + config.MODEL.MATCH_COARSE.THR = args.thr + + loguru_logger.info(f"Args and config initialized!") + + # lightning module + profiler = build_profiler(args.profiler_name) + model = PL_Trainer(config, pretrained_ckpt=args.ckpt_path, profiler=profiler, dump_dir=args.dump_dir) + loguru_logger.info(f"Model-lightning initialized!") + + # lightning data + data_module = MultiSceneDataModule(args, config) + loguru_logger.info(f"DataModule initialized!") + + # lightning trainer + trainer = pl.Trainer.from_argparse_args(args, replace_sampler_ddp=False, logger=False) + + loguru_logger.info(f"Start testing!") + trainer.test(model, datamodule=data_module, verbose=False) diff --git a/third_party/TopicFM/train.py b/third_party/TopicFM/train.py new file mode 100644 index 0000000000000000000000000000000000000000..a552c23718b81ddcb282cedbfe3ceb45e50b3f29 --- /dev/null +++ b/third_party/TopicFM/train.py @@ -0,0 +1,123 @@ +import math +import argparse +import pprint +from distutils.util import strtobool +from pathlib import Path +from loguru import logger as loguru_logger + +import pytorch_lightning as pl +from pytorch_lightning.utilities import rank_zero_only +from pytorch_lightning.loggers import TensorBoardLogger +from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor +from pytorch_lightning.plugins import DDPPlugin + +from src.config.default import get_cfg_defaults +from src.utils.misc import get_rank_zero_only_logger, setup_gpus +from src.utils.profiler import build_profiler +from src.lightning_trainer.data import MultiSceneDataModule +from src.lightning_trainer.trainer import PL_Trainer + +loguru_logger = get_rank_zero_only_logger(loguru_logger) + + +def parse_args(): + # init a costum parser which will be added into pl.Trainer parser + # check documentation: https://pytorch-lightning.readthedocs.io/en/latest/common/trainer.html#trainer-flags + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + 'data_cfg_path', type=str, help='data config path') + parser.add_argument( + 'main_cfg_path', type=str, help='main config path') + parser.add_argument( + '--exp_name', type=str, default='default_exp_name') + parser.add_argument( + '--batch_size', type=int, default=4, help='batch_size per gpu') + parser.add_argument( + '--num_workers', type=int, default=4) + parser.add_argument( + '--pin_memory', type=lambda x: bool(strtobool(x)), + nargs='?', default=True, help='whether loading data to pinned memory or not') + parser.add_argument( + '--ckpt_path', type=str, default=None, + help='pretrained checkpoint path, helpful for using a pre-trained coarse-only LoFTR') + parser.add_argument( + '--disable_ckpt', action='store_true', + help='disable checkpoint saving (useful for debugging).') + parser.add_argument( + '--profiler_name', type=str, default=None, + help='options: [inference, pytorch], or leave it unset') + parser.add_argument( + '--parallel_load_data', action='store_true', + help='load datasets in with multiple processes.') + + parser = pl.Trainer.add_argparse_args(parser) + return parser.parse_args() + + +def main(): + # parse arguments + args = parse_args() + rank_zero_only(pprint.pprint)(vars(args)) + + # init default-cfg and merge it with the main- and data-cfg + config = get_cfg_defaults() + config.merge_from_file(args.main_cfg_path) + config.merge_from_file(args.data_cfg_path) + pl.seed_everything(config.TRAINER.SEED) # reproducibility + # TODO: Use different seeds for each dataloader workers + # This is needed for data augmentation + + # scale lr and warmup-step automatically + args.gpus = _n_gpus = setup_gpus(args.gpus) + config.TRAINER.WORLD_SIZE = _n_gpus * args.num_nodes + config.TRAINER.TRUE_BATCH_SIZE = config.TRAINER.WORLD_SIZE * args.batch_size + _scaling = config.TRAINER.TRUE_BATCH_SIZE / config.TRAINER.CANONICAL_BS + config.TRAINER.SCALING = _scaling + config.TRAINER.TRUE_LR = config.TRAINER.CANONICAL_LR * _scaling + config.TRAINER.WARMUP_STEP = math.floor(config.TRAINER.WARMUP_STEP / _scaling) + + # lightning module + profiler = build_profiler(args.profiler_name) + model = PL_Trainer(config, pretrained_ckpt=args.ckpt_path, profiler=profiler) + loguru_logger.info(f"Model LightningModule initialized!") + + # lightning data + data_module = MultiSceneDataModule(args, config) + loguru_logger.info(f"Model DataModule initialized!") + + # TensorBoard Logger + logger = TensorBoardLogger(save_dir='logs/tb_logs', name=args.exp_name, default_hp_metric=False) + ckpt_dir = Path(logger.log_dir) / 'checkpoints' + + # Callbacks + # TODO: update ModelCheckpoint to monitor multiple metrics + ckpt_callback = ModelCheckpoint(monitor='auc@10', verbose=True, save_top_k=5, mode='max', + save_last=True, + dirpath=str(ckpt_dir), + filename='{epoch}-{auc@5:.3f}-{auc@10:.3f}-{auc@20:.3f}') + lr_monitor = LearningRateMonitor(logging_interval='step') + callbacks = [lr_monitor] + if not args.disable_ckpt: + callbacks.append(ckpt_callback) + + # Lightning Trainer + trainer = pl.Trainer.from_argparse_args( + args, + plugins=DDPPlugin(find_unused_parameters=False, + num_nodes=args.num_nodes, + sync_batchnorm=config.TRAINER.WORLD_SIZE > 0), + gradient_clip_val=config.TRAINER.GRADIENT_CLIPPING, + callbacks=callbacks, + logger=logger, + sync_batchnorm=config.TRAINER.WORLD_SIZE > 0, + replace_sampler_ddp=False, # use custom sampler + reload_dataloaders_every_epoch=False, # avoid repeated samples! + weights_summary='full', + profiler=profiler) + loguru_logger.info(f"Trainer initialized!") + loguru_logger.info(f"Start training!") + trainer.fit(model, datamodule=data_module) + + +if __name__ == '__main__': + main() diff --git a/third_party/TopicFM/visualization.py b/third_party/TopicFM/visualization.py new file mode 100644 index 0000000000000000000000000000000000000000..279b41cd88f61ce3414e2f3077fec642b2c8333a --- /dev/null +++ b/third_party/TopicFM/visualization.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python +# coding: utf-8 + +import os, glob, cv2 +import argparse +from argparse import Namespace +import yaml +from tqdm import tqdm +import torch +from torch.utils.data import Dataset, DataLoader, SequentialSampler + +from src.datasets.custom_dataloader import TestDataLoader +from src.utils.dataset import read_img_gray +from configs.data.base import cfg as data_cfg +import viz + + +def get_model_config(method_name, dataset_name, root_dir='viz'): + config_file = f'{root_dir}/configs/{method_name}.yml' + with open(config_file, 'r') as f: + model_conf = yaml.load(f, Loader=yaml.FullLoader)[dataset_name] + return model_conf + + +class DemoDataset(Dataset): + def __init__(self, dataset_dir, img_file=None, resize=0, down_factor=16): + self.dataset_dir = dataset_dir + if img_file is None: + self.list_img_files = glob.glob(os.path.join(dataset_dir, "*.*")) + self.list_img_files.sort() + else: + with open(img_file) as f: + self.list_img_files = [os.path.join(dataset_dir, img_file.strip()) for img_file in f.readlines()] + self.resize = resize + self.down_factor = down_factor + + def __len__(self): + return len(self.list_img_files) + + def __getitem__(self, idx): + img_path = self.list_img_files[idx] #os.path.join(self.dataset_dir, self.list_img_files[idx]) + img, scale = read_img_gray(img_path, resize=self.resize, down_factor=self.down_factor) + return {"img": img, "id": idx, "img_path": img_path} + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Visualize matches') + parser.add_argument('--gpu', '-gpu', type=str, default='0') + parser.add_argument('--method', type=str, default=None) + parser.add_argument('--dataset_dir', type=str, default='data/aachen-day-night') + parser.add_argument('--pair_dir', type=str, default=None) + parser.add_argument( + '--dataset_name', type=str, choices=['megadepth', 'scannet', 'aachen_v1.1', 'inloc'], default='megadepth' + ) + parser.add_argument('--measure_time', action="store_true") + parser.add_argument('--no_viz', action="store_true") + parser.add_argument('--compute_eval_metrics', action="store_true") + parser.add_argument('--run_demo', action="store_true") + + args = parser.parse_args() + + model_cfg = get_model_config(args.method, args.dataset_name) + class_name = model_cfg["class"] + model = viz.__dict__[class_name](model_cfg) + # all_args = Namespace(**vars(args), **model_cfg) + if not args.run_demo: + if args.dataset_name == 'megadepth': + from configs.data.megadepth_test_1500 import cfg + + data_cfg.merge_from_other_cfg(cfg) + elif args.dataset_name == 'scannet': + from configs.data.scannet_test_1500 import cfg + + data_cfg.merge_from_other_cfg(cfg) + elif args.dataset_name == 'aachen_v1.1': + data_cfg.merge_from_list(["DATASET.TEST_DATA_SOURCE", "aachen_v1.1", + "DATASET.TEST_DATA_ROOT", os.path.join(args.dataset_dir, "images/images_upright"), + "DATASET.TEST_LIST_PATH", args.pair_dir, + "DATASET.TEST_IMGSIZE", model_cfg["imsize"]]) + elif args.dataset_name == 'inloc': + data_cfg.merge_from_list(["DATASET.TEST_DATA_SOURCE", "inloc", + "DATASET.TEST_DATA_ROOT", args.dataset_dir, + "DATASET.TEST_LIST_PATH", args.pair_dir, + "DATASET.TEST_IMGSIZE", model_cfg["imsize"]]) + + has_ground_truth = str(data_cfg.DATASET.TEST_DATA_SOURCE).lower() in ["megadepth", "scannet"] + dataloader = TestDataLoader(data_cfg) + with torch.no_grad(): + for data_dict in tqdm(dataloader): + for k, v in data_dict.items(): + if isinstance(v, torch.Tensor): + data_dict[k] = v.cuda() if torch.cuda.is_available() else v + img_root_dir = data_cfg.DATASET.TEST_DATA_ROOT + model.match_and_draw(data_dict, root_dir=img_root_dir, ground_truth=has_ground_truth, + measure_time=args.measure_time, viz_matches=(not args.no_viz)) + + if args.measure_time: + print("Running time for each image is {} miliseconds".format(model.measure_time())) + if args.compute_eval_metrics and has_ground_truth: + model.compute_eval_metrics() + else: + demo_dataset = DemoDataset(args.dataset_dir, img_file=args.pair_dir, resize=640) + sampler = SequentialSampler(demo_dataset) + dataloader = DataLoader(demo_dataset, batch_size=1, sampler=sampler) + + writer = cv2.VideoWriter('topicfm_demo.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 15, (640 * 2 + 5, 480 * 2 + 10)) + + model.run_demo(iter(dataloader), writer) #, output_dir="demo", no_display=True) diff --git a/third_party/TopicFM/viz/__init__.py b/third_party/TopicFM/viz/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f0efac33299da6fb8195ce70bcb9eb210f6cf658 --- /dev/null +++ b/third_party/TopicFM/viz/__init__.py @@ -0,0 +1,3 @@ +from .methods.patch2pix import VizPatch2Pix +from .methods.loftr import VizLoFTR +from .methods.topicfm import VizTopicFM diff --git a/third_party/TopicFM/viz/configs/__init__.py b/third_party/TopicFM/viz/configs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/TopicFM/viz/configs/loftr.yml b/third_party/TopicFM/viz/configs/loftr.yml new file mode 100644 index 0000000000000000000000000000000000000000..776d625ac8ad5a0b4e4a4e65e2b99f62662bc3fc --- /dev/null +++ b/third_party/TopicFM/viz/configs/loftr.yml @@ -0,0 +1,18 @@ +default: &default + class: 'VizLoFTR' + ckpt: 'third_party/loftr/pretrained/outdoor_ds.ckpt' + match_threshold: 0.2 +megadepth: + <<: *default +scannet: + <<: *default +hpatch: + <<: *default +inloc: + <<: *default + imsize: 1024 + match_threshold: 0.3 +aachen_v1.1: + <<: *default + imsize: 1024 + match_threshold: 0.3 diff --git a/third_party/TopicFM/viz/configs/patch2pix.yml b/third_party/TopicFM/viz/configs/patch2pix.yml new file mode 100644 index 0000000000000000000000000000000000000000..5e3efa7889098425aaf586bd7b88fc28feb74778 --- /dev/null +++ b/third_party/TopicFM/viz/configs/patch2pix.yml @@ -0,0 +1,19 @@ +default: &default + class: 'VizPatch2Pix' + ckpt: 'third_party/patch2pix/pretrained/patch2pix_pretrained.pth' + ksize: 2 + imsize: 1024 + match_threshold: 0.25 +megadepth: + <<: *default + imsize: 1200 +scannet: + <<: *default + imsize: [640, 480] +hpatch: + <<: *default +inloc: + <<: *default +aachen_v1.1: + <<: *default + imsize: 1024 diff --git a/third_party/TopicFM/viz/configs/topicfm.yml b/third_party/TopicFM/viz/configs/topicfm.yml new file mode 100644 index 0000000000000000000000000000000000000000..7a8071a6fcd8def21dbfec5b9b2b10200f494eee --- /dev/null +++ b/third_party/TopicFM/viz/configs/topicfm.yml @@ -0,0 +1,29 @@ +default: &default + class: 'VizTopicFM' + ckpt: 'pretrained/model_best.ckpt' + match_threshold: 0.2 + n_sampling_topics: 4 + show_n_topics: 4 +megadepth: + <<: *default + n_sampling_topics: 10 + show_n_topics: 6 +scannet: + <<: *default + match_threshold: 0.3 + n_sampling_topics: 5 + show_n_topics: 4 +hpatch: + <<: *default +inloc: + <<: *default + imsize: 1024 + match_threshold: 0.3 + n_sampling_topics: 8 + show_n_topics: 4 +aachen_v1.1: + <<: *default + imsize: 1024 + match_threshold: 0.3 + n_sampling_topics: 6 + show_n_topics: 6 diff --git a/third_party/TopicFM/viz/methods/__init__.py b/third_party/TopicFM/viz/methods/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/TopicFM/viz/methods/base.py b/third_party/TopicFM/viz/methods/base.py new file mode 100644 index 0000000000000000000000000000000000000000..377e95134f339459bff3c5a0d30b3bfbc122d978 --- /dev/null +++ b/third_party/TopicFM/viz/methods/base.py @@ -0,0 +1,59 @@ +import pprint +from abc import ABCMeta, abstractmethod +import torch +from itertools import chain + +from src.utils.plotting import make_matching_figure, error_colormap +from src.utils.metrics import aggregate_metrics + + +def flatten_list(x): + return list(chain(*x)) + + +class Viz(metaclass=ABCMeta): + def __init__(self): + super().__init__() + self.device = torch.device('cuda:{}'.format(0) if torch.cuda.is_available() else 'cpu') + torch.set_grad_enabled(False) + + # for evaluation metrics of MegaDepth and ScanNet + self.eval_stats = [] + self.time_stats = [] + + def draw_matches(self, mkpts0, mkpts1, img0, img1, conf, path=None, **kwargs): + thr = 5e-4 + # mkpts0 = pe['mkpts0_f'].cpu().numpy() + # mkpts1 = pe['mkpts1_f'].cpu().numpy() + if "conf_thr" in kwargs: + thr = kwargs["conf_thr"] + color = error_colormap(conf, thr, alpha=0.1) + + text = [ + f"{self.name}", + f"#Matches: {len(mkpts0)}", + ] + if 'R_errs' in kwargs: + text.append(f"$\\Delta$R:{kwargs['R_errs']:.2f}°, $\\Delta$t:{kwargs['t_errs']:.2f}°",) + + if path: + make_matching_figure(img0, img1, mkpts0, mkpts1, color, text=text, path=path, dpi=150) + else: + return make_matching_figure(img0, img1, mkpts0, mkpts1, color, text=text) + + @abstractmethod + def match_and_draw(self, data_dict, **kwargs): + pass + + def compute_eval_metrics(self, epi_err_thr=5e-4): + # metrics: dict of list, numpy + _metrics = [o['metrics'] for o in self.eval_stats] + metrics = {k: flatten_list([_me[k] for _me in _metrics]) for k in _metrics[0]} + + val_metrics_4tb = aggregate_metrics(metrics, epi_err_thr) + print('\n' + pprint.pformat(val_metrics_4tb)) + + def measure_time(self): + if len(self.time_stats) == 0: + return 0 + return sum(self.time_stats) / len(self.time_stats) diff --git a/third_party/TopicFM/viz/methods/loftr.py b/third_party/TopicFM/viz/methods/loftr.py new file mode 100644 index 0000000000000000000000000000000000000000..53d0c00c1a067cee10bf1587197e4780ac8b2eda --- /dev/null +++ b/third_party/TopicFM/viz/methods/loftr.py @@ -0,0 +1,85 @@ +from argparse import Namespace +import os +import torch +import cv2 + +from .base import Viz +from src.utils.metrics import compute_symmetrical_epipolar_errors, compute_pose_errors + +from third_party.loftr.src.loftr import LoFTR, default_cfg + + +class VizLoFTR(Viz): + def __init__(self, args): + super().__init__() + if type(args) == dict: + args = Namespace(**args) + + self.match_threshold = args.match_threshold + + # Load model + conf = dict(default_cfg) + conf['match_coarse']['thr'] = self.match_threshold + print(conf) + self.model = LoFTR(config=conf) + ckpt_dict = torch.load(args.ckpt) + self.model.load_state_dict(ckpt_dict['state_dict']) + self.model = self.model.eval().to(self.device) + + # Name the method + # self.ckpt_name = args.ckpt.split('/')[-1].split('.')[0] + self.name = 'LoFTR' + + print(f'Initialize {self.name}') + + def match_and_draw(self, data_dict, root_dir=None, ground_truth=False, measure_time=False, viz_matches=True): + if measure_time: + torch.cuda.synchronize() + start = torch.cuda.Event(enable_timing=True) + end = torch.cuda.Event(enable_timing=True) + start.record() + self.model(data_dict) + if measure_time: + torch.cuda.synchronize() + end.record() + torch.cuda.synchronize() + self.time_stats.append(start.elapsed_time(end)) + + kpts0 = data_dict['mkpts0_f'].cpu().numpy() + kpts1 = data_dict['mkpts1_f'].cpu().numpy() + + img_name0, img_name1 = list(zip(*data_dict['pair_names']))[0] + img0 = cv2.imread(os.path.join(root_dir, img_name0)) + img1 = cv2.imread(os.path.join(root_dir, img_name1)) + if str(data_dict["dataset_name"][0]).lower() == 'scannet': + img0 = cv2.resize(img0, (640, 480)) + img1 = cv2.resize(img1, (640, 480)) + + if viz_matches: + saved_name = "_".join([img_name0.split('/')[-1].split('.')[0], img_name1.split('/')[-1].split('.')[0]]) + folder_matches = os.path.join(root_dir, "{}_viz_matches".format(self.name)) + if not os.path.exists(folder_matches): + os.makedirs(folder_matches) + path_to_save_matches = os.path.join(folder_matches, "{}.png".format(saved_name)) + if ground_truth: + compute_symmetrical_epipolar_errors(data_dict) # compute epi_errs for each match + compute_pose_errors(data_dict) # compute R_errs, t_errs, pose_errs for each pair + epi_errors = data_dict['epi_errs'].cpu().numpy() + R_errors, t_errors = data_dict['R_errs'][0], data_dict['t_errs'][0] + + self.draw_matches(kpts0, kpts1, img0, img1, epi_errors, path=path_to_save_matches, + R_errs=R_errors, t_errs=t_errors) + + rel_pair_names = list(zip(*data_dict['pair_names'])) + bs = data_dict['image0'].size(0) + metrics = { + # to filter duplicate pairs caused by DistributedSampler + 'identifiers': ['#'.join(rel_pair_names[b]) for b in range(bs)], + 'epi_errs': [data_dict['epi_errs'][data_dict['m_bids'] == b].cpu().numpy() for b in range(bs)], + 'R_errs': data_dict['R_errs'], + 't_errs': data_dict['t_errs'], + 'inliers': data_dict['inliers']} + self.eval_stats.append({'metrics': metrics}) + else: + m_conf = 1 - data_dict["mconf"].cpu().numpy() + self.draw_matches(kpts0, kpts1, img0, img1, m_conf, path=path_to_save_matches, conf_thr=0.4) diff --git a/third_party/TopicFM/viz/methods/patch2pix.py b/third_party/TopicFM/viz/methods/patch2pix.py new file mode 100644 index 0000000000000000000000000000000000000000..14a1d345881e2021be97dc5dde91d8bbe1cd18fa --- /dev/null +++ b/third_party/TopicFM/viz/methods/patch2pix.py @@ -0,0 +1,80 @@ +from argparse import Namespace +import os, sys +import torch +import cv2 +from pathlib import Path + +from .base import Viz +from src.utils.metrics import compute_symmetrical_epipolar_errors, compute_pose_errors + +patch2pix_path = Path(__file__).parent / '../../third_party/patch2pix' +sys.path.append(str(patch2pix_path)) +from third_party.patch2pix.utils.eval.model_helper import load_model, estimate_matches + + +class VizPatch2Pix(Viz): + def __init__(self, args): + super().__init__() + + if type(args) == dict: + args = Namespace(**args) + self.imsize = args.imsize + self.match_threshold = args.match_threshold + self.ksize = args.ksize + self.model = load_model(args.ckpt, method='patch2pix') + self.name = 'Patch2Pix' + print(f'Initialize {self.name} with image size {self.imsize}') + + def match_and_draw(self, data_dict, root_dir=None, ground_truth=False, measure_time=False, viz_matches=True): + img_name0, img_name1 = list(zip(*data_dict['pair_names']))[0] + path_img0 = os.path.join(root_dir, img_name0) + path_img1 = os.path.join(root_dir, img_name1) + img0, img1 = cv2.imread(path_img0), cv2.imread(path_img1) + return_m_upscale = True + if str(data_dict["dataset_name"][0]).lower() == 'scannet': + # self.imsize = 640 + img0 = cv2.resize(img0, tuple(self.imsize)) # (640, 480)) + img1 = cv2.resize(img1, tuple(self.imsize)) # (640, 480)) + return_m_upscale = False + outputs = estimate_matches(self.model, path_img0, path_img1, + ksize=self.ksize, io_thres=self.match_threshold, + eval_type='fine', imsize=self.imsize, + return_upscale=return_m_upscale, measure_time=measure_time) + if measure_time: + self.time_stats.append(outputs[-1]) + matches, mconf = outputs[0], outputs[1] + kpts0 = matches[:, :2] + kpts1 = matches[:, 2:4] + + if viz_matches: + saved_name = "_".join([img_name0.split('/')[-1].split('.')[0], img_name1.split('/')[-1].split('.')[0]]) + folder_matches = os.path.join(root_dir, "{}_viz_matches".format(self.name)) + if not os.path.exists(folder_matches): + os.makedirs(folder_matches) + path_to_save_matches = os.path.join(folder_matches, "{}.png".format(saved_name)) + + if ground_truth: + data_dict["mkpts0_f"] = torch.from_numpy(matches[:, :2]).float().to(self.device) + data_dict["mkpts1_f"] = torch.from_numpy(matches[:, 2:4]).float().to(self.device) + data_dict["m_bids"] = torch.zeros(matches.shape[0], device=self.device, dtype=torch.float32) + compute_symmetrical_epipolar_errors(data_dict) # compute epi_errs for each match + compute_pose_errors(data_dict) # compute R_errs, t_errs, pose_errs for each pair + epi_errors = data_dict['epi_errs'].cpu().numpy() + R_errors, t_errors = data_dict['R_errs'][0], data_dict['t_errs'][0] + + self.draw_matches(kpts0, kpts1, img0, img1, epi_errors, path=path_to_save_matches, + R_errs=R_errors, t_errs=t_errors) + + rel_pair_names = list(zip(*data_dict['pair_names'])) + bs = data_dict['image0'].size(0) + metrics = { + # to filter duplicate pairs caused by DistributedSampler + 'identifiers': ['#'.join(rel_pair_names[b]) for b in range(bs)], + 'epi_errs': [data_dict['epi_errs'][data_dict['m_bids'] == b].cpu().numpy() for b in range(bs)], + 'R_errs': data_dict['R_errs'], + 't_errs': data_dict['t_errs'], + 'inliers': data_dict['inliers']} + self.eval_stats.append({'metrics': metrics}) + else: + m_conf = 1 - mconf + self.draw_matches(kpts0, kpts1, img0, img1, m_conf, path=path_to_save_matches, conf_thr=0.4) diff --git a/third_party/TopicFM/viz/methods/topicfm.py b/third_party/TopicFM/viz/methods/topicfm.py new file mode 100644 index 0000000000000000000000000000000000000000..cd8b1485d5296947a38480cc031c5d7439bf163d --- /dev/null +++ b/third_party/TopicFM/viz/methods/topicfm.py @@ -0,0 +1,198 @@ +from argparse import Namespace +import os +import torch +import cv2 +from time import time +from pathlib import Path +import matplotlib.cm as cm +import numpy as np + +from src.models.topic_fm import TopicFM +from src import get_model_cfg +from .base import Viz +from src.utils.metrics import compute_symmetrical_epipolar_errors, compute_pose_errors +from src.utils.plotting import draw_topics, draw_topicfm_demo, error_colormap + + +class VizTopicFM(Viz): + def __init__(self, args): + super().__init__() + if type(args) == dict: + args = Namespace(**args) + + self.match_threshold = args.match_threshold + self.n_sampling_topics = args.n_sampling_topics + self.show_n_topics = args.show_n_topics + + # Load model + conf = dict(get_model_cfg()) + conf['match_coarse']['thr'] = self.match_threshold + conf['coarse']['n_samples'] = self.n_sampling_topics + print("model config: ", conf) + self.model = TopicFM(config=conf) + ckpt_dict = torch.load(args.ckpt) + self.model.load_state_dict(ckpt_dict['state_dict']) + self.model = self.model.eval().to(self.device) + + # Name the method + # self.ckpt_name = args.ckpt.split('/')[-1].split('.')[0] + self.name = 'TopicFM' + + print(f'Initialize {self.name}') + + def match_and_draw(self, data_dict, root_dir=None, ground_truth=False, measure_time=False, viz_matches=True): + if measure_time: + torch.cuda.synchronize() + start = torch.cuda.Event(enable_timing=True) + end = torch.cuda.Event(enable_timing=True) + start.record() + self.model(data_dict) + if measure_time: + torch.cuda.synchronize() + end.record() + torch.cuda.synchronize() + self.time_stats.append(start.elapsed_time(end)) + + kpts0 = data_dict['mkpts0_f'].cpu().numpy() + kpts1 = data_dict['mkpts1_f'].cpu().numpy() + + img_name0, img_name1 = list(zip(*data_dict['pair_names']))[0] + img0 = cv2.imread(os.path.join(root_dir, img_name0)) + img1 = cv2.imread(os.path.join(root_dir, img_name1)) + if str(data_dict["dataset_name"][0]).lower() == 'scannet': + img0 = cv2.resize(img0, (640, 480)) + img1 = cv2.resize(img1, (640, 480)) + + if viz_matches: + saved_name = "_".join([img_name0.split('/')[-1].split('.')[0], img_name1.split('/')[-1].split('.')[0]]) + folder_matches = os.path.join(root_dir, "{}_viz_matches".format(self.name)) + if not os.path.exists(folder_matches): + os.makedirs(folder_matches) + path_to_save_matches = os.path.join(folder_matches, "{}.png".format(saved_name)) + + if ground_truth: + compute_symmetrical_epipolar_errors(data_dict) # compute epi_errs for each match + compute_pose_errors(data_dict) # compute R_errs, t_errs, pose_errs for each pair + epi_errors = data_dict['epi_errs'].cpu().numpy() + R_errors, t_errors = data_dict['R_errs'][0], data_dict['t_errs'][0] + + self.draw_matches(kpts0, kpts1, img0, img1, epi_errors, path=path_to_save_matches, + R_errs=R_errors, t_errs=t_errors) + + # compute evaluation metrics + rel_pair_names = list(zip(*data_dict['pair_names'])) + bs = data_dict['image0'].size(0) + metrics = { + # to filter duplicate pairs caused by DistributedSampler + 'identifiers': ['#'.join(rel_pair_names[b]) for b in range(bs)], + 'epi_errs': [data_dict['epi_errs'][data_dict['m_bids'] == b].cpu().numpy() for b in range(bs)], + 'R_errs': data_dict['R_errs'], + 't_errs': data_dict['t_errs'], + 'inliers': data_dict['inliers']} + self.eval_stats.append({'metrics': metrics}) + else: + m_conf = 1 - data_dict["mconf"].cpu().numpy() + self.draw_matches(kpts0, kpts1, img0, img1, m_conf, path=path_to_save_matches, conf_thr=0.4) + if self.show_n_topics > 0: + folder_topics = os.path.join(root_dir, "{}_viz_topics".format(self.name)) + if not os.path.exists(folder_topics): + os.makedirs(folder_topics) + draw_topics(data_dict, img0, img1, saved_folder=folder_topics, show_n_topics=self.show_n_topics, + saved_name=saved_name) + + def run_demo(self, dataloader, writer=None, output_dir=None, no_display=False, skip_frames=1): + data_dict = next(dataloader) + + frame_id = 0 + last_image_id = 0 + img0 = np.array(cv2.imread(str(data_dict["img_path"][0])), dtype=np.float32) / 255 + frame_tensor = data_dict["img"].to(self.device) + pair_data = {'image0': frame_tensor} + last_frame = cv2.resize(img0, (frame_tensor.shape[-1], frame_tensor.shape[-2]), cv2.INTER_LINEAR) + + if output_dir is not None: + print('==> Will write outputs to {}'.format(output_dir)) + Path(output_dir).mkdir(exist_ok=True) + + # Create a window to display the demo. + if not no_display: + window_name = 'Topic-assisted Feature Matching' + cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) + cv2.resizeWindow(window_name, (640 * 2, 480 * 2)) + else: + print('Skipping visualization, will not show a GUI.') + + # Print the keyboard help menu. + print('==> Keyboard control:\n' + '\tn: select the current frame as the reference image (left)\n' + '\tq: quit') + + # vis_range = [kwargs["bottom_k"], kwargs["top_k"]] + + while True: + frame_id += 1 + if frame_id == len(dataloader): + print('Finished demo_loftr.py') + break + data_dict = next(dataloader) + if frame_id % skip_frames != 0: + # print("Skipping frame.") + continue + + stem0, stem1 = last_image_id, data_dict["id"][0].item() - 1 + frame = np.array(cv2.imread(str(data_dict["img_path"][0])), dtype=np.float32) / 255 + + frame_tensor = data_dict["img"].to(self.device) + frame = cv2.resize(frame, (frame_tensor.shape[-1], frame_tensor.shape[-2]), interpolation=cv2.INTER_LINEAR) + pair_data = {**pair_data, 'image1': frame_tensor} + self.model(pair_data) + + total_n_matches = len(pair_data['mkpts0_f']) + mkpts0 = pair_data['mkpts0_f'].cpu().numpy() # [vis_range[0]:vis_range[1]] + mkpts1 = pair_data['mkpts1_f'].cpu().numpy() # [vis_range[0]:vis_range[1]] + mconf = pair_data['mconf'].cpu().numpy() # [vis_range[0]:vis_range[1]] + + # Normalize confidence. + if len(mconf) > 0: + mconf = 1 - mconf + + # alpha = 0 + # color = cm.jet(mconf, alpha=alpha) + color = error_colormap(mconf, thr=0.4, alpha=0.1) + + text = [ + f'Topics', + '#Matches: {}'.format(total_n_matches), + ] + + out = draw_topicfm_demo(pair_data, last_frame, frame, mkpts0, mkpts1, color, text, + show_n_topics=4, path=None) + + if not no_display: + if writer is not None: + writer.write(out) + cv2.imshow('TopicFM Matches', out) + key = chr(cv2.waitKey(10) & 0xFF) + if key == 'q': + if writer is not None: + writer.release() + print('Exiting...') + break + elif key == 'n': + pair_data['image0'] = frame_tensor + last_frame = frame + last_image_id = (data_dict["id"][0].item() - 1) + frame_id_left = frame_id + + elif output_dir is not None: + stem = 'matches_{:06}_{:06}'.format(stem0, stem1) + out_file = str(Path(output_dir, stem + '.png')) + print('\nWriting image to {}'.format(out_file)) + cv2.imwrite(out_file, out) + else: + raise ValueError("output_dir is required when no display is given.") + + cv2.destroyAllWindows() + if writer is not None: + writer.release() + diff --git a/third_party/d2net/.gitignore b/third_party/d2net/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..fda64312542ac8b636532f580c7648708dd0c1ba --- /dev/null +++ b/third_party/d2net/.gitignore @@ -0,0 +1,13 @@ +__pycache__ +.vscode +checkpoints* +train_vis +log.txt +hpatches_sequences/hseq.pdf +hpatches_sequences/hseq-top.pdf +hpatches_sequences/hpatches-sequences-release* +hpatches_sequences/cache +hpatches_sequences/cache-top +.ipynb_checkpoints +vlfeat +*.d2-net diff --git a/third_party/d2net/LICENSE b/third_party/d2net/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..5d50329f25f288161a596172f69c84b9dc465b27 --- /dev/null +++ b/third_party/d2net/LICENSE @@ -0,0 +1,33 @@ +The Clear BSD License + +Copyright (c) 2019 Mihai Dusmanu +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted (subject to the limitations in the disclaimer +below) provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the copyright holders nor the names of the + contributors nor the names of their institutions may be used to endorse + or promote products derived from this software without specific prior + written permission. + +NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY +THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND +CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/third_party/d2net/README.md b/third_party/d2net/README.md new file mode 100644 index 0000000000000000000000000000000000000000..741c88dffcea55fc482d823d585421fbe0996cea --- /dev/null +++ b/third_party/d2net/README.md @@ -0,0 +1,121 @@ +# D2-Net: A Trainable CNN for Joint Detection and Description of Local Features + +This repository contains the implementation of the following paper: + +```text +"D2-Net: A Trainable CNN for Joint Detection and Description of Local Features". +M. Dusmanu, I. Rocco, T. Pajdla, M. Pollefeys, J. Sivic, A. Torii, and T. Sattler. CVPR 2019. +``` + +[Paper on arXiv](https://arxiv.org/abs/1905.03561), [Project page](https://dsmn.ml/publications/d2-net.html) + +## Getting started + +Python 3.6+ is recommended for running our code. [Conda](https://docs.conda.io/en/latest/) can be used to install the required packages: + +```bash +conda install pytorch torchvision cudatoolkit=10.0 -c pytorch +conda install h5py imageio imagesize matplotlib numpy scipy tqdm +``` + +## Downloading the models + +The off-the-shelf **Caffe VGG16** weights and their tuned counterpart can be downloaded by running: + +```bash +mkdir models +wget https://dsmn.ml/files/d2-net/d2_ots.pth -O models/d2_ots.pth +wget https://dsmn.ml/files/d2-net/d2_tf.pth -O models/d2_tf.pth +wget https://dsmn.ml/files/d2-net/d2_tf_no_phototourism.pth -O models/d2_tf_no_phototourism.pth +``` + +**Update - 23 May 2019** We have added a new set of weights trained on MegaDepth without the PhotoTourism scenes (sagrada_familia - 0019, lincoln_memorial_statue - 0021, british_museum - 0024, london_bridge - 0025, us_capitol - 0078, mount_rushmore - 1589). Our initial results show similar performance. In order to use these weights at test time, you should add `--model_file models/d2_tf_no_phototourism.pth`. + +## Feature extraction + +`extract_features.py` can be used to extract D2 features for a given list of images. The singlescale features require less than 6GB of VRAM for 1200x1600 images. The `--multiscale` flag can be used to extract multiscale features - for this, we recommend at least 12GB of VRAM. + +The output format can be either [`npz`](https://docs.scipy.org/doc/numpy/reference/generated/numpy.savez.html) or `mat`. In either case, the feature files encapsulate three arrays: + +- `keypoints` [`N x 3`] array containing the positions of keypoints `x, y` and the scales `s`. The positions follow the COLMAP format, with the `X` axis pointing to the right and the `Y` axis to the bottom. +- `scores` [`N`] array containing the activations of keypoints (higher is better). +- `descriptors` [`N x 512`] array containing the L2 normalized descriptors. + +```bash +python extract_features.py --image_list_file images.txt (--multiscale) +``` + +# Feature extraction with kapture datasets + +Kapture is a pivot file format, based on text and binary files, used to describe SFM (Structure From Motion) and more generally sensor-acquired data. + +It is available at https://github.com/naver/kapture. +It contains conversion tools for popular formats and several popular datasets are directly available in kapture. + +It can be installed with: +```bash +pip install kapture +``` + +Datasets can be downloaded with: +```bash +kapture_download_dataset.py update +kapture_download_dataset.py list +# e.g.: install mapping and query of Extended-CMU-Seasons_slice22 +kapture_download_dataset.py install "Extended-CMU-Seasons_slice22_*" +``` +If you want to convert your own dataset into kapture, please find some examples [here](https://github.com/naver/kapture/blob/master/doc/datasets.adoc). + +Once installed, you can extract keypoints for your kapture dataset with: +```bash +python extract_kapture.py --kapture-root pathto/yourkapturedataset (--multiscale) +``` + +Run `python extract_kapture.py --help` for more information on the extraction parameters. + +## Tuning on MegaDepth + +The training pipeline provided here is a PyTorch implementation of the TensorFlow code that was used to train the model available to download above. + +**Update - 05 June 2019** We have fixed a bug in the dataset preprocessing - retraining now yields similar results to the original TensorFlow implementation. + +**Update - 07 August 2019** We have released an updated, more accurate version of the training dataset - training is more stable and significantly faster for equal performance. + +### Downloading and preprocessing the MegaDepth dataset + +For this part, [COLMAP](https://colmap.github.io/) should be installed. Please refer to the official website for installation instructions. + +After downloading the entire [MegaDepth](http://www.cs.cornell.edu/projects/megadepth/) dataset (including SfM models), the first step is generating the undistorted reconstructions. This can be done by calling `undistort_reconstructions.py` as follows: + +```bash +python undistort_reconstructions.py --colmap_path /path/to/colmap/executable --base_path /path/to/megadepth +``` + +Next, `preprocess_megadepth.sh` can be used to retrieve the camera parameters and compute the overlap between images for all scenes. + +```bash +bash preprocess_undistorted_megadepth.sh /path/to/megadepth /path/to/output/folder +``` + +In case you prefer downloading the undistorted reconstructions and aggregated scene information folder directly, you can find them [here - Google Drive](https://drive.google.com/open?id=1hxpOsqOZefdrba_BqnW490XpNX_LgXPB). You will still need to download the depth maps ("MegaDepth v1 Dataset") from the MegaDepth website. + +### Training + +After downloading and preprocessing MegaDepth, the training can be started right away: + +```bash +python train.py --use_validation --dataset_path /path/to/megadepth --scene_info_path /path/to/preprocessing/output +``` + +## BibTeX + +If you use this code in your project, please cite the following paper: + +```bibtex +@InProceedings{Dusmanu2019CVPR, + author = {Dusmanu, Mihai and Rocco, Ignacio and Pajdla, Tomas and Pollefeys, Marc and Sivic, Josef and Torii, Akihiko and Sattler, Torsten}, + title = {{D2-Net: A Trainable CNN for Joint Detection and Description of Local Features}}, + booktitle = {Proceedings of the 2019 IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + year = {2019}, +} +``` diff --git a/third_party/d2net/extract_features.py b/third_party/d2net/extract_features.py new file mode 100644 index 0000000000000000000000000000000000000000..628463a7d042a90b5cadea8a317237cde86f5ae4 --- /dev/null +++ b/third_party/d2net/extract_features.py @@ -0,0 +1,156 @@ +import argparse + +import numpy as np + +import imageio + +import torch + +from tqdm import tqdm + +import scipy +import scipy.io +import scipy.misc + +from lib.model_test import D2Net +from lib.utils import preprocess_image +from lib.pyramid import process_multiscale + +# CUDA +use_cuda = torch.cuda.is_available() +device = torch.device("cuda:0" if use_cuda else "cpu") + +# Argument parsing +parser = argparse.ArgumentParser(description='Feature extraction script') + +parser.add_argument( + '--image_list_file', type=str, required=True, + help='path to a file containing a list of images to process' +) + +parser.add_argument( + '--preprocessing', type=str, default='caffe', + help='image preprocessing (caffe or torch)' +) +parser.add_argument( + '--model_file', type=str, default='models/d2_tf.pth', + help='path to the full model' +) + +parser.add_argument( + '--max_edge', type=int, default=1600, + help='maximum image size at network input' +) +parser.add_argument( + '--max_sum_edges', type=int, default=2800, + help='maximum sum of image sizes at network input' +) + +parser.add_argument( + '--output_extension', type=str, default='.d2-net', + help='extension for the output' +) +parser.add_argument( + '--output_type', type=str, default='npz', + help='output file type (npz or mat)' +) + +parser.add_argument( + '--multiscale', dest='multiscale', action='store_true', + help='extract multiscale features' +) +parser.set_defaults(multiscale=False) + +parser.add_argument( + '--no-relu', dest='use_relu', action='store_false', + help='remove ReLU after the dense feature extraction module' +) +parser.set_defaults(use_relu=True) + +args = parser.parse_args() + +print(args) + +# Creating CNN model +model = D2Net( + model_file=args.model_file, + use_relu=args.use_relu, + use_cuda=use_cuda +) + +# Process the file +with open(args.image_list_file, 'r') as f: + lines = f.readlines() +for line in tqdm(lines, total=len(lines)): + path = line.strip() + + image = imageio.imread(path) + if len(image.shape) == 2: + image = image[:, :, np.newaxis] + image = np.repeat(image, 3, -1) + + # TODO: switch to PIL.Image due to deprecation of scipy.misc.imresize. + resized_image = image + if max(resized_image.shape) > args.max_edge: + resized_image = scipy.misc.imresize( + resized_image, + args.max_edge / max(resized_image.shape) + ).astype('float') + if sum(resized_image.shape[: 2]) > args.max_sum_edges: + resized_image = scipy.misc.imresize( + resized_image, + args.max_sum_edges / sum(resized_image.shape[: 2]) + ).astype('float') + + fact_i = image.shape[0] / resized_image.shape[0] + fact_j = image.shape[1] / resized_image.shape[1] + + input_image = preprocess_image( + resized_image, + preprocessing=args.preprocessing + ) + with torch.no_grad(): + if args.multiscale: + keypoints, scores, descriptors = process_multiscale( + torch.tensor( + input_image[np.newaxis, :, :, :].astype(np.float32), + device=device + ), + model + ) + else: + keypoints, scores, descriptors = process_multiscale( + torch.tensor( + input_image[np.newaxis, :, :, :].astype(np.float32), + device=device + ), + model, + scales=[1] + ) + + # Input image coordinates + keypoints[:, 0] *= fact_i + keypoints[:, 1] *= fact_j + # i, j -> u, v + keypoints = keypoints[:, [1, 0, 2]] + + if args.output_type == 'npz': + with open(path + args.output_extension, 'wb') as output_file: + np.savez( + output_file, + keypoints=keypoints, + scores=scores, + descriptors=descriptors + ) + elif args.output_type == 'mat': + with open(path + args.output_extension, 'wb') as output_file: + scipy.io.savemat( + output_file, + { + 'keypoints': keypoints, + 'scores': scores, + 'descriptors': descriptors + } + ) + else: + raise ValueError('Unknown output type.') diff --git a/third_party/d2net/extract_hesaff.m b/third_party/d2net/extract_hesaff.m new file mode 100644 index 0000000000000000000000000000000000000000..5f544a49512640304df006e6704de5aaa14b0e6c --- /dev/null +++ b/third_party/d2net/extract_hesaff.m @@ -0,0 +1,25 @@ +fid = fopen('image_list_hpatches_sequences.txt'); + +tline = fgetl(fid); +while ischar(tline) + disp(tline); + I = im2single(imread(tline)); + if size(I, 3) > 1 + I = rgb2gray(I); + end + + [F, D, info] = vl_covdet(I, 'Method', 'Hessian', ... + 'EstimateAffineShape', true, ... + 'EstimateOrientation', true, ... + 'DoubleImage', false, ... + 'peakThreshold', 14 / 256^2); + keypoints = F'; + scores = info.peakScores; + descriptors = D'; + + save([tline '.hesaff'], 'keypoints', 'scores', 'descriptors'); + + tline = fgetl(fid); +end + +fclose(fid); diff --git a/third_party/d2net/extract_kapture.py b/third_party/d2net/extract_kapture.py new file mode 100644 index 0000000000000000000000000000000000000000..23198b978229c699dbe24cd3bc0400d62bcab030 --- /dev/null +++ b/third_party/d2net/extract_kapture.py @@ -0,0 +1,248 @@ +import argparse +import numpy as np +from PIL import Image +import torch +import math +from tqdm import tqdm +from os import path + +# Kapture is a pivot file format, based on text and binary files, used to describe SfM (Structure From Motion) and more generally sensor-acquired data +# it can be installed with +# pip install kapture +# for more information check out https://github.com/naver/kapture +import kapture +from kapture.io.records import get_image_fullpath +from kapture.io.csv import kapture_from_dir, get_all_tar_handlers +from kapture.io.csv import get_feature_csv_fullpath, keypoints_to_file, descriptors_to_file +from kapture.io.features import get_keypoints_fullpath, keypoints_check_dir, image_keypoints_to_file +from kapture.io.features import get_descriptors_fullpath, descriptors_check_dir, image_descriptors_to_file + +from lib.model_test import D2Net +from lib.utils import preprocess_image +from lib.pyramid import process_multiscale + +# import imageio + +# CUDA +use_cuda = torch.cuda.is_available() +device = torch.device("cuda:0" if use_cuda else "cpu") + +# Argument parsing +parser = argparse.ArgumentParser(description='Feature extraction script') + +parser.add_argument( + '--kapture-root', type=str, required=True, + help='path to kapture root directory' +) + +parser.add_argument( + '--preprocessing', type=str, default='caffe', + help='image preprocessing (caffe or torch)' +) +parser.add_argument( + '--model_file', type=str, default='models/d2_tf.pth', + help='path to the full model' +) +parser.add_argument( + '--keypoints-type', type=str, default=None, + help='keypoint type_name, default is filename of model' +) +parser.add_argument( + '--descriptors-type', type=str, default=None, + help='descriptors type_name, default is filename of model' +) + +parser.add_argument( + '--max_edge', type=int, default=1600, + help='maximum image size at network input' +) +parser.add_argument( + '--max_sum_edges', type=int, default=2800, + help='maximum sum of image sizes at network input' +) + +parser.add_argument( + '--multiscale', dest='multiscale', action='store_true', + help='extract multiscale features' +) +parser.set_defaults(multiscale=False) + +parser.add_argument( + '--no-relu', dest='use_relu', action='store_false', + help='remove ReLU after the dense feature extraction module' +) +parser.set_defaults(use_relu=True) + +parser.add_argument("--max-keypoints", type=int, default=float("+inf"), + help='max number of keypoints save to disk') + +args = parser.parse_args() + +print(args) +with get_all_tar_handlers(args.kapture_root, + mode={kapture.Keypoints: 'a', + kapture.Descriptors: 'a', + kapture.GlobalFeatures: 'r', + kapture.Matches: 'r'}) as tar_handlers: + kdata = kapture_from_dir(args.kapture_root, + skip_list=[kapture.GlobalFeatures, + kapture.Matches, + kapture.Points3d, + kapture.Observations], + tar_handlers=tar_handlers) + if kdata.keypoints is None: + kdata.keypoints = {} + if kdata.descriptors is None: + kdata.descriptors = {} + + assert kdata.records_camera is not None + image_list = [filename for _, _, filename in kapture.flatten(kdata.records_camera)] + if args.keypoints_type is None: + args.keypoints_type = path.splitext(path.basename(args.model_file))[0] + print(f'keypoints_type set to {args.keypoints_type}') + if args.descriptors_type is None: + args.descriptors_type = path.splitext(path.basename(args.model_file))[0] + print(f'descriptors_type set to {args.descriptors_type}') + if args.keypoints_type in kdata.keypoints and args.descriptors_type in kdata.descriptors: + image_list = [name + for name in image_list + if name not in kdata.keypoints[args.keypoints_type] or + name not in kdata.descriptors[args.descriptors_type]] + + if len(image_list) == 0: + print('All features were already extracted') + exit(0) + else: + print(f'Extracting d2net features for {len(image_list)} images') + + # Creating CNN model + model = D2Net( + model_file=args.model_file, + use_relu=args.use_relu, + use_cuda=use_cuda + ) + + if args.keypoints_type not in kdata.keypoints: + keypoints_dtype = None + keypoints_dsize = None + else: + keypoints_dtype = kdata.keypoints[args.keypoints_type].dtype + keypoints_dsize = kdata.keypoints[args.keypoints_type].dsize + if args.descriptors_type not in kdata.descriptors: + descriptors_dtype = None + descriptors_dsize = None + else: + descriptors_dtype = kdata.descriptors[args.descriptors_type].dtype + descriptors_dsize = kdata.descriptors[args.descriptors_type].dsize + + # Process the files + for image_name in tqdm(image_list, total=len(image_list)): + img_path = get_image_fullpath(args.kapture_root, image_name) + image = Image.open(img_path).convert('RGB') + + width, height = image.size + + resized_image = image + resized_width = width + resized_height = height + + max_edge = args.max_edge + max_sum_edges = args.max_sum_edges + if max(resized_width, resized_height) > max_edge: + scale_multiplier = max_edge / max(resized_width, resized_height) + resized_width = math.floor(resized_width * scale_multiplier) + resized_height = math.floor(resized_height * scale_multiplier) + resized_image = image.resize((resized_width, resized_height)) + if resized_width + resized_height > max_sum_edges: + scale_multiplier = max_sum_edges / (resized_width + resized_height) + resized_width = math.floor(resized_width * scale_multiplier) + resized_height = math.floor(resized_height * scale_multiplier) + resized_image = image.resize((resized_width, resized_height)) + + fact_i = width / resized_width + fact_j = height / resized_height + + resized_image = np.array(resized_image).astype('float') + + input_image = preprocess_image( + resized_image, + preprocessing=args.preprocessing + ) + + with torch.no_grad(): + if args.multiscale: + keypoints, scores, descriptors = process_multiscale( + torch.tensor( + input_image[np.newaxis, :, :, :].astype(np.float32), + device=device + ), + model + ) + else: + keypoints, scores, descriptors = process_multiscale( + torch.tensor( + input_image[np.newaxis, :, :, :].astype(np.float32), + device=device + ), + model, + scales=[1] + ) + + # Input image coordinates + keypoints[:, 0] *= fact_i + keypoints[:, 1] *= fact_j + # i, j -> u, v + keypoints = keypoints[:, [1, 0, 2]] + + if args.max_keypoints != float("+inf"): + # keep the last (the highest) indexes + idx_keep = scores.argsort()[-min(len(keypoints), args.max_keypoints):] + keypoints = keypoints[idx_keep] + descriptors = descriptors[idx_keep] + + if keypoints_dtype is None or descriptors_dtype is None: + keypoints_dtype = keypoints.dtype + descriptors_dtype = descriptors.dtype + + keypoints_dsize = keypoints.shape[1] + descriptors_dsize = descriptors.shape[1] + + kdata.keypoints[args.keypoints_type] = kapture.Keypoints('d2net', keypoints_dtype, keypoints_dsize) + kdata.descriptors[args.descriptors_type] = kapture.Descriptors('d2net', descriptors_dtype, + descriptors_dsize, + args.keypoints_type, 'L2') + + keypoints_config_absolute_path = get_feature_csv_fullpath(kapture.Keypoints, + args.keypoints_type, + args.kapture_root) + descriptors_config_absolute_path = get_feature_csv_fullpath(kapture.Descriptors, + args.descriptors_type, + args.kapture_root) + + keypoints_to_file(keypoints_config_absolute_path, kdata.keypoints[args.keypoints_type]) + descriptors_to_file(descriptors_config_absolute_path, kdata.descriptors[args.descriptors_type]) + else: + assert kdata.keypoints[args.keypoints_type].dtype == keypoints.dtype + assert kdata.descriptors[args.descriptors_type].dtype == descriptors.dtype + assert kdata.keypoints[args.keypoints_type].dsize == keypoints.shape[1] + assert kdata.descriptors[args.descriptors_type].dsize == descriptors.shape[1] + assert kdata.descriptors[args.descriptors_type].keypoints_type == args.keypoints_type + assert kdata.descriptors[args.descriptors_type].metric_type == 'L2' + + keypoints_fullpath = get_keypoints_fullpath(args.keypoints_type, args.kapture_root, + image_name, tar_handlers) + print(f"Saving {keypoints.shape[0]} keypoints to {keypoints_fullpath}") + image_keypoints_to_file(keypoints_fullpath, keypoints) + kdata.keypoints[args.keypoints_type].add(image_name) + + descriptors_fullpath = get_descriptors_fullpath(args.descriptors_type, args.kapture_root, + image_name, tar_handlers) + print(f"Saving {descriptors.shape[0]} descriptors to {descriptors_fullpath}") + image_descriptors_to_file(descriptors_fullpath, descriptors) + kdata.descriptors[args.descriptors_type].add(image_name) + + if not keypoints_check_dir(kdata.keypoints[args.keypoints_type], args.keypoints_type, + args.kapture_root, tar_handlers) or \ + not descriptors_check_dir(kdata.descriptors[args.descriptors_type], args.descriptors_type, + args.kapture_root, tar_handlers): + print('local feature extraction ended successfully but not all files were saved') diff --git a/third_party/d2net/hpatches_sequences/HPatches-Sequences-Matching-Benchmark.ipynb b/third_party/d2net/hpatches_sequences/HPatches-Sequences-Matching-Benchmark.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..bb9c93165c3325c70d22290cc53f55a34b28c1f3 --- /dev/null +++ b/third_party/d2net/hpatches_sequences/HPatches-Sequences-Matching-Benchmark.ipynb @@ -0,0 +1,441 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib\n", + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import numpy as np\n", + "\n", + "import os\n", + "\n", + "import torch\n", + "\n", + "from scipy.io import loadmat\n", + "\n", + "from tqdm import tqdm_notebook as tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "use_cuda = torch.cuda.is_available()\n", + "device = torch.device('cuda:0' if use_cuda else 'cpu')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Add new methods here.\n", + "# methods = ['hesaff', 'hesaffnet', 'delf', 'delf-new', 'superpoint', 'd2-net', 'd2-net-trained']\n", + "# names = ['Hes. Aff. + Root-SIFT', 'HAN + HN++', 'DELF', 'DELF New', 'SuperPoint', 'D2-Net', 'D2-Net Trained']\n", + "# colors = ['black', 'orange', 'red', 'red', 'blue', 'purple', 'purple']\n", + "# linestyles = ['-', '-', '-', '--', '-', '-', '--']\n", + "methods = ['hesaff', 'hesaffnet', 'delf', 'delf-new', 'superpoint', 'lf-net', 'd2-net', 'd2-net-ms', 'd2-net-trained', 'd2-net-trained-ms']\n", + "names = ['Hes. Aff. + Root-SIFT', 'HAN + HN++', 'DELF', 'DELF New', 'SuperPoint', 'LF-Net', 'D2-Net', 'D2-Net MS', 'D2-Net Trained', 'D2-Net Trained MS']\n", + "colors = ['black', 'orange', 'red', 'red', 'blue', 'brown', 'purple', 'green', 'purple', 'green']\n", + "linestyles = ['-', '-', '-', '--', '-', '-', '-', '-', '--', '--']" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Change here if you want to use top K or all features.\n", + "# top_k = 2000\n", + "top_k = None " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "n_i = 52\n", + "n_v = 56" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "dataset_path = 'hpatches-sequences-release'" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "lim = [1, 15]\n", + "rng = np.arange(lim[0], lim[1] + 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def mnn_matcher(descriptors_a, descriptors_b):\n", + " device = descriptors_a.device\n", + " sim = descriptors_a @ descriptors_b.t()\n", + " nn12 = torch.max(sim, dim=1)[1]\n", + " nn21 = torch.max(sim, dim=0)[1]\n", + " ids1 = torch.arange(0, sim.shape[0], device=device)\n", + " mask = (ids1 == nn21[nn12])\n", + " matches = torch.stack([ids1[mask], nn12[mask]])\n", + " return matches.t().data.cpu().numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "def benchmark_features(read_feats):\n", + " seq_names = sorted(os.listdir(dataset_path))\n", + "\n", + " n_feats = []\n", + " n_matches = []\n", + " seq_type = []\n", + " i_err = {thr: 0 for thr in rng}\n", + " v_err = {thr: 0 for thr in rng}\n", + "\n", + " for seq_idx, seq_name in tqdm(enumerate(seq_names), total=len(seq_names)):\n", + " keypoints_a, descriptors_a = read_feats(seq_name, 1)\n", + " n_feats.append(keypoints_a.shape[0])\n", + "\n", + " for im_idx in range(2, 7):\n", + " keypoints_b, descriptors_b = read_feats(seq_name, im_idx)\n", + " n_feats.append(keypoints_b.shape[0])\n", + "\n", + " matches = mnn_matcher(\n", + " torch.from_numpy(descriptors_a).to(device=device), \n", + " torch.from_numpy(descriptors_b).to(device=device)\n", + " )\n", + " \n", + " homography = np.loadtxt(os.path.join(dataset_path, seq_name, \"H_1_\" + str(im_idx)))\n", + " \n", + " pos_a = keypoints_a[matches[:, 0], : 2] \n", + " pos_a_h = np.concatenate([pos_a, np.ones([matches.shape[0], 1])], axis=1)\n", + " pos_b_proj_h = np.transpose(np.dot(homography, np.transpose(pos_a_h)))\n", + " pos_b_proj = pos_b_proj_h[:, : 2] / pos_b_proj_h[:, 2 :]\n", + "\n", + " pos_b = keypoints_b[matches[:, 1], : 2]\n", + "\n", + " dist = np.sqrt(np.sum((pos_b - pos_b_proj) ** 2, axis=1))\n", + "\n", + " n_matches.append(matches.shape[0])\n", + " seq_type.append(seq_name[0])\n", + " \n", + " if dist.shape[0] == 0:\n", + " dist = np.array([float(\"inf\")])\n", + " \n", + " for thr in rng:\n", + " if seq_name[0] == 'i':\n", + " i_err[thr] += np.mean(dist <= thr)\n", + " else:\n", + " v_err[thr] += np.mean(dist <= thr)\n", + " \n", + " seq_type = np.array(seq_type)\n", + " n_feats = np.array(n_feats)\n", + " n_matches = np.array(n_matches)\n", + " \n", + " return i_err, v_err, [seq_type, n_feats, n_matches]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "def summary(stats):\n", + " seq_type, n_feats, n_matches = stats\n", + " print('# Features: {:f} - [{:d}, {:d}]'.format(np.mean(n_feats), np.min(n_feats), np.max(n_feats)))\n", + " print('# Matches: Overall {:f}, Illumination {:f}, Viewpoint {:f}'.format(\n", + " np.sum(n_matches) / ((n_i + n_v) * 5), \n", + " np.sum(n_matches[seq_type == 'i']) / (n_i * 5), \n", + " np.sum(n_matches[seq_type == 'v']) / (n_v * 5))\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "def generate_read_function(method, extension='ppm'):\n", + " def read_function(seq_name, im_idx):\n", + " aux = np.load(os.path.join(dataset_path, seq_name, '%d.%s.%s' % (im_idx, extension, method)))\n", + " if top_k is None:\n", + " return aux['keypoints'], aux['descriptors']\n", + " else:\n", + " assert('scores' in aux)\n", + " ids = np.argsort(aux['scores'])[-top_k :]\n", + " return aux['keypoints'][ids, :], aux['descriptors'][ids, :]\n", + " return read_function" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "def sift_to_rootsift(descriptors):\n", + " return np.sqrt(descriptors / np.expand_dims(np.sum(np.abs(descriptors), axis=1), axis=1) + 1e-16)\n", + "def parse_mat(mat):\n", + " keypoints = mat['keypoints'][:, : 2]\n", + " raw_descriptors = mat['descriptors']\n", + " l2_norm_descriptors = raw_descriptors / np.expand_dims(np.sum(raw_descriptors ** 2, axis=1), axis=1)\n", + " descriptors = sift_to_rootsift(l2_norm_descriptors)\n", + " if top_k is None:\n", + " return keypoints, descriptors\n", + " else:\n", + " assert('scores' in mat)\n", + " ids = np.argsort(mat['scores'][0])[-top_k :]\n", + " return keypoints[ids, :], descriptors[ids, :]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "if top_k is None:\n", + " cache_dir = 'cache'\n", + "else:\n", + " cache_dir = 'cache-top'\n", + "if not os.path.isdir(cache_dir):\n", + " os.mkdir(cache_dir)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "errors = {}" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "hesaff\n", + "Loading precomputed errors...\n", + "# Features: 6710.137346 - [296, 26021]\n", + "# Matches: Overall 2851.679630, Illumination 1585.803846, Viewpoint 4027.135714\n", + "hesaffnet\n", + "Loading precomputed errors...\n", + "# Features: 3860.754630 - [89, 16326]\n", + "# Matches: Overall 1959.996296, Illumination 1098.419231, Viewpoint 2760.032143\n", + "delf\n", + "Loading precomputed errors...\n", + "# Features: 4608.236111 - [1196, 10939]\n", + "# Matches: Overall 1912.400000, Illumination 1973.100000, Viewpoint 1856.035714\n", + "delf-new\n", + "Loading precomputed errors...\n", + "# Features: 4590.001543 - [953, 12696]\n", + "# Matches: Overall 1940.288889, Illumination 2031.873077, Viewpoint 1855.246429\n", + "superpoint\n", + "Loading precomputed errors...\n", + "# Features: 1562.611111 - [90, 6422]\n", + "# Matches: Overall 883.440741, Illumination 667.830769, Viewpoint 1083.650000\n", + "lf-net\n", + "Loading precomputed errors...\n", + "# Features: 500.000000 - [500, 500]\n", + "# Matches: Overall 177.475926, Illumination 183.073077, Viewpoint 172.278571\n", + "d2-net\n", + "Loading precomputed errors...\n", + "# Features: 2994.067901 - [641, 9337]\n", + "# Matches: Overall 1182.574074, Illumination 964.588462, Viewpoint 1384.989286\n", + "d2-net-ms\n", + "Loading precomputed errors...\n", + "# Features: 4928.163580 - [1009, 15230]\n", + "# Matches: Overall 1698.377778, Illumination 1384.215385, Viewpoint 1990.100000\n", + "d2-net-trained\n", + "Loading precomputed errors...\n", + "# Features: 5965.117284 - [1309, 18974]\n", + "# Matches: Overall 2495.900000, Illumination 2033.250000, Viewpoint 2925.503571\n", + "d2-net-trained-ms\n", + "Loading precomputed errors...\n", + "# Features: 8254.473765 - [1797, 26880]\n", + "# Matches: Overall 2831.638889, Illumination 2313.957692, Viewpoint 3312.342857\n" + ] + } + ], + "source": [ + "for method in methods:\n", + " output_file = os.path.join(cache_dir, method + '.npy')\n", + " print(method)\n", + " if method == 'hesaff':\n", + " read_function = lambda seq_name, im_idx: parse_mat(loadmat(os.path.join(dataset_path, seq_name, '%d.ppm.hesaff' % im_idx), appendmat=False))\n", + " else:\n", + " if method == 'delf' or method == 'delf-new':\n", + " read_function = generate_read_function(method, extension='png')\n", + " else:\n", + " read_function = generate_read_function(method)\n", + " if os.path.exists(output_file):\n", + " print('Loading precomputed errors...')\n", + " errors[method] = np.load(output_file, allow_pickle=True)\n", + " else:\n", + " errors[method] = benchmark_features(read_function)\n", + " np.save(output_file, errors[method])\n", + " summary(errors[method][-1])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Plotting" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "plt_lim = [1, 10]\n", + "plt_rng = np.arange(plt_lim[0], plt_lim[1] + 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.rc('axes', titlesize=25)\n", + "plt.rc('axes', labelsize=25)\n", + "\n", + "plt.figure(figsize=(15, 5))\n", + "\n", + "plt.subplot(1, 3, 1)\n", + "for method, name, color, ls in zip(methods, names, colors, linestyles):\n", + " i_err, v_err, _ = errors[method]\n", + " plt.plot(plt_rng, [(i_err[thr] + v_err[thr]) / ((n_i + n_v) * 5) for thr in plt_rng], color=color, ls=ls, linewidth=3, label=name)\n", + "plt.title('Overall')\n", + "plt.xlim(plt_lim)\n", + "plt.xticks(plt_rng)\n", + "plt.ylabel('MMA')\n", + "plt.ylim([0, 1])\n", + "plt.grid()\n", + "plt.tick_params(axis='both', which='major', labelsize=20)\n", + "plt.legend()\n", + "\n", + "plt.subplot(1, 3, 2)\n", + "for method, name, color, ls in zip(methods, names, colors, linestyles):\n", + " i_err, v_err, _ = errors[method]\n", + " plt.plot(plt_rng, [i_err[thr] / (n_i * 5) for thr in plt_rng], color=color, ls=ls, linewidth=3, label=name)\n", + "plt.title('Illumination')\n", + "plt.xlabel('threshold [px]')\n", + "plt.xlim(plt_lim)\n", + "plt.xticks(plt_rng)\n", + "plt.ylim([0, 1])\n", + "plt.gca().axes.set_yticklabels([])\n", + "plt.grid()\n", + "plt.tick_params(axis='both', which='major', labelsize=20)\n", + "\n", + "plt.subplot(1, 3, 3)\n", + "for method, name, color, ls in zip(methods, names, colors, linestyles):\n", + " i_err, v_err, _ = errors[method]\n", + " plt.plot(plt_rng, [v_err[thr] / (n_v * 5) for thr in plt_rng], color=color, ls=ls, linewidth=3, label=name)\n", + "plt.title('Viewpoint')\n", + "plt.xlim(plt_lim)\n", + "plt.xticks(plt_rng)\n", + "plt.ylim([0, 1])\n", + "plt.gca().axes.set_yticklabels([])\n", + "plt.grid()\n", + "plt.tick_params(axis='both', which='major', labelsize=20)\n", + "\n", + "if top_k is None:\n", + " plt.savefig('hseq.pdf', bbox_inches='tight', dpi=300)\n", + "else:\n", + " plt.savefig('hseq-top.pdf', bbox_inches='tight', dpi=300)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/third_party/d2net/hpatches_sequences/README.md b/third_party/d2net/hpatches_sequences/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2a0b5e0f154d1717087c35f93cd02a0f54fc6027 --- /dev/null +++ b/third_party/d2net/hpatches_sequences/README.md @@ -0,0 +1,22 @@ +# HPatches Sequences / Image Pairs Matching Benchmark + +Please check the [official repository](https://github.com/hpatches/hpatches-dataset) for more information regarding references. + +The dataset can be downloaded by running `bash download.sh` - this script downloads and extracts the HPatches Sequences dataset and removes the sequences containing high resolution images (`> 1600x1200`) as mentioned in the D2-Net paper. You can also download the cache with results for all methods from the D2-Net paper by running `bash download_cache.sh`. + +New methods can be added in cell 4 of the notebook. The local features are supposed to be stored in the [`npz`](https://docs.scipy.org/doc/numpy/reference/generated/numpy.savez.html) format with three fields: + +- `keypoints` - `N x 2` matrix with `x, y` coordinates of each keypoint in COLMAP format (the `X` axis points to the right, the `Y` axis to the bottom) + +- `scores` - `N` array with detection scores for each keypoint (higher is better) - only required for the "top K" version of the benchmark + +- `descriptors` - `N x D` matrix with the descriptors (L2 normalized if you plan on using the provided mutual nearest neighbors matcher) + +Moreover, the `npz` files are supposed to be saved alongside their corresponding images with the same extension as the `method` (e.g. if `method = d2-net`, the features for the image `hpatches-sequences-release/i_ajuntament/1.ppm` should be in the file `hpatches-sequences-release/i_ajuntament/1.ppm.d2-net`). + +We provide a simple script to extract Hessian Affine keypoints with SIFT descriptors (`extract_hesaff.m`); this script requires MATLAB and [VLFeat](http://www.vlfeat.org/). + +D2-Net features can be extracted by running: +``` +python extract_features.py --image_list_file image_list_hpatches_sequences.txt +``` diff --git a/third_party/d2net/hpatches_sequences/convert_to_png.sh b/third_party/d2net/hpatches_sequences/convert_to_png.sh new file mode 100644 index 0000000000000000000000000000000000000000..5b82fff606b4ef60bad32cfef463a601cbfd4586 --- /dev/null +++ b/third_party/d2net/hpatches_sequences/convert_to_png.sh @@ -0,0 +1,9 @@ +# DELF Extraction script doesn't support .ppm images. +current_dir=`pwd` +echo $current_dir +for dir in `ls hpatches-sequences-release`; do + echo $dir + cd hpatches-sequences-release/$dir + mogrify -format png *.ppm + cd $current_dir +done diff --git a/third_party/d2net/hpatches_sequences/download.sh b/third_party/d2net/hpatches_sequences/download.sh new file mode 100644 index 0000000000000000000000000000000000000000..80eb0e3c9f24345c17177cb9d3ab0834f8d58a27 --- /dev/null +++ b/third_party/d2net/hpatches_sequences/download.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +# Download the dataset +wget http://icvl.ee.ic.ac.uk/vbalnt/hpatches/hpatches-sequences-release.tar.gz + +# Extract the dataset +tar xvzf hpatches-sequences-release.tar.gz + +# Remove the high-resolution sequences +cd hpatches-sequences-release +rm -rf i_contruction i_crownnight i_dc i_pencils i_whitebuilding v_artisans v_astronautis v_talent +cd .. diff --git a/third_party/d2net/hpatches_sequences/download_cache.sh b/third_party/d2net/hpatches_sequences/download_cache.sh new file mode 100644 index 0000000000000000000000000000000000000000..7a5a34acc75af5c2f398d3ec8cea367be404cdeb --- /dev/null +++ b/third_party/d2net/hpatches_sequences/download_cache.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +wget https://dsmn.ml/files/d2-net/hpatches-sequences-cache.tar.gz +tar xvzf hpatches-sequences-cache.tar.gz +rm -rf hpatches-sequences-cache.tar.gz + +wget https://dsmn.ml/files/d2-net/hpatches-sequences-cache-top.tar.gz +tar xvzf hpatches-sequences-cache-top.tar.gz +rm -rf hpatches-sequences-cache-top.tar.gz + diff --git a/third_party/d2net/image_list_hpatches_sequences.txt b/third_party/d2net/image_list_hpatches_sequences.txt new file mode 100644 index 0000000000000000000000000000000000000000..edee04fef9a4bdadba7b10015a3f0e20cd3e10fc --- /dev/null +++ b/third_party/d2net/image_list_hpatches_sequences.txt @@ -0,0 +1,648 @@ +hpatches_sequences/hpatches-sequences-release/v_vitro/5.ppm +hpatches_sequences/hpatches-sequences-release/v_vitro/2.ppm +hpatches_sequences/hpatches-sequences-release/v_vitro/4.ppm +hpatches_sequences/hpatches-sequences-release/v_vitro/1.ppm +hpatches_sequences/hpatches-sequences-release/v_vitro/3.ppm +hpatches_sequences/hpatches-sequences-release/v_vitro/6.ppm +hpatches_sequences/hpatches-sequences-release/v_apprentices/5.ppm +hpatches_sequences/hpatches-sequences-release/v_apprentices/2.ppm +hpatches_sequences/hpatches-sequences-release/v_apprentices/4.ppm +hpatches_sequences/hpatches-sequences-release/v_apprentices/1.ppm +hpatches_sequences/hpatches-sequences-release/v_apprentices/3.ppm +hpatches_sequences/hpatches-sequences-release/v_apprentices/6.ppm +hpatches_sequences/hpatches-sequences-release/i_miniature/5.ppm +hpatches_sequences/hpatches-sequences-release/i_miniature/2.ppm +hpatches_sequences/hpatches-sequences-release/i_miniature/4.ppm +hpatches_sequences/hpatches-sequences-release/i_miniature/1.ppm +hpatches_sequences/hpatches-sequences-release/i_miniature/3.ppm +hpatches_sequences/hpatches-sequences-release/i_miniature/6.ppm +hpatches_sequences/hpatches-sequences-release/v_churchill/5.ppm +hpatches_sequences/hpatches-sequences-release/v_churchill/2.ppm +hpatches_sequences/hpatches-sequences-release/v_churchill/4.ppm +hpatches_sequences/hpatches-sequences-release/v_churchill/1.ppm +hpatches_sequences/hpatches-sequences-release/v_churchill/3.ppm +hpatches_sequences/hpatches-sequences-release/v_churchill/6.ppm +hpatches_sequences/hpatches-sequences-release/v_soldiers/5.ppm +hpatches_sequences/hpatches-sequences-release/v_soldiers/2.ppm +hpatches_sequences/hpatches-sequences-release/v_soldiers/4.ppm +hpatches_sequences/hpatches-sequences-release/v_soldiers/1.ppm +hpatches_sequences/hpatches-sequences-release/v_soldiers/3.ppm +hpatches_sequences/hpatches-sequences-release/v_soldiers/6.ppm +hpatches_sequences/hpatches-sequences-release/i_nijmegen/5.ppm +hpatches_sequences/hpatches-sequences-release/i_nijmegen/2.ppm +hpatches_sequences/hpatches-sequences-release/i_nijmegen/4.ppm +hpatches_sequences/hpatches-sequences-release/i_nijmegen/1.ppm +hpatches_sequences/hpatches-sequences-release/i_nijmegen/3.ppm +hpatches_sequences/hpatches-sequences-release/i_nijmegen/6.ppm +hpatches_sequences/hpatches-sequences-release/v_wapping/5.ppm +hpatches_sequences/hpatches-sequences-release/v_wapping/2.ppm +hpatches_sequences/hpatches-sequences-release/v_wapping/4.ppm +hpatches_sequences/hpatches-sequences-release/v_wapping/1.ppm +hpatches_sequences/hpatches-sequences-release/v_wapping/3.ppm +hpatches_sequences/hpatches-sequences-release/v_wapping/6.ppm +hpatches_sequences/hpatches-sequences-release/v_bip/5.ppm +hpatches_sequences/hpatches-sequences-release/v_bip/2.ppm +hpatches_sequences/hpatches-sequences-release/v_bip/4.ppm +hpatches_sequences/hpatches-sequences-release/v_bip/1.ppm +hpatches_sequences/hpatches-sequences-release/v_bip/3.ppm +hpatches_sequences/hpatches-sequences-release/v_bip/6.ppm +hpatches_sequences/hpatches-sequences-release/i_fog/5.ppm +hpatches_sequences/hpatches-sequences-release/i_fog/2.ppm +hpatches_sequences/hpatches-sequences-release/i_fog/4.ppm +hpatches_sequences/hpatches-sequences-release/i_fog/1.ppm +hpatches_sequences/hpatches-sequences-release/i_fog/3.ppm +hpatches_sequences/hpatches-sequences-release/i_fog/6.ppm +hpatches_sequences/hpatches-sequences-release/i_nescafe/5.ppm +hpatches_sequences/hpatches-sequences-release/i_nescafe/2.ppm +hpatches_sequences/hpatches-sequences-release/i_nescafe/4.ppm +hpatches_sequences/hpatches-sequences-release/i_nescafe/1.ppm +hpatches_sequences/hpatches-sequences-release/i_nescafe/3.ppm +hpatches_sequences/hpatches-sequences-release/i_nescafe/6.ppm +hpatches_sequences/hpatches-sequences-release/i_village/5.ppm +hpatches_sequences/hpatches-sequences-release/i_village/2.ppm +hpatches_sequences/hpatches-sequences-release/i_village/4.ppm +hpatches_sequences/hpatches-sequences-release/i_village/1.ppm +hpatches_sequences/hpatches-sequences-release/i_village/3.ppm +hpatches_sequences/hpatches-sequences-release/i_village/6.ppm +hpatches_sequences/hpatches-sequences-release/i_table/5.ppm +hpatches_sequences/hpatches-sequences-release/i_table/2.ppm +hpatches_sequences/hpatches-sequences-release/i_table/4.ppm +hpatches_sequences/hpatches-sequences-release/i_table/1.ppm +hpatches_sequences/hpatches-sequences-release/i_table/3.ppm +hpatches_sequences/hpatches-sequences-release/i_table/6.ppm +hpatches_sequences/hpatches-sequences-release/v_calder/5.ppm +hpatches_sequences/hpatches-sequences-release/v_calder/2.ppm +hpatches_sequences/hpatches-sequences-release/v_calder/4.ppm +hpatches_sequences/hpatches-sequences-release/v_calder/1.ppm +hpatches_sequences/hpatches-sequences-release/v_calder/3.ppm +hpatches_sequences/hpatches-sequences-release/v_calder/6.ppm +hpatches_sequences/hpatches-sequences-release/i_partyfood/5.ppm +hpatches_sequences/hpatches-sequences-release/i_partyfood/2.ppm +hpatches_sequences/hpatches-sequences-release/i_partyfood/4.ppm +hpatches_sequences/hpatches-sequences-release/i_partyfood/1.ppm +hpatches_sequences/hpatches-sequences-release/i_partyfood/3.ppm +hpatches_sequences/hpatches-sequences-release/i_partyfood/6.ppm +hpatches_sequences/hpatches-sequences-release/i_bridger/5.ppm +hpatches_sequences/hpatches-sequences-release/i_bridger/2.ppm +hpatches_sequences/hpatches-sequences-release/i_bridger/4.ppm +hpatches_sequences/hpatches-sequences-release/i_bridger/1.ppm +hpatches_sequences/hpatches-sequences-release/i_bridger/3.ppm +hpatches_sequences/hpatches-sequences-release/i_bridger/6.ppm +hpatches_sequences/hpatches-sequences-release/v_dirtywall/5.ppm +hpatches_sequences/hpatches-sequences-release/v_dirtywall/2.ppm +hpatches_sequences/hpatches-sequences-release/v_dirtywall/4.ppm +hpatches_sequences/hpatches-sequences-release/v_dirtywall/1.ppm +hpatches_sequences/hpatches-sequences-release/v_dirtywall/3.ppm +hpatches_sequences/hpatches-sequences-release/v_dirtywall/6.ppm +hpatches_sequences/hpatches-sequences-release/i_parking/5.ppm +hpatches_sequences/hpatches-sequences-release/i_parking/2.ppm +hpatches_sequences/hpatches-sequences-release/i_parking/4.ppm +hpatches_sequences/hpatches-sequences-release/i_parking/1.ppm +hpatches_sequences/hpatches-sequences-release/i_parking/3.ppm +hpatches_sequences/hpatches-sequences-release/i_parking/6.ppm +hpatches_sequences/hpatches-sequences-release/v_wormhole/5.ppm +hpatches_sequences/hpatches-sequences-release/v_wormhole/2.ppm +hpatches_sequences/hpatches-sequences-release/v_wormhole/4.ppm +hpatches_sequences/hpatches-sequences-release/v_wormhole/1.ppm +hpatches_sequences/hpatches-sequences-release/v_wormhole/3.ppm +hpatches_sequences/hpatches-sequences-release/v_wormhole/6.ppm +hpatches_sequences/hpatches-sequences-release/v_tempera/5.ppm +hpatches_sequences/hpatches-sequences-release/v_tempera/2.ppm +hpatches_sequences/hpatches-sequences-release/v_tempera/4.ppm +hpatches_sequences/hpatches-sequences-release/v_tempera/1.ppm +hpatches_sequences/hpatches-sequences-release/v_tempera/3.ppm +hpatches_sequences/hpatches-sequences-release/v_tempera/6.ppm +hpatches_sequences/hpatches-sequences-release/i_greenhouse/5.ppm +hpatches_sequences/hpatches-sequences-release/i_greenhouse/2.ppm +hpatches_sequences/hpatches-sequences-release/i_greenhouse/4.ppm +hpatches_sequences/hpatches-sequences-release/i_greenhouse/1.ppm +hpatches_sequences/hpatches-sequences-release/i_greenhouse/3.ppm +hpatches_sequences/hpatches-sequences-release/i_greenhouse/6.ppm +hpatches_sequences/hpatches-sequences-release/v_adam/5.ppm +hpatches_sequences/hpatches-sequences-release/v_adam/2.ppm +hpatches_sequences/hpatches-sequences-release/v_adam/4.ppm +hpatches_sequences/hpatches-sequences-release/v_adam/1.ppm +hpatches_sequences/hpatches-sequences-release/v_adam/3.ppm +hpatches_sequences/hpatches-sequences-release/v_adam/6.ppm +hpatches_sequences/hpatches-sequences-release/i_smurf/5.ppm +hpatches_sequences/hpatches-sequences-release/i_smurf/2.ppm +hpatches_sequences/hpatches-sequences-release/i_smurf/4.ppm +hpatches_sequences/hpatches-sequences-release/i_smurf/1.ppm +hpatches_sequences/hpatches-sequences-release/i_smurf/3.ppm +hpatches_sequences/hpatches-sequences-release/i_smurf/6.ppm +hpatches_sequences/hpatches-sequences-release/v_posters/5.ppm +hpatches_sequences/hpatches-sequences-release/v_posters/2.ppm +hpatches_sequences/hpatches-sequences-release/v_posters/4.ppm +hpatches_sequences/hpatches-sequences-release/v_posters/1.ppm +hpatches_sequences/hpatches-sequences-release/v_posters/3.ppm +hpatches_sequences/hpatches-sequences-release/v_posters/6.ppm +hpatches_sequences/hpatches-sequences-release/v_cartooncity/5.ppm +hpatches_sequences/hpatches-sequences-release/v_cartooncity/2.ppm +hpatches_sequences/hpatches-sequences-release/v_cartooncity/4.ppm +hpatches_sequences/hpatches-sequences-release/v_cartooncity/1.ppm +hpatches_sequences/hpatches-sequences-release/v_cartooncity/3.ppm +hpatches_sequences/hpatches-sequences-release/v_cartooncity/6.ppm +hpatches_sequences/hpatches-sequences-release/i_melon/5.ppm +hpatches_sequences/hpatches-sequences-release/i_melon/2.ppm +hpatches_sequences/hpatches-sequences-release/i_melon/4.ppm +hpatches_sequences/hpatches-sequences-release/i_melon/1.ppm +hpatches_sequences/hpatches-sequences-release/i_melon/3.ppm +hpatches_sequences/hpatches-sequences-release/i_melon/6.ppm +hpatches_sequences/hpatches-sequences-release/i_resort/5.ppm +hpatches_sequences/hpatches-sequences-release/i_resort/2.ppm +hpatches_sequences/hpatches-sequences-release/i_resort/4.ppm +hpatches_sequences/hpatches-sequences-release/i_resort/1.ppm +hpatches_sequences/hpatches-sequences-release/i_resort/3.ppm +hpatches_sequences/hpatches-sequences-release/i_resort/6.ppm +hpatches_sequences/hpatches-sequences-release/v_coffeehouse/5.ppm +hpatches_sequences/hpatches-sequences-release/v_coffeehouse/2.ppm +hpatches_sequences/hpatches-sequences-release/v_coffeehouse/4.ppm +hpatches_sequences/hpatches-sequences-release/v_coffeehouse/1.ppm +hpatches_sequences/hpatches-sequences-release/v_coffeehouse/3.ppm +hpatches_sequences/hpatches-sequences-release/v_coffeehouse/6.ppm +hpatches_sequences/hpatches-sequences-release/v_colors/5.ppm +hpatches_sequences/hpatches-sequences-release/v_colors/2.ppm +hpatches_sequences/hpatches-sequences-release/v_colors/4.ppm +hpatches_sequences/hpatches-sequences-release/v_colors/1.ppm +hpatches_sequences/hpatches-sequences-release/v_colors/3.ppm +hpatches_sequences/hpatches-sequences-release/v_colors/6.ppm +hpatches_sequences/hpatches-sequences-release/v_underground/5.ppm +hpatches_sequences/hpatches-sequences-release/v_underground/2.ppm +hpatches_sequences/hpatches-sequences-release/v_underground/4.ppm +hpatches_sequences/hpatches-sequences-release/v_underground/1.ppm +hpatches_sequences/hpatches-sequences-release/v_underground/3.ppm +hpatches_sequences/hpatches-sequences-release/v_underground/6.ppm +hpatches_sequences/hpatches-sequences-release/v_pomegranate/5.ppm +hpatches_sequences/hpatches-sequences-release/v_pomegranate/2.ppm +hpatches_sequences/hpatches-sequences-release/v_pomegranate/4.ppm +hpatches_sequences/hpatches-sequences-release/v_pomegranate/1.ppm +hpatches_sequences/hpatches-sequences-release/v_pomegranate/3.ppm +hpatches_sequences/hpatches-sequences-release/v_pomegranate/6.ppm +hpatches_sequences/hpatches-sequences-release/v_eastsouth/5.ppm +hpatches_sequences/hpatches-sequences-release/v_eastsouth/2.ppm +hpatches_sequences/hpatches-sequences-release/v_eastsouth/4.ppm +hpatches_sequences/hpatches-sequences-release/v_eastsouth/1.ppm +hpatches_sequences/hpatches-sequences-release/v_eastsouth/3.ppm +hpatches_sequences/hpatches-sequences-release/v_eastsouth/6.ppm +hpatches_sequences/hpatches-sequences-release/v_tabletop/5.ppm +hpatches_sequences/hpatches-sequences-release/v_tabletop/2.ppm +hpatches_sequences/hpatches-sequences-release/v_tabletop/4.ppm +hpatches_sequences/hpatches-sequences-release/v_tabletop/1.ppm +hpatches_sequences/hpatches-sequences-release/v_tabletop/3.ppm +hpatches_sequences/hpatches-sequences-release/v_tabletop/6.ppm +hpatches_sequences/hpatches-sequences-release/i_crownday/5.ppm +hpatches_sequences/hpatches-sequences-release/i_crownday/2.ppm +hpatches_sequences/hpatches-sequences-release/i_crownday/4.ppm +hpatches_sequences/hpatches-sequences-release/i_crownday/1.ppm +hpatches_sequences/hpatches-sequences-release/i_crownday/3.ppm +hpatches_sequences/hpatches-sequences-release/i_crownday/6.ppm +hpatches_sequences/hpatches-sequences-release/i_leuven/5.ppm +hpatches_sequences/hpatches-sequences-release/i_leuven/2.ppm +hpatches_sequences/hpatches-sequences-release/i_leuven/4.ppm +hpatches_sequences/hpatches-sequences-release/i_leuven/1.ppm +hpatches_sequences/hpatches-sequences-release/i_leuven/3.ppm +hpatches_sequences/hpatches-sequences-release/i_leuven/6.ppm +hpatches_sequences/hpatches-sequences-release/i_tools/5.ppm +hpatches_sequences/hpatches-sequences-release/i_tools/2.ppm +hpatches_sequences/hpatches-sequences-release/i_tools/4.ppm +hpatches_sequences/hpatches-sequences-release/i_tools/1.ppm +hpatches_sequences/hpatches-sequences-release/i_tools/3.ppm +hpatches_sequences/hpatches-sequences-release/i_tools/6.ppm +hpatches_sequences/hpatches-sequences-release/i_ski/5.ppm +hpatches_sequences/hpatches-sequences-release/i_ski/2.ppm +hpatches_sequences/hpatches-sequences-release/i_ski/4.ppm +hpatches_sequences/hpatches-sequences-release/i_ski/1.ppm +hpatches_sequences/hpatches-sequences-release/i_ski/3.ppm +hpatches_sequences/hpatches-sequences-release/i_ski/6.ppm +hpatches_sequences/hpatches-sequences-release/i_ktirio/5.ppm +hpatches_sequences/hpatches-sequences-release/i_ktirio/2.ppm +hpatches_sequences/hpatches-sequences-release/i_ktirio/4.ppm +hpatches_sequences/hpatches-sequences-release/i_ktirio/1.ppm +hpatches_sequences/hpatches-sequences-release/i_ktirio/3.ppm +hpatches_sequences/hpatches-sequences-release/i_ktirio/6.ppm +hpatches_sequences/hpatches-sequences-release/i_duda/5.ppm +hpatches_sequences/hpatches-sequences-release/i_duda/2.ppm +hpatches_sequences/hpatches-sequences-release/i_duda/4.ppm +hpatches_sequences/hpatches-sequences-release/i_duda/1.ppm +hpatches_sequences/hpatches-sequences-release/i_duda/3.ppm +hpatches_sequences/hpatches-sequences-release/i_duda/6.ppm +hpatches_sequences/hpatches-sequences-release/i_pool/5.ppm +hpatches_sequences/hpatches-sequences-release/i_pool/2.ppm +hpatches_sequences/hpatches-sequences-release/i_pool/4.ppm +hpatches_sequences/hpatches-sequences-release/i_pool/1.ppm +hpatches_sequences/hpatches-sequences-release/i_pool/3.ppm +hpatches_sequences/hpatches-sequences-release/i_pool/6.ppm +hpatches_sequences/hpatches-sequences-release/v_woman/5.ppm +hpatches_sequences/hpatches-sequences-release/v_woman/2.ppm +hpatches_sequences/hpatches-sequences-release/v_woman/4.ppm +hpatches_sequences/hpatches-sequences-release/v_woman/1.ppm +hpatches_sequences/hpatches-sequences-release/v_woman/3.ppm +hpatches_sequences/hpatches-sequences-release/v_woman/6.ppm +hpatches_sequences/hpatches-sequences-release/i_lionnight/5.ppm +hpatches_sequences/hpatches-sequences-release/i_lionnight/2.ppm +hpatches_sequences/hpatches-sequences-release/i_lionnight/4.ppm +hpatches_sequences/hpatches-sequences-release/i_lionnight/1.ppm +hpatches_sequences/hpatches-sequences-release/i_lionnight/3.ppm +hpatches_sequences/hpatches-sequences-release/i_lionnight/6.ppm +hpatches_sequences/hpatches-sequences-release/i_pinard/5.ppm +hpatches_sequences/hpatches-sequences-release/i_pinard/2.ppm +hpatches_sequences/hpatches-sequences-release/i_pinard/4.ppm +hpatches_sequences/hpatches-sequences-release/i_pinard/1.ppm +hpatches_sequences/hpatches-sequences-release/i_pinard/3.ppm +hpatches_sequences/hpatches-sequences-release/i_pinard/6.ppm +hpatches_sequences/hpatches-sequences-release/v_wall/5.ppm +hpatches_sequences/hpatches-sequences-release/v_wall/2.ppm +hpatches_sequences/hpatches-sequences-release/v_wall/4.ppm +hpatches_sequences/hpatches-sequences-release/v_wall/1.ppm +hpatches_sequences/hpatches-sequences-release/v_wall/3.ppm +hpatches_sequences/hpatches-sequences-release/v_wall/6.ppm +hpatches_sequences/hpatches-sequences-release/v_sunseason/5.ppm +hpatches_sequences/hpatches-sequences-release/v_sunseason/2.ppm +hpatches_sequences/hpatches-sequences-release/v_sunseason/4.ppm +hpatches_sequences/hpatches-sequences-release/v_sunseason/1.ppm +hpatches_sequences/hpatches-sequences-release/v_sunseason/3.ppm +hpatches_sequences/hpatches-sequences-release/v_sunseason/6.ppm +hpatches_sequences/hpatches-sequences-release/v_bees/5.ppm +hpatches_sequences/hpatches-sequences-release/v_bees/2.ppm +hpatches_sequences/hpatches-sequences-release/v_bees/4.ppm +hpatches_sequences/hpatches-sequences-release/v_bees/1.ppm +hpatches_sequences/hpatches-sequences-release/v_bees/3.ppm +hpatches_sequences/hpatches-sequences-release/v_bees/6.ppm +hpatches_sequences/hpatches-sequences-release/i_brooklyn/5.ppm +hpatches_sequences/hpatches-sequences-release/i_brooklyn/2.ppm +hpatches_sequences/hpatches-sequences-release/i_brooklyn/4.ppm +hpatches_sequences/hpatches-sequences-release/i_brooklyn/1.ppm +hpatches_sequences/hpatches-sequences-release/i_brooklyn/3.ppm +hpatches_sequences/hpatches-sequences-release/i_brooklyn/6.ppm +hpatches_sequences/hpatches-sequences-release/v_strand/5.ppm +hpatches_sequences/hpatches-sequences-release/v_strand/2.ppm +hpatches_sequences/hpatches-sequences-release/v_strand/4.ppm +hpatches_sequences/hpatches-sequences-release/v_strand/1.ppm +hpatches_sequences/hpatches-sequences-release/v_strand/3.ppm +hpatches_sequences/hpatches-sequences-release/v_strand/6.ppm +hpatches_sequences/hpatches-sequences-release/i_dome/5.ppm +hpatches_sequences/hpatches-sequences-release/i_dome/2.ppm +hpatches_sequences/hpatches-sequences-release/i_dome/4.ppm +hpatches_sequences/hpatches-sequences-release/i_dome/1.ppm +hpatches_sequences/hpatches-sequences-release/i_dome/3.ppm +hpatches_sequences/hpatches-sequences-release/i_dome/6.ppm +hpatches_sequences/hpatches-sequences-release/v_samples/5.ppm +hpatches_sequences/hpatches-sequences-release/v_samples/2.ppm +hpatches_sequences/hpatches-sequences-release/v_samples/4.ppm +hpatches_sequences/hpatches-sequences-release/v_samples/1.ppm +hpatches_sequences/hpatches-sequences-release/v_samples/3.ppm +hpatches_sequences/hpatches-sequences-release/v_samples/6.ppm +hpatches_sequences/hpatches-sequences-release/v_bricks/5.ppm +hpatches_sequences/hpatches-sequences-release/v_bricks/2.ppm +hpatches_sequences/hpatches-sequences-release/v_bricks/4.ppm +hpatches_sequences/hpatches-sequences-release/v_bricks/1.ppm +hpatches_sequences/hpatches-sequences-release/v_bricks/3.ppm +hpatches_sequences/hpatches-sequences-release/v_bricks/6.ppm +hpatches_sequences/hpatches-sequences-release/v_home/5.ppm +hpatches_sequences/hpatches-sequences-release/v_home/2.ppm +hpatches_sequences/hpatches-sequences-release/v_home/4.ppm +hpatches_sequences/hpatches-sequences-release/v_home/1.ppm +hpatches_sequences/hpatches-sequences-release/v_home/3.ppm +hpatches_sequences/hpatches-sequences-release/v_home/6.ppm +hpatches_sequences/hpatches-sequences-release/v_beyus/5.ppm +hpatches_sequences/hpatches-sequences-release/v_beyus/2.ppm +hpatches_sequences/hpatches-sequences-release/v_beyus/4.ppm +hpatches_sequences/hpatches-sequences-release/v_beyus/1.ppm +hpatches_sequences/hpatches-sequences-release/v_beyus/3.ppm +hpatches_sequences/hpatches-sequences-release/v_beyus/6.ppm +hpatches_sequences/hpatches-sequences-release/i_porta/5.ppm +hpatches_sequences/hpatches-sequences-release/i_porta/2.ppm +hpatches_sequences/hpatches-sequences-release/i_porta/4.ppm +hpatches_sequences/hpatches-sequences-release/i_porta/1.ppm +hpatches_sequences/hpatches-sequences-release/i_porta/3.ppm +hpatches_sequences/hpatches-sequences-release/i_porta/6.ppm +hpatches_sequences/hpatches-sequences-release/v_weapons/5.ppm +hpatches_sequences/hpatches-sequences-release/v_weapons/2.ppm +hpatches_sequences/hpatches-sequences-release/v_weapons/4.ppm +hpatches_sequences/hpatches-sequences-release/v_weapons/1.ppm +hpatches_sequences/hpatches-sequences-release/v_weapons/3.ppm +hpatches_sequences/hpatches-sequences-release/v_weapons/6.ppm +hpatches_sequences/hpatches-sequences-release/v_abstract/5.ppm +hpatches_sequences/hpatches-sequences-release/v_abstract/2.ppm +hpatches_sequences/hpatches-sequences-release/v_abstract/4.ppm +hpatches_sequences/hpatches-sequences-release/v_abstract/1.ppm +hpatches_sequences/hpatches-sequences-release/v_abstract/3.ppm +hpatches_sequences/hpatches-sequences-release/v_abstract/6.ppm +hpatches_sequences/hpatches-sequences-release/v_gardens/5.ppm +hpatches_sequences/hpatches-sequences-release/v_gardens/2.ppm +hpatches_sequences/hpatches-sequences-release/v_gardens/4.ppm +hpatches_sequences/hpatches-sequences-release/v_gardens/1.ppm +hpatches_sequences/hpatches-sequences-release/v_gardens/3.ppm +hpatches_sequences/hpatches-sequences-release/v_gardens/6.ppm +hpatches_sequences/hpatches-sequences-release/i_veggies/5.ppm +hpatches_sequences/hpatches-sequences-release/i_veggies/2.ppm +hpatches_sequences/hpatches-sequences-release/i_veggies/4.ppm +hpatches_sequences/hpatches-sequences-release/i_veggies/1.ppm +hpatches_sequences/hpatches-sequences-release/i_veggies/3.ppm +hpatches_sequences/hpatches-sequences-release/i_veggies/6.ppm +hpatches_sequences/hpatches-sequences-release/v_circus/5.ppm +hpatches_sequences/hpatches-sequences-release/v_circus/2.ppm +hpatches_sequences/hpatches-sequences-release/v_circus/4.ppm +hpatches_sequences/hpatches-sequences-release/v_circus/1.ppm +hpatches_sequences/hpatches-sequences-release/v_circus/3.ppm +hpatches_sequences/hpatches-sequences-release/v_circus/6.ppm +hpatches_sequences/hpatches-sequences-release/i_santuario/5.ppm +hpatches_sequences/hpatches-sequences-release/i_santuario/2.ppm +hpatches_sequences/hpatches-sequences-release/i_santuario/4.ppm +hpatches_sequences/hpatches-sequences-release/i_santuario/1.ppm +hpatches_sequences/hpatches-sequences-release/i_santuario/3.ppm +hpatches_sequences/hpatches-sequences-release/i_santuario/6.ppm +hpatches_sequences/hpatches-sequences-release/i_lionday/5.ppm +hpatches_sequences/hpatches-sequences-release/i_lionday/2.ppm +hpatches_sequences/hpatches-sequences-release/i_lionday/4.ppm +hpatches_sequences/hpatches-sequences-release/i_lionday/1.ppm +hpatches_sequences/hpatches-sequences-release/i_lionday/3.ppm +hpatches_sequences/hpatches-sequences-release/i_lionday/6.ppm +hpatches_sequences/hpatches-sequences-release/v_boat/5.ppm +hpatches_sequences/hpatches-sequences-release/v_boat/2.ppm +hpatches_sequences/hpatches-sequences-release/v_boat/4.ppm +hpatches_sequences/hpatches-sequences-release/v_boat/1.ppm +hpatches_sequences/hpatches-sequences-release/v_boat/3.ppm +hpatches_sequences/hpatches-sequences-release/v_boat/6.ppm +hpatches_sequences/hpatches-sequences-release/i_salon/5.ppm +hpatches_sequences/hpatches-sequences-release/i_salon/2.ppm +hpatches_sequences/hpatches-sequences-release/i_salon/4.ppm +hpatches_sequences/hpatches-sequences-release/i_salon/1.ppm +hpatches_sequences/hpatches-sequences-release/i_salon/3.ppm +hpatches_sequences/hpatches-sequences-release/i_salon/6.ppm +hpatches_sequences/hpatches-sequences-release/i_steps/5.ppm +hpatches_sequences/hpatches-sequences-release/i_steps/2.ppm +hpatches_sequences/hpatches-sequences-release/i_steps/4.ppm +hpatches_sequences/hpatches-sequences-release/i_steps/1.ppm +hpatches_sequences/hpatches-sequences-release/i_steps/3.ppm +hpatches_sequences/hpatches-sequences-release/i_steps/6.ppm +hpatches_sequences/hpatches-sequences-release/i_ajuntament/5.ppm +hpatches_sequences/hpatches-sequences-release/i_ajuntament/2.ppm +hpatches_sequences/hpatches-sequences-release/i_ajuntament/4.ppm +hpatches_sequences/hpatches-sequences-release/i_ajuntament/1.ppm +hpatches_sequences/hpatches-sequences-release/i_ajuntament/3.ppm +hpatches_sequences/hpatches-sequences-release/i_ajuntament/6.ppm +hpatches_sequences/hpatches-sequences-release/v_fest/5.ppm +hpatches_sequences/hpatches-sequences-release/v_fest/2.ppm +hpatches_sequences/hpatches-sequences-release/v_fest/4.ppm +hpatches_sequences/hpatches-sequences-release/v_fest/1.ppm +hpatches_sequences/hpatches-sequences-release/v_fest/3.ppm +hpatches_sequences/hpatches-sequences-release/v_fest/6.ppm +hpatches_sequences/hpatches-sequences-release/i_kions/5.ppm +hpatches_sequences/hpatches-sequences-release/i_kions/2.ppm +hpatches_sequences/hpatches-sequences-release/i_kions/4.ppm +hpatches_sequences/hpatches-sequences-release/i_kions/1.ppm +hpatches_sequences/hpatches-sequences-release/i_kions/3.ppm +hpatches_sequences/hpatches-sequences-release/i_kions/6.ppm +hpatches_sequences/hpatches-sequences-release/v_wounded/5.ppm +hpatches_sequences/hpatches-sequences-release/v_wounded/2.ppm +hpatches_sequences/hpatches-sequences-release/v_wounded/4.ppm +hpatches_sequences/hpatches-sequences-release/v_wounded/1.ppm +hpatches_sequences/hpatches-sequences-release/v_wounded/3.ppm +hpatches_sequences/hpatches-sequences-release/v_wounded/6.ppm +hpatches_sequences/hpatches-sequences-release/i_indiana/5.ppm +hpatches_sequences/hpatches-sequences-release/i_indiana/2.ppm +hpatches_sequences/hpatches-sequences-release/i_indiana/4.ppm +hpatches_sequences/hpatches-sequences-release/i_indiana/1.ppm +hpatches_sequences/hpatches-sequences-release/i_indiana/3.ppm +hpatches_sequences/hpatches-sequences-release/i_indiana/6.ppm +hpatches_sequences/hpatches-sequences-release/v_yuri/5.ppm +hpatches_sequences/hpatches-sequences-release/v_yuri/2.ppm +hpatches_sequences/hpatches-sequences-release/v_yuri/4.ppm +hpatches_sequences/hpatches-sequences-release/v_yuri/1.ppm +hpatches_sequences/hpatches-sequences-release/v_yuri/3.ppm +hpatches_sequences/hpatches-sequences-release/v_yuri/6.ppm +hpatches_sequences/hpatches-sequences-release/i_boutique/5.ppm +hpatches_sequences/hpatches-sequences-release/i_boutique/2.ppm +hpatches_sequences/hpatches-sequences-release/i_boutique/4.ppm +hpatches_sequences/hpatches-sequences-release/i_boutique/1.ppm +hpatches_sequences/hpatches-sequences-release/i_boutique/3.ppm +hpatches_sequences/hpatches-sequences-release/i_boutique/6.ppm +hpatches_sequences/hpatches-sequences-release/v_birdwoman/5.ppm +hpatches_sequences/hpatches-sequences-release/v_birdwoman/2.ppm +hpatches_sequences/hpatches-sequences-release/v_birdwoman/4.ppm +hpatches_sequences/hpatches-sequences-release/v_birdwoman/1.ppm +hpatches_sequences/hpatches-sequences-release/v_birdwoman/3.ppm +hpatches_sequences/hpatches-sequences-release/v_birdwoman/6.ppm +hpatches_sequences/hpatches-sequences-release/v_grace/5.ppm +hpatches_sequences/hpatches-sequences-release/v_grace/2.ppm +hpatches_sequences/hpatches-sequences-release/v_grace/4.ppm +hpatches_sequences/hpatches-sequences-release/v_grace/1.ppm +hpatches_sequences/hpatches-sequences-release/v_grace/3.ppm +hpatches_sequences/hpatches-sequences-release/v_grace/6.ppm +hpatches_sequences/hpatches-sequences-release/v_man/5.ppm +hpatches_sequences/hpatches-sequences-release/v_man/2.ppm +hpatches_sequences/hpatches-sequences-release/v_man/4.ppm +hpatches_sequences/hpatches-sequences-release/v_man/1.ppm +hpatches_sequences/hpatches-sequences-release/v_man/3.ppm +hpatches_sequences/hpatches-sequences-release/v_man/6.ppm +hpatches_sequences/hpatches-sequences-release/i_kurhaus/5.ppm +hpatches_sequences/hpatches-sequences-release/i_kurhaus/2.ppm +hpatches_sequences/hpatches-sequences-release/i_kurhaus/4.ppm +hpatches_sequences/hpatches-sequences-release/i_kurhaus/1.ppm +hpatches_sequences/hpatches-sequences-release/i_kurhaus/3.ppm +hpatches_sequences/hpatches-sequences-release/i_kurhaus/6.ppm +hpatches_sequences/hpatches-sequences-release/v_busstop/5.ppm +hpatches_sequences/hpatches-sequences-release/v_busstop/2.ppm +hpatches_sequences/hpatches-sequences-release/v_busstop/4.ppm +hpatches_sequences/hpatches-sequences-release/v_busstop/1.ppm +hpatches_sequences/hpatches-sequences-release/v_busstop/3.ppm +hpatches_sequences/hpatches-sequences-release/v_busstop/6.ppm +hpatches_sequences/hpatches-sequences-release/v_machines/5.ppm +hpatches_sequences/hpatches-sequences-release/v_machines/2.ppm +hpatches_sequences/hpatches-sequences-release/v_machines/4.ppm +hpatches_sequences/hpatches-sequences-release/v_machines/1.ppm +hpatches_sequences/hpatches-sequences-release/v_machines/3.ppm +hpatches_sequences/hpatches-sequences-release/v_machines/6.ppm +hpatches_sequences/hpatches-sequences-release/i_castle/5.ppm +hpatches_sequences/hpatches-sequences-release/i_castle/2.ppm +hpatches_sequences/hpatches-sequences-release/i_castle/4.ppm +hpatches_sequences/hpatches-sequences-release/i_castle/1.ppm +hpatches_sequences/hpatches-sequences-release/i_castle/3.ppm +hpatches_sequences/hpatches-sequences-release/i_castle/6.ppm +hpatches_sequences/hpatches-sequences-release/i_bologna/5.ppm +hpatches_sequences/hpatches-sequences-release/i_bologna/2.ppm +hpatches_sequences/hpatches-sequences-release/i_bologna/4.ppm +hpatches_sequences/hpatches-sequences-release/i_bologna/1.ppm +hpatches_sequences/hpatches-sequences-release/i_bologna/3.ppm +hpatches_sequences/hpatches-sequences-release/i_bologna/6.ppm +hpatches_sequences/hpatches-sequences-release/v_blueprint/5.ppm +hpatches_sequences/hpatches-sequences-release/v_blueprint/2.ppm +hpatches_sequences/hpatches-sequences-release/v_blueprint/4.ppm +hpatches_sequences/hpatches-sequences-release/v_blueprint/1.ppm +hpatches_sequences/hpatches-sequences-release/v_blueprint/3.ppm +hpatches_sequences/hpatches-sequences-release/v_blueprint/6.ppm +hpatches_sequences/hpatches-sequences-release/i_troulos/5.ppm +hpatches_sequences/hpatches-sequences-release/i_troulos/2.ppm +hpatches_sequences/hpatches-sequences-release/i_troulos/4.ppm +hpatches_sequences/hpatches-sequences-release/i_troulos/1.ppm +hpatches_sequences/hpatches-sequences-release/i_troulos/3.ppm +hpatches_sequences/hpatches-sequences-release/i_troulos/6.ppm +hpatches_sequences/hpatches-sequences-release/i_gonnenberg/5.ppm +hpatches_sequences/hpatches-sequences-release/i_gonnenberg/2.ppm +hpatches_sequences/hpatches-sequences-release/i_gonnenberg/4.ppm +hpatches_sequences/hpatches-sequences-release/i_gonnenberg/1.ppm +hpatches_sequences/hpatches-sequences-release/i_gonnenberg/3.ppm +hpatches_sequences/hpatches-sequences-release/i_gonnenberg/6.ppm +hpatches_sequences/hpatches-sequences-release/v_war/5.ppm +hpatches_sequences/hpatches-sequences-release/v_war/2.ppm +hpatches_sequences/hpatches-sequences-release/v_war/4.ppm +hpatches_sequences/hpatches-sequences-release/v_war/1.ppm +hpatches_sequences/hpatches-sequences-release/v_war/3.ppm +hpatches_sequences/hpatches-sequences-release/v_war/6.ppm +hpatches_sequences/hpatches-sequences-release/i_autannes/5.ppm +hpatches_sequences/hpatches-sequences-release/i_autannes/2.ppm +hpatches_sequences/hpatches-sequences-release/i_autannes/4.ppm +hpatches_sequences/hpatches-sequences-release/i_autannes/1.ppm +hpatches_sequences/hpatches-sequences-release/i_autannes/3.ppm +hpatches_sequences/hpatches-sequences-release/i_autannes/6.ppm +hpatches_sequences/hpatches-sequences-release/v_bird/5.ppm +hpatches_sequences/hpatches-sequences-release/v_bird/2.ppm +hpatches_sequences/hpatches-sequences-release/v_bird/4.ppm +hpatches_sequences/hpatches-sequences-release/v_bird/1.ppm +hpatches_sequences/hpatches-sequences-release/v_bird/3.ppm +hpatches_sequences/hpatches-sequences-release/v_bird/6.ppm +hpatches_sequences/hpatches-sequences-release/v_london/5.ppm +hpatches_sequences/hpatches-sequences-release/v_london/2.ppm +hpatches_sequences/hpatches-sequences-release/v_london/4.ppm +hpatches_sequences/hpatches-sequences-release/v_london/1.ppm +hpatches_sequences/hpatches-sequences-release/v_london/3.ppm +hpatches_sequences/hpatches-sequences-release/v_london/6.ppm +hpatches_sequences/hpatches-sequences-release/i_fenis/5.ppm +hpatches_sequences/hpatches-sequences-release/i_fenis/2.ppm +hpatches_sequences/hpatches-sequences-release/i_fenis/4.ppm +hpatches_sequences/hpatches-sequences-release/i_fenis/1.ppm +hpatches_sequences/hpatches-sequences-release/i_fenis/3.ppm +hpatches_sequences/hpatches-sequences-release/i_fenis/6.ppm +hpatches_sequences/hpatches-sequences-release/v_graffiti/5.ppm +hpatches_sequences/hpatches-sequences-release/v_graffiti/2.ppm +hpatches_sequences/hpatches-sequences-release/v_graffiti/4.ppm +hpatches_sequences/hpatches-sequences-release/v_graffiti/1.ppm +hpatches_sequences/hpatches-sequences-release/v_graffiti/3.ppm +hpatches_sequences/hpatches-sequences-release/v_graffiti/6.ppm +hpatches_sequences/hpatches-sequences-release/i_zion/5.ppm +hpatches_sequences/hpatches-sequences-release/i_zion/2.ppm +hpatches_sequences/hpatches-sequences-release/i_zion/4.ppm +hpatches_sequences/hpatches-sequences-release/i_zion/1.ppm +hpatches_sequences/hpatches-sequences-release/i_zion/3.ppm +hpatches_sequences/hpatches-sequences-release/i_zion/6.ppm +hpatches_sequences/hpatches-sequences-release/i_toy/5.ppm +hpatches_sequences/hpatches-sequences-release/i_toy/2.ppm +hpatches_sequences/hpatches-sequences-release/i_toy/4.ppm +hpatches_sequences/hpatches-sequences-release/i_toy/1.ppm +hpatches_sequences/hpatches-sequences-release/i_toy/3.ppm +hpatches_sequences/hpatches-sequences-release/i_toy/6.ppm +hpatches_sequences/hpatches-sequences-release/i_objects/5.ppm +hpatches_sequences/hpatches-sequences-release/i_objects/2.ppm +hpatches_sequences/hpatches-sequences-release/i_objects/4.ppm +hpatches_sequences/hpatches-sequences-release/i_objects/1.ppm +hpatches_sequences/hpatches-sequences-release/i_objects/3.ppm +hpatches_sequences/hpatches-sequences-release/i_objects/6.ppm +hpatches_sequences/hpatches-sequences-release/v_charing/5.ppm +hpatches_sequences/hpatches-sequences-release/v_charing/2.ppm +hpatches_sequences/hpatches-sequences-release/v_charing/4.ppm +hpatches_sequences/hpatches-sequences-release/v_charing/1.ppm +hpatches_sequences/hpatches-sequences-release/v_charing/3.ppm +hpatches_sequences/hpatches-sequences-release/v_charing/6.ppm +hpatches_sequences/hpatches-sequences-release/v_maskedman/5.ppm +hpatches_sequences/hpatches-sequences-release/v_maskedman/2.ppm +hpatches_sequences/hpatches-sequences-release/v_maskedman/4.ppm +hpatches_sequences/hpatches-sequences-release/v_maskedman/1.ppm +hpatches_sequences/hpatches-sequences-release/v_maskedman/3.ppm +hpatches_sequences/hpatches-sequences-release/v_maskedman/6.ppm +hpatches_sequences/hpatches-sequences-release/i_chestnuts/5.ppm +hpatches_sequences/hpatches-sequences-release/i_chestnuts/2.ppm +hpatches_sequences/hpatches-sequences-release/i_chestnuts/4.ppm +hpatches_sequences/hpatches-sequences-release/i_chestnuts/1.ppm +hpatches_sequences/hpatches-sequences-release/i_chestnuts/3.ppm +hpatches_sequences/hpatches-sequences-release/i_chestnuts/6.ppm +hpatches_sequences/hpatches-sequences-release/i_school/5.ppm +hpatches_sequences/hpatches-sequences-release/i_school/2.ppm +hpatches_sequences/hpatches-sequences-release/i_school/4.ppm +hpatches_sequences/hpatches-sequences-release/i_school/1.ppm +hpatches_sequences/hpatches-sequences-release/i_school/3.ppm +hpatches_sequences/hpatches-sequences-release/i_school/6.ppm +hpatches_sequences/hpatches-sequences-release/i_nuts/5.ppm +hpatches_sequences/hpatches-sequences-release/i_nuts/2.ppm +hpatches_sequences/hpatches-sequences-release/i_nuts/4.ppm +hpatches_sequences/hpatches-sequences-release/i_nuts/1.ppm +hpatches_sequences/hpatches-sequences-release/i_nuts/3.ppm +hpatches_sequences/hpatches-sequences-release/i_nuts/6.ppm +hpatches_sequences/hpatches-sequences-release/v_feast/5.ppm +hpatches_sequences/hpatches-sequences-release/v_feast/2.ppm +hpatches_sequences/hpatches-sequences-release/v_feast/4.ppm +hpatches_sequences/hpatches-sequences-release/v_feast/1.ppm +hpatches_sequences/hpatches-sequences-release/v_feast/3.ppm +hpatches_sequences/hpatches-sequences-release/v_feast/6.ppm +hpatches_sequences/hpatches-sequences-release/v_courses/5.ppm +hpatches_sequences/hpatches-sequences-release/v_courses/2.ppm +hpatches_sequences/hpatches-sequences-release/v_courses/4.ppm +hpatches_sequences/hpatches-sequences-release/v_courses/1.ppm +hpatches_sequences/hpatches-sequences-release/v_courses/3.ppm +hpatches_sequences/hpatches-sequences-release/v_courses/6.ppm +hpatches_sequences/hpatches-sequences-release/v_yard/5.ppm +hpatches_sequences/hpatches-sequences-release/v_yard/2.ppm +hpatches_sequences/hpatches-sequences-release/v_yard/4.ppm +hpatches_sequences/hpatches-sequences-release/v_yard/1.ppm +hpatches_sequences/hpatches-sequences-release/v_yard/3.ppm +hpatches_sequences/hpatches-sequences-release/v_yard/6.ppm +hpatches_sequences/hpatches-sequences-release/v_azzola/5.ppm +hpatches_sequences/hpatches-sequences-release/v_azzola/2.ppm +hpatches_sequences/hpatches-sequences-release/v_azzola/4.ppm +hpatches_sequences/hpatches-sequences-release/v_azzola/1.ppm +hpatches_sequences/hpatches-sequences-release/v_azzola/3.ppm +hpatches_sequences/hpatches-sequences-release/v_azzola/6.ppm +hpatches_sequences/hpatches-sequences-release/i_books/5.ppm +hpatches_sequences/hpatches-sequences-release/i_books/2.ppm +hpatches_sequences/hpatches-sequences-release/i_books/4.ppm +hpatches_sequences/hpatches-sequences-release/i_books/1.ppm +hpatches_sequences/hpatches-sequences-release/i_books/3.ppm +hpatches_sequences/hpatches-sequences-release/i_books/6.ppm +hpatches_sequences/hpatches-sequences-release/i_yellowtent/5.ppm +hpatches_sequences/hpatches-sequences-release/i_yellowtent/2.ppm +hpatches_sequences/hpatches-sequences-release/i_yellowtent/4.ppm +hpatches_sequences/hpatches-sequences-release/i_yellowtent/1.ppm +hpatches_sequences/hpatches-sequences-release/i_yellowtent/3.ppm +hpatches_sequences/hpatches-sequences-release/i_yellowtent/6.ppm +hpatches_sequences/hpatches-sequences-release/v_bark/5.ppm +hpatches_sequences/hpatches-sequences-release/v_bark/2.ppm +hpatches_sequences/hpatches-sequences-release/v_bark/4.ppm +hpatches_sequences/hpatches-sequences-release/v_bark/1.ppm +hpatches_sequences/hpatches-sequences-release/v_bark/3.ppm +hpatches_sequences/hpatches-sequences-release/v_bark/6.ppm +hpatches_sequences/hpatches-sequences-release/v_laptop/5.ppm +hpatches_sequences/hpatches-sequences-release/v_laptop/2.ppm +hpatches_sequences/hpatches-sequences-release/v_laptop/4.ppm +hpatches_sequences/hpatches-sequences-release/v_laptop/1.ppm +hpatches_sequences/hpatches-sequences-release/v_laptop/3.ppm +hpatches_sequences/hpatches-sequences-release/v_laptop/6.ppm +hpatches_sequences/hpatches-sequences-release/i_fruits/5.ppm +hpatches_sequences/hpatches-sequences-release/i_fruits/2.ppm +hpatches_sequences/hpatches-sequences-release/i_fruits/4.ppm +hpatches_sequences/hpatches-sequences-release/i_fruits/1.ppm +hpatches_sequences/hpatches-sequences-release/i_fruits/3.ppm +hpatches_sequences/hpatches-sequences-release/i_fruits/6.ppm +hpatches_sequences/hpatches-sequences-release/v_dogman/5.ppm +hpatches_sequences/hpatches-sequences-release/v_dogman/2.ppm +hpatches_sequences/hpatches-sequences-release/v_dogman/4.ppm +hpatches_sequences/hpatches-sequences-release/v_dogman/1.ppm +hpatches_sequences/hpatches-sequences-release/v_dogman/3.ppm +hpatches_sequences/hpatches-sequences-release/v_dogman/6.ppm +hpatches_sequences/hpatches-sequences-release/i_greentea/5.ppm +hpatches_sequences/hpatches-sequences-release/i_greentea/2.ppm +hpatches_sequences/hpatches-sequences-release/i_greentea/4.ppm +hpatches_sequences/hpatches-sequences-release/i_greentea/1.ppm +hpatches_sequences/hpatches-sequences-release/i_greentea/3.ppm +hpatches_sequences/hpatches-sequences-release/i_greentea/6.ppm +hpatches_sequences/hpatches-sequences-release/i_londonbridge/5.ppm +hpatches_sequences/hpatches-sequences-release/i_londonbridge/2.ppm +hpatches_sequences/hpatches-sequences-release/i_londonbridge/4.ppm +hpatches_sequences/hpatches-sequences-release/i_londonbridge/1.ppm +hpatches_sequences/hpatches-sequences-release/i_londonbridge/3.ppm +hpatches_sequences/hpatches-sequences-release/i_londonbridge/6.ppm +hpatches_sequences/hpatches-sequences-release/v_there/5.ppm +hpatches_sequences/hpatches-sequences-release/v_there/2.ppm +hpatches_sequences/hpatches-sequences-release/v_there/4.ppm +hpatches_sequences/hpatches-sequences-release/v_there/1.ppm +hpatches_sequences/hpatches-sequences-release/v_there/3.ppm +hpatches_sequences/hpatches-sequences-release/v_there/6.ppm diff --git a/third_party/d2net/image_list_qualitative.txt b/third_party/d2net/image_list_qualitative.txt new file mode 100644 index 0000000000000000000000000000000000000000..f8e4916b50cf13aae6ad847403127752bf062025 --- /dev/null +++ b/third_party/d2net/image_list_qualitative.txt @@ -0,0 +1,6 @@ +qualitative/images/pair_1/1.jpg +qualitative/images/pair_1/2.jpg +qualitative/images/pair_2/1.jpg +qualitative/images/pair_2/2.jpg +qualitative/images/pair_3/1.jpg +qualitative/images/pair_3/2.jpg diff --git a/third_party/d2net/inloc/README.md b/third_party/d2net/inloc/README.md new file mode 100644 index 0000000000000000000000000000000000000000..598368ba5c361770c8bc571d1793a613854babfe --- /dev/null +++ b/third_party/d2net/inloc/README.md @@ -0,0 +1,15 @@ +# InLoc evaluation instructions + +Start by downloading the [InLoc_demo](https://github.com/HajimeTaira/InLoc_demo) code. Once it is up and running according to the official instruction, you can copy and paste all the files available here overwriting the `Features_WUSTL` and `parfor_sparseGV` functions. `generate_list.m` will generate `image_list.txt` containing the queries and top 100 database matches (run `sort -u image_list.txt > image_list_unique.txt` to remove the duplicates). After extracting features for all the images in `image_list_unique.txt`, you can run `custom_demo` directly. + +The feature extraction part for D2-Net can be done using the following command: `python extract_features.py --image_list_file /path/to/image_list_unique.txt --multiscale --output_format .mat`. + +In case you plan on using your own features, don't forget to change the extension in `Features_WUSTL.m`. The local features are supposed to be stored in the `mat` format with two fields: + +- `keypoints` - `N x 3` matrix with `x, y, scale` coordinates of each keypoint in COLMAP format (the `X` axis points to the right, the `Y` axis to the bottom), + +- `descriptors` - `N x D` matrix with the descriptors. + +The evaluation pipeline is live at [visuallocalization.net](https://www.visuallocalization.net/). In order to generate a submission file, please use the provided [ImgList2text](https://github.com/HajimeTaira/InLoc_demo/blob/master/functions/utils/ImgList2text.m) function. + +We have also provided the `merge_files` MATLAB script that was used to merge the solutions of D2-Net Multiscale and Dense InLoc based on the view synthesis score. It can be used as follows `merge_files('output/densePV_top10_shortlist_method1.mat', 'outputs/densePV_top10_shortlist_method2.mat')`. \ No newline at end of file diff --git a/third_party/d2net/inloc/custom_demo.m b/third_party/d2net/inloc/custom_demo.m new file mode 100644 index 0000000000000000000000000000000000000000..91057ed63bdc3d1b9284e0ed24f74cf83b431839 --- /dev/null +++ b/third_party/d2net/inloc/custom_demo.m @@ -0,0 +1,13 @@ +% Startup +startup; +[ params ] = setup_project_ht_WUSTL; + +% 1. Retrieval +ht_retrieval; + +% 2. Geometric verification +ht_top100_sparsePE_localization; + +% 3. Pose verification +ImgList_densePE = ImgList_sparsePE; % Force dense PV to use sparse PE results. +ht_top10_densePV_localization; diff --git a/third_party/d2net/inloc/functions/wustl_function/Features_WUSTL.m b/third_party/d2net/inloc/functions/wustl_function/Features_WUSTL.m new file mode 100644 index 0000000000000000000000000000000000000000..88551e076799ef0eb30d995c90c89fff448105db --- /dev/null +++ b/third_party/d2net/inloc/functions/wustl_function/Features_WUSTL.m @@ -0,0 +1,6 @@ +function [f, d] = features_custom(I_path) + data = load([I_path '.d2-net'], '-mat'); + f = double(data.keypoints(:, 1 : 3).'); + d = double(data.descriptors.'); +end + diff --git a/third_party/d2net/inloc/functions/wustl_function/parfor_sparseGV.m b/third_party/d2net/inloc/functions/wustl_function/parfor_sparseGV.m new file mode 100644 index 0000000000000000000000000000000000000000..04cdadc5c447dabdde708c1ac50884802e5a045d --- /dev/null +++ b/third_party/d2net/inloc/functions/wustl_function/parfor_sparseGV.m @@ -0,0 +1,73 @@ +function parfor_sparseGV( qname, dbname, params ) + + +[~, dbbasename, ~] = fileparts(dbname); +this_sparsegv_matname = fullfile(params.output.gv_sparse.dir, qname, [dbbasename, params.output.gv_sparse.matformat]); + +if exist(this_sparsegv_matname, 'file') ~= 2 + %load features + qfmatname = fullfile(params.input.feature.dir, params.data.q.dir, [qname, params.input.feature.q_sps_matformat]); + if exist(qfmatname, 'file') ~= 2 + Iqname = fullfile(params.data.dir, params.data.q.dir, qname); + [f, d] = features_WUSTL(Iqname); + [qfdir, ~, ~] = fileparts(qfmatname); + if exist(qfdir, 'dir') ~= 7 + mkdir(qfdir); + end + save('-v6', qfmatname, 'f', 'd'); + end + features_q = load(qfmatname); + + dbfmatname = fullfile(params.input.feature.dir, params.data.db.cutout.dir, [dbname, params.input.feature.db_sps_matformat]); + if exist(dbfmatname, 'file') ~= 2 + Idbname = fullfile(params.data.dir, params.data.db.cutout.dir, dbname); + [f, d] = features_WUSTL(Idbname); + [dbfdir, ~, ~] = fileparts(dbfmatname); + if exist(dbfdir, 'dir') ~= 7 + mkdir(dbfdir); + end + save('-v6', dbfmatname, 'f', 'd'); + end + features_db = load(dbfmatname); + + %geometric verification + if size(features_db.d, 2) < 6 + H = nan(3, 3); + inls_qidx = []; + inls_dbidx = []; + inliernum = 0; + matches = []; + inliers = []; + else + + %geometric verification (homography lo-ransac) + [matches, inliers, H, ~] = at_sparseransac(features_q.f,features_q.d,features_db.f,features_db.d,3,10); + inliernum = length(inliers); + inls_qidx = inliers(1, :); inls_dbidx = inliers(2, :); + end + + %save + if exist(fullfile(params.output.gv_sparse.dir, qname), 'dir') ~= 7 + mkdir(fullfile(params.output.gv_sparse.dir, qname)); + end + save('-v6', this_sparsegv_matname, 'H', 'inliernum', 'inls_qidx', 'inls_dbidx', 'matches', 'inliers'); + +% %debug +% Iq = imread(fullfile(params.data.dir, params.data.q.dir, qname)); +% Idb = imread(fullfile(params.data.dir, params.data.db.cutout.dir, dbname)); +% figure(); +% ultimateSubplot ( 2, 1, 1, 1, 0.01, 0.05 ); +% imshow(rgb2gray(Iq));hold on; +% plot(features_q.f(1, inls_qidx), features_q.f(2, inls_qidx),'g.'); +% ultimateSubplot ( 2, 1, 2, 1, 0.01, 0.05 ); +% imshow(rgb2gray(Idb));hold on; +% plot(features_db.f(1, inls_dbidx), features_db.f(2, inls_dbidx),'g.'); +% +% keyboard; + +end + + + +end + diff --git a/third_party/d2net/inloc/generate_list.m b/third_party/d2net/inloc/generate_list.m new file mode 100644 index 0000000000000000000000000000000000000000..e7680cbefe98421b242e77007d4bc2773acfc6f2 --- /dev/null +++ b/third_party/d2net/inloc/generate_list.m @@ -0,0 +1,25 @@ +startup; +params = setup_project; + +ht_retrieval; + +shortlist_topN = 100; + +query_dir = fullfile(params.data.dir, params.data.q.dir); +db_dir = fullfile(params.data.dir, params.data.db.cutout.dir); + +image_list_file = fopen('image_list.txt', 'w'); + +for ii = 1:1:length(ImgList_original) + query_image_path = [query_dir '/' ImgList_original(ii).queryname]; + + fprintf(image_list_file, '%s\n', query_image_path); + + for jj = 1:1:shortlist_topN + db_image_path = [db_dir '/' ImgList_original(ii).topNname{jj}]; + + fprintf(image_list_file, '%s\n', db_image_path); + end +end + +fclose(image_list_file); diff --git a/third_party/d2net/inloc/merge_files.m b/third_party/d2net/inloc/merge_files.m new file mode 100644 index 0000000000000000000000000000000000000000..789a8974d5e7b9ac67a6c1982a332b7be2042975 --- /dev/null +++ b/third_party/d2net/inloc/merge_files.m @@ -0,0 +1,82 @@ +function ImgList = merge_files(file1, file2) + f1 = load(file1); + ImgList_file1 = f1.ImgList; + f2 = load(file2); + ImgList_file2 = f2.ImgList; + + PV_topN = 10; + + n1 = 0; + n2 = 0; + ImgList = struct('queryname', {}, 'topNname', {}, 'topNscore', {}, 'P', {}); + for ii = 1:1:length(ImgList_file1) + ImgList(ii).queryname = ImgList_file1(ii).queryname; + + sum_scores = containers.Map('KeyType', 'char', 'ValueType', 'double'); + for jj = 1 : PV_topN + name = char(ImgList_file1(ii).topNname(jj)); + if isKey(sum_scores, name) + sum_scores(name) = sum_scores(name) + ImgList_file1(ii).topNscore(jj); + else + sum_scores(name) = ImgList_file1(ii).topNscore(jj); + end + name = char(ImgList_file2(ii).topNname(jj)); + if isKey(sum_scores, name) + sum_scores(name) = sum_scores(name) + ImgList_file2(ii).topNscore(jj); + else + sum_scores(name) = ImgList_file2(ii).topNscore(jj); + end + end + + max_score = 0; + img_name = 0; + for key = keys(sum_scores) + if sum_scores(char(key)) > max_score + max_score = sum_scores(char(key)); + img_name = key; + end + end + + id_dense = 0; + id_sparse = 0; + for jj = 1 : PV_topN + if strcmp(char(ImgList_file1(ii).topNname(jj)), img_name) + id_dense = jj; + end + if strcmp(char(ImgList_file2(ii).topNname(jj)), img_name) + id_sparse = jj; + end + end + + if id_sparse == 0 + n1 = n1 + 1; + ImgList(ii).topNscore = [ImgList_file1(ii).topNscore(id_dense)]; + ImgList(ii).topNname = [ImgList_file1(ii).topNname(id_dense)]; + ImgList(ii).P = [ImgList_file1(ii).P(id_dense)]; + continue + end + + if id_dense == 0 + n2 = n2 + 1; + ImgList(ii).topNscore = [ImgList_file2(ii).topNscore(id_sparse)]; + ImgList(ii).topNname = [ImgList_file2(ii).topNname(id_sparse)]; + ImgList(ii).P = [ImgList_file2(ii).P(id_sparse)]; + continue + end + + max_score = 0; + if ImgList_file1(ii).topNscore(id_dense) > ImgList_file2(ii).topNscore(id_sparse) + n1 = n1 + 1; + ImgList(ii).topNscore = [ImgList_file1(ii).topNscore(id_dense)]; + ImgList(ii).topNname = [ImgList_file1(ii).topNname(id_dense)]; + ImgList(ii).P = [ImgList_file1(ii).P(id_dense)]; + else + n2 = n2 + 1; + ImgList(ii).topNscore = [ImgList_file2(ii).topNscore(id_sparse)]; + ImgList(ii).topNname = [ImgList_file2(ii).topNname(id_sparse)]; + ImgList(ii).P = [ImgList_file2(ii).P(id_sparse)]; + end + end + + fprintf(1, "%d file 1 poses & %d file 2 poses selected\n", n1, n2); +end \ No newline at end of file diff --git a/third_party/d2net/megadepth_utils/preprocess_scene.py b/third_party/d2net/megadepth_utils/preprocess_scene.py new file mode 100644 index 0000000000000000000000000000000000000000..fc68a403795e7cddce88dfcb74b38d19ab09e133 --- /dev/null +++ b/third_party/d2net/megadepth_utils/preprocess_scene.py @@ -0,0 +1,242 @@ +import argparse + +import imagesize + +import numpy as np + +import os + +parser = argparse.ArgumentParser(description='MegaDepth preprocessing script') + +parser.add_argument( + '--base_path', type=str, required=True, + help='path to MegaDepth' +) +parser.add_argument( + '--scene_id', type=str, required=True, + help='scene ID' +) + +parser.add_argument( + '--output_path', type=str, required=True, + help='path to the output directory' +) + +args = parser.parse_args() + +base_path = args.base_path +# Remove the trailing / if need be. +if base_path[-1] in ['/', '\\']: + base_path = base_path[: - 1] +scene_id = args.scene_id + +base_depth_path = os.path.join( + base_path, 'phoenix/S6/zl548/MegaDepth_v1' +) +base_undistorted_sfm_path = os.path.join( + base_path, 'Undistorted_SfM' +) + +undistorted_sparse_path = os.path.join( + base_undistorted_sfm_path, scene_id, 'sparse-txt' +) +if not os.path.exists(undistorted_sparse_path): + exit() + +depths_path = os.path.join( + base_depth_path, scene_id, 'dense0', 'depths' +) +if not os.path.exists(depths_path): + exit() + +images_path = os.path.join( + base_undistorted_sfm_path, scene_id, 'images' +) +if not os.path.exists(images_path): + exit() + +# Process cameras.txt +with open(os.path.join(undistorted_sparse_path, 'cameras.txt'), 'r') as f: + raw = f.readlines()[3 :] # skip the header + +camera_intrinsics = {} +for camera in raw: + camera = camera.split(' ') + camera_intrinsics[int(camera[0])] = [float(elem) for elem in camera[2 :]] + +# Process points3D.txt +with open(os.path.join(undistorted_sparse_path, 'points3D.txt'), 'r') as f: + raw = f.readlines()[3 :] # skip the header + +points3D = {} +for point3D in raw: + point3D = point3D.split(' ') + points3D[int(point3D[0])] = np.array([ + float(point3D[1]), float(point3D[2]), float(point3D[3]) + ]) + +# Process images.txt +with open(os.path.join(undistorted_sparse_path, 'images.txt'), 'r') as f: + raw = f.readlines()[4 :] # skip the header + +image_id_to_idx = {} +image_names = [] +raw_pose = [] +camera = [] +points3D_id_to_2D = [] +n_points3D = [] +for idx, (image, points) in enumerate(zip(raw[:: 2], raw[1 :: 2])): + image = image.split(' ') + points = points.split(' ') + + image_id_to_idx[int(image[0])] = idx + + image_name = image[-1].strip('\n') + image_names.append(image_name) + + raw_pose.append([float(elem) for elem in image[1 : -2]]) + camera.append(int(image[-2])) + current_points3D_id_to_2D = {} + for x, y, point3D_id in zip(points[:: 3], points[1 :: 3], points[2 :: 3]): + if int(point3D_id) == -1: + continue + current_points3D_id_to_2D[int(point3D_id)] = [float(x), float(y)] + points3D_id_to_2D.append(current_points3D_id_to_2D) + n_points3D.append(len(current_points3D_id_to_2D)) +n_images = len(image_names) + +# Image and depthmaps paths +image_paths = [] +depth_paths = [] +for image_name in image_names: + image_path = os.path.join(images_path, image_name) + + # Path to the depth file + depth_path = os.path.join( + depths_path, '%s.h5' % os.path.splitext(image_name)[0] + ) + + if os.path.exists(depth_path): + # Check if depth map or background / foreground mask + file_size = os.stat(depth_path).st_size + # Rough estimate - 75KB might work as well + if file_size < 100 * 1024: + depth_paths.append(None) + image_paths.append(None) + else: + depth_paths.append(depth_path[len(base_path) + 1 :]) + image_paths.append(image_path[len(base_path) + 1 :]) + else: + depth_paths.append(None) + image_paths.append(None) + +# Camera configuration +intrinsics = [] +poses = [] +principal_axis = [] +points3D_id_to_ndepth = [] +for idx, image_name in enumerate(image_names): + if image_paths[idx] is None: + intrinsics.append(None) + poses.append(None) + principal_axis.append([0, 0, 0]) + points3D_id_to_ndepth.append({}) + continue + image_intrinsics = camera_intrinsics[camera[idx]] + K = np.zeros([3, 3]) + K[0, 0] = image_intrinsics[2] + K[0, 2] = image_intrinsics[4] + K[1, 1] = image_intrinsics[3] + K[1, 2] = image_intrinsics[5] + K[2, 2] = 1 + intrinsics.append(K) + + image_pose = raw_pose[idx] + qvec = image_pose[: 4] + qvec = qvec / np.linalg.norm(qvec) + w, x, y, z = qvec + R = np.array([ + [ + 1 - 2 * y * y - 2 * z * z, + 2 * x * y - 2 * z * w, + 2 * x * z + 2 * y * w + ], + [ + 2 * x * y + 2 * z * w, + 1 - 2 * x * x - 2 * z * z, + 2 * y * z - 2 * x * w + ], + [ + 2 * x * z - 2 * y * w, + 2 * y * z + 2 * x * w, + 1 - 2 * x * x - 2 * y * y + ] + ]) + principal_axis.append(R[2, :]) + t = image_pose[4 : 7] + # World-to-Camera pose + current_pose = np.zeros([4, 4]) + current_pose[: 3, : 3] = R + current_pose[: 3, 3] = t + current_pose[3, 3] = 1 + # Camera-to-World pose + # pose = np.zeros([4, 4]) + # pose[: 3, : 3] = np.transpose(R) + # pose[: 3, 3] = -np.matmul(np.transpose(R), t) + # pose[3, 3] = 1 + poses.append(current_pose) + + current_points3D_id_to_ndepth = {} + for point3D_id in points3D_id_to_2D[idx].keys(): + p3d = points3D[point3D_id] + current_points3D_id_to_ndepth[point3D_id] = (np.dot(R[2, :], p3d) + t[2]) / (.5 * (K[0, 0] + K[1, 1])) + points3D_id_to_ndepth.append(current_points3D_id_to_ndepth) +principal_axis = np.array(principal_axis) +angles = np.rad2deg(np.arccos( + np.clip( + np.dot(principal_axis, np.transpose(principal_axis)), + -1, 1 + ) +)) + +# Compute overlap score +overlap_matrix = np.full([n_images, n_images], -1.) +scale_ratio_matrix = np.full([n_images, n_images], -1.) +for idx1 in range(n_images): + if image_paths[idx1] is None or depth_paths[idx1] is None: + continue + for idx2 in range(idx1 + 1, n_images): + if image_paths[idx2] is None or depth_paths[idx2] is None: + continue + matches = ( + points3D_id_to_2D[idx1].keys() & + points3D_id_to_2D[idx2].keys() + ) + min_num_points3D = min( + len(points3D_id_to_2D[idx1]), len(points3D_id_to_2D[idx2]) + ) + overlap_matrix[idx1, idx2] = len(matches) / len(points3D_id_to_2D[idx1]) # min_num_points3D + overlap_matrix[idx2, idx1] = len(matches) / len(points3D_id_to_2D[idx2]) # min_num_points3D + if len(matches) == 0: + continue + points3D_id_to_ndepth1 = points3D_id_to_ndepth[idx1] + points3D_id_to_ndepth2 = points3D_id_to_ndepth[idx2] + nd1 = np.array([points3D_id_to_ndepth1[match] for match in matches]) + nd2 = np.array([points3D_id_to_ndepth2[match] for match in matches]) + min_scale_ratio = np.min(np.maximum(nd1 / nd2, nd2 / nd1)) + scale_ratio_matrix[idx1, idx2] = min_scale_ratio + scale_ratio_matrix[idx2, idx1] = min_scale_ratio + +np.savez( + os.path.join(args.output_path, '%s.npz' % scene_id), + image_paths=image_paths, + depth_paths=depth_paths, + intrinsics=intrinsics, + poses=poses, + overlap_matrix=overlap_matrix, + scale_ratio_matrix=scale_ratio_matrix, + angles=angles, + n_points3D=n_points3D, + points3D_id_to_2D=points3D_id_to_2D, + points3D_id_to_ndepth=points3D_id_to_ndepth +) diff --git a/third_party/d2net/megadepth_utils/preprocess_undistorted_megadepth.sh b/third_party/d2net/megadepth_utils/preprocess_undistorted_megadepth.sh new file mode 100644 index 0000000000000000000000000000000000000000..c983ee464bb36439d68f52d60f981414e2c6e84b --- /dev/null +++ b/third_party/d2net/megadepth_utils/preprocess_undistorted_megadepth.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +if [[ $# != 2 ]]; then + echo 'Usage: bash preprocess_megadepth.sh /path/to/megadepth /output/path' + exit +fi + +export dataset_path=$1 +export output_path=$2 + +mkdir $output_path +echo 0 +ls $dataset_path/Undistorted_SfM | xargs -P 8 -I % sh -c 'echo %; python preprocess_scene.py --base_path $dataset_path --scene_id % --output_path $output_path' \ No newline at end of file diff --git a/third_party/d2net/megadepth_utils/train_scenes.txt b/third_party/d2net/megadepth_utils/train_scenes.txt new file mode 100644 index 0000000000000000000000000000000000000000..635c8dfe5d0f1814d92f3a891a4b3d48ba8da93f --- /dev/null +++ b/third_party/d2net/megadepth_utils/train_scenes.txt @@ -0,0 +1,117 @@ +0000 +0001 +0002 +0003 +0004 +0005 +0007 +0008 +0011 +0012 +0013 +0015 +0017 +0019 +0020 +0021 +0022 +0023 +0024 +0025 +0026 +0027 +0032 +0035 +0036 +0037 +0039 +0042 +0043 +0046 +0048 +0050 +0056 +0057 +0060 +0061 +0063 +0065 +0070 +0080 +0083 +0086 +0087 +0095 +0098 +0100 +0101 +0103 +0104 +0105 +0107 +0115 +0117 +0122 +0130 +0137 +0143 +0147 +0148 +0149 +0150 +0156 +0160 +0176 +0183 +0189 +0190 +0200 +0214 +0224 +0235 +0237 +0240 +0243 +0258 +0265 +0269 +0299 +0312 +0326 +0327 +0331 +0335 +0341 +0348 +0366 +0377 +0380 +0394 +0407 +0411 +0430 +0446 +0455 +0472 +0474 +0476 +0478 +0493 +0494 +0496 +0505 +0559 +0733 +0860 +1017 +1589 +4541 +5004 +5005 +5006 +5007 +5009 +5010 +5012 +5013 +5017 diff --git a/third_party/d2net/megadepth_utils/undistort_reconstructions.py b/third_party/d2net/megadepth_utils/undistort_reconstructions.py new file mode 100644 index 0000000000000000000000000000000000000000..a6b99a72f81206e6fbefae9daa9aa683c8754051 --- /dev/null +++ b/third_party/d2net/megadepth_utils/undistort_reconstructions.py @@ -0,0 +1,69 @@ +import argparse + +import imagesize + +import os + +import subprocess + +parser = argparse.ArgumentParser(description='MegaDepth Undistortion') + +parser.add_argument( + '--colmap_path', type=str, required=True, + help='path to colmap executable' +) +parser.add_argument( + '--base_path', type=str, required=True, + help='path to MegaDepth' +) + +args = parser.parse_args() + +sfm_path = os.path.join( + args.base_path, 'MegaDepth_v1_SfM' +) +base_depth_path = os.path.join( + args.base_path, 'phoenix/S6/zl548/MegaDepth_v1' +) +output_path = os.path.join( + args.base_path, 'Undistorted_SfM' +) + +os.mkdir(output_path) + +for scene_name in os.listdir(base_depth_path): + current_output_path = os.path.join(output_path, scene_name) + os.mkdir(current_output_path) + + image_path = os.path.join( + base_depth_path, scene_name, 'dense0', 'imgs' + ) + if not os.path.exists(image_path): + continue + + # Find the maximum image size in scene. + max_image_size = 0 + for image_name in os.listdir(image_path): + max_image_size = max( + max_image_size, + max(imagesize.get(os.path.join(image_path, image_name))) + ) + + # Undistort the images and update the reconstruction. + subprocess.call([ + os.path.join(args.colmap_path, 'colmap'), 'image_undistorter', + '--image_path', os.path.join(sfm_path, scene_name, 'images'), + '--input_path', os.path.join(sfm_path, scene_name, 'sparse', 'manhattan', '0'), + '--output_path', current_output_path, + '--max_image_size', str(max_image_size) + ]) + + # Transform the reconstruction to raw text format. + sparse_txt_path = os.path.join(current_output_path, 'sparse-txt') + os.mkdir(sparse_txt_path) + subprocess.call([ + os.path.join(args.colmap_path, 'colmap'), 'model_converter', + '--input_path', os.path.join(current_output_path, 'sparse'), + '--output_path', sparse_txt_path, + '--output_type', 'TXT' + ]) \ No newline at end of file diff --git a/third_party/d2net/megadepth_utils/valid_scenes.txt b/third_party/d2net/megadepth_utils/valid_scenes.txt new file mode 100644 index 0000000000000000000000000000000000000000..42503496535a13b9426db28a22c6df891191c9f2 --- /dev/null +++ b/third_party/d2net/megadepth_utils/valid_scenes.txt @@ -0,0 +1,77 @@ +0016 +0033 +0034 +0041 +0044 +0047 +0049 +0058 +0062 +0064 +0067 +0071 +0076 +0078 +0090 +0094 +0099 +0102 +0121 +0129 +0133 +0141 +0151 +0162 +0168 +0175 +0177 +0178 +0181 +0185 +0186 +0197 +0204 +0205 +0209 +0212 +0217 +0223 +0229 +0231 +0238 +0252 +0257 +0271 +0275 +0277 +0281 +0285 +0286 +0290 +0294 +0303 +0306 +0307 +0323 +0349 +0360 +0387 +0389 +0402 +0406 +0412 +0443 +0482 +0768 +1001 +3346 +5000 +5001 +5002 +5003 +5008 +5011 +5014 +5015 +5016 +5018 diff --git a/third_party/d2net/models/d2_tf.pth b/third_party/d2net/models/d2_tf.pth new file mode 100644 index 0000000000000000000000000000000000000000..e0e501511ec988202a6411c8f3332ab1c458ca8d --- /dev/null +++ b/third_party/d2net/models/d2_tf.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2d32c3bc53b6588d40bc5325d536a159e529b7492198a2cfa4b11913c615c80 +size 30545768 diff --git a/third_party/d2net/qualitative/Qualitative-Matches.ipynb b/third_party/d2net/qualitative/Qualitative-Matches.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..5ae18faa46ee3ab4efddc48eb6455f7f1341fb40 --- /dev/null +++ b/third_party/d2net/qualitative/Qualitative-Matches.ipynb @@ -0,0 +1,217 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import cv2\n", + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import numpy as np\n", + "\n", + "import os\n", + "\n", + "from PIL import Image\n", + "\n", + "from skimage.feature import match_descriptors\n", + "from skimage.measure import ransac\n", + "from skimage.transform import ProjectiveTransform" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Don't forget to run feature extraction before running this script\n", + "```python extract_features.py --image_list_file image_list_qualitative.txt```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Change the pair index here (possible values: 1, 2 or 3)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "pair_idx = 2\n", + "assert(pair_idx in [1, 2, 3])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Loading the features" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "pair_path = os.path.join('images', 'pair_%d' % pair_idx)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "image1 = np.array(Image.open(os.path.join(pair_path, '1.jpg')))\n", + "image2 = np.array(Image.open(os.path.join(pair_path, '2.jpg')))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "feat1 = np.load(os.path.join(pair_path, '1.jpg.d2-net'))\n", + "feat2 = np.load(os.path.join(pair_path, '2.jpg.d2-net'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Mutual nearest neighbors matching" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "matches = match_descriptors(feat1['descriptors'], feat2['descriptors'], cross_check=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of raw matches: 296.\n" + ] + } + ], + "source": [ + "print('Number of raw matches: %d.' % matches.shape[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Homography fitting" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of inliers: 69.\n" + ] + } + ], + "source": [ + "keypoints_left = feat1['keypoints'][matches[:, 0], : 2]\n", + "keypoints_right = feat2['keypoints'][matches[:, 1], : 2]\n", + "np.random.seed(0)\n", + "model, inliers = ransac(\n", + " (keypoints_left, keypoints_right),\n", + " ProjectiveTransform, min_samples=4,\n", + " residual_threshold=4, max_trials=10000\n", + ")\n", + "n_inliers = np.sum(inliers)\n", + "print('Number of inliers: %d.' % n_inliers)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Plotting" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "inlier_keypoints_left = [cv2.KeyPoint(point[0], point[1], 1) for point in keypoints_left[inliers]]\n", + "inlier_keypoints_right = [cv2.KeyPoint(point[0], point[1], 1) for point in keypoints_right[inliers]]\n", + "placeholder_matches = [cv2.DMatch(idx, idx, 1) for idx in range(n_inliers)]\n", + "image3 = cv2.drawMatches(image1, inlier_keypoints_left, image2, inlier_keypoints_right, placeholder_matches, None)\n", + "\n", + "plt.figure(figsize=(15, 15))\n", + "plt.imshow(image3)\n", + "plt.axis('off')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/third_party/d2net/qualitative/images/pair_1/1.jpg b/third_party/d2net/qualitative/images/pair_1/1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..30e969e4214b17724749421acbde8e25d2378ec1 --- /dev/null +++ b/third_party/d2net/qualitative/images/pair_1/1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca3fbf5145372316ed0d7b3e5c23183e05094ee95b60d5f669e2a03d0783bc43 +size 63747 diff --git a/third_party/d2net/qualitative/images/pair_1/2.jpg b/third_party/d2net/qualitative/images/pair_1/2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f289909ce7520aa712b4d92c2a16867f6466d1e4 --- /dev/null +++ b/third_party/d2net/qualitative/images/pair_1/2.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cc4ee1bd7b2c342a9e4d3ce5a66850d1b8b77d8113642de55338f02ddaa9e35 +size 40726 diff --git a/third_party/d2net/qualitative/images/pair_2/1.jpg b/third_party/d2net/qualitative/images/pair_2/1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..588806f2ad92391585c289aa1e2c7b96313ea0f9 --- /dev/null +++ b/third_party/d2net/qualitative/images/pair_2/1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb840ffd7e84d42fcb51338c5299ce18b07bbe183f764422616c034a14bf0e25 +size 81310 diff --git a/third_party/d2net/qualitative/images/pair_2/2.jpg b/third_party/d2net/qualitative/images/pair_2/2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f2737214e4c8ad776262006d556e1ddd1922b6be --- /dev/null +++ b/third_party/d2net/qualitative/images/pair_2/2.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dff3a9db9e38ac796fa96144c6f7fbe212852559cba864e3319f826fa1c4ff0 +size 77962 diff --git a/third_party/d2net/qualitative/images/pair_3/1.jpg b/third_party/d2net/qualitative/images/pair_3/1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a08411d75a88034d4b48ab47813bbb9821aaab6f --- /dev/null +++ b/third_party/d2net/qualitative/images/pair_3/1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4393bb1531361b180dc1def1213bfae22aabafe8696a956094d4ae9cfe3328d1 +size 565714 diff --git a/third_party/d2net/qualitative/images/pair_3/2.jpg b/third_party/d2net/qualitative/images/pair_3/2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..bfa7a264d640c74c1620bfb293d6182891e0f4bb --- /dev/null +++ b/third_party/d2net/qualitative/images/pair_3/2.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae9c4b91e00446bf45a30c0ecb65abc17328aae10eb21286b4205e959898cec3 +size 199241 diff --git a/third_party/d2net/train.py b/third_party/d2net/train.py new file mode 100644 index 0000000000000000000000000000000000000000..5817f1712bda0779175fb18437d1f8c263f29f3b --- /dev/null +++ b/third_party/d2net/train.py @@ -0,0 +1,279 @@ +import argparse + +import numpy as np + +import os + +import shutil + +import torch +import torch.optim as optim + +from torch.utils.data import DataLoader + +from tqdm import tqdm + +import warnings + +from lib.dataset import MegaDepthDataset +from lib.exceptions import NoGradientError +from lib.loss import loss_function +from lib.model import D2Net + + +# CUDA +use_cuda = torch.cuda.is_available() +device = torch.device("cuda:0" if use_cuda else "cpu") + +# Seed +torch.manual_seed(1) +if use_cuda: + torch.cuda.manual_seed(1) +np.random.seed(1) + +# Argument parsing +parser = argparse.ArgumentParser(description='Training script') + +parser.add_argument( + '--dataset_path', type=str, required=True, + help='path to the dataset' +) +parser.add_argument( + '--scene_info_path', type=str, required=True, + help='path to the processed scenes' +) + +parser.add_argument( + '--preprocessing', type=str, default='caffe', + help='image preprocessing (caffe or torch)' +) +parser.add_argument( + '--model_file', type=str, default='models/d2_ots.pth', + help='path to the full model' +) + +parser.add_argument( + '--num_epochs', type=int, default=10, + help='number of training epochs' +) +parser.add_argument( + '--lr', type=float, default=1e-3, + help='initial learning rate' +) +parser.add_argument( + '--batch_size', type=int, default=1, + help='batch size' +) +parser.add_argument( + '--num_workers', type=int, default=4, + help='number of workers for data loading' +) + +parser.add_argument( + '--use_validation', dest='use_validation', action='store_true', + help='use the validation split' +) +parser.set_defaults(use_validation=False) + +parser.add_argument( + '--log_interval', type=int, default=250, + help='loss logging interval' +) + +parser.add_argument( + '--log_file', type=str, default='log.txt', + help='loss logging file' +) + +parser.add_argument( + '--plot', dest='plot', action='store_true', + help='plot training pairs' +) +parser.set_defaults(plot=False) + +parser.add_argument( + '--checkpoint_directory', type=str, default='checkpoints', + help='directory for training checkpoints' +) +parser.add_argument( + '--checkpoint_prefix', type=str, default='d2', + help='prefix for training checkpoints' +) + +args = parser.parse_args() + +print(args) + +# Create the folders for plotting if need be +if args.plot: + plot_path = 'train_vis' + if os.path.isdir(plot_path): + print('[Warning] Plotting directory already exists.') + else: + os.mkdir(plot_path) + +# Creating CNN model +model = D2Net( + model_file=args.model_file, + use_cuda=use_cuda +) + +# Optimizer +optimizer = optim.Adam( + filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr +) + +# Dataset +if args.use_validation: + validation_dataset = MegaDepthDataset( + scene_list_path='megadepth_utils/valid_scenes.txt', + scene_info_path=args.scene_info_path, + base_path=args.dataset_path, + train=False, + preprocessing=args.preprocessing, + pairs_per_scene=25 + ) + validation_dataloader = DataLoader( + validation_dataset, + batch_size=args.batch_size, + num_workers=args.num_workers + ) + +training_dataset = MegaDepthDataset( + scene_list_path='megadepth_utils/train_scenes.txt', + scene_info_path=args.scene_info_path, + base_path=args.dataset_path, + preprocessing=args.preprocessing +) +training_dataloader = DataLoader( + training_dataset, + batch_size=args.batch_size, + num_workers=args.num_workers +) + + +# Define epoch function +def process_epoch( + epoch_idx, + model, loss_function, optimizer, dataloader, device, + log_file, args, train=True +): + epoch_losses = [] + + torch.set_grad_enabled(train) + + progress_bar = tqdm(enumerate(dataloader), total=len(dataloader)) + for batch_idx, batch in progress_bar: + if train: + optimizer.zero_grad() + + batch['train'] = train + batch['epoch_idx'] = epoch_idx + batch['batch_idx'] = batch_idx + batch['batch_size'] = args.batch_size + batch['preprocessing'] = args.preprocessing + batch['log_interval'] = args.log_interval + + try: + loss = loss_function(model, batch, device, plot=args.plot) + except NoGradientError: + continue + + current_loss = loss.data.cpu().numpy()[0] + epoch_losses.append(current_loss) + + progress_bar.set_postfix(loss=('%.4f' % np.mean(epoch_losses))) + + if batch_idx % args.log_interval == 0: + log_file.write('[%s] epoch %d - batch %d / %d - avg_loss: %f\n' % ( + 'train' if train else 'valid', + epoch_idx, batch_idx, len(dataloader), np.mean(epoch_losses) + )) + + if train: + loss.backward() + optimizer.step() + + log_file.write('[%s] epoch %d - avg_loss: %f\n' % ( + 'train' if train else 'valid', + epoch_idx, + np.mean(epoch_losses) + )) + log_file.flush() + + return np.mean(epoch_losses) + + +# Create the checkpoint directory +if os.path.isdir(args.checkpoint_directory): + print('[Warning] Checkpoint directory already exists.') +else: + os.mkdir(args.checkpoint_directory) + + +# Open the log file for writing +if os.path.exists(args.log_file): + print('[Warning] Log file already exists.') +log_file = open(args.log_file, 'a+') + +# Initialize the history +train_loss_history = [] +validation_loss_history = [] +if args.use_validation: + validation_dataset.build_dataset() + min_validation_loss = process_epoch( + 0, + model, loss_function, optimizer, validation_dataloader, device, + log_file, args, + train=False + ) + +# Start the training +for epoch_idx in range(1, args.num_epochs + 1): + # Process epoch + training_dataset.build_dataset() + train_loss_history.append( + process_epoch( + epoch_idx, + model, loss_function, optimizer, training_dataloader, device, + log_file, args + ) + ) + + if args.use_validation: + validation_loss_history.append( + process_epoch( + epoch_idx, + model, loss_function, optimizer, validation_dataloader, device, + log_file, args, + train=False + ) + ) + + # Save the current checkpoint + checkpoint_path = os.path.join( + args.checkpoint_directory, + '%s.%02d.pth' % (args.checkpoint_prefix, epoch_idx) + ) + checkpoint = { + 'args': args, + 'epoch_idx': epoch_idx, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict(), + 'train_loss_history': train_loss_history, + 'validation_loss_history': validation_loss_history + } + torch.save(checkpoint, checkpoint_path) + if ( + args.use_validation and + validation_loss_history[-1] < min_validation_loss + ): + min_validation_loss = validation_loss_history[-1] + best_checkpoint_path = os.path.join( + args.checkpoint_directory, + '%s.best.pth' % args.checkpoint_prefix + ) + shutil.copy(checkpoint_path, best_checkpoint_path) + +# Close the log file +log_file.close() diff --git a/third_party/lanet/.gitattributes b/third_party/lanet/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..ec4a626fbb7799f6a25b45fb86344b2bf7b37e64 --- /dev/null +++ b/third_party/lanet/.gitattributes @@ -0,0 +1 @@ +*.pth filter=lfs diff=lfs merge=lfs -text diff --git a/third_party/lanet/LICENSE b/third_party/lanet/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..df725685f32f70fdf841379ed1ae5273600c7248 --- /dev/null +++ b/third_party/lanet/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) Changhao Wang + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/third_party/lanet/README.md b/third_party/lanet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0bdac20ad300970ff3949800f3dd14e5efbd4001 --- /dev/null +++ b/third_party/lanet/README.md @@ -0,0 +1,72 @@ +# Rethinking Low-level Features for Interest Point Detection and Description + +## Dependency + - pytorch + - torchvision + - cv2 + - tqdm + + We use cuda 11.4/python 3.8.13/torch 1.10.0/torchvision 0.11.0/opencv 3.4.8 for training and testing. + + +## Pre-trained models +We provide two versions of LANet with different structure in [network_v0](network_v0) and [network_v1](network_v1), the corresponding pre-trained models are in [checkpoints](checkpoints). + - v0: The original version used in our paper. + - v1: An improved version that has a better over all performance. + + +## Training +Download the COCO dataset: +``` +cd datasets/COCO/ +wget http://images.cocodataset.org/zips/train2017.zip +unzip train2017.zip +``` +Prepare the training file: +``` +python datasets/prepare_coco.py --raw_dir datasets/COCO/train2017/ --saved_dir datasets/COCO/ +``` + +To train the model (v0) on COCO dataset, run: +``` +python main.py --train_root datasets/COCO/train2017/ --train_txt datasets/COCO/train2017.txt +``` + + +## Evaluation +### Evaluation on HPatches dataset +Download the HPatches dataset: +``` +cd datasets/HPatches/ +wget http://icvl.ee.ic.ac.uk/vbalnt/hpatches/hpatches-sequences-release.tar.gz +tar -xvf hpatches-sequences-release.tar.gz +``` + +To evaluate the pre-trained model, run: +``` +python test.py --test_dir ./datasets/HPatches/hpatches-sequences-release +``` + + +## License +The code is released under the [MIT license](LICENSE). + + +## Citation +Please use the following citation when referencing our work: +``` +@InProceedings{Wang_2022_ACCV, + author = {Changhao Wang and Guanwen Zhang and Zhengyun Cheng and Wei Zhou}, + title = {Rethinking Low-level Features for Interest Point Detection and Description}, + booktitle = {Computer Vision - {ACCV} 2022 - 16th Asian Conference on Computer + Vision, Macao, China, December 4-8, 2022, Proceedings, Part {II}}, + series = {Lecture Notes in Computer Science}, + volume = {13842}, + pages = {108--123}, + year = {2022} +} +``` + + +## Related Projects +https://github.com/TRI-ML/KP2D diff --git a/third_party/lanet/augmentations.py b/third_party/lanet/augmentations.py new file mode 100644 index 0000000000000000000000000000000000000000..f4e4496c77ce8fc8cdadb230dd0d0750166152a9 --- /dev/null +++ b/third_party/lanet/augmentations.py @@ -0,0 +1,342 @@ +# From https://github.com/TRI-ML/KP2D. + +# Copyright 2020 Toyota Research Institute. All rights reserved. + +import random +from math import pi + +import cv2 +import numpy as np +import torch +import torchvision +import torchvision.transforms as transforms +from PIL import Image + +from utils import image_grid + + +def filter_dict(dict, keywords): + """ + Returns only the keywords that are part of a dictionary + + Parameters + ---------- + dictionary : dict + Dictionary for filtering + keywords : list of str + Keywords that will be filtered + + Returns + ------- + keywords : list of str + List containing the keywords that are keys in dictionary + """ + return [key for key in keywords if key in dict] + + +def resize_sample(sample, image_shape, image_interpolation=Image.ANTIALIAS): + """ + Resizes a sample, which contains an input image. + + Parameters + ---------- + sample : dict + Dictionary with sample values (output from a dataset's __getitem__ method) + shape : tuple (H,W) + Output shape + image_interpolation : int + Interpolation mode + + Returns + ------- + sample : dict + Resized sample + """ + # image + image_transform = transforms.Resize(image_shape, interpolation=image_interpolation) + sample['image'] = image_transform(sample['image']) + return sample + +def spatial_augment_sample(sample): + """ Apply spatial augmentation to an image (flipping and random affine transformation).""" + augment_image = transforms.Compose([ + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomAffine(15, translate=(0.1, 0.1), scale=(0.9, 1.1)) + + ]) + sample['image'] = augment_image(sample['image']) + + return sample + +def unnormalize_image(tensor, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)): + """ Counterpart method of torchvision.transforms.Normalize.""" + for t, m, s in zip(tensor, mean, std): + t.div_(1 / s).sub_(-m) + return tensor + + +def sample_homography( + shape, perspective=True, scaling=True, rotation=True, translation=True, + n_scales=100, n_angles=100, scaling_amplitude=0.1, perspective_amplitude=0.4, + patch_ratio=0.8, max_angle=pi/4): + """ Sample a random homography that includes perspective, scale, translation and rotation operations.""" + + width = float(shape[1]) + hw_ratio = float(shape[0]) / float(shape[1]) + + pts1 = np.stack([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.]], axis=0) + pts2 = pts1.copy() * patch_ratio + pts2[:,1] *= hw_ratio + + if perspective: + + perspective_amplitude_x = np.random.normal(0., perspective_amplitude/2, (2)) + perspective_amplitude_y = np.random.normal(0., hw_ratio * perspective_amplitude/2, (2)) + + perspective_amplitude_x = np.clip(perspective_amplitude_x, -perspective_amplitude/2, perspective_amplitude/2) + perspective_amplitude_y = np.clip(perspective_amplitude_y, hw_ratio * -perspective_amplitude/2, hw_ratio * perspective_amplitude/2) + + pts2[0,0] -= perspective_amplitude_x[1] + pts2[0,1] -= perspective_amplitude_y[1] + + pts2[1,0] -= perspective_amplitude_x[0] + pts2[1,1] += perspective_amplitude_y[1] + + pts2[2,0] += perspective_amplitude_x[1] + pts2[2,1] -= perspective_amplitude_y[0] + + pts2[3,0] += perspective_amplitude_x[0] + pts2[3,1] += perspective_amplitude_y[0] + + if scaling: + + random_scales = np.random.normal(1, scaling_amplitude/2, (n_scales)) + random_scales = np.clip(random_scales, 1-scaling_amplitude/2, 1+scaling_amplitude/2) + + scales = np.concatenate([[1.], random_scales], 0) + center = np.mean(pts2, axis=0, keepdims=True) + scaled = np.expand_dims(pts2 - center, axis=0) * np.expand_dims( + np.expand_dims(scales, 1), 1) + center + valid = np.arange(n_scales) # all scales are valid except scale=1 + idx = valid[np.random.randint(valid.shape[0])] + pts2 = scaled[idx] + + if translation: + t_min, t_max = np.min(pts2 - [-1., -hw_ratio], axis=0), np.min([1., hw_ratio] - pts2, axis=0) + pts2 += np.expand_dims(np.stack([np.random.uniform(-t_min[0], t_max[0]), + np.random.uniform(-t_min[1], t_max[1])]), + axis=0) + + if rotation: + angles = np.linspace(-max_angle, max_angle, n_angles) + angles = np.concatenate([[0.], angles], axis=0) + + center = np.mean(pts2, axis=0, keepdims=True) + rot_mat = np.reshape(np.stack([np.cos(angles), -np.sin(angles), np.sin(angles), + np.cos(angles)], axis=1), [-1, 2, 2]) + rotated = np.matmul( + np.tile(np.expand_dims(pts2 - center, axis=0), [n_angles+1, 1, 1]), + rot_mat) + center + + valid = np.where(np.all((rotated >= [-1.,-hw_ratio]) & (rotated < [1.,hw_ratio]), + axis=(1, 2)))[0] + + idx = valid[np.random.randint(valid.shape[0])] + pts2 = rotated[idx] + + pts2[:,1] /= hw_ratio + + def ax(p, q): return [p[0], p[1], 1, 0, 0, 0, -p[0] * q[0], -p[1] * q[0]] + def ay(p, q): return [0, 0, 0, p[0], p[1], 1, -p[0] * q[1], -p[1] * q[1]] + + a_mat = np.stack([f(pts1[i], pts2[i]) for i in range(4) for f in (ax, ay)], axis=0) + p_mat = np.transpose(np.stack( + [[pts2[i][j] for i in range(4) for j in range(2)]], axis=0)) + + homography = np.matmul(np.linalg.pinv(a_mat), p_mat).squeeze() + homography = np.concatenate([homography, [1.]]).reshape(3,3) + return homography + +def warp_homography(sources, homography): + """Warp features given a homography + + Parameters + ---------- + sources: torch.tensor (1,H,W,2) + Keypoint vector. + homography: torch.Tensor (3,3) + Homography. + + Returns + ------- + warped_sources: torch.tensor (1,H,W,2) + Warped feature vector. + """ + _, H, W, _ = sources.shape + warped_sources = sources.clone().squeeze() + warped_sources = warped_sources.view(-1,2) + warped_sources = torch.addmm(homography[:,2], warped_sources, homography[:,:2].t()) + warped_sources.mul_(1/warped_sources[:,2].unsqueeze(1)) + warped_sources = warped_sources[:,:2].contiguous().view(1,H,W,2) + return warped_sources + +def add_noise(img, mode="gaussian", percent=0.02): + """Add image noise + + Parameters + ---------- + image : np.array + Input image + mode: str + Type of noise, from ['gaussian','salt','pepper','s&p'] + percent: float + Percentage image points to add noise to. + Returns + ------- + image : np.array + Image plus noise. + """ + original_dtype = img.dtype + if mode == "gaussian": + mean = 0 + var = 0.1 + sigma = var * 0.5 + + if img.ndim == 2: + h, w = img.shape + gauss = np.random.normal(mean, sigma, (h, w)) + else: + h, w, c = img.shape + gauss = np.random.normal(mean, sigma, (h, w, c)) + + if img.dtype not in [np.float32, np.float64]: + gauss = gauss * np.iinfo(img.dtype).max + img = np.clip(img.astype(np.float) + gauss, 0, np.iinfo(img.dtype).max) + else: + img = np.clip(img.astype(np.float) + gauss, 0, 1) + + elif mode == "salt": + print(img.dtype) + s_vs_p = 1 + num_salt = np.ceil(percent * img.size * s_vs_p) + coords = tuple([np.random.randint(0, i - 1, int(num_salt)) for i in img.shape]) + + if img.dtype in [np.float32, np.float64]: + img[coords] = 1 + else: + img[coords] = np.iinfo(img.dtype).max + print(img.dtype) + elif mode == "pepper": + s_vs_p = 0 + num_pepper = np.ceil(percent * img.size * (1.0 - s_vs_p)) + coords = tuple( + [np.random.randint(0, i - 1, int(num_pepper)) for i in img.shape] + ) + img[coords] = 0 + + elif mode == "s&p": + s_vs_p = 0.5 + + # Salt mode + num_salt = np.ceil(percent * img.size * s_vs_p) + coords = tuple([np.random.randint(0, i - 1, int(num_salt)) for i in img.shape]) + if img.dtype in [np.float32, np.float64]: + img[coords] = 1 + else: + img[coords] = np.iinfo(img.dtype).max + + # Pepper mode + num_pepper = np.ceil(percent * img.size * (1.0 - s_vs_p)) + coords = tuple( + [np.random.randint(0, i - 1, int(num_pepper)) for i in img.shape] + ) + img[coords] = 0 + else: + raise ValueError("not support mode for {}".format(mode)) + + noisy = img.astype(original_dtype) + return noisy + + +def non_spatial_augmentation(img_warp_ori, jitter_paramters, color_order=[0,1,2], to_gray=False): + """ Apply non-spatial augmentation to an image (jittering, color swap, convert to gray scale, Gaussian blur).""" + + brightness, contrast, saturation, hue = jitter_paramters + color_augmentation = transforms.ColorJitter(brightness, contrast, saturation, hue) + ''' + augment_image = color_augmentation.get_params(brightness=[max(0, 1 - brightness), 1 + brightness], + contrast=[max(0, 1 - contrast), 1 + contrast], + saturation=[max(0, 1 - saturation), 1 + saturation], + hue=[-hue, hue]) + ''' + + B = img_warp_ori.shape[0] + img_warp = [] + kernel_sizes = [0,1,3,5] + for b in range(B): + img_warp_sub = img_warp_ori[b].cpu() + img_warp_sub = torchvision.transforms.functional.to_pil_image(img_warp_sub) + + img_warp_sub_np = np.array(img_warp_sub) + img_warp_sub_np = img_warp_sub_np[:,:,color_order] + + if np.random.rand() > 0.5: + img_warp_sub_np = add_noise(img_warp_sub_np) + + rand_index = np.random.randint(4) + kernel_size = kernel_sizes[rand_index] + if kernel_size >0: + img_warp_sub_np = cv2.GaussianBlur(img_warp_sub_np, (kernel_size, kernel_size), sigmaX=0) + + if to_gray: + img_warp_sub_np = cv2.cvtColor(img_warp_sub_np, cv2.COLOR_RGB2GRAY) + img_warp_sub_np = cv2.cvtColor(img_warp_sub_np, cv2.COLOR_GRAY2RGB) + + img_warp_sub = Image.fromarray(img_warp_sub_np) + img_warp_sub = color_augmentation(img_warp_sub) + + img_warp_sub = torchvision.transforms.functional.to_tensor(img_warp_sub).to(img_warp_ori.device) + + img_warp.append(img_warp_sub) + + img_warp = torch.stack(img_warp, dim=0) + return img_warp + +def ha_augment_sample(data, jitter_paramters=[0.5, 0.5, 0.2, 0.05], patch_ratio=0.7, scaling_amplitude=0.2, max_angle=pi/4): + """Apply Homography Adaptation image augmentation.""" + input_img = data['image'].unsqueeze(0) + _, _, H, W = input_img.shape + device = input_img.device + + homography = torch.from_numpy( + sample_homography([H, W], + patch_ratio=patch_ratio, + scaling_amplitude=scaling_amplitude, + max_angle=max_angle)).float().to(device) + homography_inv = torch.inverse(homography) + + source = image_grid(1, H, W, + dtype=input_img.dtype, + device=device, + ones=False, normalized=True).clone().permute(0, 2, 3, 1) + + target_warped = warp_homography(source, homography) + img_warp = torch.nn.functional.grid_sample(input_img, target_warped) + + color_order = [0,1,2] + if np.random.rand() > 0.5: + random.shuffle(color_order) + + to_gray = False + if np.random.rand() > 0.5: + to_gray = True + + input_img = non_spatial_augmentation(input_img, jitter_paramters=jitter_paramters, color_order=color_order, to_gray=to_gray) + img_warp = non_spatial_augmentation(img_warp, jitter_paramters=jitter_paramters, color_order=color_order, to_gray=to_gray) + + data['image'] = input_img.squeeze() + data['image_aug'] = img_warp.squeeze() + data['homography'] = homography + data['homography_inv'] = homography_inv + return data diff --git a/third_party/lanet/checkpoints/PointModel_v0.pth b/third_party/lanet/checkpoints/PointModel_v0.pth new file mode 100644 index 0000000000000000000000000000000000000000..50d18dfc0d62c0a8c8f8a4b89f050accb22226d5 --- /dev/null +++ b/third_party/lanet/checkpoints/PointModel_v0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17c1adfc8c22b044a9538019101bae0740a9d054c1b4fecd80b52d642272b9ff +size 33802301 diff --git a/third_party/lanet/config.py b/third_party/lanet/config.py new file mode 100644 index 0000000000000000000000000000000000000000..baa3aedc95410b231c29ab64b31ea5a2bd3266d7 --- /dev/null +++ b/third_party/lanet/config.py @@ -0,0 +1,79 @@ +import argparse + +arg_lists = [] +parser = argparse.ArgumentParser(description='LANet') + +def str2bool(v): + return v.lower() in ('true', '1') + +def add_argument_group(name): + arg = parser.add_argument_group(name) + arg_lists.append(arg) + return arg + +# train data params +traindata_arg = add_argument_group('Traindata Params') +traindata_arg.add_argument('--train_txt', type=str, default='', + help='Train set.') +traindata_arg.add_argument('--train_root', type=str, default='', + help='Where the train images are.') +traindata_arg.add_argument('--batch_size', type=int, default=8, + help='# of images in each batch of data') +traindata_arg.add_argument('--num_workers', type=int, default=4, + help='# of subprocesses to use for data loading') +traindata_arg.add_argument('--pin_memory', type=str2bool, default=True, + help='# of subprocesses to use for data loading') +traindata_arg.add_argument('--shuffle', type=str2bool, default=True, + help='Whether to shuffle the train and valid indices') +traindata_arg.add_argument('--image_shape', type=tuple, default=(240, 320), + help='') +traindata_arg.add_argument('--jittering', type=tuple, default=(0.5, 0.5, 0.2, 0.05), + help='') + +# data storage +storage_arg = add_argument_group('Storage') +storage_arg.add_argument('--ckpt_name', type=str, default='PointModel', + help='') + +# training params +train_arg = add_argument_group('Training Params') +train_arg.add_argument('--start_epoch', type=int, default=0, + help='') +train_arg.add_argument('--max_epoch', type=int, default=12, + help='') +train_arg.add_argument('--init_lr', type=float, default=3e-4, + help='Initial learning rate value.') +train_arg.add_argument('--lr_factor', type=float, default=0.5, + help='Reduce learning rate value.') +train_arg.add_argument('--momentum', type=float, default=0.9, + help='Nesterov momentum value.') +train_arg.add_argument('--display', type=int, default=50, + help='') + +# loss function params +loss_arg = add_argument_group('Loss function Params') +loss_arg.add_argument('--score_weight', type=float, default=1., + help='') +loss_arg.add_argument('--loc_weight', type=float, default=1., + help='') +loss_arg.add_argument('--desc_weight', type=float, default=4., + help='') +loss_arg.add_argument('--corres_weight', type=float, default=.5, + help='') +loss_arg.add_argument('--corres_threshold', type=int, default=4., + help='') + +# other params +misc_arg = add_argument_group('Misc.') +misc_arg.add_argument('--use_gpu', type=str2bool, default=True, + help="Whether to run on the GPU.") +misc_arg.add_argument('--gpu', type=int, default=0, + help="Which GPU to run on.") +misc_arg.add_argument('--seed', type=int, default=1001, + help='Seed to ensure reproducibility.') +misc_arg.add_argument('--ckpt_dir', type=str, default='./checkpoints', + help='Directory in which to save model checkpoints.') + +def get_config(): + config, unparsed = parser.parse_known_args() + return config, unparsed diff --git a/third_party/lanet/data_loader.py b/third_party/lanet/data_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..e694e39bb5f3e7ad6763a5cfcce3ca4804071262 --- /dev/null +++ b/third_party/lanet/data_loader.py @@ -0,0 +1,86 @@ +from PIL import Image +from torch.utils.data import Dataset, DataLoader + +from augmentations import ha_augment_sample, resize_sample, spatial_augment_sample +from utils import to_tensor_sample + +def image_transforms(shape, jittering): + def train_transforms(sample): + sample = resize_sample(sample, image_shape=shape) + sample = spatial_augment_sample(sample) + sample = to_tensor_sample(sample) + sample = ha_augment_sample(sample, jitter_paramters=jittering) + return sample + + return {'train': train_transforms} + +class GetData(Dataset): + def __init__(self, config, transforms=None): + """ + Get the list containing all images and labels. + """ + datafile = open(config.train_txt, 'r') + lines = datafile.readlines() + + dataset = [] + for line in lines: + line = line.rstrip() + data = line.split() + dataset.append(data[0]) + + self.config = config + self.dataset = dataset + self.root = config.train_root + + self.transforms = transforms + + def __getitem__(self, index): + """ + Return image'data and its label. + """ + img_path = self.dataset[index] + img_file = self.root + img_path + img = Image.open(img_file) + + # image.mode == 'L' means the image is in gray scale + if img.mode == 'L': + img_new = Image.new("RGB", img.size) + img_new.paste(img) + sample = {'image': img_new, 'idx': index} + else: + sample = {'image': img, 'idx': index} + + if self.transforms: + sample = self.transforms(sample) + + return sample + + def __len__(self): + """ + Return the number of all data. + """ + return len(self.dataset) + +def get_data_loader( + config, + transforms=None, + sampler=None, + drop_last=True, + ): + """ + Return batch data for training. + """ + transforms = image_transforms(shape=config.image_shape, jittering=config.jittering) + dataset = GetData(config, transforms=transforms['train']) + + train_loader = DataLoader( + dataset, + batch_size=config.batch_size, + shuffle=config.shuffle, + sampler=sampler, + num_workers=config.num_workers, + pin_memory=config.pin_memory, + drop_last=drop_last + ) + + return train_loader diff --git a/third_party/lanet/datasets/hp_loader.py b/third_party/lanet/datasets/hp_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..b4c1d8f3c33fd51bfa928c529544a77c06ed73f0 --- /dev/null +++ b/third_party/lanet/datasets/hp_loader.py @@ -0,0 +1,106 @@ +import torch +import cv2 +import numpy as np + +from torchvision import transforms +from torch.utils.data import Dataset +from pathlib import Path + + +class PatchesDataset(Dataset): + """ + HPatches dataset class. + # Note: output_shape = (output_width, output_height) + # Note: this returns Pytorch tensors, resized to output_shape (if specified) + # Note: the homography will be adjusted according to output_shape. + + Parameters + ---------- + root_dir : str + Path to the dataset + use_color : bool + Return color images or convert to grayscale. + data_transform : Function + Transformations applied to the sample + output_shape: tuple + If specified, the images and homographies will be resized to the desired shape. + type: str + Dataset subset to return from ['i', 'v', 'all']: + i - illumination sequences + v - viewpoint sequences + all - all sequences + """ + def __init__(self, root_dir, use_color=True, data_transform=None, output_shape=None, type='all'): + super().__init__() + self.type = type + self.root_dir = root_dir + self.data_transform = data_transform + self.output_shape = output_shape + self.use_color = use_color + base_path = Path(root_dir) + folder_paths = [x for x in base_path.iterdir() if x.is_dir()] + image_paths = [] + warped_image_paths = [] + homographies = [] + for path in folder_paths: + if self.type == 'i' and path.stem[0] != 'i': + continue + if self.type == 'v' and path.stem[0] != 'v': + continue + num_images = 5 + file_ext = '.ppm' + for i in range(2, 2 + num_images): + image_paths.append(str(Path(path, "1" + file_ext))) + warped_image_paths.append(str(Path(path, str(i) + file_ext))) + homographies.append(np.loadtxt(str(Path(path, "H_1_" + str(i))))) + self.files = {'image_paths': image_paths, 'warped_image_paths': warped_image_paths, 'homography': homographies} + + def scale_homography(self, homography, original_scale, new_scale, pre): + scales = np.divide(new_scale, original_scale) + if pre: + s = np.diag(np.append(scales, 1.)) + homography = np.matmul(s, homography) + else: + sinv = np.diag(np.append(1. / scales, 1.)) + homography = np.matmul(homography, sinv) + return homography + + def __len__(self): + return len(self.files['image_paths']) + + def __getitem__(self, idx): + + def _read_image(path): + img = cv2.imread(path, cv2.IMREAD_COLOR) + if self.use_color: + return img + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + return gray + + image = _read_image(self.files['image_paths'][idx]) + + warped_image = _read_image(self.files['warped_image_paths'][idx]) + homography = np.array(self.files['homography'][idx]) + sample = {'image': image, 'warped_image': warped_image, 'homography': homography, 'index' : idx} + + # Apply transformations + if self.output_shape is not None: + sample['homography'] = self.scale_homography(sample['homography'], + sample['image'].shape[:2][::-1], + self.output_shape, + pre=False) + sample['homography'] = self.scale_homography(sample['homography'], + sample['warped_image'].shape[:2][::-1], + self.output_shape, + pre=True) + + for key in ['image', 'warped_image']: + sample[key] = cv2.resize(sample[key], self.output_shape) + if self.use_color is False: + sample[key] = np.expand_dims(sample[key], axis=2) + + transform = transforms.ToTensor() + + for key in ['image', 'warped_image']: + sample[key] = transform(sample[key]).type('torch.FloatTensor') + return sample diff --git a/third_party/lanet/datasets/prepare_coco.py b/third_party/lanet/datasets/prepare_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..0468aba19c6c2c76bda1a1af2b86dc7f20176fdb --- /dev/null +++ b/third_party/lanet/datasets/prepare_coco.py @@ -0,0 +1,26 @@ +import os +import argparse + +def prepare_coco(args): + train_file = open(os.path.join(args.saved_dir, args.saved_txt), 'w') + dirs = os.listdir(args.raw_dir) + + for file in dirs: + # Write training files + train_file.write('%s\n' % (file)) + + print('Data Preparation Finished.') + +if __name__ == '__main__': + arg_parser = argparse.ArgumentParser(description="coco prepareing.") + arg_parser.add_argument('--dataset', type=str, default='coco', + help='') + arg_parser.add_argument('--raw_dir', type=str, default='', + help='') + arg_parser.add_argument('--saved_dir', type=str, default='', + help='') + arg_parser.add_argument('--saved_txt', type=str, default='train2017.txt', + help='') + args = arg_parser.parse_args() + + prepare_coco(args) \ No newline at end of file diff --git a/third_party/lanet/evaluation/descriptor_evaluation.py b/third_party/lanet/evaluation/descriptor_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..c0e1f84199d353ac5858641c8f68bc298f9d6413 --- /dev/null +++ b/third_party/lanet/evaluation/descriptor_evaluation.py @@ -0,0 +1,254 @@ +# Copyright 2020 Toyota Research Institute. All rights reserved. +# Adapted from: https://github.com/rpautrat/SuperPoint/blob/master/superpoint/evaluations/descriptor_evaluation.py + +import random +from glob import glob +from os import path as osp + +import cv2 +import numpy as np + +from utils import warp_keypoints + + +def select_k_best(points, descriptors, k): + """ Select the k most probable points (and strip their probability). + points has shape (num_points, 3) where the last coordinate is the probability. + + Parameters + ---------- + points: numpy.ndarray (N,3) + Keypoint vector, consisting of (x,y,probability). + descriptors: numpy.ndarray (N,256) + Keypoint descriptors. + k: int + Number of keypoints to select, based on probability. + Returns + ------- + + selected_points: numpy.ndarray (k,2) + k most probable keypoints. + selected_descriptors: numpy.ndarray (k,256) + Descriptors corresponding to the k most probable keypoints. + """ + sorted_prob = points[points[:, 2].argsort(), :2] + sorted_desc = descriptors[points[:, 2].argsort(), :] + start = min(k, points.shape[0]) + selected_points = sorted_prob[-start:, :] + selected_descriptors = sorted_desc[-start:, :] + return selected_points, selected_descriptors + + +def keep_shared_points(keypoints, descriptors, H, shape, keep_k_points=1000): + """ + Compute a list of keypoints from the map, filter the list of points by keeping + only the points that once mapped by H are still inside the shape of the map + and keep at most 'keep_k_points' keypoints in the image. + + Parameters + ---------- + keypoints: numpy.ndarray (N,3) + Keypoint vector, consisting of (x,y,probability). + descriptors: numpy.ndarray (N,256) + Keypoint descriptors. + H: numpy.ndarray (3,3) + Homography. + shape: tuple + Image shape. + keep_k_points: int + Number of keypoints to select, based on probability. + + Returns + ------- + selected_points: numpy.ndarray (k,2) + k most probable keypoints. + selected_descriptors: numpy.ndarray (k,256) + Descriptors corresponding to the k most probable keypoints. + """ + + def keep_true_keypoints(points, descriptors, H, shape): + """ Keep only the points whose warped coordinates by H are still inside shape. """ + warped_points = warp_keypoints(points[:, [1, 0]], H) + warped_points[:, [0, 1]] = warped_points[:, [1, 0]] + mask = (warped_points[:, 0] >= 0) & (warped_points[:, 0] < shape[0]) &\ + (warped_points[:, 1] >= 0) & (warped_points[:, 1] < shape[1]) + return points[mask, :], descriptors[mask, :] + + selected_keypoints, selected_descriptors = keep_true_keypoints(keypoints, descriptors, H, shape) + selected_keypoints, selected_descriptors = select_k_best(selected_keypoints, selected_descriptors, keep_k_points) + return selected_keypoints, selected_descriptors + + +def compute_matching_score(data, keep_k_points=1000): + """ + Compute the matching score between two sets of keypoints with associated descriptors. + + Parameters + ---------- + data: dict + Input dictionary containing: + image_shape: tuple (H,W) + Original image shape. + homography: numpy.ndarray (3,3) + Ground truth homography. + prob: numpy.ndarray (N,3) + Keypoint vector, consisting of (x,y,probability). + warped_prob: numpy.ndarray (N,3) + Warped keypoint vector, consisting of (x,y,probability). + desc: numpy.ndarray (N,256) + Keypoint descriptors. + warped_desc: numpy.ndarray (N,256) + Warped keypoint descriptors. + keep_k_points: int + Number of keypoints to select, based on probability. + + Returns + ------- + ms: float + Matching score. + """ + shape = data['image_shape'] + real_H = data['homography'] + + # Filter out predictions + keypoints = data['prob'][:, :2].T + keypoints = keypoints[::-1] + prob = data['prob'][:, 2] + keypoints = np.stack([keypoints[0], keypoints[1], prob], axis=-1) + + warped_keypoints = data['warped_prob'][:, :2].T + warped_keypoints = warped_keypoints[::-1] + warped_prob = data['warped_prob'][:, 2] + warped_keypoints = np.stack([warped_keypoints[0], warped_keypoints[1], warped_prob], axis=-1) + + desc = data['desc'] + warped_desc = data['warped_desc'] + + # Keeps all points for the next frame. The matching for caculating M.Score shouldnt use only in view points. + keypoints, desc = select_k_best(keypoints, desc, keep_k_points) + warped_keypoints, warped_desc = select_k_best(warped_keypoints, warped_desc, keep_k_points) + + # Match the keypoints with the warped_keypoints with nearest neighbor search + # This part needs to be done with crossCheck=False. + # All the matched pairs need to be evaluated without any selection. + bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=False) + + matches = bf.match(desc, warped_desc) + matches_idx = np.array([m.queryIdx for m in matches]) + m_keypoints = keypoints[matches_idx, :] + matches_idx = np.array([m.trainIdx for m in matches]) + m_warped_keypoints = warped_keypoints[matches_idx, :] + + true_warped_keypoints = warp_keypoints(m_warped_keypoints[:, [1, 0]], np.linalg.inv(real_H))[:,::-1] + vis_warped = np.all((true_warped_keypoints >= 0) & (true_warped_keypoints <= (np.array(shape)-1)), axis=-1) + norm1 = np.linalg.norm(true_warped_keypoints - m_keypoints, axis=-1) + + correct1 = (norm1 < 3) + count1 = np.sum(correct1 * vis_warped) + score1 = count1 / np.maximum(np.sum(vis_warped), 1.0) + + matches = bf.match(warped_desc, desc) + matches_idx = np.array([m.queryIdx for m in matches]) + m_warped_keypoints = warped_keypoints[matches_idx, :] + matches_idx = np.array([m.trainIdx for m in matches]) + m_keypoints = keypoints[matches_idx, :] + + true_keypoints = warp_keypoints(m_keypoints[:, [1, 0]], real_H)[:,::-1] + vis = np.all((true_keypoints >= 0) & (true_keypoints <= (np.array(shape)-1)), axis=-1) + norm2 = np.linalg.norm(true_keypoints - m_warped_keypoints, axis=-1) + + correct2 = (norm2 < 3) + count2 = np.sum(correct2 * vis) + score2 = count2 / np.maximum(np.sum(vis), 1.0) + + ms = (score1 + score2) / 2 + + return ms + +def compute_homography(data, keep_k_points=1000): + """ + Compute the homography between 2 sets of Keypoints and descriptors inside data. + Use the homography to compute the correctness metrics (1,3,5). + + Parameters + ---------- + data: dict + Input dictionary containing: + image_shape: tuple (H,W) + Original image shape. + homography: numpy.ndarray (3,3) + Ground truth homography. + prob: numpy.ndarray (N,3) + Keypoint vector, consisting of (x,y,probability). + warped_prob: numpy.ndarray (N,3) + Warped keypoint vector, consisting of (x,y,probability). + desc: numpy.ndarray (N,256) + Keypoint descriptors. + warped_desc: numpy.ndarray (N,256) + Warped keypoint descriptors. + keep_k_points: int + Number of keypoints to select, based on probability. + + Returns + ------- + correctness1: float + correctness1 metric. + correctness3: float + correctness3 metric. + correctness5: float + correctness5 metric. + """ + shape = data['image_shape'] + real_H = data['homography'] + + # Filter out predictions + keypoints = data['prob'][:, :2].T + keypoints = keypoints[::-1] + prob = data['prob'][:, 2] + keypoints = np.stack([keypoints[0], keypoints[1], prob], axis=-1) + + warped_keypoints = data['warped_prob'][:, :2].T + warped_keypoints = warped_keypoints[::-1] + warped_prob = data['warped_prob'][:, 2] + warped_keypoints = np.stack([warped_keypoints[0], warped_keypoints[1], warped_prob], axis=-1) + + desc = data['desc'] + warped_desc = data['warped_desc'] + + # Keeps only the points shared between the two views + keypoints, desc = keep_shared_points(keypoints, desc, real_H, shape, keep_k_points) + warped_keypoints, warped_desc = keep_shared_points(warped_keypoints, warped_desc, np.linalg.inv(real_H), shape, + keep_k_points) + + bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True) + matches = bf.match(desc, warped_desc) + matches_idx = np.array([m.queryIdx for m in matches]) + m_keypoints = keypoints[matches_idx, :] + matches_idx = np.array([m.trainIdx for m in matches]) + m_warped_keypoints = warped_keypoints[matches_idx, :] + + # Estimate the homography between the matches using RANSAC + H, _ = cv2.findHomography(m_keypoints[:, [1, 0]], + m_warped_keypoints[:, [1, 0]], cv2.RANSAC, 3, maxIters=5000) + + if H is None: + return 0, 0, 0 + + shape = shape[::-1] + + # Compute correctness + corners = np.array([[0, 0, 1], + [0, shape[1] - 1, 1], + [shape[0] - 1, 0, 1], + [shape[0] - 1, shape[1] - 1, 1]]) + real_warped_corners = np.dot(corners, np.transpose(real_H)) + real_warped_corners = real_warped_corners[:, :2] / real_warped_corners[:, 2:] + warped_corners = np.dot(corners, np.transpose(H)) + warped_corners = warped_corners[:, :2] / warped_corners[:, 2:] + + mean_dist = np.mean(np.linalg.norm(real_warped_corners - warped_corners, axis=1)) + correctness1 = float(mean_dist <= 1) + correctness3 = float(mean_dist <= 3) + correctness5 = float(mean_dist <= 5) + + return correctness1, correctness3, correctness5 diff --git a/third_party/lanet/evaluation/detector_evaluation.py b/third_party/lanet/evaluation/detector_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..ccc8792d17a6fbb6b446f0f9f84a2b82e3cdb57c --- /dev/null +++ b/third_party/lanet/evaluation/detector_evaluation.py @@ -0,0 +1,121 @@ +# Copyright 2020 Toyota Research Institute. All rights reserved. +# Adapted from: https://github.com/rpautrat/SuperPoint/blob/master/superpoint/evaluations/detector_evaluation.py + +import random +from glob import glob +from os import path as osp + +import cv2 +import numpy as np + +from utils import warp_keypoints + + +def compute_repeatability(data, keep_k_points=300, distance_thresh=3): + """ + Compute the repeatability metric between 2 sets of keypoints inside data. + + Parameters + ---------- + data: dict + Input dictionary containing: + image_shape: tuple (H,W) + Original image shape. + homography: numpy.ndarray (3,3) + Ground truth homography. + prob: numpy.ndarray (N,3) + Keypoint vector, consisting of (x,y,probability). + warped_prob: numpy.ndarray (N,3) + Warped keypoint vector, consisting of (x,y,probability). + keep_k_points: int + Number of keypoints to select, based on probability. + distance_thresh: int + Distance threshold in pixels for a corresponding keypoint to be considered a correct match. + + Returns + ------- + N1: int + Number of true keypoints in the first image. + N2: int + Number of true keypoints in the second image. + repeatability: float + Keypoint repeatability metric. + loc_err: float + Keypoint localization error. + """ + def filter_keypoints(points, shape): + """ Keep only the points whose coordinates are inside the dimensions of shape. """ + mask = (points[:, 0] >= 0) & (points[:, 0] < shape[0]) &\ + (points[:, 1] >= 0) & (points[:, 1] < shape[1]) + return points[mask, :] + + def keep_true_keypoints(points, H, shape): + """ Keep only the points whose warped coordinates by H are still inside shape. """ + warped_points = warp_keypoints(points[:, [1, 0]], H) + warped_points[:, [0, 1]] = warped_points[:, [1, 0]] + mask = (warped_points[:, 0] >= 0) & (warped_points[:, 0] < shape[0]) &\ + (warped_points[:, 1] >= 0) & (warped_points[:, 1] < shape[1]) + return points[mask, :] + + + def select_k_best(points, k): + """ Select the k most probable points (and strip their probability). + points has shape (num_points, 3) where the last coordinate is the probability. """ + sorted_prob = points[points[:, 2].argsort(), :2] + start = min(k, points.shape[0]) + return sorted_prob[-start:, :] + + H = data['homography'] + shape = data['image_shape'] + + # # Filter out predictions + keypoints = data['prob'][:, :2].T + keypoints = keypoints[::-1] + prob = data['prob'][:, 2] + + warped_keypoints = data['warped_prob'][:, :2].T + warped_keypoints = warped_keypoints[::-1] + warped_prob = data['warped_prob'][:, 2] + + keypoints = np.stack([keypoints[0], keypoints[1]], axis=-1) + warped_keypoints = np.stack([warped_keypoints[0], warped_keypoints[1], warped_prob], axis=-1) + warped_keypoints = keep_true_keypoints(warped_keypoints, np.linalg.inv(H), shape) + + # Warp the original keypoints with the true homography + true_warped_keypoints = warp_keypoints(keypoints[:, [1, 0]], H) + true_warped_keypoints = np.stack([true_warped_keypoints[:, 1], true_warped_keypoints[:, 0], prob], axis=-1) + true_warped_keypoints = filter_keypoints(true_warped_keypoints, shape) + + # Keep only the keep_k_points best predictions + warped_keypoints = select_k_best(warped_keypoints, keep_k_points) + true_warped_keypoints = select_k_best(true_warped_keypoints, keep_k_points) + + # Compute the repeatability + N1 = true_warped_keypoints.shape[0] + N2 = warped_keypoints.shape[0] + true_warped_keypoints = np.expand_dims(true_warped_keypoints, 1) + warped_keypoints = np.expand_dims(warped_keypoints, 0) + # shapes are broadcasted to N1 x N2 x 2: + norm = np.linalg.norm(true_warped_keypoints - warped_keypoints, ord=None, axis=2) + count1 = 0 + count2 = 0 + le1 = 0 + le2 = 0 + if N2 != 0: + min1 = np.min(norm, axis=1) + correct1 = (min1 <= distance_thresh) + count1 = np.sum(correct1) + le1 = min1[correct1].sum() + if N1 != 0: + min2 = np.min(norm, axis=0) + correct2 = (min2 <= distance_thresh) + count2 = np.sum(correct2) + le2 = min2[correct2].sum() + if N1 + N2 > 0: + repeatability = (count1 + count2) / (N1 + N2) + loc_err = (le1 + le2) / (count1 + count2) + else: + repeatability = -1 + loc_err = -1 + + return N1, N2, repeatability, loc_err diff --git a/third_party/lanet/evaluation/evaluate.py b/third_party/lanet/evaluation/evaluate.py new file mode 100644 index 0000000000000000000000000000000000000000..fa9e91ee6d9cc0142ebbe8f2a3f904f6fae8434c --- /dev/null +++ b/third_party/lanet/evaluation/evaluate.py @@ -0,0 +1,84 @@ +# Copyright 2020 Toyota Research Institute. All rights reserved. + +import numpy as np +import torch +import torchvision.transforms as transforms +from tqdm import tqdm + +from evaluation.descriptor_evaluation import (compute_homography, + compute_matching_score) +from evaluation.detector_evaluation import compute_repeatability + + +def evaluate_keypoint_net(data_loader, keypoint_net, output_shape=(320, 240), top_k=300): + """Keypoint net evaluation script. + + Parameters + ---------- + data_loader: torch.utils.data.DataLoader + Dataset loader. + keypoint_net: torch.nn.module + Keypoint network. + output_shape: tuple + Original image shape. + top_k: int + Number of keypoints to use to compute metrics, selected based on probability. + use_color: bool + Use color or grayscale images. + """ + keypoint_net.eval() + keypoint_net.training = False + + conf_threshold = 0.0 + localization_err, repeatability = [], [] + correctness1, correctness3, correctness5, MScore = [], [], [], [] + + with torch.no_grad(): + for i, sample in tqdm(enumerate(data_loader), desc="Evaluate point model"): + + image = sample['image'].cuda() + warped_image = sample['warped_image'].cuda() + + score_1, coord_1, desc1 = keypoint_net(image) + score_2, coord_2, desc2 = keypoint_net(warped_image) + B, _, Hc, Wc = desc1.shape + + # Scores & Descriptors + score_1 = torch.cat([coord_1, score_1], dim=1).view(3, -1).t().cpu().numpy() + score_2 = torch.cat([coord_2, score_2], dim=1).view(3, -1).t().cpu().numpy() + desc1 = desc1.view(256, Hc, Wc).view(256, -1).t().cpu().numpy() + desc2 = desc2.view(256, Hc, Wc).view(256, -1).t().cpu().numpy() + + # Filter based on confidence threshold + desc1 = desc1[score_1[:, 2] > conf_threshold, :] + desc2 = desc2[score_2[:, 2] > conf_threshold, :] + score_1 = score_1[score_1[:, 2] > conf_threshold, :] + score_2 = score_2[score_2[:, 2] > conf_threshold, :] + + # Prepare data for eval + data = {'image': sample['image'].numpy().squeeze(), + 'image_shape' : output_shape[::-1], + 'warped_image': sample['warped_image'].numpy().squeeze(), + 'homography': sample['homography'].squeeze().numpy(), + 'prob': score_1, + 'warped_prob': score_2, + 'desc': desc1, + 'warped_desc': desc2} + + # Compute repeatabilty and localization error + _, _, rep, loc_err = compute_repeatability(data, keep_k_points=top_k, distance_thresh=3) + repeatability.append(rep) + localization_err.append(loc_err) + + # Compute correctness + c1, c2, c3 = compute_homography(data, keep_k_points=top_k) + correctness1.append(c1) + correctness3.append(c2) + correctness5.append(c3) + + # Compute matching score + mscore = compute_matching_score(data, keep_k_points=top_k) + MScore.append(mscore) + + return np.mean(repeatability), np.mean(localization_err), \ + np.mean(correctness1), np.mean(correctness3), np.mean(correctness5), np.mean(MScore) diff --git a/third_party/lanet/loss_function.py b/third_party/lanet/loss_function.py new file mode 100644 index 0000000000000000000000000000000000000000..2e74cf2b53af3c3fc26c34394df4cfe538b3b49c --- /dev/null +++ b/third_party/lanet/loss_function.py @@ -0,0 +1,156 @@ +import torch + +def build_descriptor_loss(source_des, target_des, tar_points_un, top_kk=None, relax_field=4, eval_only=False): + """ + Desc Head Loss, per-pixel level triplet loss from https://arxiv.org/pdf/1902.11046.pdf. + + Parameters + ---------- + source_des: torch.Tensor (B,256,H/8,W/8) + Source image descriptors. + target_des: torch.Tensor (B,256,H/8,W/8) + Target image descriptors. + source_points: torch.Tensor (B,H/8,W/8,2) + Source image keypoints + tar_points: torch.Tensor (B,H/8,W/8,2) + Target image keypoints + tar_points_un: torch.Tensor (B,2,H/8,W/8) + Target image keypoints unnormalized + eval_only: bool + Computes only recall without the loss. + Returns + ------- + loss: torch.Tensor + Descriptor loss. + recall: torch.Tensor + Descriptor match recall. + """ + device = source_des.device + loss = 0 + batch_size = source_des.size(0) + recall = 0. + + relax_field_size = [relax_field] + margins = [1.0] + weights = [1.0] + + isource_dense = top_kk is None + + for b_id in range(batch_size): + + if isource_dense: + ref_desc = source_des[b_id].squeeze().view(256, -1) + tar_desc = target_des[b_id].squeeze().view(256, -1) + tar_points_raw = tar_points_un[b_id].view(2, -1) + else: + top_k = top_kk[b_id].squeeze() + + n_feat = top_k.sum().item() + if n_feat < 20: + continue + + ref_desc = source_des[b_id].squeeze()[:, top_k] + tar_desc = target_des[b_id].squeeze()[:, top_k] + tar_points_raw = tar_points_un[b_id][:, top_k] + + # Compute dense descriptor distance matrix and find nearest neighbor + ref_desc = ref_desc.div(torch.norm(ref_desc, p=2, dim=0)) + tar_desc = tar_desc.div(torch.norm(tar_desc, p=2, dim=0)) + dmat = torch.mm(ref_desc.t(), tar_desc) + + dmat = torch.sqrt(2 - 2 * torch.clamp(dmat, min=-1, max=1)) + _, idx = torch.sort(dmat, dim=1) + + + # Compute triplet loss and recall + for pyramid in range(len(relax_field_size)): + + candidates = idx.t() + + match_k_x = tar_points_raw[0, candidates] + match_k_y = tar_points_raw[1, candidates] + + tru_x = tar_points_raw[0] + tru_y = tar_points_raw[1] + + if pyramid == 0: + correct2 = (abs(match_k_x[0]-tru_x) == 0) & (abs(match_k_y[0]-tru_y) == 0) + correct2_cnt = correct2.float().sum() + recall += float(1.0 / batch_size) * (float(correct2_cnt) / float( ref_desc.size(1))) + + if eval_only: + continue + correct_k = (abs(match_k_x - tru_x) <= relax_field_size[pyramid]) & (abs(match_k_y - tru_y) <= relax_field_size[pyramid]) + + incorrect_index = torch.arange(start=correct_k.shape[0]-1, end=-1, step=-1).unsqueeze(1).repeat(1,correct_k.shape[1]).to(device) + incorrect_first = torch.argmax(incorrect_index * (1 - correct_k.long()), dim=0) + + incorrect_first_index = candidates.gather(0, incorrect_first.unsqueeze(0)).squeeze() + + anchor_var = ref_desc + posource_var = tar_desc + neg_var = tar_desc[:, incorrect_first_index] + + loss += float(1.0 / batch_size) * torch.nn.functional.triplet_margin_loss(anchor_var.t(), posource_var.t(), neg_var.t(), margin=margins[pyramid]).mul(weights[pyramid]) + + return loss, recall + + +class KeypointLoss(object): + """ + Loss function class encapsulating the location loss, the descriptor loss, and the score loss. + """ + def __init__(self, config): + self.score_weight = config.score_weight + self.loc_weight = config.loc_weight + self.desc_weight = config.desc_weight + self.corres_weight = config.corres_weight + self.corres_threshold = config.corres_threshold + + def __call__(self, data): + B, _, hc, wc = data['source_score'].shape + + loc_mat_abs = torch.abs(data['target_coord_warped'].view(B, 2, -1).unsqueeze(3) - data['target_coord'].view(B, 2, -1).unsqueeze(2)) + l2_dist_loc_mat = torch.norm(loc_mat_abs, p=2, dim=1) + l2_dist_loc_min, l2_dist_loc_min_index = l2_dist_loc_mat.min(dim=2) + + # construct pseudo ground truth matching matrix + loc_min_mat = torch.repeat_interleave(l2_dist_loc_min.unsqueeze(dim=-1), repeats=l2_dist_loc_mat.shape[-1], dim=-1) + pos_mask = l2_dist_loc_mat.eq(loc_min_mat) & l2_dist_loc_mat.le(1.) + neg_mask = l2_dist_loc_mat.ge(4.) + + pos_corres = - torch.log(data['confidence_matrix'][pos_mask]) + neg_corres = - torch.log(1.0 - data['confidence_matrix'][neg_mask]) + corres_loss = pos_corres.mean() + 5e5 * neg_corres.mean() + + # corresponding distance threshold is 4 + dist_norm_valid_mask = l2_dist_loc_min.lt(self.corres_threshold) & data['border_mask'].view(B, hc * wc) + + # location loss + loc_loss = l2_dist_loc_min[dist_norm_valid_mask].mean() + + # desc Head Loss, per-pixel level triplet loss from https://arxiv.org/pdf/1902.11046.pdf. + desc_loss, _ = build_descriptor_loss(data['source_desc'], data['target_desc_warped'], data['target_coord_warped'].detach(), top_kk=data['border_mask'], relax_field=8) + + # score loss + target_score_associated = data['target_score'].view(B, hc * wc).gather(1, l2_dist_loc_min_index).view(B, hc, wc).unsqueeze(1) + dist_norm_valid_mask = dist_norm_valid_mask.view(B, hc, wc).unsqueeze(1) & data['border_mask'].unsqueeze(1) + l2_dist_loc_min = l2_dist_loc_min.view(B, hc, wc).unsqueeze(1) + loc_err = l2_dist_loc_min[dist_norm_valid_mask] + + # repeatable_constrain in score loss + repeatable_constrain = ((target_score_associated[dist_norm_valid_mask] + data['source_score'][dist_norm_valid_mask]) * (loc_err - loc_err.mean())).mean() + + # consistent_constrain in score_loss + consistent_constrain = torch.nn.functional.mse_loss(data['target_score_warped'][data['border_mask'].unsqueeze(1)], data['source_score'][data['border_mask'].unsqueeze(1)]).mean() * 2 + aware_consistent_loss = torch.nn.functional.mse_loss(data['target_aware_warped'][data['border_mask'].unsqueeze(1).repeat(1, 2, 1, 1)], data['source_aware'][data['border_mask'].unsqueeze(1).repeat(1, 2, 1, 1)]).mean() * 2 + + score_loss = repeatable_constrain + consistent_constrain + aware_consistent_loss + + loss = self.loc_weight * loc_loss + self.desc_weight * desc_loss + self.score_weight * score_loss + self.corres_weight * corres_loss + + return loss, self.loc_weight * loc_loss, self.desc_weight * desc_loss, self.score_weight * score_loss, self.corres_weight * corres_loss + + + + diff --git a/third_party/lanet/main.py b/third_party/lanet/main.py new file mode 100644 index 0000000000000000000000000000000000000000..2aa81d8104c19ea1d8c4ce7d1dd547f8b35a4a72 --- /dev/null +++ b/third_party/lanet/main.py @@ -0,0 +1,25 @@ +import torch + +from train import Trainer +from config import get_config +from utils import prepare_dirs +from data_loader import get_data_loader + +def main(config): + # ensure directories are setup + prepare_dirs(config) + + # ensure reproducibility + torch.manual_seed(config.seed) + if config.use_gpu: + torch.cuda.manual_seed(config.seed) + + # instantiate train data loaders + train_loader = get_data_loader(config=config) + + trainer = Trainer(config, train_loader=train_loader) + trainer.train() + +if __name__ == '__main__': + config, unparsed = get_config() + main(config) \ No newline at end of file diff --git a/third_party/lanet/network_v0/model.py b/third_party/lanet/network_v0/model.py new file mode 100644 index 0000000000000000000000000000000000000000..564000330ddd5e9f18821e8606d23cd12dc847bc --- /dev/null +++ b/third_party/lanet/network_v0/model.py @@ -0,0 +1,128 @@ +import torch +import torch.nn as nn +import torchvision.transforms as tvf + +from .modules import InterestPointModule, CorrespondenceModule + +def warp_homography_batch(sources, homographies): + """ + Batch warp keypoints given homographies. From https://github.com/TRI-ML/KP2D. + + Parameters + ---------- + sources: torch.Tensor (B,H,W,C) + Keypoints vector. + homographies: torch.Tensor (B,3,3) + Homographies. + + Returns + ------- + warped_sources: torch.Tensor (B,H,W,C) + Warped keypoints vector. + """ + B, H, W, _ = sources.shape + warped_sources = [] + for b in range(B): + source = sources[b].clone() + source = source.view(-1,2) + ''' + [X, [M11, M12, M13 [x, M11*x + M12*y + M13 [M11, M12 [M13, + Y, = M21, M22, M23 * y, = M21*x + M22*y + M23 = [x, y] * M21, M22 + M23, + Z] M31, M32, M33] 1] M31*x + M32*y + M33 M31, M32].T M33] + ''' + source = torch.addmm(homographies[b,:,2], source, homographies[b,:,:2].t()) + source.mul_(1/source[:,2].unsqueeze(1)) + source = source[:,:2].contiguous().view(H,W,2) + warped_sources.append(source) + return torch.stack(warped_sources, dim=0) + +class PointModel(nn.Module): + def __init__(self, is_test=True): + super(PointModel, self).__init__() + self.is_test = is_test + self.interestpoint_module = InterestPointModule(is_test=self.is_test) + self.correspondence_module = CorrespondenceModule() + self.norm_rgb = tvf.Normalize(mean=[0.5, 0.5, 0.5], std=[0.225, 0.225, 0.225]) + + def forward(self, *args): + if self.is_test: + img = args[0] + img = self.norm_rgb(img) + score, coord, desc = self.interestpoint_module(img) + return score, coord, desc + else: + source_score, source_coord, source_desc_block = self.interestpoint_module(args[0]) + target_score, target_coord, target_desc_block = self.interestpoint_module(args[1]) + + B, _, H, W = args[0].shape + B, _, hc, wc = source_score.shape + device = source_score.device + + # Normalize the coordinates from ([0, h], [0, w]) to ([0, 1], [0, 1]). + source_coord_norm = source_coord.clone() + source_coord_norm[:, 0] = (source_coord_norm[:, 0] / (float(W - 1) / 2.)) - 1. + source_coord_norm[:, 1] = (source_coord_norm[:, 1] / (float(H - 1) / 2.)) - 1. + source_coord_norm = source_coord_norm.permute(0, 2, 3, 1) + + target_coord_norm = target_coord.clone() + target_coord_norm[:, 0] = (target_coord_norm[:, 0] / (float(W - 1) / 2.)) - 1. + target_coord_norm[:, 1] = (target_coord_norm[:, 1] / (float(H - 1) / 2.)) - 1. + target_coord_norm = target_coord_norm.permute(0, 2, 3, 1) + + target_coord_warped_norm = warp_homography_batch(source_coord_norm, args[2]) + target_coord_warped = target_coord_warped_norm.clone() + + # de-normlize the coordinates + target_coord_warped[:, :, :, 0] = (target_coord_warped[:, :, :, 0] + 1) * (float(W - 1) / 2.) + target_coord_warped[:, :, :, 1] = (target_coord_warped[:, :, :, 1] + 1) * (float(H - 1) / 2.) + target_coord_warped = target_coord_warped.permute(0, 3, 1, 2) + + # Border mask + border_mask_ori = torch.ones(B, hc, wc) + border_mask_ori[:, 0] = 0 + border_mask_ori[:, hc - 1] = 0 + border_mask_ori[:, :, 0] = 0 + border_mask_ori[:, :, wc - 1] = 0 + border_mask_ori = border_mask_ori.gt(1e-3).to(device) + + oob_mask2 = target_coord_warped_norm[:, :, :, 0].lt(1) & target_coord_warped_norm[:, :, :, 0].gt(-1) & target_coord_warped_norm[:, :, :, 1].lt(1) & target_coord_warped_norm[:, :, :, 1].gt(-1) + border_mask = border_mask_ori & oob_mask2 + + # score + target_score_warped = torch.nn.functional.grid_sample(target_score, target_coord_warped_norm.detach(), align_corners=False) + + # descriptor + source_desc2 = torch.nn.functional.grid_sample(source_desc_block[0], source_coord_norm.detach()) + source_desc3 = torch.nn.functional.grid_sample(source_desc_block[1], source_coord_norm.detach()) + source_aware = source_desc_block[2] + source_desc = torch.mul(source_desc2, source_aware[:, 0, :, :].unsqueeze(1).contiguous()) + torch.mul(source_desc3, source_aware[:, 1, :, :].unsqueeze(1).contiguous()) + + target_desc2 = torch.nn.functional.grid_sample(target_desc_block[0], target_coord_norm.detach()) + target_desc3 = torch.nn.functional.grid_sample(target_desc_block[1], target_coord_norm.detach()) + target_aware = target_desc_block[2] + target_desc = torch.mul(target_desc2, target_aware[:, 0, :, :].unsqueeze(1).contiguous()) + torch.mul(target_desc3, target_aware[:, 1, :, :].unsqueeze(1).contiguous()) + + target_desc2_warped = torch.nn.functional.grid_sample(target_desc_block[0], target_coord_warped_norm.detach()) + target_desc3_warped = torch.nn.functional.grid_sample(target_desc_block[1], target_coord_warped_norm.detach()) + target_aware_warped = torch.nn.functional.grid_sample(target_desc_block[2], target_coord_warped_norm.detach()) + target_desc_warped = torch.mul(target_desc2_warped, target_aware_warped[:, 0, :, :].unsqueeze(1).contiguous()) + torch.mul(target_desc3_warped, target_aware_warped[:, 1, :, :].unsqueeze(1).contiguous()) + + confidence_matrix = self.correspondence_module(source_desc, target_desc) + confidence_matrix = torch.clamp(confidence_matrix, 1e-12, 1 - 1e-12) + + output = { + 'source_score': source_score, + 'source_coord': source_coord, + 'source_desc': source_desc, + 'source_aware': source_aware, + 'target_score': target_score, + 'target_coord': target_coord, + 'target_score_warped': target_score_warped, + 'target_coord_warped': target_coord_warped, + 'target_desc_warped': target_desc_warped, + 'target_aware_warped': target_aware_warped, + 'border_mask': border_mask, + 'confidence_matrix': confidence_matrix + } + + return output diff --git a/third_party/lanet/network_v0/modules.py b/third_party/lanet/network_v0/modules.py new file mode 100644 index 0000000000000000000000000000000000000000..a38c53133aff8769f267cc054174361296cb3e7d --- /dev/null +++ b/third_party/lanet/network_v0/modules.py @@ -0,0 +1,158 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from utils import image_grid + +class ConvBlock(nn.Module): + def __init__(self, in_channels, out_channels): + super(ConvBlock, self).__init__() + + self.conv = nn.Sequential( + nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False), + nn.BatchNorm2d(out_channels), + nn.ReLU(inplace=True), + nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False), + nn.BatchNorm2d(out_channels), + nn.ReLU(inplace=True) + ) + + def forward(self, x): + return self.conv(x) + + +class DilationConv3x3(nn.Module): + def __init__(self, in_channels, out_channels): + super(DilationConv3x3, self).__init__() + + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=2, dilation=2, bias=False) + self.bn = nn.BatchNorm2d(out_channels) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + return x + + +class InterestPointModule(nn.Module): + def __init__(self, is_test=False): + super(InterestPointModule, self).__init__() + self.is_test = is_test + + self.conv1 = ConvBlock(3, 32) + self.conv2 = ConvBlock(32, 64) + self.conv3 = ConvBlock(64, 128) + self.conv4 = ConvBlock(128, 256) + + self.maxpool2x2 = nn.MaxPool2d(2, 2) + + # score head + self.score_conv = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False) + self.score_norm = nn.BatchNorm2d(256) + self.score_out = nn.Conv2d(256, 3, kernel_size=3, stride=1, padding=1) + self.softmax = nn.Softmax(dim=1) + + # location head + self.loc_conv = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False) + self.loc_norm = nn.BatchNorm2d(256) + self.loc_out = nn.Conv2d(256, 2, kernel_size=3, stride=1, padding=1) + + # descriptor out + self.des_conv2 = DilationConv3x3(64, 256) + self.des_conv3 = DilationConv3x3(128, 256) + + # cross_head: + self.shift_out = nn.Conv2d(256, 1, kernel_size=3, stride=1, padding=1) + + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + B, _, H, W = x.shape + + x = self.conv1(x) + x = self.maxpool2x2(x) + x2 = self.conv2(x) + x = self.maxpool2x2(x2) + x3 = self.conv3(x) + x = self.maxpool2x2(x3) + x = self.conv4(x) + + B, _, Hc, Wc = x.shape + + # score head + score_x = self.score_out(self.relu(self.score_norm(self.score_conv(x)))) + aware = self.softmax(score_x[:, 0:2, :, :]) + score = score_x[:, 2, :, :].unsqueeze(1).sigmoid() + + border_mask = torch.ones(B, Hc, Wc) + border_mask[:, 0] = 0 + border_mask[:, Hc - 1] = 0 + border_mask[:, :, 0] = 0 + border_mask[:, :, Wc - 1] = 0 + border_mask = border_mask.unsqueeze(1) + score = score * border_mask.to(score.device) + + # location head + coord_x = self.relu(self.loc_norm(self.loc_conv(x))) + coord_cell = self.loc_out(coord_x).tanh() + + shift_ratio = self.shift_out(coord_x).sigmoid() * 2.0 + + step = ((H/Hc)-1) / 2. + center_base = image_grid(B, Hc, Wc, + dtype=coord_cell.dtype, + device=coord_cell.device, + ones=False, normalized=False).mul(H/Hc) + step + + coord_un = center_base.add(coord_cell.mul(shift_ratio * step)) + coord = coord_un.clone() + coord[:, 0] = torch.clamp(coord_un[:, 0], min=0, max=W-1) + coord[:, 1] = torch.clamp(coord_un[:, 1], min=0, max=H-1) + + # descriptor block + desc_block = [] + desc_block.append(self.des_conv2(x2)) + desc_block.append(self.des_conv3(x3)) + desc_block.append(aware) + + if self.is_test: + coord_norm = coord[:, :2].clone() + coord_norm[:, 0] = (coord_norm[:, 0] / (float(W-1)/2.)) - 1. + coord_norm[:, 1] = (coord_norm[:, 1] / (float(H-1)/2.)) - 1. + coord_norm = coord_norm.permute(0, 2, 3, 1) + + desc2 = torch.nn.functional.grid_sample(desc_block[0], coord_norm) + desc3 = torch.nn.functional.grid_sample(desc_block[1], coord_norm) + aware = desc_block[2] + + desc = torch.mul(desc2, aware[:, 0, :, :]) + torch.mul(desc3, aware[:, 1, :, :]) + desc = desc.div(torch.unsqueeze(torch.norm(desc, p=2, dim=1), 1)) # Divide by norm to normalize. + + return score, coord, desc + + return score, coord, desc_block + + +class CorrespondenceModule(nn.Module): + def __init__(self, match_type='dual_softmax'): + super(CorrespondenceModule, self).__init__() + self.match_type = match_type + + if self.match_type == 'dual_softmax': + self.temperature = 0.1 + else: + raise NotImplementedError() + + def forward(self, source_desc, target_desc): + b, c, h, w = source_desc.size() + + source_desc = source_desc.div(torch.unsqueeze(torch.norm(source_desc, p=2, dim=1), 1)).view(b, -1, h*w) + target_desc = target_desc.div(torch.unsqueeze(torch.norm(target_desc, p=2, dim=1), 1)).view(b, -1, h*w) + + if self.match_type == 'dual_softmax': + sim_mat = torch.einsum("bcm, bcn -> bmn", source_desc, target_desc) / self.temperature + confidence_matrix = F.softmax(sim_mat, 1) * F.softmax(sim_mat, 2) + else: + raise NotImplementedError() + + return confidence_matrix \ No newline at end of file diff --git a/third_party/lanet/network_v1/model.py b/third_party/lanet/network_v1/model.py new file mode 100644 index 0000000000000000000000000000000000000000..baeb37c563852340fe9278ed5c2dccea4b3b693a --- /dev/null +++ b/third_party/lanet/network_v1/model.py @@ -0,0 +1,52 @@ +import torch +import torch.nn as nn +import torchvision.transforms as tvf + +from .modules import InterestPointModule, CorrespondenceModule + +def warp_homography_batch(sources, homographies): + """ + Batch warp keypoints given homographies. From https://github.com/TRI-ML/KP2D. + + Parameters + ---------- + sources: torch.Tensor (B,H,W,C) + Keypoints vector. + homographies: torch.Tensor (B,3,3) + Homographies. + + Returns + ------- + warped_sources: torch.Tensor (B,H,W,C) + Warped keypoints vector. + """ + B, H, W, _ = sources.shape + warped_sources = [] + for b in range(B): + source = sources[b].clone() + source = source.view(-1,2) + ''' + [X, [M11, M12, M13 [x, M11*x + M12*y + M13 [M11, M12 [M13, + Y, = M21, M22, M23 * y, = M21*x + M22*y + M23 = [x, y] * M21, M22 + M23, + Z] M31, M32, M33] 1] M31*x + M32*y + M33 M31, M32].T M33] + ''' + source = torch.addmm(homographies[b,:,2], source, homographies[b,:,:2].t()) + source.mul_(1/source[:,2].unsqueeze(1)) + source = source[:,:2].contiguous().view(H,W,2) + warped_sources.append(source) + return torch.stack(warped_sources, dim=0) + + +class PointModel(nn.Module): + def __init__(self, is_test=False): + super(PointModel, self).__init__() + self.is_test = is_test + self.interestpoint_module = InterestPointModule(is_test=self.is_test) + self.correspondence_module = CorrespondenceModule() + self.norm_rgb = tvf.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + + def forward(self, *args): + img = args[0] + img = self.norm_rgb(img) + score, coord, desc = self.interestpoint_module(img) + return score, coord, desc diff --git a/third_party/lanet/network_v1/modules.py b/third_party/lanet/network_v1/modules.py new file mode 100644 index 0000000000000000000000000000000000000000..4daed5f12c40e40f6fc8347f701235e141839ada --- /dev/null +++ b/third_party/lanet/network_v1/modules.py @@ -0,0 +1,174 @@ +from curses import is_term_resized +import torch +import torch.nn as nn +import torch.nn.functional as F + +from torchvision import models +from utils import image_grid + +class ConvBlock(nn.Module): + def __init__(self, in_channels, out_channels): + super(ConvBlock, self).__init__() + + self.conv = nn.Sequential( + nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False), + nn.BatchNorm2d(out_channels), + nn.ReLU(inplace=True), + nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False), + nn.BatchNorm2d(out_channels), + nn.ReLU(inplace=True) + ) + + def forward(self, x): + return self.conv(x) + +class DilationConv3x3(nn.Module): + def __init__(self, in_channels, out_channels): + super(DilationConv3x3, self).__init__() + + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=2, dilation=2, bias=False) + self.bn = nn.BatchNorm2d(out_channels) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + return x + + +class InterestPointModule(nn.Module): + def __init__(self, is_test=False): + super(InterestPointModule, self).__init__() + self.is_test = is_test + + model = models.vgg16_bn(pretrained=True) + + # use the first 23 layers as encoder + self.encoder = nn.Sequential( + *list(model.features.children())[: 33] + ) + + # score head + self.score_head = nn.Sequential( + nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1, bias=False), + nn.BatchNorm2d(256), + nn.ReLU(inplace=True), + nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1) + ) + self.softmax = nn.Softmax(dim=1) + + # location head + self.loc_head = nn.Sequential( + nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1, bias=False), + nn.BatchNorm2d(256), + nn.ReLU(inplace=True), + ) + # location out + self.loc_out = nn.Conv2d(256, 2, kernel_size=3, stride=1, padding=1) + self.shift_out = nn.Conv2d(256, 1, kernel_size=3, stride=1, padding=1) + + # descriptor out + self.des_out2 = DilationConv3x3(128, 256) + self.des_out3 = DilationConv3x3(256, 256) + self.des_out4 = DilationConv3x3(512, 256) + + def forward(self, x): + B, _, H, W = x.shape + + x = self.encoder[2](self.encoder[1](self.encoder[0](x))) + x = self.encoder[5](self.encoder[4](self.encoder[3](x))) + + x = self.encoder[6](x) + x = self.encoder[9](self.encoder[8](self.encoder[7](x))) + x2 = self.encoder[12](self.encoder[11](self.encoder[10](x))) + + x = self.encoder[13](x2) + x = self.encoder[16](self.encoder[15](self.encoder[14](x))) + x = self.encoder[19](self.encoder[18](self.encoder[17](x))) + x3 = self.encoder[22](self.encoder[21](self.encoder[20](x))) + + x = self.encoder[23](x3) + x = self.encoder[26](self.encoder[25](self.encoder[24](x))) + x = self.encoder[29](self.encoder[28](self.encoder[27](x))) + x = self.encoder[32](self.encoder[31](self.encoder[30](x))) + + + B, _, Hc, Wc = x.shape + + # score head + score_x = self.score_head(x) + aware = self.softmax(score_x[:, 0:3, :, :]) + score = score_x[:, 3, :, :].unsqueeze(1).sigmoid() + + border_mask = torch.ones(B, Hc, Wc) + border_mask[:, 0] = 0 + border_mask[:, Hc - 1] = 0 + border_mask[:, :, 0] = 0 + border_mask[:, :, Wc - 1] = 0 + border_mask = border_mask.unsqueeze(1) + score = score * border_mask.to(score.device) + + # location head + coord_x = self.loc_head(x) + coord_cell = self.loc_out(coord_x).tanh() + + shift_ratio = self.shift_out(coord_x).sigmoid() * 2.0 + + step = ((H/Hc)-1) / 2. + center_base = image_grid(B, Hc, Wc, + dtype=coord_cell.dtype, + device=coord_cell.device, + ones=False, normalized=False).mul(H/Hc) + step + + coord_un = center_base.add(coord_cell.mul(shift_ratio * step)) + coord = coord_un.clone() + coord[:, 0] = torch.clamp(coord_un[:, 0], min=0, max=W-1) + coord[:, 1] = torch.clamp(coord_un[:, 1], min=0, max=H-1) + + # descriptor block + desc_block = [] + desc_block.append(self.des_out2(x2)) + desc_block.append(self.des_out3(x3)) + desc_block.append(self.des_out4(x)) + desc_block.append(aware) + + if self.is_test: + coord_norm = coord[:, :2].clone() + coord_norm[:, 0] = (coord_norm[:, 0] / (float(W-1)/2.)) - 1. + coord_norm[:, 1] = (coord_norm[:, 1] / (float(H-1)/2.)) - 1. + coord_norm = coord_norm.permute(0, 2, 3, 1) + + desc2 = torch.nn.functional.grid_sample(desc_block[0], coord_norm) + desc3 = torch.nn.functional.grid_sample(desc_block[1], coord_norm) + desc4 = torch.nn.functional.grid_sample(desc_block[2], coord_norm) + aware = desc_block[3] + + desc = torch.mul(desc2, aware[:, 0, :, :]) + torch.mul(desc3, aware[:, 1, :, :]) + torch.mul(desc4, aware[:, 2, :, :]) + desc = desc.div(torch.unsqueeze(torch.norm(desc, p=2, dim=1), 1)) # Divide by norm to normalize. + + return score, coord, desc + + return score, coord, desc_block + +class CorrespondenceModule(nn.Module): + def __init__(self, match_type='dual_softmax'): + super(CorrespondenceModule, self).__init__() + self.match_type = match_type + + if self.match_type == 'dual_softmax': + self.temperature = 0.1 + else: + raise NotImplementedError() + + def forward(self, source_desc, target_desc): + b, c, h, w = source_desc.size() + + source_desc = source_desc.div(torch.unsqueeze(torch.norm(source_desc, p=2, dim=1), 1)).view(b, -1, h*w) + target_desc = target_desc.div(torch.unsqueeze(torch.norm(target_desc, p=2, dim=1), 1)).view(b, -1, h*w) + + if self.match_type == 'dual_softmax': + sim_mat = torch.einsum("bcm, bcn -> bmn", source_desc, target_desc) / self.temperature + confidence_matrix = F.softmax(sim_mat, 1) * F.softmax(sim_mat, 2) + else: + raise NotImplementedError() + + return confidence_matrix diff --git a/third_party/lanet/test.py b/third_party/lanet/test.py new file mode 100644 index 0000000000000000000000000000000000000000..cc9365f5c92cbd69c3ee9250ff66b07bd1eed1c6 --- /dev/null +++ b/third_party/lanet/test.py @@ -0,0 +1,87 @@ +import os +import cv2 +import argparse +import numpy as np +import torch +import torchvision + +from torchvision import datasets, transforms +from torch.autograd import Variable +from network_v0.model import PointModel +from datasets.hp_loader import PatchesDataset +from torch.utils.data import DataLoader +from evaluation.evaluate import evaluate_keypoint_net + + +def main(): + parser = argparse.ArgumentParser(description='Testing') + parser.add_argument('--device', default=0, type=int, help='which gpu to run on.') + parser.add_argument('--test_dir', required=True, type=str, help='Test data path.') + opt = parser.parse_args() + + torch.manual_seed(0) + use_gpu = torch.cuda.is_available() + if use_gpu: + torch.cuda.set_device(opt.device) + + # Load data in 320x240 + hp_dataset_320x240 = PatchesDataset(root_dir=opt.test_dir, use_color=True, output_shape=(320, 240), type='all') + data_loader_320x240 = DataLoader(hp_dataset_320x240, + batch_size=1, + pin_memory=False, + shuffle=False, + num_workers=4, + worker_init_fn=None, + sampler=None) + + # Load data in 640x480 + hp_dataset_640x480 = PatchesDataset(root_dir=opt.test_dir, use_color=True, output_shape=(640, 480), type='all') + data_loader_640x480 = DataLoader(hp_dataset_640x480, + batch_size=1, + pin_memory=False, + shuffle=False, + num_workers=4, + worker_init_fn=None, + sampler=None) + + # Load model + model = PointModel(is_test=True) + ckpt = torch.load('./checkpoints/PointModel_v0.pth') + model.load_state_dict(ckpt['model_state']) + model = model.eval() + if use_gpu: + model = model.cuda() + + + print('Evaluating in 320x240, 300 points') + rep, loc, c1, c3, c5, mscore = evaluate_keypoint_net( + data_loader_320x240, + model, + output_shape=(320, 240), + top_k=300) + + print('Repeatability: {0:.3f}'.format(rep)) + print('Localization Error: {0:.3f}'.format(loc)) + print('H-1 Accuracy: {:.3f}'.format(c1)) + print('H-3 Accuracy: {:.3f}'.format(c3)) + print('H-5 Accuracy: {:.3f}'.format(c5)) + print('Matching Score: {:.3f}'.format(mscore)) + print('\n') + + print('Evaluating in 640x480, 1000 points') + rep, loc, c1, c3, c5, mscore = evaluate_keypoint_net( + data_loader_640x480, + model, + output_shape=(640, 480), + top_k=1000) + + print('Repeatability: {0:.3f}'.format(rep)) + print('Localization Error: {0:.3f}'.format(loc)) + print('H-1 Accuracy: {:.3f}'.format(c1)) + print('H-3 Accuracy: {:.3f}'.format(c3)) + print('H-5 Accuracy: {:.3f}'.format(c5)) + print('Matching Score: {:.3f}'.format(mscore)) + print('\n') + +if __name__ == '__main__': + main() diff --git a/third_party/lanet/train.py b/third_party/lanet/train.py new file mode 100644 index 0000000000000000000000000000000000000000..3076a0fdb78a59bfd64367399c0f2b0de1297653 --- /dev/null +++ b/third_party/lanet/train.py @@ -0,0 +1,129 @@ +import os +import torch +import torch.optim as optim +from tqdm import tqdm + +from torch.autograd import Variable + +from network_v0.model import PointModel +from loss_function import KeypointLoss + +class Trainer(object): + def __init__(self, config, train_loader=None): + self.config = config + # data parameters + self.train_loader = train_loader + self.num_train = len(self.train_loader) + + # training parameters + self.max_epoch = config.max_epoch + self.start_epoch = config.start_epoch + self.momentum = config.momentum + self.lr = config.init_lr + self.lr_factor = config.lr_factor + self.display = config.display + + # misc params + self.use_gpu = config.use_gpu + self.random_seed = config.seed + self.gpu = config.gpu + self.ckpt_dir = config.ckpt_dir + self.ckpt_name = '{}-{}'.format(config.ckpt_name, config.seed) + + # build model + self.model = PointModel(is_test=False) + + # training on GPU + if self.use_gpu: + torch.cuda.set_device(self.gpu) + self.model.cuda() + + print('Number of model parameters: {:,}'.format(sum([p.data.nelement() for p in self.model.parameters()]))) + + # build loss functional + self.loss_func = KeypointLoss(config) + + # build optimizer and scheduler + self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr) + self.lr_scheduler = optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=[4, 8], gamma=self.lr_factor) + + # resume + if int(self.config.start_epoch) > 0: + self.config.start_epoch, self.model, self.optimizer, self.lr_scheduler = self.load_checkpoint(int(self.config.start_epoch), self.model, self.optimizer, self.lr_scheduler) + + def train(self): + print("\nTrain on {} samples".format(self.num_train)) + self.save_checkpoint(0, self.model, self.optimizer, self.lr_scheduler) + for epoch in range(self.start_epoch, self.max_epoch): + print("\nEpoch: {}/{} --lr: {:.6f}".format(epoch+1, self.max_epoch, self.lr)) + # train for one epoch + self.train_one_epoch(epoch) + if self.lr_scheduler: + self.lr_scheduler.step() + self.save_checkpoint(epoch+1, self.model, self.optimizer, self.lr_scheduler) + + def train_one_epoch(self, epoch): + self.model.train() + for (i, data) in enumerate(tqdm(self.train_loader)): + + if self.use_gpu: + source_img = data['image_aug'].cuda() + target_img = data['image'].cuda() + homography = data['homography'].cuda() + + source_img = Variable(source_img) + target_img = Variable(target_img) + homography = Variable(homography) + + # forward propogation + output = self.model(source_img, target_img, homography) + + # compute loss + loss, loc_loss, desc_loss, score_loss, corres_loss = self.loss_func(output) + + # compute gradients and update + self.optimizer.zero_grad() + loss.backward() + self.optimizer.step() + + # print training info + msg_batch = "Epoch:{} Iter:{} lr:{:.4f} "\ + "loc_loss={:.4f} desc_loss={:.4f} score_loss={:.4f} corres_loss={:.4f} "\ + "loss={:.4f} "\ + .format((epoch + 1), i, self.lr, loc_loss.data, desc_loss.data, score_loss.data, corres_loss.data, loss.data) + + if((i % self.display) == 0): + print(msg_batch) + return + + def save_checkpoint(self, epoch, model, optimizer, lr_scheduler): + filename = self.ckpt_name + '_' + str(epoch) + '.pth' + torch.save( + {'epoch': epoch, + 'model_state': model.state_dict(), + 'optimizer_state': optimizer.state_dict(), + 'lr_scheduler': lr_scheduler.state_dict()}, + os.path.join(self.ckpt_dir, filename)) + + def load_checkpoint(self, epoch, model, optimizer, lr_scheduler): + filename = self.ckpt_name + '_' + str(epoch) + '.pth' + ckpt = torch.load(os.path.join(self.ckpt_dir, filename)) + epoch = ckpt['epoch'] + model.load_state_dict(ckpt['model_state']) + optimizer.load_state_dict(ckpt['optimizer_state']) + lr_scheduler.load_state_dict(ckpt['lr_scheduler']) + + print("[*] Loaded {} checkpoint @ epoch {}".format(filename, ckpt['epoch'])) + + return epoch, model, optimizer, lr_scheduler + + + + + + + + + + + \ No newline at end of file diff --git a/third_party/lanet/utils.py b/third_party/lanet/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d5422ebcfc2847be047391791d891a09388ca7d1 --- /dev/null +++ b/third_party/lanet/utils.py @@ -0,0 +1,102 @@ +import os +import torch + +import torchvision.transforms as transforms +from functools import lru_cache + +@lru_cache(maxsize=None) +def meshgrid(B, H, W, dtype, device, normalized=False): + """ + Create mesh-grid given batch size, height and width dimensions. From https://github.com/TRI-ML/KP2D. + + Parameters + ---------- + B: int + Batch size + H: int + Grid Height + W: int + Batch size + dtype: torch.dtype + Tensor dtype + device: str + Tensor device + normalized: bool + Normalized image coordinates or integer-grid. + + Returns + ------- + xs: torch.Tensor + Batched mesh-grid x-coordinates (BHW). + ys: torch.Tensor + Batched mesh-grid y-coordinates (BHW). + """ + if normalized: + xs = torch.linspace(-1, 1, W, device=device, dtype=dtype) + ys = torch.linspace(-1, 1, H, device=device, dtype=dtype) + else: + xs = torch.linspace(0, W-1, W, device=device, dtype=dtype) + ys = torch.linspace(0, H-1, H, device=device, dtype=dtype) + ys, xs = torch.meshgrid([ys, xs]) + return xs.repeat([B, 1, 1]), ys.repeat([B, 1, 1]) + + +@lru_cache(maxsize=None) +def image_grid(B, H, W, dtype, device, ones=True, normalized=False): + """ + Create an image mesh grid with shape B3HW given image shape BHW. From https://github.com/TRI-ML/KP2D. + + Parameters + ---------- + B: int + Batch size + H: int + Grid Height + W: int + Batch size + dtype: str + Tensor dtype + device: str + Tensor device + ones : bool + Use (x, y, 1) coordinates + normalized: bool + Normalized image coordinates or integer-grid. + + Returns + ------- + grid: torch.Tensor + Mesh-grid for the corresponding image shape (B3HW) + """ + xs, ys = meshgrid(B, H, W, dtype, device, normalized=normalized) + coords = [xs, ys] + if ones: + coords.append(torch.ones_like(xs)) # BHW + grid = torch.stack(coords, dim=1) # B3HW + return grid + +def to_tensor_sample(sample, tensor_type='torch.FloatTensor'): + """ + Casts the keys of sample to tensors. From https://github.com/TRI-ML/KP2D. + + Parameters + ---------- + sample : dict + Input sample + tensor_type : str + Type of tensor we are casting to + + Returns + ------- + sample : dict + Sample with keys cast as tensors + """ + transform = transforms.ToTensor() + sample['image'] = transform(sample['image']).type(tensor_type) + return sample + +def prepare_dirs(config): + for path in [config.ckpt_dir]: + if not os.path.exists(path): + os.makedirs(path) + diff --git a/third_party/r2d2/LICENSE b/third_party/r2d2/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..9144e3e43fe3d62cd66971ab021466949fc4ee14 --- /dev/null +++ b/third_party/r2d2/LICENSE @@ -0,0 +1,69 @@ +Creative Commons + +Attribution-NonCommercial-ShareAlike 3.0 Unported + +CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS LICENSE DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE INFORMATION PROVIDED, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM ITS USE. +License +THE WORK (AS DEFINED BELOW) IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE OR COPYRIGHT LAW IS PROHIBITED. + +BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS. + +1. Definitions + +"Adaptation" means a work based upon the Work, or upon the Work and other pre-existing works, such as a translation, adaptation, derivative work, arrangement of music or other alterations of a literary or artistic work, or phonogram or performance and includes cinematographic adaptations or any other form in which the Work may be recast, transformed, or adapted including in any form recognizably derived from the original, except that a work that constitutes a Collection will not be considered an Adaptation for the purpose of this License. For the avoidance of doubt, where the Work is a musical work, performance or phonogram, the synchronization of the Work in timed-relation with a moving image ("synching") will be considered an Adaptation for the purpose of this License. +"Collection" means a collection of literary or artistic works, such as encyclopedias and anthologies, or performances, phonograms or broadcasts, or other works or subject matter other than works listed in Section 1(g) below, which, by reason of the selection and arrangement of their contents, constitute intellectual creations, in which the Work is included in its entirety in unmodified form along with one or more other contributions, each constituting separate and independent works in themselves, which together are assembled into a collective whole. A work that constitutes a Collection will not be considered an Adaptation (as defined above) for the purposes of this License. +"Distribute" means to make available to the public the original and copies of the Work or Adaptation, as appropriate, through sale or other transfer of ownership. +"License Elements" means the following high-level license attributes as selected by Licensor and indicated in the title of this License: Attribution, Noncommercial, ShareAlike. +"Licensor" means the individual, individuals, entity or entities that offer(s) the Work under the terms of this License. +"Original Author" means, in the case of a literary or artistic work, the individual, individuals, entity or entities who created the Work or if no individual or entity can be identified, the publisher; and in addition (i) in the case of a performance the actors, singers, musicians, dancers, and other persons who act, sing, deliver, declaim, play in, interpret or otherwise perform literary or artistic works or expressions of folklore; (ii) in the case of a phonogram the producer being the person or legal entity who first fixes the sounds of a performance or other sounds; and, (iii) in the case of broadcasts, the organization that transmits the broadcast. +"Work" means the literary and/or artistic work offered under the terms of this License including without limitation any production in the literary, scientific and artistic domain, whatever may be the mode or form of its expression including digital form, such as a book, pamphlet and other writing; a lecture, address, sermon or other work of the same nature; a dramatic or dramatico-musical work; a choreographic work or entertainment in dumb show; a musical composition with or without words; a cinematographic work to which are assimilated works expressed by a process analogous to cinematography; a work of drawing, painting, architecture, sculpture, engraving or lithography; a photographic work to which are assimilated works expressed by a process analogous to photography; a work of applied art; an illustration, map, plan, sketch or three-dimensional work relative to geography, topography, architecture or science; a performance; a broadcast; a phonogram; a compilation of data to the extent it is protected as a copyrightable work; or a work performed by a variety or circus performer to the extent it is not otherwise considered a literary or artistic work. +"You" means an individual or entity exercising rights under this License who has not previously violated the terms of this License with respect to the Work, or who has received express permission from the Licensor to exercise rights under this License despite a previous violation. +"Publicly Perform" means to perform public recitations of the Work and to communicate to the public those public recitations, by any means or process, including by wire or wireless means or public digital performances; to make available to the public Works in such a way that members of the public may access these Works from a place and at a place individually chosen by them; to perform the Work to the public by any means or process and the communication to the public of the performances of the Work, including by public digital performance; to broadcast and rebroadcast the Work by any means including signs, sounds or images. +"Reproduce" means to make copies of the Work by any means including without limitation by sound or visual recordings and the right of fixation and reproducing fixations of the Work, including storage of a protected performance or phonogram in digital form or other electronic medium. + +2. Fair Dealing Rights. Nothing in this License is intended to reduce, limit, or restrict any uses free from copyright or rights arising from limitations or exceptions that are provided for in connection with the copyright protection under copyright law or other applicable laws. + +3. License Grant. Subject to the terms and conditions of this License, Licensor hereby grants You a worldwide, royalty-free, non-exclusive, perpetual (for the duration of the applicable copyright) license to exercise the rights in the Work as stated below: + +to Reproduce the Work, to incorporate the Work into one or more Collections, and to Reproduce the Work as incorporated in the Collections; +to create and Reproduce Adaptations provided that any such Adaptation, including any translation in any medium, takes reasonable steps to clearly label, demarcate or otherwise identify that changes were made to the original Work. For example, a translation could be marked "The original work was translated from English to Spanish," or a modification could indicate "The original work has been modified."; +to Distribute and Publicly Perform the Work including as incorporated in Collections; and, +to Distribute and Publicly Perform Adaptations. +The above rights may be exercised in all media and formats whether now known or hereafter devised. The above rights include the right to make such modifications as are technically necessary to exercise the rights in other media and formats. Subject to Section 8(f), all rights not expressly granted by Licensor are hereby reserved, including but not limited to the rights described in Section 4(e). + +4. Restrictions. The license granted in Section 3 above is expressly made subject to and limited by the following restrictions: + +You may Distribute or Publicly Perform the Work only under the terms of this License. You must include a copy of, or the Uniform Resource Identifier (URI) for, this License with every copy of the Work You Distribute or Publicly Perform. You may not offer or impose any terms on the Work that restrict the terms of this License or the ability of the recipient of the Work to exercise the rights granted to that recipient under the terms of the License. You may not sublicense the Work. You must keep intact all notices that refer to this License and to the disclaimer of warranties with every copy of the Work You Distribute or Publicly Perform. When You Distribute or Publicly Perform the Work, You may not impose any effective technological measures on the Work that restrict the ability of a recipient of the Work from You to exercise the rights granted to that recipient under the terms of the License. This Section 4(a) applies to the Work as incorporated in a Collection, but this does not require the Collection apart from the Work itself to be made subject to the terms of this License. If You create a Collection, upon notice from any Licensor You must, to the extent practicable, remove from the Collection any credit as required by Section 4(d), as requested. If You create an Adaptation, upon notice from any Licensor You must, to the extent practicable, remove from the Adaptation any credit as required by Section 4(d), as requested. +You may Distribute or Publicly Perform an Adaptation only under: (i) the terms of this License; (ii) a later version of this License with the same License Elements as this License; (iii) a Creative Commons jurisdiction license (either this or a later license version) that contains the same License Elements as this License (e.g., Attribution-NonCommercial-ShareAlike 3.0 US) ("Applicable License"). You must include a copy of, or the URI, for Applicable License with every copy of each Adaptation You Distribute or Publicly Perform. You may not offer or impose any terms on the Adaptation that restrict the terms of the Applicable License or the ability of the recipient of the Adaptation to exercise the rights granted to that recipient under the terms of the Applicable License. You must keep intact all notices that refer to the Applicable License and to the disclaimer of warranties with every copy of the Work as included in the Adaptation You Distribute or Publicly Perform. When You Distribute or Publicly Perform the Adaptation, You may not impose any effective technological measures on the Adaptation that restrict the ability of a recipient of the Adaptation from You to exercise the rights granted to that recipient under the terms of the Applicable License. This Section 4(b) applies to the Adaptation as incorporated in a Collection, but this does not require the Collection apart from the Adaptation itself to be made subject to the terms of the Applicable License. +You may not exercise any of the rights granted to You in Section 3 above in any manner that is primarily intended for or directed toward commercial advantage or private monetary compensation. The exchange of the Work for other copyrighted works by means of digital file-sharing or otherwise shall not be considered to be intended for or directed toward commercial advantage or private monetary compensation, provided there is no payment of any monetary compensation in con-nection with the exchange of copyrighted works. +If You Distribute, or Publicly Perform the Work or any Adaptations or Collections, You must, unless a request has been made pursuant to Section 4(a), keep intact all copyright notices for the Work and provide, reasonable to the medium or means You are utilizing: (i) the name of the Original Author (or pseudonym, if applicable) if supplied, and/or if the Original Author and/or Licensor designate another party or parties (e.g., a sponsor institute, publishing entity, journal) for attribution ("Attribution Parties") in Licensor's copyright notice, terms of service or by other reasonable means, the name of such party or parties; (ii) the title of the Work if supplied; (iii) to the extent reasonably practicable, the URI, if any, that Licensor specifies to be associated with the Work, unless such URI does not refer to the copyright notice or licensing information for the Work; and, (iv) consistent with Section 3(b), in the case of an Adaptation, a credit identifying the use of the Work in the Adaptation (e.g., "French translation of the Work by Original Author," or "Screenplay based on original Work by Original Author"). The credit required by this Section 4(d) may be implemented in any reasonable manner; provided, however, that in the case of a Adaptation or Collection, at a minimum such credit will appear, if a credit for all contributing authors of the Adaptation or Collection appears, then as part of these credits and in a manner at least as prominent as the credits for the other contributing authors. For the avoidance of doubt, You may only use the credit required by this Section for the purpose of attribution in the manner set out above and, by exercising Your rights under this License, You may not implicitly or explicitly assert or imply any connection with, sponsorship or endorsement by the Original Author, Licensor and/or Attribution Parties, as appropriate, of You or Your use of the Work, without the separate, express prior written permission of the Original Author, Licensor and/or Attribution Parties. +For the avoidance of doubt: + +Non-waivable Compulsory License Schemes. In those jurisdictions in which the right to collect royalties through any statutory or compulsory licensing scheme cannot be waived, the Licensor reserves the exclusive right to collect such royalties for any exercise by You of the rights granted under this License; +Waivable Compulsory License Schemes. In those jurisdictions in which the right to collect royalties through any statutory or compulsory licensing scheme can be waived, the Licensor reserves the exclusive right to collect such royalties for any exercise by You of the rights granted under this License if Your exercise of such rights is for a purpose or use which is otherwise than noncommercial as permitted under Section 4(c) and otherwise waives the right to collect royalties through any statutory or compulsory licensing scheme; and, +Voluntary License Schemes. The Licensor reserves the right to collect royalties, whether individually or, in the event that the Licensor is a member of a collecting society that administers voluntary licensing schemes, via that society, from any exercise by You of the rights granted under this License that is for a purpose or use which is otherwise than noncommercial as permitted under Section 4(c). +Except as otherwise agreed in writing by the Licensor or as may be otherwise permitted by applicable law, if You Reproduce, Distribute or Publicly Perform the Work either by itself or as part of any Adaptations or Collections, You must not distort, mutilate, modify or take other derogatory action in relation to the Work which would be prejudicial to the Original Author's honor or reputation. Licensor agrees that in those jurisdictions (e.g. Japan), in which any exercise of the right granted in Section 3(b) of this License (the right to make Adaptations) would be deemed to be a distortion, mutilation, modification or other derogatory action prejudicial to the Original Author's honor and reputation, the Licensor will waive or not assert, as appropriate, this Section, to the fullest extent permitted by the applicable national law, to enable You to reasonably exercise Your right under Section 3(b) of this License (right to make Adaptations) but not otherwise. +5. Representations, Warranties and Disclaimer + +UNLESS OTHERWISE MUTUALLY AGREED TO BY THE PARTIES IN WRITING AND TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, LICENSOR OFFERS THE WORK AS-IS AND MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE WORK, EXPRESS, IMPLIED, STATUTORY OR OTHERWISE, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, ACCURACY, OR THE PRESENCE OF ABSENCE OF ERRORS, WHETHER OR NOT DISCOVERABLE. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OF IMPLIED WARRANTIES, SO THIS EXCLUSION MAY NOT APPLY TO YOU. + +6. Limitation on Liability. EXCEPT TO THE EXTENT REQUIRED BY APPLICABLE LAW, IN NO EVENT WILL LICENSOR BE LIABLE TO YOU ON ANY LEGAL THEORY FOR ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL, PUNITIVE OR EXEMPLARY DAMAGES ARISING OUT OF THIS LICENSE OR THE USE OF THE WORK, EVEN IF LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +7. Termination + +This License and the rights granted hereunder will terminate automatically upon any breach by You of the terms of this License. Individuals or entities who have received Adaptations or Collections from You under this License, however, will not have their licenses terminated provided such individuals or entities remain in full compliance with those licenses. Sections 1, 2, 5, 6, 7, and 8 will survive any termination of this License. +Subject to the above terms and conditions, the license granted here is perpetual (for the duration of the applicable copyright in the Work). Notwithstanding the above, Licensor reserves the right to release the Work under different license terms or to stop distributing the Work at any time; provided, however that any such election will not serve to withdraw this License (or any other license that has been, or is required to be, granted under the terms of this License), and this License will continue in full force and effect unless terminated as stated above. +8. Miscellaneous + +Each time You Distribute or Publicly Perform the Work or a Collection, the Licensor offers to the recipient a license to the Work on the same terms and conditions as the license granted to You under this License. +Each time You Distribute or Publicly Perform an Adaptation, Licensor offers to the recipient a license to the original Work on the same terms and conditions as the license granted to You under this License. +If any provision of this License is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this License, and without further action by the parties to this agreement, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable. +No term or provision of this License shall be deemed waived and no breach consented to unless such waiver or consent shall be in writing and signed by the party to be charged with such waiver or consent. +This License constitutes the entire agreement between the parties with respect to the Work licensed here. There are no understandings, agreements or representations with respect to the Work not specified here. Licensor shall not be bound by any additional provisions that may appear in any communication from You. This License may not be modified without the mutual written agreement of the Licensor and You. +The rights granted under, and the subject matter referenced, in this License were drafted utilizing the terminology of the Berne Convention for the Protection of Literary and Artistic Works (as amended on September 28, 1979), the Rome Convention of 1961, the WIPO Copyright Treaty of 1996, the WIPO Performances and Phonograms Treaty of 1996 and the Universal Copyright Convention (as revised on July 24, 1971). These rights and subject matter take effect in the relevant jurisdiction in which the License terms are sought to be enforced according to the corresponding provisions of the implementation of those treaty provisions in the applicable national law. If the standard suite of rights granted under applicable copyright law includes additional rights not granted under this License, such additional rights are deemed to be included in the License; this License is not intended to restrict the license of any rights under applicable law. +Creative Commons Notice +Creative Commons is not a party to this License, and makes no warranty whatsoever in connection with the Work. Creative Commons will not be liable to You or any party on any legal theory for any damages whatsoever, including without limitation any general, special, incidental or consequential damages arising in connection to this license. Notwithstanding the foregoing two (2) sentences, if Creative Commons has expressly identified itself as the Licensor hereunder, it shall have all rights and obligations of Licensor. + +Except for the limited purpose of indicating to the public that the Work is licensed under the CCPL, Creative Commons does not authorize the use by either party of the trademark "Creative Commons" or any related trademark or logo of Creative Commons without the prior written consent of Creative Commons. Any permitted use will be in compliance with Creative Commons' then-current trademark usage guidelines, as may be published on its website or otherwise made available upon request from time to time. For the avoidance of doubt, this trademark restriction does not form part of this License. + +Creative Commons may be contacted at https://creativecommons.org/. \ No newline at end of file diff --git a/third_party/r2d2/NOTICE b/third_party/r2d2/NOTICE new file mode 100644 index 0000000000000000000000000000000000000000..3658c4ddefd692e904a5c3664b4bbdcafa7d57fd --- /dev/null +++ b/third_party/r2d2/NOTICE @@ -0,0 +1,140 @@ +r2d2 +Copyright 2019-present NAVER Corp. + +This project contains subcomponents with separate copyright notices and license terms. +Your use of the source code for these subcomponents is subject to the terms and conditions of the following licenses. + +===== + +pytorch/pytorch +https://github.com/pytorch/pytorch + + +From PyTorch: + +Copyright (c) 2016- Facebook, Inc (Adam Paszke) +Copyright (c) 2014- Facebook, Inc (Soumith Chintala) +Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +Copyright (c) 2011-2013 NYU (Clement Farabet) +Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) +Copyright (c) 2006 Idiap Research Institute (Samy Bengio) +Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) + +From Caffe2: + +Copyright (c) 2016-present, Facebook Inc. All rights reserved. + +All contributions by Facebook: +Copyright (c) 2016 Facebook Inc. + +All contributions by Google: +Copyright (c) 2015 Google Inc. +All rights reserved. + +All contributions by Yangqing Jia: +Copyright (c) 2015 Yangqing Jia +All rights reserved. + +All contributions from Caffe: +Copyright(c) 2013, 2014, 2015, the respective contributors +All rights reserved. + +All other contributions: +Copyright(c) 2015, 2016 the respective contributors +All rights reserved. + +Caffe2 uses a copyright model similar to Caffe: each contributor holds +copyright over their contributions to Caffe2. The project versioning records +all such contribution and copyright details. If a contributor wants to further +mark their specific copyright on a particular contribution, they should +indicate their copyright solely in the commit message of the change when it is +committed. + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America + and IDIAP Research Institute nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +===== + +pytorch/vision +https://github.com/pytorch/vision + + +BSD 3-Clause License + +Copyright (c) Soumith Chintala 2016, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +===== + +tomrunia/OpticalFlow_Visualization +https://github.com/tomrunia/OpticalFlow_Visualization + + +# MIT License +# +# Copyright (c) 2018 Tom Runia +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to conditions. +# +# Author: Tom Runia +# Date Created: 2018-08-03 + +===== diff --git a/third_party/r2d2/README.md b/third_party/r2d2/README.md new file mode 100644 index 0000000000000000000000000000000000000000..185b8c61863ae0c42ba864321b24c48dfbe85e30 --- /dev/null +++ b/third_party/r2d2/README.md @@ -0,0 +1,194 @@ +# R2D2: Reliable and Repeatable Detector and Descriptor # +This repository contains the implementation of the following [paper](https://europe.naverlabs.com/research/publications/r2d2-reliable-and-repeatable-detectors-and-descriptors-for-joint-sparse-local-keypoint-detection-and-feature-extraction/): + +```text +@inproceedings{r2d2, + author = {Jerome Revaud and Philippe Weinzaepfel and C{\'{e}}sar Roberto de Souza and + Martin Humenberger}, + title = {{R2D2:} Repeatable and Reliable Detector and Descriptor}, + booktitle = {NeurIPS}, + year = {2019}, +} +``` + +Fast-R2D2 +----------------- + +This repository also contains the code needed to train and extract Fast-R2D2 keypoints. +Fast-R2D2 is a revised version of R2D2 that is significantly faster, uses less memory yet achieves the same order of precision as the original network. + + +License +------- + +Our code is released under the Creative Commons BY-NC-SA 3.0 (see [LICENSE](LICENSE) for more details), available only for non-commercial use. + + +Getting started +--------------- +You just need Python 3.6+ equipped with standard scientific packages and PyTorch1.1+. +Typically, conda is one of the easiest way to get started: +```bash +conda install python tqdm pillow numpy matplotlib scipy +conda install pytorch torchvision cudatoolkit=10.1 -c pytorch +``` + + +Pretrained models +----------------- +For your convenience, we provide five pre-trained models in the `models/` folder: + - `r2d2_WAF_N16.pt`: this is the model used in most experiments of the paper (on HPatches `MMA@3=0.686`). It was trained with Web images (`W`), Aachen day-time images (`A`) and Aachen optical flow pairs (`F`) + - `r2d2_WASF_N16.pt`: this is the model used in the visual localization experiments (on HPatches `MMA@3=0.721`). It was trained with Web images (`W`), Aachen day-time images (`A`), Aachen day-night synthetic pairs (`S`), and Aachen optical flow pairs (`F`). + - `r2d2_WASF_N8_big.pt`: Same than previous model, but trained with `N=8` instead of `N=16` in the repeatability loss. In other words, it outputs a higher density of keypoints. This can be interesting for certain applications like visual localization, but it implies a drop in MMA since keypoints gets slighlty less reliable. + - `faster2d2_WASF_N16.pt`: The Fast-R2D2 equivalent of r2d2_WASF_N16.pt + - `faster2d2_WASF_N8_big.pt`: The Fast-R2D2 equivalent of r2d2_WASF_N8.pt + +For more details about the training data, see the dedicated section below. +Here is a table that summarizes the performance of each model: + +| model name | model size
(#weights)| number of
keypoints |MMA@3 on
HPatches| +|------------------|:-----------------------:|:----------------------:|:------------------:| +|`r2d2_WAF_N16.pt` | 0.5M | 5K | 0.686 | +|`r2d2_WASF_N16.pt` | 0.5M | 5K | 0.721 | +|`r2d2_WASF_N8_big.pt`| 1.0M | 10K | 0.692 | +|`faster2d2_WASF_N8_big.pt`| 1.0M | 5K | 0.650 | + + + +Feature extraction +------------------ +To extract keypoints for a given image, simply execute: +```bash +python extract.py --model models/r2d2_WASF_N16.pt --images imgs/brooklyn.png --top-k 5000 +``` +This also works for multiple images (separated by spaces) or a `.txt` image list. +For each image, this will save the `top-k` keypoints in a file with the same path as the image and a `.r2d2` extension. +For example, they will be saved in `imgs/brooklyn.png.r2d2` for the sample command above. + +The keypoint file is in the `npz` numpy format and contains 3 fields: + - `keypoints` (`N x 3`): keypoint position (x, y and scale). Scale denotes here the patch diameters in pixels. + - `descriptors` (`N x 128`): l2-normalized descriptors. + - `scores` (`N`): keypoint scores (the higher the better). + +*Note*: You can modify the extraction parameters (scale factor, scale range...). Run `python extract.py --help` for more information. +By default, they corespond to what is used in the paper, i.e., a scale factor equal to `2^0.25` (`--scale-f 1.189207`) and image size in the range `[256, 1024]` (`--min-size 256 --max-size 1024`). + +*Note2*: You can significantly improve the `MMA@3` score (by ~4 pts) if you can afford more computations. To do so, you just need to increase the upper-limit on the scale range by replacing `--min-size 256 --max-size 1024` with `--min-size 0 --max-size 9999 --min-scale 0.3 --max-scale 1.0`. + +Feature extraction with kapture datasets +------------------ +Kapture is a pivot file format, based on text and binary files, used to describe SFM (Structure From Motion) and more generally sensor-acquired data. + +It is available at https://github.com/naver/kapture. +It contains conversion tools for popular formats and several popular datasets are directly available in kapture. + +It can be installed with: +```bash +pip install kapture +``` + +Datasets can be downloaded with: +```bash +kapture_download_dataset.py update +kapture_download_dataset.py list +# e.g.: install mapping and query of Extended-CMU-Seasons_slice22 +kapture_download_dataset.py install "Extended-CMU-Seasons_slice22_*" +``` +If you want to convert your own dataset into kapture, please find some examples [here](https://github.com/naver/kapture/blob/master/doc/datasets.adoc). + +Once installed, you can extract keypoints for your kapture dataset with: +```bash +python extract_kapture.py --model models/r2d2_WASF_N16.pt --kapture-root pathto/yourkapturedataset --top-k 5000 +``` + +Run `python extract_kapture.py --help` for more information on the extraction parameters. + +Evaluation on HPatches +---------------------- +The evaluation is based on the [code](https://github.com/mihaidusmanu/d2-net) from [D2-Net](https://dsmn.ml/publications/d2-net.html). +```bash +git clone https://github.com/mihaidusmanu/d2-net.git +cd d2-net/hpatches_sequences/ +bash download.sh +bash download_cache.sh +cd ../.. +ln -s d2-net/hpatches_sequences # finally create a soft-link +``` + +Once this is done, extract all the features: +```bash +python extract.py --model models/r2d2_WAF_N16.pt --images d2-net/image_list_hpatches_sequences.txt +``` + +Finally, evaluate using the iPython notebook `d2-net/hpatches_sequences/HPatches-Sequences-Matching-Benchmark.ipynb`. +You should normally get the following `MMA` plot: +![image](https://user-images.githubusercontent.com/56719813/67966238-d3cc6500-fc03-11e9-969b-5f086da26e34.png). + + +**New**: we have uploaded in the `results/` folder some pre-computed plots that you can visualize using the aforementioned ipython notebook from `d2-net` (you need to place them in the `d2-net/hpatches_sequences/cache/` folder). + - `r2d2_*_N16.size-256-1024.npy`: keypoints were extracted using a limited image resolution (i.e. with `python extract.py --min-size 256 --max-size 1024 ...`) + - `r2d2_*_N16.scale-0.3-1.npy`: keypoints were extracted using a full image resolution (i.e. with `python extract.py --min-size 0 --max-size 9999 --min-scale 0.3 --max-scale 1.0`). + +Here is a summary of the results: + +| result file | training set | resolution | MMA@3 on
HPatches| note | +|--------------|:------------:|:----------:|:-------------------:|------| +|[r2d2_W_N16.scale-0.3-1.npy](results/r2d2_W_N16.scale-0.3-1.npy) | `W` only | full | 0.699 | no annotation whatsoever | +|[r2d2_WAF_N16.size-256-1024.npy](results/r2d2_WAF_N16.size-256-1024.npy) | `W`+`A`+`F` | 1024 px | 0.686 | as in NeurIPS paper | +|[r2d2_WAF_N16.scale-0.3-1.npy](results/r2d2_WAF_N16.scale-0.3-1.npy) | `W`+`A`+`F` | full | 0.718 | +3.2% just from resolution | +|[r2d2_WASF_N16.size-256-1024.npy](results/r2d2_WASF_N16.size-256-1024.npy) | `W`+`A`+`S`+`F` | 1024 px | 0.721 | with style transfer | +|[r2d2_WASF_N16.scale-0.3-1.npy](results/r2d2_WASF_N16.scale-0.3-1.npy) | `W`+`A`+`S`+`F` | full | 0.758 | +3.7% just from resolution | + +Evaluation on visuallocalization.net +---------------------- +In our paper, we report visual localization results on the Aachen Day-Night dataset (nighttime images) available at visuallocalization.net. We used the provided local feature evaluation pipeline provided here: https://github.com/tsattler/visuallocalizationbenchmark/tree/master/local_feature_evaluation +In the meantime, the ground truth poses as well as the error thresholds of the Aachen nighttime images (which are used for the local feature evaluation) have been improved and changed on the website, thus, the original results reported in the paper cannot be reproduced. + +Training the model +------------------ +We provide all the code and data to retrain the model as described in the paper. + +### Downloading training data ### +The first step is to download the training data. +First, create a folder that will host all data in a place where you have sufficient disk space (15 GB required). +```bash +DATA_ROOT=/path/to/data +mkdir -p $DATA_ROOT +ln -fs $DATA_ROOT data +mkdir $DATA_ROOT/aachen +``` +Then, manually download the [Aachen dataset here](https://drive.google.com/drive/folders/1fvb5gwqHCV4cr4QPVIEMTWkIhCpwei7n) and save it as `$DATA_ROOT/aachen/database_and_query_images.zip`. +Finally, execute the download script to complete the installation. It will download the remaining training data and will extract all files properly. +```bash +./download_training_data.sh +``` +The following datasets are now installed: + +| full name |tag|Disk |# imgs|# pairs| python instance | +|---------------------------------|---|-----|------|-------|--------------------------------| +| Random Web images | W |2.7GB| 3125 | 3125 | `auto_pairs(web_images)` | +| Aachen DB images | A |2.5GB| 4479 | 4479 | `auto_pairs(aachen_db_images)` | +| Aachen style transfer pairs | S |0.3GB| 8115 | 3636 | `aachen_style_transfer_pairs` | +| Aachen optical flow pairs | F |2.9GB| 4479 | 4770 | `aachen_flow_pairs` | + +Note that you can visualize the content of each dataset using the following command: +```bash +python -m tools.dataloader "PairLoader(aachen_flow_pairs)" +``` +![image](https://user-images.githubusercontent.com/56719813/68311498-eafecd00-00b1-11ea-8d37-6693f3f90c9f.png) + + +### Training details ### +To train the model, simply run this command: +```bash +python train.py --save-path /path/to/model.pt +``` +On a recent GPU, it takes 30 min per epoch, so ~12h for 25 epochs. +You should get a model that scores `0.71 +/- 0.01` in `MMA@3` on HPatches (this standard-deviation is similar to what is reported in Table 1 of the paper). + +If you want to retrain fast-r2d2 architectures, run: +```bash +python train.py --save-path /path/to/fast-model.pt --net 'Fast_Quad_L2Net_ConfCFS()' +``` + +Note that you can fully configure the training (i.e. select the data sources, change the batch size, learning rate, number of epochs etc.). One easy way to improve the model is to train for more epochs, e.g. `--epochs 50`. For more details about all parameters, run `python train.py --help`. diff --git a/third_party/r2d2/datasets/__init__.py b/third_party/r2d2/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8f11df21be72856ea365f6efd7a389aba267562b --- /dev/null +++ b/third_party/r2d2/datasets/__init__.py @@ -0,0 +1,33 @@ +# Copyright 2019-present NAVER Corp. +# CC BY-NC-SA 3.0 +# Available only for non-commercial use + +from .pair_dataset import CatPairDataset, SyntheticPairDataset, TransformedPairs +from .imgfolder import ImgFolder + +from .web_images import RandomWebImages +from .aachen import * + +# try to instanciate datasets +import sys +try: + web_images = RandomWebImages(0, 52) +except AssertionError as e: + print(f"Dataset web_images not available, reason: {e}", file=sys.stderr) + +try: + aachen_db_images = AachenImages_DB() +except AssertionError as e: + print(f"Dataset aachen_db_images not available, reason: {e}", file=sys.stderr) + +try: + aachen_style_transfer_pairs = AachenPairs_StyleTransferDayNight() +except AssertionError as e: + print(f"Dataset aachen_style_transfer_pairs not available, reason: {e}", file=sys.stderr) + +try: + aachen_flow_pairs = AachenPairs_OpticalFlow() +except AssertionError as e: + print(f"Dataset aachen_flow_pairs not available, reason: {e}", file=sys.stderr) + + diff --git a/third_party/r2d2/datasets/aachen.py b/third_party/r2d2/datasets/aachen.py new file mode 100644 index 0000000000000000000000000000000000000000..4ddb324cea01da2430ee89b32c7627b34c01a41f --- /dev/null +++ b/third_party/r2d2/datasets/aachen.py @@ -0,0 +1,146 @@ +# Copyright 2019-present NAVER Corp. +# CC BY-NC-SA 3.0 +# Available only for non-commercial use + +import os, pdb +import numpy as np +from PIL import Image + +from .dataset import Dataset +from .pair_dataset import PairDataset, StillPairDataset + + +class AachenImages (Dataset): + """ Loads all images from the Aachen Day-Night dataset + """ + def __init__(self, select='db day night', root='data/aachen'): + Dataset.__init__(self) + self.root = root + self.img_dir = 'images_upright' + self.select = set(select.split()) + assert self.select, 'Nothing was selected' + + self.imgs = [] + root = os.path.join(root, self.img_dir) + for dirpath, _, filenames in os.walk(root): + r = dirpath[len(root)+1:] + if not(self.select & set(r.split('/'))): continue + self.imgs += [os.path.join(r,f) for f in filenames if f.endswith('.jpg')] + + self.nimg = len(self.imgs) + assert self.nimg, 'Empty Aachen dataset' + + def get_key(self, idx): + return self.imgs[idx] + + + +class AachenImages_DB (AachenImages): + """ Only database (db) images. + """ + def __init__(self, **kw): + AachenImages.__init__(self, select='db', **kw) + self.db_image_idxs = {self.get_tag(i) : i for i,f in enumerate(self.imgs)} + + def get_tag(self, idx): + # returns image tag == img number (name) + return os.path.split( self.imgs[idx][:-4] )[1] + + + +class AachenPairs_StyleTransferDayNight (AachenImages_DB, StillPairDataset): + """ synthetic day-night pairs of images + (night images obtained using autoamtic style transfer from web night images) + """ + def __init__(self, root='data/aachen/style_transfer', **kw): + StillPairDataset.__init__(self) + AachenImages_DB.__init__(self, **kw) + old_root = os.path.join(self.root, self.img_dir) + self.root = os.path.commonprefix((old_root, root)) + self.img_dir = '' + + newpath = lambda folder, f: os.path.join(folder, f)[len(self.root):] + self.imgs = [newpath(old_root, f) for f in self.imgs] + + self.image_pairs = [] + for fname in os.listdir(root): + tag = fname.split('.jpg.st_')[0] + self.image_pairs.append((self.db_image_idxs[tag], len(self.imgs))) + self.imgs.append(newpath(root, fname)) + + self.nimg = len(self.imgs) + self.npairs = len(self.image_pairs) + assert self.nimg and self.npairs + + + +class AachenPairs_OpticalFlow (AachenImages_DB, PairDataset): + """ Image pairs from Aachen db with optical flow. + """ + def __init__(self, root='data/aachen/optical_flow', **kw): + PairDataset.__init__(self) + AachenImages_DB.__init__(self, **kw) + self.root_flow = root + + # find out the subsest of valid pairs from the list of flow files + flows = {f for f in os.listdir(os.path.join(root, 'flow')) if f.endswith('.png')} + masks = {f for f in os.listdir(os.path.join(root, 'mask')) if f.endswith('.png')} + assert flows == masks, 'Missing flow or mask pairs' + + make_pair = lambda f: tuple(self.db_image_idxs[v] for v in f[:-4].split('_')) + self.image_pairs = [make_pair(f) for f in flows] + self.npairs = len(self.image_pairs) + assert self.nimg and self.npairs + + def get_mask_filename(self, pair_idx): + tag_a, tag_b = map(self.get_tag, self.image_pairs[pair_idx]) + return os.path.join(self.root_flow, 'mask', f'{tag_a}_{tag_b}.png') + + def get_mask(self, pair_idx): + return np.asarray(Image.open(self.get_mask_filename(pair_idx))) + + def get_flow_filename(self, pair_idx): + tag_a, tag_b = map(self.get_tag, self.image_pairs[pair_idx]) + return os.path.join(self.root_flow, 'flow', f'{tag_a}_{tag_b}.png') + + def get_flow(self, pair_idx): + fname = self.get_flow_filename(pair_idx) + try: + return self._png2flow(fname) + except IOError: + flow = open(fname[:-4], 'rb') + help = np.fromfile(flow, np.float32, 1) + assert help == 202021.25 + W, H = np.fromfile(flow, np.int32, 2) + flow = np.fromfile(flow, np.float32).reshape((H, W, 2)) + return self._flow2png(flow, fname) + + def get_pair(self, idx, output=()): + if isinstance(output, str): + output = output.split() + + img1, img2 = map(self.get_image, self.image_pairs[idx]) + meta = {} + + if 'flow' in output or 'aflow' in output: + flow = self.get_flow(idx) + assert flow.shape[:2] == img1.size[::-1] + meta['flow'] = flow + H, W = flow.shape[:2] + meta['aflow'] = flow + np.mgrid[:H,:W][::-1].transpose(1,2,0) + + if 'mask' in output: + mask = self.get_mask(idx) + assert mask.shape[:2] == img1.size[::-1] + meta['mask'] = mask + + return img1, img2, meta + + + + +if __name__ == '__main__': + print(aachen_db_images) + print(aachen_style_transfer_pairs) + print(aachen_flow_pairs) + pdb.set_trace() diff --git a/third_party/r2d2/datasets/dataset.py b/third_party/r2d2/datasets/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..80d893b8ea4ead7845f35c4fe82c9f5a9b849de3 --- /dev/null +++ b/third_party/r2d2/datasets/dataset.py @@ -0,0 +1,77 @@ +# Copyright 2019-present NAVER Corp. +# CC BY-NC-SA 3.0 +# Available only for non-commercial use + +import os +import json +import pdb +import numpy as np + + +class Dataset(object): + ''' Base class for a dataset. To be overloaded. + ''' + root = '' + img_dir = '' + nimg = 0 + + def __len__(self): + return self.nimg + + def get_key(self, img_idx): + raise NotImplementedError() + + def get_filename(self, img_idx, root=None): + return os.path.join(root or self.root, self.img_dir, self.get_key(img_idx)) + + def get_image(self, img_idx): + from PIL import Image + fname = self.get_filename(img_idx) + try: + return Image.open(fname).convert('RGB') + except Exception as e: + raise IOError("Could not load image %s (reason: %s)" % (fname, str(e))) + + def __repr__(self): + res = 'Dataset: %s\n' % self.__class__.__name__ + res += ' %d images' % self.nimg + res += '\n root: %s...\n' % self.root + return res + + + +class CatDataset (Dataset): + ''' Concatenation of several datasets. + ''' + def __init__(self, *datasets): + assert len(datasets) >= 1 + self.datasets = datasets + offsets = [0] + for db in datasets: + offsets.append(db.nimg) + self.offsets = np.cumsum(offsets) + self.nimg = self.offsets[-1] + self.root = None + + def which(self, i): + pos = np.searchsorted(self.offsets, i, side='right')-1 + assert pos < self.nimg, 'Bad image index %d >= %d' % (i, self.nimg) + return pos, i - self.offsets[pos] + + def get_key(self, i): + b, i = self.which(i) + return self.datasets[b].get_key(i) + + def get_filename(self, i): + b, i = self.which(i) + return self.datasets[b].get_filename(i) + + def __repr__(self): + fmt_str = "CatDataset(" + for db in self.datasets: + fmt_str += str(db).replace("\n"," ") + ', ' + return fmt_str[:-2] + ')' + + + + diff --git a/third_party/r2d2/datasets/imgfolder.py b/third_party/r2d2/datasets/imgfolder.py new file mode 100644 index 0000000000000000000000000000000000000000..45f7bc9ee4c3ba5f04380dbc02ad17b6463cf32f --- /dev/null +++ b/third_party/r2d2/datasets/imgfolder.py @@ -0,0 +1,23 @@ +# Copyright 2019-present NAVER Corp. +# CC BY-NC-SA 3.0 +# Available only for non-commercial use + +import os, pdb + +from .dataset import Dataset +from .pair_dataset import SyntheticPairDataset + + +class ImgFolder (Dataset): + """ load all images in a folder (no recursion). + """ + def __init__(self, root, imgs=None, exts=('.jpg','.png','.ppm')): + Dataset.__init__(self) + self.root = root + self.imgs = imgs or [f for f in os.listdir(root) if f.endswith(exts)] + self.nimg = len(self.imgs) + + def get_key(self, idx): + return self.imgs[idx] + + diff --git a/third_party/r2d2/datasets/pair_dataset.py b/third_party/r2d2/datasets/pair_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..aeed98b6700e0ba108bb44abccc20351d16f3295 --- /dev/null +++ b/third_party/r2d2/datasets/pair_dataset.py @@ -0,0 +1,287 @@ +# Copyright 2019-present NAVER Corp. +# CC BY-NC-SA 3.0 +# Available only for non-commercial use + +import os, pdb +import numpy as np +from PIL import Image + +from .dataset import Dataset, CatDataset +from tools.transforms import instanciate_transformation +from tools.transforms_tools import persp_apply + + +class PairDataset (Dataset): + """ A dataset that serves image pairs with ground-truth pixel correspondences. + """ + def __init__(self): + Dataset.__init__(self) + self.npairs = 0 + + def get_filename(self, img_idx, root=None): + if is_pair(img_idx): # if img_idx is a pair of indices, we return a pair of filenames + return tuple(Dataset.get_filename(self, i, root) for i in img_idx) + return Dataset.get_filename(self, img_idx, root) + + def get_image(self, img_idx): + if is_pair(img_idx): # if img_idx is a pair of indices, we return a pair of images + return tuple(Dataset.get_image(self, i) for i in img_idx) + return Dataset.get_image(self, img_idx) + + def get_corres_filename(self, pair_idx): + raise NotImplementedError() + + def get_homography_filename(self, pair_idx): + raise NotImplementedError() + + def get_flow_filename(self, pair_idx): + raise NotImplementedError() + + def get_mask_filename(self, pair_idx): + raise NotImplementedError() + + def get_pair(self, idx, output=()): + """ returns (img1, img2, `metadata`) + + `metadata` is a dict() that can contain: + flow: optical flow + aflow: absolute flow + corres: list of 2d-2d correspondences + mask: boolean image of flow validity (in the first image) + ... + """ + raise NotImplementedError() + + def get_paired_images(self): + fns = set() + for i in range(self.npairs): + a,b = self.image_pairs[i] + fns.add(self.get_filename(a)) + fns.add(self.get_filename(b)) + return fns + + def __len__(self): + return self.npairs # size should correspond to the number of pairs, not images + + def __repr__(self): + res = 'Dataset: %s\n' % self.__class__.__name__ + res += ' %d images,' % self.nimg + res += ' %d image pairs' % self.npairs + res += '\n root: %s...\n' % self.root + return res + + @staticmethod + def _flow2png(flow, path): + flow = np.clip(np.around(16*flow), -2**15, 2**15-1) + bytes = np.int16(flow).view(np.uint8) + Image.fromarray(bytes).save(path) + return flow / 16 + + @staticmethod + def _png2flow(path): + try: + flow = np.asarray(Image.open(path)).view(np.int16) + return np.float32(flow) / 16 + except: + raise IOError("Error loading flow for %s" % path) + + + +class StillPairDataset (PairDataset): + """ A dataset of 'still' image pairs. + By overloading a normal image dataset, it appends the get_pair(i) function + that serves trivial image pairs (img1, img2) where img1 == img2 == get_image(i). + """ + def get_pair(self, pair_idx, output=()): + if isinstance(output, str): output = output.split() + img1, img2 = map(self.get_image, self.image_pairs[pair_idx]) + + W,H = img1.size + sx = img2.size[0] / float(W) + sy = img2.size[1] / float(H) + + meta = {} + if 'aflow' in output or 'flow' in output: + mgrid = np.mgrid[0:H, 0:W][::-1].transpose(1,2,0).astype(np.float32) + meta['aflow'] = mgrid * (sx,sy) + meta['flow'] = meta['aflow'] - mgrid + + if 'mask' in output: + meta['mask'] = np.ones((H,W), np.uint8) + + if 'homography' in output: + meta['homography'] = np.diag(np.float32([sx, sy, 1])) + + return img1, img2, meta + + + +class SyntheticPairDataset (PairDataset): + """ A synthetic generator of image pairs. + Given a normal image dataset, it constructs pairs using random homographies & noise. + """ + def __init__(self, dataset, scale='', distort=''): + self.attach_dataset(dataset) + self.distort = instanciate_transformation(distort) + self.scale = instanciate_transformation(scale) + + def attach_dataset(self, dataset): + assert isinstance(dataset, Dataset) and not isinstance(dataset, PairDataset) + self.dataset = dataset + self.npairs = dataset.nimg + self.get_image = dataset.get_image + self.get_key = dataset.get_key + self.get_filename = dataset.get_filename + self.root = None + + def make_pair(self, img): + return img, img + + def get_pair(self, i, output=('aflow')): + """ Procedure: + This function applies a series of random transformations to one original image + to form a synthetic image pairs with perfect ground-truth. + """ + if isinstance(output, str): + output = output.split() + + original_img = self.dataset.get_image(i) + + scaled_image = self.scale(original_img) + scaled_image, scaled_image2 = self.make_pair(scaled_image) + scaled_and_distorted_image = self.distort( + dict(img=scaled_image2, persp=(1,0,0,0,1,0,0,0))) + W, H = scaled_image.size + trf = scaled_and_distorted_image['persp'] + + meta = dict() + if 'aflow' in output or 'flow' in output: + # compute optical flow + xy = np.mgrid[0:H,0:W][::-1].reshape(2,H*W).T + aflow = np.float32(persp_apply(trf, xy).reshape(H,W,2)) + meta['flow'] = aflow - xy.reshape(H,W,2) + meta['aflow'] = aflow + + if 'homography' in output: + meta['homography'] = np.float32(trf+(1,)).reshape(3,3) + + return scaled_image, scaled_and_distorted_image['img'], meta + + def __repr__(self): + res = 'Dataset: %s\n' % self.__class__.__name__ + res += ' %d images and pairs' % self.npairs + res += '\n root: %s...' % self.dataset.root + res += '\n Scale: %s' % (repr(self.scale).replace('\n','')) + res += '\n Distort: %s' % (repr(self.distort).replace('\n','')) + return res + '\n' + + + +class TransformedPairs (PairDataset): + """ Automatic data augmentation for pre-existing image pairs. + Given an image pair dataset, it generates synthetically jittered pairs + using random transformations (e.g. homographies & noise). + """ + def __init__(self, dataset, trf=''): + self.attach_dataset(dataset) + self.trf = instanciate_transformation(trf) + + def attach_dataset(self, dataset): + assert isinstance(dataset, PairDataset) + self.dataset = dataset + self.nimg = dataset.nimg + self.npairs = dataset.npairs + self.get_image = dataset.get_image + self.get_key = dataset.get_key + self.get_filename = dataset.get_filename + self.root = None + + def get_pair(self, i, output=''): + """ Procedure: + This function applies a series of random transformations to one original image + to form a synthetic image pairs with perfect ground-truth. + """ + img_a, img_b_, metadata = self.dataset.get_pair(i, output) + + img_b = self.trf({'img': img_b_, 'persp':(1,0,0,0,1,0,0,0)}) + trf = img_b['persp'] + + if 'aflow' in metadata or 'flow' in metadata: + aflow = metadata['aflow'] + aflow[:] = persp_apply(trf, aflow.reshape(-1,2)).reshape(aflow.shape) + W, H = img_a.size + flow = metadata['flow'] + mgrid = np.mgrid[0:H, 0:W][::-1].transpose(1,2,0).astype(np.float32) + flow[:] = aflow - mgrid + + if 'corres' in metadata: + corres = metadata['corres'] + corres[:,1] = persp_apply(trf, corres[:,1]) + + if 'homography' in metadata: + # p_b = homography * p_a + trf_ = np.float32(trf+(1,)).reshape(3,3) + metadata['homography'] = np.float32(trf_ @ metadata['homography']) + + return img_a, img_b['img'], metadata + + def __repr__(self): + res = 'Transformed Pairs from %s\n' % type(self.dataset).__name__ + res += ' %d images and pairs' % self.npairs + res += '\n root: %s...' % self.dataset.root + res += '\n transform: %s' % (repr(self.trf).replace('\n','')) + return res + '\n' + + + +class CatPairDataset (CatDataset): + ''' Concatenation of several pair datasets. + ''' + def __init__(self, *datasets): + CatDataset.__init__(self, *datasets) + pair_offsets = [0] + for db in datasets: + pair_offsets.append(db.npairs) + self.pair_offsets = np.cumsum(pair_offsets) + self.npairs = self.pair_offsets[-1] + + def __len__(self): + return self.npairs + + def __repr__(self): + fmt_str = "CatPairDataset(" + for db in self.datasets: + fmt_str += str(db).replace("\n"," ") + ', ' + return fmt_str[:-2] + ')' + + def pair_which(self, i): + pos = np.searchsorted(self.pair_offsets, i, side='right')-1 + assert pos < self.npairs, 'Bad pair index %d >= %d' % (i, self.npairs) + return pos, i - self.pair_offsets[pos] + + def pair_call(self, func, i, *args, **kwargs): + b, j = self.pair_which(i) + return getattr(self.datasets[b], func)(j, *args, **kwargs) + + def get_pair(self, i, output=()): + b, i = self.pair_which(i) + return self.datasets[b].get_pair(i, output) + + def get_flow_filename(self, pair_idx, *args, **kwargs): + return self.pair_call('get_flow_filename', pair_idx, *args, **kwargs) + + def get_mask_filename(self, pair_idx, *args, **kwargs): + return self.pair_call('get_mask_filename', pair_idx, *args, **kwargs) + + def get_corres_filename(self, pair_idx, *args, **kwargs): + return self.pair_call('get_corres_filename', pair_idx, *args, **kwargs) + + + +def is_pair(x): + if isinstance(x, (tuple,list)) and len(x) == 2: + return True + if isinstance(x, np.ndarray) and x.ndim == 1 and x.shape[0] == 2: + return True + return False + diff --git a/third_party/r2d2/datasets/web_images.py b/third_party/r2d2/datasets/web_images.py new file mode 100644 index 0000000000000000000000000000000000000000..7c17fbe956f3b4db25d9a4148e8f7c615f122478 --- /dev/null +++ b/third_party/r2d2/datasets/web_images.py @@ -0,0 +1,64 @@ +# Copyright 2019-present NAVER Corp. +# CC BY-NC-SA 3.0 +# Available only for non-commercial use + +import os, pdb +from tqdm import trange + +from .dataset import Dataset + + +class RandomWebImages (Dataset): + """ 1 million distractors from Oxford and Paris Revisited + see http://ptak.felk.cvut.cz/revisitop/revisitop1m/ + """ + def __init__(self, start=0, end=1024, root="data/revisitop1m"): + Dataset.__init__(self) + self.root = root + + bar = None + self.imgs = [] + for i in range(start, end): + try: + # read cached list + img_list_path = os.path.join(self.root, "image_list_%d.txt"%i) + cached_imgs = [e.strip() for e in open(img_list_path)] + assert cached_imgs, f"Cache '{img_list_path}' is empty!" + self.imgs += cached_imgs + + except IOError: + if bar is None: + bar = trange(start, 4*end, desc='Caching') + bar.update(4*i) + + # create it + imgs = [] + for d in range(i*4,(i+1)*4): # 4096 folders in total, on average 256 each + key = hex(d)[2:].zfill(3) + folder = os.path.join(self.root, key) + if not os.path.isdir(folder): continue + imgs += [f for f in os.listdir(folder) if verify_img(folder,f)] + bar.update(1) + assert imgs, f"No images found in {folder}/" + open(img_list_path,'w').write('\n'.join(imgs)) + self.imgs += imgs + + if bar: bar.update(bar.total - bar.n) + self.nimg = len(self.imgs) + + def get_key(self, i): + key = self.imgs[i] + return os.path.join(key[:3], key) + + +def verify_img(folder, f): + path = os.path.join(folder, f) + if not f.endswith('.jpg'): return False + try: + from PIL import Image + Image.open(path).convert('RGB') # try to open it + return True + except: + return False + + diff --git a/third_party/r2d2/download_training_data.sh b/third_party/r2d2/download_training_data.sh new file mode 100644 index 0000000000000000000000000000000000000000..8257c83ef70eeab47b6b344d591ddef86ba848cd --- /dev/null +++ b/third_party/r2d2/download_training_data.sh @@ -0,0 +1,69 @@ +# Copyright 2019-present NAVER Corp. +# CC BY-NC-SA 3.0 +# Available only for non-commercial use + +CODE_ROOT=`pwd` +if [ ! -e data ]; then + echo "Error: missing data/ folder" + echo "First, create a folder that can host (at least) 15 GB of data." + echo "Then, create a soft-link named 'data' that points to it." + exit -1 +fi + +# download web images from the revisitop1m dataset +WEB_ROOT=data/revisitop1m +mkdir -p $WEB_ROOT +cd $WEB_ROOT +if [ ! -e 0d3 ]; then + for i in {1..5}; do + echo "Installing the web images dataset ($i/5)..." + if [ ! -f revisitop1m.$i.tar.gz ]; then + wget http://ptak.felk.cvut.cz/revisitop/revisitop1m/jpg/revisitop1m.$i.tar.gz + fi + tar -xzvf revisitop1m.$i.tar.gz + rm -f revisitop1m.$i.tar.gz + done +fi +cd $CODE_ROOT + +# download aachen images +AACHEN_ROOT=data/aachen +mkdir -p $AACHEN_ROOT +cd $AACHEN_ROOT +if [ ! -e "images_upright" ]; then + echo "Installing the Aachen dataset..." + fname=database_and_query_images.zip + if [ ! -f $fname ]; then + echo "File not found: $fname" + exit -1 + else + unzip $fname + rm -f $fname + fi +fi + +# download style transfer images +if [ ! -e "style_transfer" ]; then + echo "Installing the Aachen style-transfer dataset..." + fname=aachen_style_transfer.zip + if [ ! -f $fname ]; then + wget http://download.europe.naverlabs.com/3DVision/aachen_style_transfer.zip $fname + fi + unzip $fname + rm -f $fname +fi + +# download optical flow pairs +if [ ! -e "optical_flow" ]; then + echo "Installing the Aachen optical flow dataset..." + fname=aachen_optical_flow.zip + if [ ! -f $fname ]; then + wget http://download.europe.naverlabs.com/3DVision/aachen_optical_flow.zip $fname + fi + unzip $fname + rm -f $fname +fi +cd $CODE_ROOT + +echo "Done!" + diff --git a/third_party/r2d2/extract.py b/third_party/r2d2/extract.py new file mode 100644 index 0000000000000000000000000000000000000000..c3fea02f87c0615504e3648bfd590e413ab13898 --- /dev/null +++ b/third_party/r2d2/extract.py @@ -0,0 +1,183 @@ +# Copyright 2019-present NAVER Corp. +# CC BY-NC-SA 3.0 +# Available only for non-commercial use + + +import os, pdb +from PIL import Image +import numpy as np +import torch + +from tools import common +from tools.dataloader import norm_RGB +from nets.patchnet import * + + +def load_network(model_fn): + checkpoint = torch.load(model_fn) + print("\n>> Creating net = " + checkpoint['net']) + net = eval(checkpoint['net']) + nb_of_weights = common.model_size(net) + print(f" ( Model size: {nb_of_weights/1000:.0f}K parameters )") + + # initialization + weights = checkpoint['state_dict'] + net.load_state_dict({k.replace('module.',''):v for k,v in weights.items()}) + return net.eval() + + +class NonMaxSuppression (torch.nn.Module): + def __init__(self, rel_thr=0.7, rep_thr=0.7): + nn.Module.__init__(self) + self.max_filter = torch.nn.MaxPool2d(kernel_size=3, stride=1, padding=1) + self.rel_thr = rel_thr + self.rep_thr = rep_thr + + def forward(self, reliability, repeatability, **kw): + assert len(reliability) == len(repeatability) == 1 + reliability, repeatability = reliability[0], repeatability[0] + + # local maxima + maxima = (repeatability == self.max_filter(repeatability)) + + # remove low peaks + maxima *= (repeatability >= self.rep_thr) + maxima *= (reliability >= self.rel_thr) + + return maxima.nonzero().t()[2:4] + + +def extract_multiscale( net, img, detector, scale_f=2**0.25, + min_scale=0.0, max_scale=1, + min_size=256, max_size=1024, + verbose=False): + old_bm = torch.backends.cudnn.benchmark + torch.backends.cudnn.benchmark = False # speedup + + # extract keypoints at multiple scales + B, three, H, W = img.shape + assert B == 1 and three == 3, "should be a batch with a single RGB image" + + assert max_scale <= 1 + s = 1.0 # current scale factor + + X,Y,S,C,Q,D = [],[],[],[],[],[] + while s+0.001 >= max(min_scale, min_size / max(H,W)): + if s-0.001 <= min(max_scale, max_size / max(H,W)): + nh, nw = img.shape[2:] + if verbose: print(f"extracting at scale x{s:.02f} = {nw:4d}x{nh:3d}") + # extract descriptors + with torch.no_grad(): + res = net(imgs=[img]) + + # get output and reliability map + descriptors = res['descriptors'][0] + reliability = res['reliability'][0] + repeatability = res['repeatability'][0] + + # normalize the reliability for nms + # extract maxima and descs + y,x = detector(**res) # nms + c = reliability[0,0,y,x] + q = repeatability[0,0,y,x] + d = descriptors[0,:,y,x].t() + n = d.shape[0] + + # accumulate multiple scales + X.append(x.float() * W/nw) + Y.append(y.float() * H/nh) + S.append((32/s) * torch.ones(n, dtype=torch.float32, device=d.device)) + C.append(c) + Q.append(q) + D.append(d) + s /= scale_f + + # down-scale the image for next iteration + nh, nw = round(H*s), round(W*s) + img = F.interpolate(img, (nh,nw), mode='bilinear', align_corners=False) + + # restore value + torch.backends.cudnn.benchmark = old_bm + + Y = torch.cat(Y) + X = torch.cat(X) + S = torch.cat(S) # scale + scores = torch.cat(C) * torch.cat(Q) # scores = reliability * repeatability + XYS = torch.stack([X,Y,S], dim=-1) + D = torch.cat(D) + return XYS, D, scores + + +def extract_keypoints(args): + iscuda = common.torch_set_gpu(args.gpu) + + # load the network... + net = load_network(args.model) + if iscuda: net = net.cuda() + + # create the non-maxima detector + detector = NonMaxSuppression( + rel_thr = args.reliability_thr, + rep_thr = args.repeatability_thr) + + while args.images: + img_path = args.images.pop(0) + + if img_path.endswith('.txt'): + args.images = open(img_path).read().splitlines() + args.images + continue + + print(f"\nExtracting features for {img_path}") + img = Image.open(img_path).convert('RGB') + W, H = img.size + img = norm_RGB(img)[None] + if iscuda: img = img.cuda() + + # extract keypoints/descriptors for a single image + xys, desc, scores = extract_multiscale(net, img, detector, + scale_f = args.scale_f, + min_scale = args.min_scale, + max_scale = args.max_scale, + min_size = args.min_size, + max_size = args.max_size, + verbose = True) + + xys = xys.cpu().numpy() + desc = desc.cpu().numpy() + scores = scores.cpu().numpy() + idxs = scores.argsort()[-args.top_k or None:] + + outpath = img_path + '.' + args.tag + print(f"Saving {len(idxs)} keypoints to {outpath}") + np.savez(open(outpath,'wb'), + imsize = (W,H), + keypoints = xys[idxs], + descriptors = desc[idxs], + scores = scores[idxs]) + + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser("Extract keypoints for a given image") + parser.add_argument("--model", type=str, required=True, help='model path') + + parser.add_argument("--images", type=str, required=True, nargs='+', help='images / list') + parser.add_argument("--tag", type=str, default='r2d2', help='output file tag') + + parser.add_argument("--top-k", type=int, default=5000, help='number of keypoints') + + parser.add_argument("--scale-f", type=float, default=2**0.25) + parser.add_argument("--min-size", type=int, default=256) + parser.add_argument("--max-size", type=int, default=1024) + parser.add_argument("--min-scale", type=float, default=0) + parser.add_argument("--max-scale", type=float, default=1) + + parser.add_argument("--reliability-thr", type=float, default=0.7) + parser.add_argument("--repeatability-thr", type=float, default=0.7) + + parser.add_argument("--gpu", type=int, nargs='+', default=[0], help='use -1 for CPU') + args = parser.parse_args() + + extract_keypoints(args) + diff --git a/third_party/r2d2/extract_kapture.py b/third_party/r2d2/extract_kapture.py new file mode 100644 index 0000000000000000000000000000000000000000..51b2403b8a1730eaee32d099d0b6dd5d091ccdda --- /dev/null +++ b/third_party/r2d2/extract_kapture.py @@ -0,0 +1,194 @@ +# Copyright 2019-present NAVER Corp. +# CC BY-NC-SA 3.0 +# Available only for non-commercial use + + +from PIL import Image + +from tools import common +from tools.dataloader import norm_RGB +from nets.patchnet import * +from os import path + +from extract import load_network, NonMaxSuppression, extract_multiscale + +# Kapture is a pivot file format, based on text and binary files, used to describe SfM (Structure From Motion) +# and more generally sensor-acquired data +# it can be installed with +# pip install kapture +# for more information check out https://github.com/naver/kapture +import kapture +from kapture.io.records import get_image_fullpath +from kapture.io.csv import kapture_from_dir +from kapture.io.csv import get_feature_csv_fullpath, keypoints_to_file, descriptors_to_file +from kapture.io.features import get_keypoints_fullpath, keypoints_check_dir, image_keypoints_to_file +from kapture.io.features import get_descriptors_fullpath, descriptors_check_dir, image_descriptors_to_file +from kapture.io.csv import get_all_tar_handlers + + +def extract_kapture_keypoints(args): + """ + Extract r2d2 keypoints and descritors to the kapture format directly + """ + print('extract_kapture_keypoints...') + with get_all_tar_handlers(args.kapture_root, + mode={kapture.Keypoints: 'a', + kapture.Descriptors: 'a', + kapture.GlobalFeatures: 'r', + kapture.Matches: 'r'}) as tar_handlers: + kdata = kapture_from_dir(args.kapture_root, None, + skip_list=[kapture.GlobalFeatures, + kapture.Matches, + kapture.Points3d, + kapture.Observations], + tar_handlers=tar_handlers) + + assert kdata.records_camera is not None + image_list = [filename for _, _, filename in kapture.flatten(kdata.records_camera)] + if args.keypoints_type is None: + args.keypoints_type = path.splitext(path.basename(args.model))[0] + print(f'keypoints_type set to {args.keypoints_type}') + if args.descriptors_type is None: + args.descriptors_type = path.splitext(path.basename(args.model))[0] + print(f'descriptors_type set to {args.descriptors_type}') + + if kdata.keypoints is not None and args.keypoints_type in kdata.keypoints \ + and kdata.descriptors is not None and args.descriptors_type in kdata.descriptors: + print('detected already computed features of same keypoints_type/descriptors_type, resuming extraction...') + image_list = [name + for name in image_list + if name not in kdata.keypoints[args.keypoints_type] or + name not in kdata.descriptors[args.descriptors_type]] + + if len(image_list) == 0: + print('All features were already extracted') + return + else: + print(f'Extracting r2d2 features for {len(image_list)} images') + + iscuda = common.torch_set_gpu(args.gpu) + + # load the network... + net = load_network(args.model) + if iscuda: + net = net.cuda() + + # create the non-maxima detector + detector = NonMaxSuppression( + rel_thr=args.reliability_thr, + rep_thr=args.repeatability_thr) + + if kdata.keypoints is None: + kdata.keypoints = {} + if kdata.descriptors is None: + kdata.descriptors = {} + + if args.keypoints_type not in kdata.keypoints: + keypoints_dtype = None + keypoints_dsize = None + else: + keypoints_dtype = kdata.keypoints[args.keypoints_type].dtype + keypoints_dsize = kdata.keypoints[args.keypoints_type].dsize + if args.descriptors_type not in kdata.descriptors: + descriptors_dtype = None + descriptors_dsize = None + else: + descriptors_dtype = kdata.descriptors[args.descriptors_type].dtype + descriptors_dsize = kdata.descriptors[args.descriptors_type].dsize + + for image_name in image_list: + img_path = get_image_fullpath(args.kapture_root, image_name) + print(f"\nExtracting features for {img_path}") + img = Image.open(img_path).convert('RGB') + W, H = img.size + img = norm_RGB(img)[None] + if iscuda: + img = img.cuda() + + # extract keypoints/descriptors for a single image + xys, desc, scores = extract_multiscale(net, img, detector, + scale_f=args.scale_f, + min_scale=args.min_scale, + max_scale=args.max_scale, + min_size=args.min_size, + max_size=args.max_size, + verbose=True) + + xys = xys.cpu().numpy() + desc = desc.cpu().numpy() + scores = scores.cpu().numpy() + idxs = scores.argsort()[-args.top_k or None:] + + xys = xys[idxs] + desc = desc[idxs] + if keypoints_dtype is None or descriptors_dtype is None: + keypoints_dtype = xys.dtype + descriptors_dtype = desc.dtype + + keypoints_dsize = xys.shape[1] + descriptors_dsize = desc.shape[1] + + kdata.keypoints[args.keypoints_type] = kapture.Keypoints('r2d2', keypoints_dtype, keypoints_dsize) + kdata.descriptors[args.descriptors_type] = kapture.Descriptors('r2d2', descriptors_dtype, + descriptors_dsize, + args.keypoints_type, 'L2') + keypoints_config_absolute_path = get_feature_csv_fullpath(kapture.Keypoints, + args.keypoints_type, + args.kapture_root) + descriptors_config_absolute_path = get_feature_csv_fullpath(kapture.Descriptors, + args.descriptors_type, + args.kapture_root) + keypoints_to_file(keypoints_config_absolute_path, kdata.keypoints[args.keypoints_type]) + descriptors_to_file(descriptors_config_absolute_path, kdata.descriptors[args.descriptors_type]) + else: + assert kdata.keypoints[args.keypoints_type].dtype == xys.dtype + assert kdata.descriptors[args.descriptors_type].dtype == desc.dtype + assert kdata.keypoints[args.keypoints_type].dsize == xys.shape[1] + assert kdata.descriptors[args.descriptors_type].dsize == desc.shape[1] + assert kdata.descriptors[args.descriptors_type].keypoints_type == args.keypoints_type + assert kdata.descriptors[args.descriptors_type].metric_type == 'L2' + + keypoints_fullpath = get_keypoints_fullpath(args.keypoints_type, args.kapture_root, + image_name, tar_handlers) + print(f"Saving {xys.shape[0]} keypoints to {keypoints_fullpath}") + image_keypoints_to_file(keypoints_fullpath, xys) + kdata.keypoints[args.keypoints_type].add(image_name) + + descriptors_fullpath = get_descriptors_fullpath(args.descriptors_type, args.kapture_root, + image_name, tar_handlers) + print(f"Saving {desc.shape[0]} descriptors to {descriptors_fullpath}") + image_descriptors_to_file(descriptors_fullpath, desc) + kdata.descriptors[args.descriptors_type].add(image_name) + + if not keypoints_check_dir(kdata.keypoints[args.keypoints_type], args.keypoints_type, + args.kapture_root, tar_handlers) or \ + not descriptors_check_dir(kdata.descriptors[args.descriptors_type], args.descriptors_type, + args.kapture_root, tar_handlers): + print('local feature extraction ended successfully but not all files were saved') + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser( + "Extract r2d2 local features for all images in a dataset stored in the kapture format") + parser.add_argument("--model", type=str, required=True, help='model path') + parser.add_argument('--keypoints-type', default=None, help='keypoint type_name, default is filename of model') + parser.add_argument('--descriptors-type', default=None, help='descriptors type_name, default is filename of model') + + parser.add_argument("--kapture-root", type=str, required=True, help='path to kapture root directory') + + parser.add_argument("--top-k", type=int, default=5000, help='number of keypoints') + + parser.add_argument("--scale-f", type=float, default=2**0.25) + parser.add_argument("--min-size", type=int, default=256) + parser.add_argument("--max-size", type=int, default=1024) + parser.add_argument("--min-scale", type=float, default=0) + parser.add_argument("--max-scale", type=float, default=1) + + parser.add_argument("--reliability-thr", type=float, default=0.7) + parser.add_argument("--repeatability-thr", type=float, default=0.7) + + parser.add_argument("--gpu", type=int, nargs='+', default=[0], help='use -1 for CPU') + args = parser.parse_args() + + extract_kapture_keypoints(args) diff --git a/third_party/r2d2/imgs/boat.png b/third_party/r2d2/imgs/boat.png new file mode 100644 index 0000000000000000000000000000000000000000..32870e4896c4dafced779ee47fc98f51f51a48b2 --- /dev/null +++ b/third_party/r2d2/imgs/boat.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18bea4de1634456f5791d16301863fc974401d144cd6afb86f09a6be4620fe54 +size 177762 diff --git a/third_party/r2d2/imgs/brooklyn.png b/third_party/r2d2/imgs/brooklyn.png new file mode 100644 index 0000000000000000000000000000000000000000..7aa7982e77046d67a16eb139e80efb6d5ab63246 --- /dev/null +++ b/third_party/r2d2/imgs/brooklyn.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01a4d36445bf49d635c5cc2c92af36741770e8fb547d53909d3198d62dc812eb +size 1566722 diff --git a/third_party/r2d2/imgs/peppers.png b/third_party/r2d2/imgs/peppers.png new file mode 100644 index 0000000000000000000000000000000000000000..ca7b9c6be465320a650d38a58ab9d293d0e37db4 --- /dev/null +++ b/third_party/r2d2/imgs/peppers.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46d363d6bd5406bf6f68d16a5c6c803f5efb72802e505130444ead02533f0d5b +size 538749 diff --git a/third_party/r2d2/imgs/test.png b/third_party/r2d2/imgs/test.png new file mode 100644 index 0000000000000000000000000000000000000000..6568a167d9e0fe1e69ac7fd57a790f123310e677 --- /dev/null +++ b/third_party/r2d2/imgs/test.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76ea0cb0da0310f8549565a834c8c383ca58c357415c895d5bb06cd371277c77 +size 34427 diff --git a/third_party/r2d2/models/faster2d2_WASF_N16.pt b/third_party/r2d2/models/faster2d2_WASF_N16.pt new file mode 100644 index 0000000000000000000000000000000000000000..c448459efd5c557caa66e081cc65862117523297 --- /dev/null +++ b/third_party/r2d2/models/faster2d2_WASF_N16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:217daa3a166bfe9bf2b68c05c1607a09dd4d552ae1bbeda885479d504eefc14b +size 3251102 diff --git a/third_party/r2d2/models/faster2d2_WASF_N8_big.pt b/third_party/r2d2/models/faster2d2_WASF_N8_big.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0a2c8432933ad33e852506990d6c3b85e33e856 --- /dev/null +++ b/third_party/r2d2/models/faster2d2_WASF_N8_big.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c26dc10077ad9ab721454787693198b140f823ca0448254ae1c69474b8d59151 +size 5616403 diff --git a/third_party/r2d2/models/r2d2_WAF_N16.pt b/third_party/r2d2/models/r2d2_WAF_N16.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3ce0e26a753d5d0608b99d13832e82710e66687 --- /dev/null +++ b/third_party/r2d2/models/r2d2_WAF_N16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27cebd6608317b35198a76f60f87492110dee3e88ca382586a729dadb1a16b90 +size 1950677 diff --git a/third_party/r2d2/models/r2d2_WASF_N16.pt b/third_party/r2d2/models/r2d2_WASF_N16.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e53cfec3f07b222d41ded5a6bf11f2479fbbd47 --- /dev/null +++ b/third_party/r2d2/models/r2d2_WASF_N16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ae90e02a9a133d100ca7aeaa32f4d4d7736a6dd222a530a25c8f7da5e508528 +size 1950677 diff --git a/third_party/r2d2/models/r2d2_WASF_N8_big.pt b/third_party/r2d2/models/r2d2_WASF_N8_big.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3c8c9de3647051c675e1205f52d17a6bb301e07 --- /dev/null +++ b/third_party/r2d2/models/r2d2_WASF_N8_big.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:597dc13998e211e827c550bdc8f76dbb4aca32747846f96962c9168586cec418 +size 4171550 diff --git a/third_party/r2d2/nets/ap_loss.py b/third_party/r2d2/nets/ap_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..251815cd97009a5feb6a815c20caca0c40daaccd --- /dev/null +++ b/third_party/r2d2/nets/ap_loss.py @@ -0,0 +1,67 @@ +# Copyright 2019-present NAVER Corp. +# CC BY-NC-SA 3.0 +# Available only for non-commercial use + +import pdb +import numpy as np +import torch +import torch.nn as nn + + +class APLoss (nn.Module): + """ differentiable AP loss, through quantization. + + Input: (N, M) values in [min, max] + label: (N, M) values in {0, 1} + + Returns: list of query AP (for each n in {1..N}) + Note: typically, you want to minimize 1 - mean(AP) + """ + def __init__(self, nq=25, min=0, max=1, euc=False): + nn.Module.__init__(self) + assert isinstance(nq, int) and 2 <= nq <= 100 + self.nq = nq + self.min = min + self.max = max + self.euc = euc + gap = max - min + assert gap > 0 + + # init quantizer = non-learnable (fixed) convolution + self.quantizer = q = nn.Conv1d(1, 2*nq, kernel_size=1, bias=True) + a = (nq-1) / gap + #1st half = lines passing to (min+x,1) and (min+x+1/a,0) with x = {nq-1..0}*gap/(nq-1) + q.weight.data[:nq] = -a + q.bias.data[:nq] = torch.from_numpy(a*min + np.arange(nq, 0, -1)) # b = 1 + a*(min+x) + #2nd half = lines passing to (min+x,1) and (min+x-1/a,0) with x = {nq-1..0}*gap/(nq-1) + q.weight.data[nq:] = a + q.bias.data[nq:] = torch.from_numpy(np.arange(2-nq, 2, 1) - a*min) # b = 1 - a*(min+x) + # first and last one are special: just horizontal straight line + q.weight.data[0] = q.weight.data[-1] = 0 + q.bias.data[0] = q.bias.data[-1] = 1 + + def compute_AP(self, x, label): + N, M = x.shape + if self.euc: # euclidean distance in same range than similarities + x = 1 - torch.sqrt(2.001 - 2*x) + + # quantize all predictions + q = self.quantizer(x.unsqueeze(1)) + q = torch.min(q[:,:self.nq], q[:,self.nq:]).clamp(min=0) # N x Q x M + + nbs = q.sum(dim=-1) # number of samples N x Q = c + rec = (q * label.view(N,1,M).float()).sum(dim=-1) # nb of correct samples = c+ N x Q + prec = rec.cumsum(dim=-1) / (1e-16 + nbs.cumsum(dim=-1)) # precision + rec /= rec.sum(dim=-1).unsqueeze(1) # norm in [0,1] + + ap = (prec * rec).sum(dim=-1) # per-image AP + return ap + + def forward(self, x, label): + assert x.shape == label.shape # N x M + return self.compute_AP(x, label) + + + + + diff --git a/third_party/r2d2/nets/losses.py b/third_party/r2d2/nets/losses.py new file mode 100644 index 0000000000000000000000000000000000000000..f8eea8f6e82835e22d2bb445125f7dc722db85b2 --- /dev/null +++ b/third_party/r2d2/nets/losses.py @@ -0,0 +1,56 @@ +# Copyright 2019-present NAVER Corp. +# CC BY-NC-SA 3.0 +# Available only for non-commercial use + +import pdb + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from nets.sampler import * +from nets.repeatability_loss import * +from nets.reliability_loss import * + + +class MultiLoss (nn.Module): + """ Combines several loss functions for convenience. + *args: [loss weight (float), loss creator, ... ] + + Example: + loss = MultiLoss( 1, MyFirstLoss(), 0.5, MySecondLoss() ) + """ + def __init__(self, *args, dbg=()): + nn.Module.__init__(self) + assert len(args) % 2 == 0, 'args must be a list of (float, loss)' + self.weights = [] + self.losses = nn.ModuleList() + for i in range(len(args)//2): + weight = float(args[2*i+0]) + loss = args[2*i+1] + assert isinstance(loss, nn.Module), "%s is not a loss!" % loss + self.weights.append(weight) + self.losses.append(loss) + + def forward(self, select=None, **variables): + assert not select or all(1<=n<=len(self.losses) for n in select) + d = dict() + cum_loss = 0 + for num, (weight, loss_func) in enumerate(zip(self.weights, self.losses),1): + if select is not None and num not in select: continue + l = loss_func(**{k:v for k,v in variables.items()}) + if isinstance(l, tuple): + assert len(l) == 2 and isinstance(l[1], dict) + else: + l = l, {loss_func.name:l} + cum_loss = cum_loss + weight * l[0] + for key,val in l[1].items(): + d['loss_'+key] = float(val) + d['loss'] = float(cum_loss) + return cum_loss, d + + + + + + diff --git a/third_party/r2d2/nets/patchnet.py b/third_party/r2d2/nets/patchnet.py new file mode 100644 index 0000000000000000000000000000000000000000..854c61ecf9b879fa7f420255296c4fbbfd665181 --- /dev/null +++ b/third_party/r2d2/nets/patchnet.py @@ -0,0 +1,186 @@ +# Copyright 2019-present NAVER Corp. +# CC BY-NC-SA 3.0 +# Available only for non-commercial use + +import pdb +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class BaseNet (nn.Module): + """ Takes a list of images as input, and returns for each image: + - a pixelwise descriptor + - a pixelwise confidence + """ + def softmax(self, ux): + if ux.shape[1] == 1: + x = F.softplus(ux) + return x / (1 + x) # for sure in [0,1], much less plateaus than softmax + elif ux.shape[1] == 2: + return F.softmax(ux, dim=1)[:,1:2] + + def normalize(self, x, ureliability, urepeatability): + return dict(descriptors = F.normalize(x, p=2, dim=1), + repeatability = self.softmax( urepeatability ), + reliability = self.softmax( ureliability )) + + def forward_one(self, x): + raise NotImplementedError() + + def forward(self, imgs, **kw): + res = [self.forward_one(img) for img in imgs] + # merge all dictionaries into one + res = {k:[r[k] for r in res if k in r] for k in {k for r in res for k in r}} + return dict(res, imgs=imgs, **kw) + + + +class PatchNet (BaseNet): + """ Helper class to construct a fully-convolutional network that + extract a l2-normalized patch descriptor. + """ + def __init__(self, inchan=3, dilated=True, dilation=1, bn=True, bn_affine=False): + BaseNet.__init__(self) + self.inchan = inchan + self.curchan = inchan + self.dilated = dilated + self.dilation = dilation + self.bn = bn + self.bn_affine = bn_affine + self.ops = nn.ModuleList([]) + + def _make_bn(self, outd): + return nn.BatchNorm2d(outd, affine=self.bn_affine) + + def _add_conv(self, outd, k=3, stride=1, dilation=1, bn=True, relu=True, k_pool = 1, pool_type='max'): + # as in the original implementation, dilation is applied at the end of layer, so it will have impact only from next layer + d = self.dilation * dilation + if self.dilated: + conv_params = dict(padding=((k-1)*d)//2, dilation=d, stride=1) + self.dilation *= stride + else: + conv_params = dict(padding=((k-1)*d)//2, dilation=d, stride=stride) + self.ops.append( nn.Conv2d(self.curchan, outd, kernel_size=k, **conv_params) ) + if bn and self.bn: self.ops.append( self._make_bn(outd) ) + if relu: self.ops.append( nn.ReLU(inplace=True) ) + self.curchan = outd + + if k_pool > 1: + if pool_type == 'avg': + self.ops.append(torch.nn.AvgPool2d(kernel_size=k_pool)) + elif pool_type == 'max': + self.ops.append(torch.nn.MaxPool2d(kernel_size=k_pool)) + else: + print(f"Error, unknown pooling type {pool_type}...") + + def forward_one(self, x): + assert self.ops, "You need to add convolutions first" + for n,op in enumerate(self.ops): + x = op(x) + return self.normalize(x) + + +class L2_Net (PatchNet): + """ Compute a 128D descriptor for all overlapping 32x32 patches. + From the L2Net paper (CVPR'17). + """ + def __init__(self, dim=128, **kw ): + PatchNet.__init__(self, **kw) + add_conv = lambda n,**kw: self._add_conv((n*dim)//128,**kw) + add_conv(32) + add_conv(32) + add_conv(64, stride=2) + add_conv(64) + add_conv(128, stride=2) + add_conv(128) + add_conv(128, k=7, stride=8, bn=False, relu=False) + self.out_dim = dim + + +class Quad_L2Net (PatchNet): + """ Same than L2_Net, but replace the final 8x8 conv by 3 successive 2x2 convs. + """ + def __init__(self, dim=128, mchan=4, relu22=False, **kw ): + PatchNet.__init__(self, **kw) + self._add_conv( 8*mchan) + self._add_conv( 8*mchan) + self._add_conv( 16*mchan, stride=2) + self._add_conv( 16*mchan) + self._add_conv( 32*mchan, stride=2) + self._add_conv( 32*mchan) + # replace last 8x8 convolution with 3 2x2 convolutions + self._add_conv( 32*mchan, k=2, stride=2, relu=relu22) + self._add_conv( 32*mchan, k=2, stride=2, relu=relu22) + self._add_conv(dim, k=2, stride=2, bn=False, relu=False) + self.out_dim = dim + + + +class Quad_L2Net_ConfCFS (Quad_L2Net): + """ Same than Quad_L2Net, with 2 confidence maps for repeatability and reliability. + """ + def __init__(self, **kw ): + Quad_L2Net.__init__(self, **kw) + # reliability classifier + self.clf = nn.Conv2d(self.out_dim, 2, kernel_size=1) + # repeatability classifier: for some reasons it's a softplus, not a softmax! + # Why? I guess it's a mistake that was left unnoticed in the code for a long time... + self.sal = nn.Conv2d(self.out_dim, 1, kernel_size=1) + + def forward_one(self, x): + assert self.ops, "You need to add convolutions first" + for op in self.ops: + x = op(x) + # compute the confidence maps + ureliability = self.clf(x**2) + urepeatability = self.sal(x**2) + return self.normalize(x, ureliability, urepeatability) + + +class Fast_Quad_L2Net (PatchNet): + """ Faster version of Quad l2 net, replacing one dilated conv with one pooling to diminish image resolution thus increase inference time + Dilation factors and pooling: + 1,1,1, pool2, 1,1, 2,2, 4, 8, upsample2 + """ + def __init__(self, dim=128, mchan=4, relu22=False, downsample_factor=2, **kw ): + + PatchNet.__init__(self, **kw) + self._add_conv( 8*mchan) + self._add_conv( 8*mchan) + self._add_conv( 16*mchan, k_pool = downsample_factor) # added avg pooling to decrease img resolution + self._add_conv( 16*mchan) + self._add_conv( 32*mchan, stride=2) + self._add_conv( 32*mchan) + + # replace last 8x8 convolution with 3 2x2 convolutions + self._add_conv( 32*mchan, k=2, stride=2, relu=relu22) + self._add_conv( 32*mchan, k=2, stride=2, relu=relu22) + self._add_conv(dim, k=2, stride=2, bn=False, relu=False) + + # Go back to initial image resolution with upsampling + self.ops.append(torch.nn.Upsample(scale_factor=downsample_factor, mode='bilinear', align_corners=False)) + + self.out_dim = dim + + +class Fast_Quad_L2Net_ConfCFS (Fast_Quad_L2Net): + """ Fast r2d2 architecture + """ + def __init__(self, **kw ): + Fast_Quad_L2Net.__init__(self, **kw) + # reliability classifier + self.clf = nn.Conv2d(self.out_dim, 2, kernel_size=1) + + # repeatability classifier: for some reasons it's a softplus, not a softmax! + # Why? I guess it's a mistake that was left unnoticed in the code for a long time... + self.sal = nn.Conv2d(self.out_dim, 1, kernel_size=1) + + def forward_one(self, x): + assert self.ops, "You need to add convolutions first" + for op in self.ops: + x = op(x) + # compute the confidence maps + ureliability = self.clf(x**2) + urepeatability = self.sal(x**2) + return self.normalize(x, ureliability, urepeatability) \ No newline at end of file diff --git a/third_party/r2d2/nets/reliability_loss.py b/third_party/r2d2/nets/reliability_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..52d5383b0eaa52bcf2111eabb4b45e39b63b976f --- /dev/null +++ b/third_party/r2d2/nets/reliability_loss.py @@ -0,0 +1,59 @@ +# Copyright 2019-present NAVER Corp. +# CC BY-NC-SA 3.0 +# Available only for non-commercial use + +import pdb +import torch.nn as nn +import torch.nn.functional as F + +from nets.ap_loss import APLoss + + +class PixelAPLoss (nn.Module): + """ Computes the pixel-wise AP loss: + Given two images and ground-truth optical flow, computes the AP per pixel. + + feat1: (B, C, H, W) pixel-wise features extracted from img1 + feat2: (B, C, H, W) pixel-wise features extracted from img2 + aflow: (B, 2, H, W) absolute flow: aflow[...,y1,x1] = x2,y2 + """ + def __init__(self, sampler, nq=20): + nn.Module.__init__(self) + self.aploss = APLoss(nq, min=0, max=1, euc=False) + self.name = 'pixAP' + self.sampler = sampler + + def loss_from_ap(self, ap, rel): + return 1 - ap + + def forward(self, descriptors, aflow, **kw): + # subsample things + scores, gt, msk, qconf = self.sampler(descriptors, kw.get('reliability'), aflow) + + # compute pixel-wise AP + n = qconf.numel() + if n == 0: return 0 + scores, gt = scores.view(n,-1), gt.view(n,-1) + ap = self.aploss(scores, gt).view(msk.shape) + + pixel_loss = self.loss_from_ap(ap, qconf) + + loss = pixel_loss[msk].mean() + return loss + + +class ReliabilityLoss (PixelAPLoss): + """ same than PixelAPLoss, but also train a pixel-wise confidence + that this pixel is going to have a good AP. + """ + def __init__(self, sampler, base=0.5, **kw): + PixelAPLoss.__init__(self, sampler, **kw) + assert 0 <= base < 1 + self.base = base + self.name = 'reliability' + + def loss_from_ap(self, ap, rel): + return 1 - ap*rel - (1-rel)*self.base + + + diff --git a/third_party/r2d2/nets/repeatability_loss.py b/third_party/r2d2/nets/repeatability_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..5cda0b6d036f98af88a88780fe39da0c5c0b610e --- /dev/null +++ b/third_party/r2d2/nets/repeatability_loss.py @@ -0,0 +1,66 @@ +# Copyright 2019-present NAVER Corp. +# CC BY-NC-SA 3.0 +# Available only for non-commercial use + +import pdb + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from nets.sampler import FullSampler + +class CosimLoss (nn.Module): + """ Try to make the repeatability repeatable from one image to the other. + """ + def __init__(self, N=16): + nn.Module.__init__(self) + self.name = f'cosim{N}' + self.patches = nn.Unfold(N, padding=0, stride=N//2) + + def extract_patches(self, sal): + patches = self.patches(sal).transpose(1,2) # flatten + patches = F.normalize(patches, p=2, dim=2) # norm + return patches + + def forward(self, repeatability, aflow, **kw): + B,two,H,W = aflow.shape + assert two == 2 + + # normalize + sali1, sali2 = repeatability + grid = FullSampler._aflow_to_grid(aflow) + sali2 = F.grid_sample(sali2, grid, mode='bilinear', padding_mode='border') + + patches1 = self.extract_patches(sali1) + patches2 = self.extract_patches(sali2) + cosim = (patches1 * patches2).sum(dim=2) + return 1 - cosim.mean() + + +class PeakyLoss (nn.Module): + """ Try to make the repeatability locally peaky. + + Mechanism: we maximize, for each pixel, the difference between the local mean + and the local max. + """ + def __init__(self, N=16): + nn.Module.__init__(self) + self.name = f'peaky{N}' + assert N % 2 == 0, 'N must be pair' + self.preproc = nn.AvgPool2d(3, stride=1, padding=1) + self.maxpool = nn.MaxPool2d(N+1, stride=1, padding=N//2) + self.avgpool = nn.AvgPool2d(N+1, stride=1, padding=N//2) + + def forward_one(self, sali): + sali = self.preproc(sali) # remove super high frequency + return 1 - (self.maxpool(sali) - self.avgpool(sali)).mean() + + def forward(self, repeatability, **kw): + sali1, sali2 = repeatability + return (self.forward_one(sali1) + self.forward_one(sali2)) /2 + + + + + diff --git a/third_party/r2d2/nets/sampler.py b/third_party/r2d2/nets/sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..9fede70d3a04d7f31a1d414eace0aaf3729e8235 --- /dev/null +++ b/third_party/r2d2/nets/sampler.py @@ -0,0 +1,390 @@ +# Copyright 2019-present NAVER Corp. +# CC BY-NC-SA 3.0 +# Available only for non-commercial use + +import pdb + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + + +""" Different samplers, each specifying how to sample pixels for the AP loss. +""" + + +class FullSampler(nn.Module): + """ all pixels are selected + - feats: keypoint descriptors + - confs: reliability values + """ + def __init__(self): + nn.Module.__init__(self) + self.mode = 'bilinear' + self.padding = 'zeros' + + @staticmethod + def _aflow_to_grid(aflow): + H, W = aflow.shape[2:] + grid = aflow.permute(0,2,3,1).clone() + grid[:,:,:,0] *= 2/(W-1) + grid[:,:,:,1] *= 2/(H-1) + grid -= 1 + grid[torch.isnan(grid)] = 9e9 # invalids + return grid + + def _warp(self, feats, confs, aflow): + if isinstance(aflow, tuple): return aflow # result was precomputed + feat1, feat2 = feats + conf1, conf2 = confs if confs else (None,None) + + B, two, H, W = aflow.shape + D = feat1.shape[1] + assert feat1.shape == feat2.shape == (B, D, H, W) # D = 128, B = batch + assert conf1.shape == conf2.shape == (B, 1, H, W) if confs else True + + # warp img2 to img1 + grid = self._aflow_to_grid(aflow) + ones2 = feat2.new_ones(feat2[:,0:1].shape) + feat2to1 = F.grid_sample(feat2, grid, mode=self.mode, padding_mode=self.padding) + mask2to1 = F.grid_sample(ones2, grid, mode='nearest', padding_mode='zeros') + conf2to1 = F.grid_sample(conf2, grid, mode=self.mode, padding_mode=self.padding) \ + if confs else None + return feat2to1, mask2to1.byte(), conf2to1 + + def _warp_positions(self, aflow): + B, two, H, W = aflow.shape + assert two == 2 + + Y = torch.arange(H, device=aflow.device) + X = torch.arange(W, device=aflow.device) + XY = torch.stack(torch.meshgrid(Y,X)[::-1], dim=0) + XY = XY[None].expand(B, 2, H, W).float() + + grid = self._aflow_to_grid(aflow) + XY2 = F.grid_sample(XY, grid, mode='bilinear', padding_mode='zeros') + return XY, XY2 + + + +class SubSampler (FullSampler): + """ pixels are selected in an uniformly spaced grid + """ + def __init__(self, border, subq, subd, perimage=False): + FullSampler.__init__(self) + assert subq % subd == 0, 'subq must be multiple of subd' + self.sub_q = subq + self.sub_d = subd + self.border = border + self.perimage = perimage + + def __repr__(self): + return "SubSampler(border=%d, subq=%d, subd=%d, perimage=%d)" % ( + self.border, self.sub_q, self.sub_d, self.perimage) + + def __call__(self, feats, confs, aflow): + feat1, conf1 = feats[0], (confs[0] if confs else None) + # warp with optical flow in img1 coords + feat2, mask2, conf2 = self._warp(feats, confs, aflow) + + # subsample img1 + slq = slice(self.border, -self.border or None, self.sub_q) + feat1 = feat1[:, :, slq, slq] + conf1 = conf1[:, :, slq, slq] if confs else None + # subsample img2 + sld = slice(self.border, -self.border or None, self.sub_d) + feat2 = feat2[:, :, sld, sld] + mask2 = mask2[:, :, sld, sld] + conf2 = conf2[:, :, sld, sld] if confs else None + + B, D, Hq, Wq = feat1.shape + B, D, Hd, Wd = feat2.shape + + # compute gt + if self.perimage or self.sub_q != self.sub_d: + # compute ground-truth by comparing pixel indices + f = feats[0][0:1,0] if self.perimage else feats[0][:,0] + idxs = torch.arange(f.numel(), dtype=torch.int64, device=feat1.device).view(f.shape) + idxs1 = idxs[:, slq, slq].reshape(-1,Hq*Wq) + idxs2 = idxs[:, sld, sld].reshape(-1,Hd*Wd) + if self.perimage: + gt = (idxs1[0].view(-1,1) == idxs2[0].view(1,-1)) + gt = gt[None,:,:].expand(B, Hq*Wq, Hd*Wd) + else : + gt = (idxs1.view(-1,1) == idxs2.view(1,-1)) + else: + gt = torch.eye(feat1[:,0].numel(), dtype=torch.uint8, device=feat1.device) # always binary for AP loss + + # compute all images together + queries = feat1.reshape(B,D,-1) # B x D x (Hq x Wq) + database = feat2.reshape(B,D,-1) # B x D x (Hd x Wd) + if self.perimage: + queries = queries.transpose(1,2) # B x (Hd x Wd) x D + scores = torch.bmm(queries, database) # B x (Hq x Wq) x (Hd x Wd) + else: + queries = queries .transpose(1,2).reshape(-1,D) # (B x Hq x Wq) x D + database = database.transpose(1,0).reshape(D,-1) # D x (B x Hd x Wd) + scores = torch.matmul(queries, database) # (B x Hq x Wq) x (B x Hd x Wd) + + # compute reliability + qconf = (conf1 + conf2)/2 if confs else None + + assert gt.shape == scores.shape + return scores, gt, mask2, qconf + + + +class NghSampler (FullSampler): + """ all pixels in a small neighborhood + """ + def __init__(self, ngh, subq=1, subd=1, ignore=1, border=None): + FullSampler.__init__(self) + assert 0 <= ignore < ngh + self.ngh = ngh + self.ignore = ignore + assert subd <= ngh + self.sub_q = subq + self.sub_d = subd + if border is None: border = ngh + assert border >= ngh, 'border has to be larger than ngh' + self.border = border + + def __repr__(self): + return "NghSampler(ngh=%d, subq=%d, subd=%d, ignore=%d, border=%d)" % ( + self.ngh, self.sub_q, self.sub_d, self.ignore, self.border) + + def trans(self, arr, i, j): + s = lambda i: slice(self.border+i, i-self.border or None, self.sub_q) + return arr[:,:,s(j),s(i)] + + def __call__(self, feats, confs, aflow): + feat1, conf1 = feats[0], (confs[0] if confs else None) + # warp with optical flow in img1 coords + feat2, mask2, conf2 = self._warp(feats, confs, aflow) + + qfeat = self.trans(feat1,0,0) + qconf = (self.trans(conf1,0,0) + self.trans(conf2,0,0)) / 2 if confs else None + mask2 = self.trans(mask2,0,0) + scores_at = lambda i,j: (qfeat * self.trans(feat2,i,j)).sum(dim=1) + + # compute scores for all neighbors + B, D = feat1.shape[:2] + min_d = self.ignore**2 + max_d = self.ngh**2 + rad = (self.ngh//self.sub_d) * self.ngh # make an integer multiple + negs = [] + offsets = [] + for j in range(-rad, rad+1, self.sub_d): + for i in range(-rad, rad+1, self.sub_d): + if not(min_d < i*i + j*j <= max_d): + continue # out of scope + offsets.append((i,j)) # Note: this list is just for debug + negs.append( scores_at(i,j) ) + + scores = torch.stack([scores_at(0,0)] + negs, dim=-1) + gt = scores.new_zeros(scores.shape, dtype=torch.uint8) + gt[..., 0] = 1 # only the center point is positive + + return scores, gt, mask2, qconf + + + +class FarNearSampler (FullSampler): + """ Sample pixels from *both* a small neighborhood *and* far-away pixels. + + How it works? + 1) Queries are sampled from img1, + - at least `border` pixels from borders and + - on a grid with step = `subq` + + 2) Close database pixels + - from the corresponding image (img2), + - within a `ngh` distance radius + - on a grid with step = `subd_ngh` + - ignored if distance to query is >0 and <=`ignore` + + 3) Far-away database pixels from , + - from all batch images in `img2` + - at least `border` pixels from borders + - on a grid with step = `subd_far` + """ + def __init__(self, subq, ngh, subd_ngh, subd_far, border=None, ignore=1, + maxpool_ngh=False ): + FullSampler.__init__(self) + border = border or ngh + assert ignore < ngh < subd_far, 'neighborhood needs to be smaller than far step' + self.close_sampler = NghSampler(ngh=ngh, subq=subq, subd=subd_ngh, + ignore=not(maxpool_ngh), border=border) + self.faraway_sampler = SubSampler(border=border, subq=subq, subd=subd_far) + self.maxpool_ngh = maxpool_ngh + + def __repr__(self): + c,f = self.close_sampler, self.faraway_sampler + res = "FarNearSampler(subq=%d, ngh=%d" % (c.sub_q, c.ngh) + res += ", subd_ngh=%d, subd_far=%d" % (c.sub_d, f.sub_d) + res += ", border=%d, ign=%d" % (f.border, c.ignore) + res += ", maxpool_ngh=%d" % self.maxpool_ngh + return res+')' + + def __call__(self, feats, confs, aflow): + # warp with optical flow in img1 coords + aflow = self._warp(feats, confs, aflow) + + # sample ngh pixels + scores1, gt1, msk1, conf1 = self.close_sampler(feats, confs, aflow) + scores1, gt1 = scores1.view(-1,scores1.shape[-1]), gt1.view(-1,gt1.shape[-1]) + if self.maxpool_ngh: + # we consider all scores from ngh as potential positives + scores1, self._cached_maxpool_ngh = scores1.max(dim=1,keepdim=True) + gt1 = gt1[:, 0:1] + + # sample far pixels + scores2, gt2, msk2, conf2 = self.faraway_sampler(feats, confs, aflow) + # assert (msk1 == msk2).all() + # assert (conf1 == conf2).all() + + return (torch.cat((scores1,scores2),dim=1), + torch.cat((gt1, gt2), dim=1), + msk1, conf1 if confs else None) + + +class NghSampler2 (nn.Module): + """ Similar to NghSampler, but doesnt warp the 2nd image. + Distance to GT => 0 ... pos_d ... neg_d ... ngh + Pixel label => + + + + + + 0 0 - - - - - - - + + Subsample on query side: if > 0, regular grid + < 0, random points + In both cases, the number of query points is = W*H/subq**2 + """ + def __init__(self, ngh, subq=1, subd=1, pos_d=0, neg_d=2, border=None, + maxpool_pos=True, subd_neg=0): + nn.Module.__init__(self) + assert 0 <= pos_d < neg_d <= (ngh if ngh else 99) + self.ngh = ngh + self.pos_d = pos_d + self.neg_d = neg_d + assert subd <= ngh or ngh == 0 + assert subq != 0 + self.sub_q = subq + self.sub_d = subd + self.sub_d_neg = subd_neg + if border is None: border = ngh + assert border >= ngh, 'border has to be larger than ngh' + self.border = border + self.maxpool_pos = maxpool_pos + self.precompute_offsets() + + def precompute_offsets(self): + pos_d2 = self.pos_d**2 + neg_d2 = self.neg_d**2 + rad2 = self.ngh**2 + rad = (self.ngh//self.sub_d) * self.ngh # make an integer multiple + pos = [] + neg = [] + for j in range(-rad, rad+1, self.sub_d): + for i in range(-rad, rad+1, self.sub_d): + d2 = i*i + j*j + if d2 <= pos_d2: + pos.append( (i,j) ) + elif neg_d2 <= d2 <= rad2: + neg.append( (i,j) ) + + self.register_buffer('pos_offsets', torch.LongTensor(pos).view(-1,2).t()) + self.register_buffer('neg_offsets', torch.LongTensor(neg).view(-1,2).t()) + + def gen_grid(self, step, aflow): + B, two, H, W = aflow.shape + dev = aflow.device + b1 = torch.arange(B, device=dev) + if step > 0: + # regular grid + x1 = torch.arange(self.border, W-self.border, step, device=dev) + y1 = torch.arange(self.border, H-self.border, step, device=dev) + H1, W1 = len(y1), len(x1) + x1 = x1[None,None,:].expand(B,H1,W1).reshape(-1) + y1 = y1[None,:,None].expand(B,H1,W1).reshape(-1) + b1 = b1[:,None,None].expand(B,H1,W1).reshape(-1) + shape = (B, H1, W1) + else: + # randomly spread + n = (H - 2*self.border) * (W - 2*self.border) // step**2 + x1 = torch.randint(self.border, W-self.border, (n,), device=dev) + y1 = torch.randint(self.border, H-self.border, (n,), device=dev) + x1 = x1[None,:].expand(B,n).reshape(-1) + y1 = y1[None,:].expand(B,n).reshape(-1) + b1 = b1[:,None].expand(B,n).reshape(-1) + shape = (B, n) + return b1, y1, x1, shape + + def forward(self, feats, confs, aflow, **kw): + B, two, H, W = aflow.shape + assert two == 2 + feat1, conf1 = feats[0], (confs[0] if confs else None) + feat2, conf2 = feats[1], (confs[1] if confs else None) + + # positions in the first image + b1, y1, x1, shape = self.gen_grid(self.sub_q, aflow) + + # sample features from first image + feat1 = feat1[b1, :, y1, x1] + qconf = conf1[b1, :, y1, x1].view(shape) if confs else None + + #sample GT from second image + b2 = b1 + xy2 = (aflow[b1, :, y1, x1] + 0.5).long().t() + mask = (0 <= xy2[0]) * (0 <= xy2[1]) * (xy2[0] < W) * (xy2[1] < H) + mask = mask.view(shape) + + def clamp(xy): + torch.clamp(xy[0], 0, W-1, out=xy[0]) + torch.clamp(xy[1], 0, H-1, out=xy[1]) + return xy + + # compute positive scores + xy2p = clamp(xy2[:,None,:] + self.pos_offsets[:,:,None]) + pscores = (feat1[None,:,:] * feat2[b2, :, xy2p[1], xy2p[0]]).sum(dim=-1).t() +# xy1p = clamp(torch.stack((x1,y1))[:,None,:] + self.pos_offsets[:,:,None]) +# grid = FullSampler._aflow_to_grid(aflow) +# feat2p = F.grid_sample(feat2, grid, mode='bilinear', padding_mode='border') +# pscores = (feat1[None,:,:] * feat2p[b1,:,xy1p[1], xy1p[0]]).sum(dim=-1).t() + if self.maxpool_pos: + pscores, pos = pscores.max(dim=1, keepdim=True) + if confs: + sel = clamp(xy2 + self.pos_offsets[:,pos.view(-1)]) + qconf = (qconf + conf2[b2, :, sel[1], sel[0]].view(shape))/2 + + # compute negative scores + xy2n = clamp(xy2[:,None,:] + self.neg_offsets[:,:,None]) + nscores = (feat1[None,:,:] * feat2[b2, :, xy2n[1], xy2n[0]]).sum(dim=-1).t() + + if self.sub_d_neg: + # add distractors from a grid + b3, y3, x3, _ = self.gen_grid(self.sub_d_neg, aflow) + distractors = feat2[b3, :, y3, x3] + dscores = torch.matmul(feat1, distractors.t()) + del distractors + + # remove scores that corresponds to positives or nulls + dis2 = (x3 - xy2[0][:,None])**2 + (y3 - xy2[1][:,None])**2 + dis2 += (b3 != b2[:,None]).long() * self.neg_d**2 + dscores[dis2 < self.neg_d**2] = 0 + + scores = torch.cat((pscores, nscores, dscores), dim=1) + else: + # concat everything + scores = torch.cat((pscores, nscores), dim=1) + + gt = scores.new_zeros(scores.shape, dtype=torch.uint8) + gt[:, :pscores.shape[1]] = 1 + + return scores, gt, mask, qconf + + + + + + + + diff --git a/third_party/r2d2/results/r2d2_WAF_N16.scale-0.3-1.npy b/third_party/r2d2/results/r2d2_WAF_N16.scale-0.3-1.npy new file mode 100644 index 0000000000000000000000000000000000000000..8d731b481ac647bbe9fba4ebbc6552bdc1fd1f77 --- /dev/null +++ b/third_party/r2d2/results/r2d2_WAF_N16.scale-0.3-1.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b6c42e579c824adc0e6e623202ae1845617cb81e6d1cd6606673fc2c9eb83d1 +size 15728 diff --git a/third_party/r2d2/results/r2d2_WAF_N16.size-256-1024.npy b/third_party/r2d2/results/r2d2_WAF_N16.size-256-1024.npy new file mode 100644 index 0000000000000000000000000000000000000000..54c4f4eae62ec18d440a57e6aab60ca000201717 --- /dev/null +++ b/third_party/r2d2/results/r2d2_WAF_N16.size-256-1024.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f01c195853636831cd8560591fe2c21d42f58fe7e1b5767acf398e583ad66d4e +size 15710 diff --git a/third_party/r2d2/results/r2d2_WASF_N16.scale-0.3-1.npy b/third_party/r2d2/results/r2d2_WASF_N16.scale-0.3-1.npy new file mode 100644 index 0000000000000000000000000000000000000000..8cdcdaba1bc992ad33120ea4de62fe79ec116100 --- /dev/null +++ b/third_party/r2d2/results/r2d2_WASF_N16.scale-0.3-1.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d36f0d172ddacce4d34d7d3729ba0d63aa3e783d8d9c2157ca6c32002b2fa5cd +size 15684 diff --git a/third_party/r2d2/results/r2d2_WASF_N16.size-256-1024.npy b/third_party/r2d2/results/r2d2_WASF_N16.size-256-1024.npy new file mode 100644 index 0000000000000000000000000000000000000000..75a00ce5276e058e869204ef255b788b98fccf3b --- /dev/null +++ b/third_party/r2d2/results/r2d2_WASF_N16.size-256-1024.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3e17baff59af4591de27b9c67649644f9709da291ab94791233228c6b28f29d +size 15709 diff --git a/third_party/r2d2/results/r2d2_W_N16.scale-0.3-1.npy b/third_party/r2d2/results/r2d2_W_N16.scale-0.3-1.npy new file mode 100644 index 0000000000000000000000000000000000000000..c091ab7db8d3e34075b047a5ccebad070ec14369 --- /dev/null +++ b/third_party/r2d2/results/r2d2_W_N16.scale-0.3-1.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b26d13b2272baed4acab517e1e85ac4832f28eeede4177f53749160e8aa67285 +size 15748 diff --git a/third_party/r2d2/tools/common.py b/third_party/r2d2/tools/common.py new file mode 100644 index 0000000000000000000000000000000000000000..a7875ddd714b1d08efb0d1369c3a856490796288 --- /dev/null +++ b/third_party/r2d2/tools/common.py @@ -0,0 +1,41 @@ +# Copyright 2019-present NAVER Corp. +# CC BY-NC-SA 3.0 +# Available only for non-commercial use + +import os, pdb#, shutil +import numpy as np +import torch + + +def mkdir_for(file_path): + os.makedirs(os.path.split(file_path)[0], exist_ok=True) + + +def model_size(model): + ''' Computes the number of parameters of the model + ''' + size = 0 + for weights in model.state_dict().values(): + size += np.prod(weights.shape) + return size + + +def torch_set_gpu(gpus): + if type(gpus) is int: + gpus = [gpus] + + cuda = all(gpu>=0 for gpu in gpus) + + if cuda: + os.environ['CUDA_VISIBLE_DEVICES'] = ','.join([str(gpu) for gpu in gpus]) + assert cuda and torch.cuda.is_available(), "%s has GPUs %s unavailable" % ( + os.environ['HOSTNAME'],os.environ['CUDA_VISIBLE_DEVICES']) + torch.backends.cudnn.benchmark = True # speed-up cudnn + torch.backends.cudnn.fastest = True # even more speed-up? + print( 'Launching on GPUs ' + os.environ['CUDA_VISIBLE_DEVICES'] ) + + else: + print( 'Launching on CPU' ) + + return cuda + diff --git a/third_party/r2d2/tools/dataloader.py b/third_party/r2d2/tools/dataloader.py new file mode 100644 index 0000000000000000000000000000000000000000..f6d9fff5f8dfb8d9d3b243a57555779de33d0818 --- /dev/null +++ b/third_party/r2d2/tools/dataloader.py @@ -0,0 +1,367 @@ +# Copyright 2019-present NAVER Corp. +# CC BY-NC-SA 3.0 +# Available only for non-commercial use + +import pdb +from PIL import Image +import numpy as np + +import torch +import torchvision.transforms as tvf + +from tools.transforms import instanciate_transformation +from tools.transforms_tools import persp_apply + + +RGB_mean = [0.485, 0.456, 0.406] +RGB_std = [0.229, 0.224, 0.225] + +norm_RGB = tvf.Compose([tvf.ToTensor(), tvf.Normalize(mean=RGB_mean, std=RGB_std)]) + + +class PairLoader: + """ On-the-fly jittering of pairs of image with dense pixel ground-truth correspondences. + + crop: random crop applied to both images + scale: random scaling applied to img2 + distort: random ditorsion applied to img2 + + self[idx] returns a dictionary with keys: img1, img2, aflow, mask + - img1: cropped original + - img2: distorted cropped original + - aflow: 'absolute' optical flow = (x,y) position of each pixel from img1 in img2 + - mask: (binary image) valid pixels of img1 + """ + def __init__(self, dataset, crop='', scale='', distort='', norm = norm_RGB, + what = 'aflow mask', idx_as_rng_seed = False): + assert hasattr(dataset, 'npairs') + assert hasattr(dataset, 'get_pair') + self.dataset = dataset + self.distort = instanciate_transformation(distort) + self.crop = instanciate_transformation(crop) + self.norm = instanciate_transformation(norm) + self.scale = instanciate_transformation(scale) + self.idx_as_rng_seed = idx_as_rng_seed # to remove randomness + self.what = what.split() if isinstance(what, str) else what + self.n_samples = 5 # number of random trials per image + + def __len__(self): + assert len(self.dataset) == self.dataset.npairs, pdb.set_trace() # and not nimg + return len(self.dataset) + + def __repr__(self): + fmt_str = 'PairLoader\n' + fmt_str += repr(self.dataset) + fmt_str += ' npairs: %d\n' % self.dataset.npairs + short_repr = lambda s: repr(s).strip().replace('\n',', ')[14:-1].replace(' ',' ') + fmt_str += ' Distort: %s\n' % short_repr(self.distort) + fmt_str += ' Crop: %s\n' % short_repr(self.crop) + fmt_str += ' Norm: %s\n' % short_repr(self.norm) + return fmt_str + + def __getitem__(self, i): + #from time import time as now; t0 = now() + if self.idx_as_rng_seed: + import random + random.seed(i) + np.random.seed(i) + + # Retrieve an image pair and their absolute flow + img_a, img_b, metadata = self.dataset.get_pair(i, self.what) + + # aflow contains pixel coordinates indicating where each + # pixel from the left image ended up in the right image + # as (x,y) pairs, but its shape is (H,W,2) + aflow = np.float32(metadata['aflow']) + mask = metadata.get('mask', np.ones(aflow.shape[:2],np.uint8)) + + # apply transformations to the second image + img_b = {'img': img_b, 'persp':(1,0,0,0,1,0,0,0)} + if self.scale: + img_b = self.scale(img_b) + if self.distort: + img_b = self.distort(img_b) + + # apply the same transformation to the flow + aflow[:] = persp_apply(img_b['persp'], aflow.reshape(-1,2)).reshape(aflow.shape) + corres = None + if 'corres' in metadata: + corres = np.float32(metadata['corres']) + corres[:,1] = persp_apply(img_b['persp'], corres[:,1]) + + # apply the same transformation to the homography + homography = None + if 'homography' in metadata: + homography = np.float32(metadata['homography']) + # p_b = homography * p_a + persp = np.float32(img_b['persp']+(1,)).reshape(3,3) + homography = persp @ homography + + # determine crop size + img_b = img_b['img'] + crop_size = self.crop({'imsize':(10000,10000)})['imsize'] + output_size_a = min(img_a.size, crop_size) + output_size_b = min(img_b.size, crop_size) + img_a = np.array(img_a) + img_b = np.array(img_b) + + ah,aw,p1 = img_a.shape + bh,bw,p2 = img_b.shape + assert p1 == 3 + assert p2 == 3 + assert aflow.shape == (ah, aw, 2) + assert mask.shape == (ah, aw) + + # Let's start by computing the scale of the + # optical flow and applying a median filter: + dx = np.gradient(aflow[:,:,0]) + dy = np.gradient(aflow[:,:,1]) + scale = np.sqrt(np.clip(np.abs(dx[1]*dy[0] - dx[0]*dy[1]), 1e-16, 1e16)) + + accu2 = np.zeros((16,16), bool) + Q = lambda x, w: np.int32(16 * (x - w.start) / (w.stop - w.start)) + + def window1(x, size, w): + l = x - int(0.5 + size / 2) + r = l + int(0.5 + size) + if l < 0: l,r = (0, r - l) + if r > w: l,r = (l + w - r, w) + if l < 0: l,r = 0,w # larger than width + return slice(l,r) + def window(cx, cy, win_size, scale, img_shape): + return (window1(cy, win_size[1]*scale, img_shape[0]), + window1(cx, win_size[0]*scale, img_shape[1])) + + n_valid_pixel = mask.sum() + sample_w = mask / (1e-16 + n_valid_pixel) + def sample_valid_pixel(): + n = np.random.choice(sample_w.size, p=sample_w.ravel()) + y, x = np.unravel_index(n, sample_w.shape) + return x, y + + # Find suitable left and right windows + trials = 0 # take the best out of few trials + best = -np.inf, None + for _ in range(50*self.n_samples): + if trials >= self.n_samples: break # finished! + + # pick a random valid point from the first image + if n_valid_pixel == 0: break + c1x, c1y = sample_valid_pixel() + + # Find in which position the center of the left + # window ended up being placed in the right image + c2x, c2y = (aflow[c1y, c1x] + 0.5).astype(np.int32) + if not(0 <= c2x < bw and 0 <= c2y < bh): continue + + # Get the flow scale + sigma = scale[c1y, c1x] + + # Determine sampling windows + if 0.2 < sigma < 1: + win1 = window(c1x, c1y, output_size_a, 1/sigma, img_a.shape) + win2 = window(c2x, c2y, output_size_b, 1, img_b.shape) + elif 1 <= sigma < 5: + win1 = window(c1x, c1y, output_size_a, 1, img_a.shape) + win2 = window(c2x, c2y, output_size_b, sigma, img_b.shape) + else: + continue # bad scale + + # compute a score based on the flow + x2,y2 = aflow[win1].reshape(-1, 2).T.astype(np.int32) + # Check the proportion of valid flow vectors + valid = (win2[1].start <= x2) & (x2 < win2[1].stop) \ + & (win2[0].start <= y2) & (y2 < win2[0].stop) + score1 = (valid * mask[win1].ravel()).mean() + # check the coverage of the second window + accu2[:] = False + accu2[Q(y2[valid],win2[0]), Q(x2[valid],win2[1])] = True + score2 = accu2.mean() + # Check how many hits we got + score = min(score1, score2) + + trials += 1 + if score > best[0]: + best = score, win1, win2 + + if None in best: # counldn't find a good window + img_a = np.zeros(output_size_a[::-1]+(3,), dtype=np.uint8) + img_b = np.zeros(output_size_b[::-1]+(3,), dtype=np.uint8) + aflow = np.nan * np.ones((2,)+output_size_a[::-1], dtype=np.float32) + homography = np.nan * np.ones((3,3), dtype=np.float32) + + else: + win1, win2 = best[1:] + img_a = img_a[win1] + img_b = img_b[win2] + aflow = aflow[win1] - np.float32([[[win2[1].start, win2[0].start]]]) + mask = mask[win1] + aflow[~mask.view(bool)] = np.nan # mask bad pixels! + aflow = aflow.transpose(2,0,1) # --> (2,H,W) + + if corres is not None: + corres[:,0] -= (win1[1].start, win1[0].start) + corres[:,1] -= (win2[1].start, win2[0].start) + + if homography is not None: + trans1 = np.eye(3, dtype=np.float32) + trans1[:2,2] = (win1[1].start, win1[0].start) + trans2 = np.eye(3, dtype=np.float32) + trans2[:2,2] = (-win2[1].start, -win2[0].start) + homography = trans2 @ homography @ trans1 + homography /= homography[2,2] + + # rescale if necessary + if img_a.shape[:2][::-1] != output_size_a: + sx, sy = (np.float32(output_size_a)-1)/(np.float32(img_a.shape[:2][::-1])-1) + img_a = np.asarray(Image.fromarray(img_a).resize(output_size_a, Image.ANTIALIAS)) + mask = np.asarray(Image.fromarray(mask).resize(output_size_a, Image.NEAREST)) + afx = Image.fromarray(aflow[0]).resize(output_size_a, Image.NEAREST) + afy = Image.fromarray(aflow[1]).resize(output_size_a, Image.NEAREST) + aflow = np.stack((np.float32(afx), np.float32(afy))) + + if corres is not None: + corres[:,0] *= (sx, sy) + + if homography is not None: + homography = homography @ np.diag(np.float32([1/sx,1/sy,1])) + homography /= homography[2,2] + + if img_b.shape[:2][::-1] != output_size_b: + sx, sy = (np.float32(output_size_b)-1)/(np.float32(img_b.shape[:2][::-1])-1) + img_b = np.asarray(Image.fromarray(img_b).resize(output_size_b, Image.ANTIALIAS)) + aflow *= [[[sx]], [[sy]]] + + if corres is not None: + corres[:,1] *= (sx, sy) + + if homography is not None: + homography = np.diag(np.float32([sx,sy,1])) @ homography + homography /= homography[2,2] + + assert aflow.dtype == np.float32, pdb.set_trace() + assert homography is None or homography.dtype == np.float32, pdb.set_trace() + if 'flow' in self.what: + H, W = img_a.shape[:2] + mgrid = np.mgrid[0:H, 0:W][::-1].astype(np.float32) + flow = aflow - mgrid + + result = dict(img1=self.norm(img_a), img2=self.norm(img_b)) + for what in self.what: + try: result[what] = eval(what) + except NameError: pass + return result + + + +def threaded_loader( loader, iscuda, threads, batch_size=1, shuffle=True): + """ Get a data loader, given the dataset and some parameters. + + Parameters + ---------- + loader : object[i] returns the i-th training example. + + iscuda : bool + + batch_size : int + + threads : int + + shuffle : int + + Returns + ------- + a multi-threaded pytorch loader. + """ + return torch.utils.data.DataLoader( + loader, + batch_size = batch_size, + shuffle = shuffle, + sampler = None, + num_workers = threads, + pin_memory = iscuda, + collate_fn=collate) + + + +def collate(batch, _use_shared_memory=True): + """Puts each data field into a tensor with outer dimension batch size. + Copied from https://github.com/pytorch in torch/utils/data/_utils/collate.py + """ + import re + error_msg = "batch must contain tensors, numbers, dicts or lists; found {}" + elem_type = type(batch[0]) + if isinstance(batch[0], torch.Tensor): + out = None + if _use_shared_memory: + # If we're in a background process, concatenate directly into a + # shared memory tensor to avoid an extra copy + numel = sum([x.numel() for x in batch]) + storage = batch[0].storage()._new_shared(numel) + out = batch[0].new(storage) + return torch.stack(batch, 0, out=out) + elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \ + and elem_type.__name__ != 'string_': + elem = batch[0] + assert elem_type.__name__ == 'ndarray' + # array of string classes and object + if re.search('[SaUO]', elem.dtype.str) is not None: + raise TypeError(error_msg.format(elem.dtype)) + batch = [torch.from_numpy(b) for b in batch] + try: + return torch.stack(batch, 0) + except RuntimeError: + return batch + elif batch[0] is None: + return list(batch) + elif isinstance(batch[0], int): + return torch.LongTensor(batch) + elif isinstance(batch[0], float): + return torch.DoubleTensor(batch) + elif isinstance(batch[0], str): + return batch + elif isinstance(batch[0], dict): + return {key: collate([d[key] for d in batch]) for key in batch[0]} + elif isinstance(batch[0], (tuple,list)): + transposed = zip(*batch) + return [collate(samples) for samples in transposed] + + raise TypeError((error_msg.format(type(batch[0])))) + + + +def tensor2img(tensor, model=None): + """ convert back a torch/numpy tensor to a PIL Image + by undoing the ToTensor() and Normalize() transforms. + """ + mean = norm_RGB.transforms[1].mean + std = norm_RGB.transforms[1].std + if isinstance(tensor, torch.Tensor): + tensor = tensor.detach().cpu().numpy() + + res = np.uint8(np.clip(255*((tensor.transpose(1,2,0) * std) + mean), 0, 255)) + from PIL import Image + return Image.fromarray(res) + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser("Tool to debug/visualize the data loader") + parser.add_argument("dataloader", type=str, help="command to create the data loader") + args = parser.parse_args() + + from datasets import * + auto_pairs = lambda db: SyntheticPairDataset(db, + 'RandomScale(256,1024,can_upscale=True)', + 'RandomTilting(0.5), PixelNoise(25)') + + loader = eval(args.dataloader) + print("Data loader =", loader) + + from tools.viz import show_flow + for data in loader: + aflow = data['aflow'] + H, W = aflow.shape[-2:] + flow = (aflow - np.mgrid[:H, :W][::-1]).transpose(1,2,0) + show_flow(tensor2img(data['img1']), tensor2img(data['img2']), flow) + diff --git a/third_party/r2d2/tools/trainer.py b/third_party/r2d2/tools/trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..9f893395efdeb8e13cc00539325572553168c5ce --- /dev/null +++ b/third_party/r2d2/tools/trainer.py @@ -0,0 +1,76 @@ +# Copyright 2019-present NAVER Corp. +# CC BY-NC-SA 3.0 +# Available only for non-commercial use + +import pdb +from tqdm import tqdm +from collections import defaultdict + +import torch +import torch.nn as nn + + +class Trainer (nn.Module): + """ Helper class to train a deep network. + Overload this class `forward_backward` for your actual needs. + + Usage: + train = Trainer(net, loader, loss, optimizer) + for epoch in range(n_epochs): + train() + """ + def __init__(self, net, loader, loss, optimizer): + nn.Module.__init__(self) + self.net = net + self.loader = loader + self.loss_func = loss + self.optimizer = optimizer + + def iscuda(self): + return next(self.net.parameters()).device != torch.device('cpu') + + def todevice(self, x): + if isinstance(x, dict): + return {k:self.todevice(v) for k,v in x.items()} + if isinstance(x, (tuple,list)): + return [self.todevice(v) for v in x] + + if self.iscuda(): + return x.contiguous().cuda(non_blocking=True) + else: + return x.cpu() + + def __call__(self): + self.net.train() + + stats = defaultdict(list) + + for iter,inputs in enumerate(tqdm(self.loader)): + inputs = self.todevice(inputs) + + # compute gradient and do model update + self.optimizer.zero_grad() + + loss, details = self.forward_backward(inputs) + if torch.isnan(loss): + raise RuntimeError('Loss is NaN') + + self.optimizer.step() + + for key, val in details.items(): + stats[key].append( val ) + + print(" Summary of losses during this epoch:") + mean = lambda lis: sum(lis) / len(lis) + for loss_name, vals in stats.items(): + N = 1 + len(vals)//10 + print(f" - {loss_name:20}:", end='') + print(f" {mean(vals[:N]):.3f} --> {mean(vals[-N:]):.3f} (avg: {mean(vals):.3f})") + return mean(stats['loss']) # return average loss + + def forward_backward(self, inputs): + raise NotImplementedError() + + + + diff --git a/third_party/r2d2/tools/transforms.py b/third_party/r2d2/tools/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..87275276310191a7da3fc14f606345d9616208e0 --- /dev/null +++ b/third_party/r2d2/tools/transforms.py @@ -0,0 +1,513 @@ +# Copyright 2019-present NAVER Corp. +# CC BY-NC-SA 3.0 +# Available only for non-commercial use + +import pdb +import numpy as np +from PIL import Image, ImageOps +import torchvision.transforms as tvf +import random +from math import ceil + +from . import transforms_tools as F + +''' +Example command to try out some transformation chain: + +python -m tools.transforms --trfs "Scale(384), ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.1), RandomRotation(10), RandomTilting(0.5, 'all'), RandomScale(240,320), RandomCrop(224)" +''' + + +def instanciate_transformation(cmd_line): + ''' Create a sequence of transformations. + + cmd_line: (str) + Comma-separated list of transformations. + Ex: "Rotate(10), Scale(256)" + ''' + if not isinstance(cmd_line, str): + return cmd_line # already instanciated + + cmd_line = "tvf.Compose([%s])" % cmd_line + try: + return eval(cmd_line) + except Exception as e: + print("Cannot interpret this transform list: %s\nReason: %s" % (cmd_line, e)) + + +class Scale (object): + """ Rescale the input PIL.Image to a given size. + Copied from https://github.com/pytorch in torchvision/transforms/transforms.py + + The smallest dimension of the resulting image will be = size. + + if largest == True: same behaviour for the largest dimension. + + if not can_upscale: don't upscale + if not can_downscale: don't downscale + """ + def __init__(self, size, interpolation=Image.BILINEAR, largest=False, + can_upscale=True, can_downscale=True): + assert isinstance(size, int) or (len(size) == 2) + self.size = size + self.interpolation = interpolation + self.largest = largest + self.can_upscale = can_upscale + self.can_downscale = can_downscale + + def __repr__(self): + fmt_str = "RandomScale(%s" % str(self.size) + if self.largest: fmt_str += ', largest=True' + if not self.can_upscale: fmt_str += ', can_upscale=False' + if not self.can_downscale: fmt_str += ', can_downscale=False' + return fmt_str+')' + + def get_params(self, imsize): + w,h = imsize + if isinstance(self.size, int): + cmp = lambda a,b: (a>=b) if self.largest else (a<=b) + if (cmp(w, h) and w == self.size) or (cmp(h, w) and h == self.size): + ow, oh = w, h + elif cmp(w, h): + ow = self.size + oh = int(self.size * h / w) + else: + oh = self.size + ow = int(self.size * w / h) + else: + ow, oh = self.size + return ow, oh + + def __call__(self, inp): + img = F.grab_img(inp) + w, h = img.size + + size2 = ow, oh = self.get_params(img.size) + + if size2 != img.size: + a1, a2 = img.size, size2 + if (self.can_upscale and min(a1) < min(a2)) or (self.can_downscale and min(a1) > min(a2)): + img = img.resize(size2, self.interpolation) + + return F.update_img_and_labels(inp, img, persp=(ow/w,0,0,0,oh/h,0,0,0)) + + + +class RandomScale (Scale): + """Rescale the input PIL.Image to a random size. + Copied from https://github.com/pytorch in torchvision/transforms/transforms.py + + Args: + min_size (int): min size of the smaller edge of the picture. + max_size (int): max size of the smaller edge of the picture. + + ar (float or tuple): + max change of aspect ratio (width/height). + + interpolation (int, optional): Desired interpolation. Default is + ``PIL.Image.BILINEAR`` + """ + + def __init__(self, min_size, max_size, ar=1, + can_upscale=False, can_downscale=True, interpolation=Image.BILINEAR): + Scale.__init__(self, 0, can_upscale=can_upscale, can_downscale=can_downscale, interpolation=interpolation) + assert type(min_size) == type(max_size), 'min_size and max_size can only be 2 ints or 2 floats' + assert isinstance(min_size, int) and min_size >= 1 or isinstance(min_size, float) and min_size>0 + assert isinstance(max_size, (int,float)) and min_size <= max_size + self.min_size = min_size + self.max_size = max_size + if type(ar) in (float,int): ar = (min(1/ar,ar),max(1/ar,ar)) + assert 0.2 < ar[0] <= ar[1] < 5 + self.ar = ar + + def get_params(self, imsize): + w,h = imsize + if isinstance(self.min_size, float): + min_size = int(self.min_size*min(w,h) + 0.5) + if isinstance(self.max_size, float): + max_size = int(self.max_size*min(w,h) + 0.5) + if isinstance(self.min_size, int): + min_size = self.min_size + if isinstance(self.max_size, int): + max_size = self.max_size + + if not self.can_upscale: + max_size = min(max_size,min(w,h)) + + size = int(0.5 + F.rand_log_uniform(min_size,max_size)) + ar = F.rand_log_uniform(*self.ar) # change of aspect ratio + + if w < h: # image is taller + ow = size + oh = int(0.5 + size * h / w / ar) + if oh < min_size: + ow,oh = int(0.5 + ow*float(min_size)/oh),min_size + else: # image is wider + oh = size + ow = int(0.5 + size * w / h * ar) + if ow < min_size: + ow,oh = min_size,int(0.5 + oh*float(min_size)/ow) + + assert ow >= min_size, 'image too small (width=%d < min_size=%d)' % (ow, min_size) + assert oh >= min_size, 'image too small (height=%d < min_size=%d)' % (oh, min_size) + return ow, oh + + + +class RandomCrop (object): + """Crop the given PIL Image at a random location. + Copied from https://github.com/pytorch in torchvision/transforms/transforms.py + + Args: + size (sequence or int): Desired output size of the crop. If size is an + int instead of sequence like (h, w), a square crop (size, size) is + made. + padding (int or sequence, optional): Optional padding on each border + of the image. Default is 0, i.e no padding. If a sequence of length + 4 is provided, it is used to pad left, top, right, bottom borders + respectively. + """ + + def __init__(self, size, padding=0): + if isinstance(size, int): + self.size = (int(size), int(size)) + else: + self.size = size + self.padding = padding + + def __repr__(self): + return "RandomCrop(%s)" % str(self.size) + + @staticmethod + def get_params(img, output_size): + w, h = img.size + th, tw = output_size + assert h >= th and w >= tw, "Image of %dx%d is too small for crop %dx%d" % (w,h,tw,th) + + y = np.random.randint(0, h - th) if h > th else 0 + x = np.random.randint(0, w - tw) if w > tw else 0 + return x, y, tw, th + + def __call__(self, inp): + img = F.grab_img(inp) + + padl = padt = 0 + if self.padding: + if F.is_pil_image(img): + img = ImageOps.expand(img, border=self.padding, fill=0) + else: + assert isinstance(img, F.DummyImg) + img = img.expand(border=self.padding) + if isinstance(self.padding, int): + padl = padt = self.padding + else: + padl, padt = self.padding[0:2] + + i, j, tw, th = self.get_params(img, self.size) + img = img.crop((i, j, i+tw, j+th)) + + return F.update_img_and_labels(inp, img, persp=(1,0,padl-i,0,1,padt-j,0,0)) + + +class CenterCrop (RandomCrop): + """Crops the given PIL Image at the center. + Copied from https://github.com/pytorch in torchvision/transforms/transforms.py + + Args: + size (sequence or int): Desired output size of the crop. If size is an + int instead of sequence like (h, w), a square crop (size, size) is + made. + """ + @staticmethod + def get_params(img, output_size): + w, h = img.size + th, tw = output_size + y = int(0.5 +((h - th) / 2.)) + x = int(0.5 +((w - tw) / 2.)) + return x, y, tw, th + + + +class RandomRotation(object): + """Rescale the input PIL.Image to a random size. + Copied from https://github.com/pytorch in torchvision/transforms/transforms.py + + Args: + degrees (float): + rotation angle. + + interpolation (int, optional): Desired interpolation. Default is + ``PIL.Image.BILINEAR`` + """ + + def __init__(self, degrees, interpolation=Image.BILINEAR): + self.degrees = degrees + self.interpolation = interpolation + + def __call__(self, inp): + img = F.grab_img(inp) + w, h = img.size + + angle = np.random.uniform(-self.degrees, self.degrees) + + img = img.rotate(angle, resample=self.interpolation) + w2, h2 = img.size + + trf = F.translate(-w/2,-h/2) + trf = F.persp_mul(trf, F.rotate(-angle * np.pi/180)) + trf = F.persp_mul(trf, F.translate(w2/2,h2/2)) + return F.update_img_and_labels(inp, img, persp=trf) + + + +class RandomTilting(object): + """Apply a random tilting (left, right, up, down) to the input PIL.Image + Copied from https://github.com/pytorch in torchvision/transforms/transforms.py + + Args: + maginitude (float): + maximum magnitude of the random skew (value between 0 and 1) + directions (string): + tilting directions allowed (all, left, right, up, down) + examples: "all", "left,right", "up-down-right" + """ + + def __init__(self, magnitude, directions='all'): + self.magnitude = magnitude + self.directions = directions.lower().replace(',',' ').replace('-',' ') + + def __repr__(self): + return "RandomTilt(%g, '%s')" % (self.magnitude,self.directions) + + def __call__(self, inp): + img = F.grab_img(inp) + w, h = img.size + + x1,y1,x2,y2 = 0,0,h,w + original_plane = [(y1, x1), (y2, x1), (y2, x2), (y1, x2)] + + max_skew_amount = max(w, h) + max_skew_amount = int(ceil(max_skew_amount * self.magnitude)) + skew_amount = random.randint(1, max_skew_amount) + + if self.directions == 'all': + choices = [0,1,2,3] + else: + dirs = ['left', 'right', 'up', 'down'] + choices = [] + for d in self.directions.split(): + try: + choices.append(dirs.index(d)) + except: + raise ValueError('Tilting direction %s not recognized' % d) + + skew_direction = random.choice(choices) + + # print('randomtitlting: ', skew_amount, skew_direction) # to debug random + + if skew_direction == 0: + # Left Tilt + new_plane = [(y1, x1 - skew_amount), # Top Left + (y2, x1), # Top Right + (y2, x2), # Bottom Right + (y1, x2 + skew_amount)] # Bottom Left + elif skew_direction == 1: + # Right Tilt + new_plane = [(y1, x1), # Top Left + (y2, x1 - skew_amount), # Top Right + (y2, x2 + skew_amount), # Bottom Right + (y1, x2)] # Bottom Left + elif skew_direction == 2: + # Forward Tilt + new_plane = [(y1 - skew_amount, x1), # Top Left + (y2 + skew_amount, x1), # Top Right + (y2, x2), # Bottom Right + (y1, x2)] # Bottom Left + elif skew_direction == 3: + # Backward Tilt + new_plane = [(y1, x1), # Top Left + (y2, x1), # Top Right + (y2 + skew_amount, x2), # Bottom Right + (y1 - skew_amount, x2)] # Bottom Left + + # To calculate the coefficients required by PIL for the perspective skew, + # see the following Stack Overflow discussion: https://goo.gl/sSgJdj + matrix = [] + + for p1, p2 in zip(new_plane, original_plane): + matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0] * p1[0], -p2[0] * p1[1]]) + matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1] * p1[0], -p2[1] * p1[1]]) + + A = np.matrix(matrix, dtype=np.float) + B = np.array(original_plane).reshape(8) + + homography = np.dot(np.linalg.pinv(A), B) + homography = tuple(np.array(homography).reshape(8)) + #print(homography) + + img = img.transform(img.size, Image.PERSPECTIVE, homography, resample=Image.BICUBIC) + + homography = np.linalg.pinv(np.float32(homography+(1,)).reshape(3,3)).ravel()[:8] + return F.update_img_and_labels(inp, img, persp=tuple(homography)) + + +RandomTilt = RandomTilting # redefinition + + +class Tilt(object): + """Apply a known tilting to an image + """ + def __init__(self, *homography): + assert len(homography) == 8 + self.homography = homography + + def __call__(self, inp): + img = F.grab_img(inp) + homography = self.homography + #print(homography) + + img = img.transform(img.size, Image.PERSPECTIVE, homography, resample=Image.BICUBIC) + + homography = np.linalg.pinv(np.float32(homography+(1,)).reshape(3,3)).ravel()[:8] + return F.update_img_and_labels(inp, img, persp=tuple(homography)) + + + +class StillTransform (object): + """ Takes and return an image, without changing its shape or geometry. + """ + def _transform(self, img): + raise NotImplementedError() + + def __call__(self, inp): + img = F.grab_img(inp) + + # transform the image (size should not change) + try: + img = self._transform(img) + except TypeError: + pass + + return F.update_img_and_labels(inp, img, persp=(1,0,0,0,1,0,0,0)) + + + +class PixelNoise (StillTransform): + """ Takes an image, and add random white noise. + """ + def __init__(self, ampl=20): + StillTransform.__init__(self) + assert 0 <= ampl < 255 + self.ampl = ampl + + def __repr__(self): + return "PixelNoise(%g)" % self.ampl + + def _transform(self, img): + img = np.float32(img) + img += np.random.uniform(0.5-self.ampl/2, 0.5+self.ampl/2, size=img.shape) + return Image.fromarray(np.uint8(img.clip(0,255))) + + + +class ColorJitter (StillTransform): + """Randomly change the brightness, contrast and saturation of an image. + Copied from https://github.com/pytorch in torchvision/transforms/transforms.py + + Args: + brightness (float): How much to jitter brightness. brightness_factor + is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]. + contrast (float): How much to jitter contrast. contrast_factor + is chosen uniformly from [max(0, 1 - contrast), 1 + contrast]. + saturation (float): How much to jitter saturation. saturation_factor + is chosen uniformly from [max(0, 1 - saturation), 1 + saturation]. + hue(float): How much to jitter hue. hue_factor is chosen uniformly from + [-hue, hue]. Should be >=0 and <= 0.5. + """ + def __init__(self, brightness=0, contrast=0, saturation=0, hue=0): + self.brightness = brightness + self.contrast = contrast + self.saturation = saturation + self.hue = hue + + def __repr__(self): + return "ColorJitter(%g,%g,%g,%g)" % ( + self.brightness, self.contrast, self.saturation, self.hue) + + @staticmethod + def get_params(brightness, contrast, saturation, hue): + """Get a randomized transform to be applied on image. + Arguments are same as that of __init__. + Returns: + Transform which randomly adjusts brightness, contrast and + saturation in a random order. + """ + transforms = [] + if brightness > 0: + brightness_factor = np.random.uniform(max(0, 1 - brightness), 1 + brightness) + transforms.append(tvf.Lambda(lambda img: F.adjust_brightness(img, brightness_factor))) + + if contrast > 0: + contrast_factor = np.random.uniform(max(0, 1 - contrast), 1 + contrast) + transforms.append(tvf.Lambda(lambda img: F.adjust_contrast(img, contrast_factor))) + + if saturation > 0: + saturation_factor = np.random.uniform(max(0, 1 - saturation), 1 + saturation) + transforms.append(tvf.Lambda(lambda img: F.adjust_saturation(img, saturation_factor))) + + if hue > 0: + hue_factor = np.random.uniform(-hue, hue) + transforms.append(tvf.Lambda(lambda img: F.adjust_hue(img, hue_factor))) + + # print('colorjitter: ', brightness_factor, contrast_factor, saturation_factor, hue_factor) # to debug random seed + + np.random.shuffle(transforms) + transform = tvf.Compose(transforms) + + return transform + + def _transform(self, img): + transform = self.get_params(self.brightness, self.contrast, self.saturation, self.hue) + return transform(img) + + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser("Script to try out and visualize transformations") + parser.add_argument('--img', type=str, default='imgs/test.png', help='input image') + parser.add_argument('--trfs', type=str, required=True, help='list of transformations') + parser.add_argument('--layout', type=int, nargs=2, default=(3,3), help='nb of rows,cols') + args = parser.parse_args() + + import os + args.img = args.img.replace('$HERE',os.path.dirname(__file__)) + img = Image.open(args.img) + img = dict(img=img) + + trfs = instanciate_transformation(args.trfs) + + from matplotlib import pyplot as pl + pl.ion() + pl.subplots_adjust(0,0,1,1) + + nr,nc = args.layout + + while True: + for j in range(nr): + for i in range(nc): + pl.subplot(nr,nc,i+j*nc+1) + if i==j==0: + img2 = img + else: + img2 = trfs(img.copy()) + if isinstance(img2, dict): + img2 = img2['img'] + pl.imshow(img2) + pl.xlabel("%d x %d" % img2.size) + pl.xticks(()) + pl.yticks(()) + pdb.set_trace() + + + diff --git a/third_party/r2d2/tools/transforms_tools.py b/third_party/r2d2/tools/transforms_tools.py new file mode 100644 index 0000000000000000000000000000000000000000..294c22228a88f70480af52f79a77d73f9e5b3e1a --- /dev/null +++ b/third_party/r2d2/tools/transforms_tools.py @@ -0,0 +1,230 @@ +# Copyright 2019-present NAVER Corp. +# CC BY-NC-SA 3.0 +# Available only for non-commercial use + +import pdb +import numpy as np +from PIL import Image, ImageOps, ImageEnhance + + +class DummyImg: + ''' This class is a dummy image only defined by its size. + ''' + def __init__(self, size): + self.size = size + + def resize(self, size, *args, **kwargs): + return DummyImg(size) + + def expand(self, border): + w, h = self.size + if isinstance(border, int): + size = (w+2*border, h+2*border) + else: + l,t,r,b = border + size = (w+l+r, h+t+b) + return DummyImg(size) + + def crop(self, border): + w, h = self.size + l,t,r,b = border + assert 0 <= l <= r <= w + assert 0 <= t <= b <= h + size = (r-l, b-t) + return DummyImg(size) + + def rotate(self, angle): + raise NotImplementedError + + def transform(self, size, *args, **kwargs): + return DummyImg(size) + + +def grab_img( img_and_label ): + ''' Called to extract the image from an img_and_label input + (a dictionary). Also compatible with old-style PIL images. + ''' + if isinstance(img_and_label, dict): + # if input is a dictionary, then + # it must contains the img or its size. + try: + return img_and_label['img'] + except KeyError: + return DummyImg(img_and_label['imsize']) + + else: + # or it must be the img directly + return img_and_label + + +def update_img_and_labels(img_and_label, img, persp=None): + ''' Called to update the img_and_label + ''' + if isinstance(img_and_label, dict): + img_and_label['img'] = img + img_and_label['imsize'] = img.size + + if persp: + if 'persp' not in img_and_label: + img_and_label['persp'] = (1,0,0,0,1,0,0,0) + img_and_label['persp'] = persp_mul(persp, img_and_label['persp']) + + return img_and_label + + else: + # or it must be the img directly + return img + + +def rand_log_uniform(a, b): + return np.exp(np.random.uniform(np.log(a),np.log(b))) + + +def translate(tx, ty): + return (1,0,tx, + 0,1,ty, + 0,0) + +def rotate(angle): + return (np.cos(angle),-np.sin(angle), 0, + np.sin(angle), np.cos(angle), 0, + 0, 0) + + +def persp_mul(mat, mat2): + ''' homography (perspective) multiplication. + mat: 8-tuple (homography transform) + mat2: 8-tuple (homography transform) or 2-tuple (point) + ''' + assert isinstance(mat, tuple) + assert isinstance(mat2, tuple) + + mat = np.float32(mat+(1,)).reshape(3,3) + mat2 = np.array(mat2+(1,)).reshape(3,3) + res = np.dot(mat, mat2) + return tuple((res/res[2,2]).ravel()[:8]) + + +def persp_apply(mat, pts): + ''' homography (perspective) transformation. + mat: 8-tuple (homography transform) + pts: numpy array + ''' + assert isinstance(mat, tuple) + assert isinstance(pts, np.ndarray) + assert pts.shape[-1] == 2 + mat = np.float32(mat+(1,)).reshape(3,3) + + if pts.ndim == 1: + pt = np.dot(pts, mat[:,:2].T).ravel() + mat[:,2] + pt /= pt[2] # homogeneous coordinates + return tuple(pt[:2]) + else: + pt = np.dot(pts, mat[:,:2].T) + mat[:,2] + pt[:,:2] /= pt[:,2:3] # homogeneous coordinates + return pt[:,:2] + + +def is_pil_image(img): + return isinstance(img, Image.Image) + + +def adjust_brightness(img, brightness_factor): + """Adjust brightness of an Image. + Args: + img (PIL Image): PIL Image to be adjusted. + brightness_factor (float): How much to adjust the brightness. Can be + any non negative number. 0 gives a black image, 1 gives the + original image while 2 increases the brightness by a factor of 2. + Returns: + PIL Image: Brightness adjusted image. + Copied from https://github.com/pytorch in torchvision/transforms/functional.py + """ + if not is_pil_image(img): + raise TypeError('img should be PIL Image. Got {}'.format(type(img))) + + enhancer = ImageEnhance.Brightness(img) + img = enhancer.enhance(brightness_factor) + return img + + +def adjust_contrast(img, contrast_factor): + """Adjust contrast of an Image. + Args: + img (PIL Image): PIL Image to be adjusted. + contrast_factor (float): How much to adjust the contrast. Can be any + non negative number. 0 gives a solid gray image, 1 gives the + original image while 2 increases the contrast by a factor of 2. + Returns: + PIL Image: Contrast adjusted image. + Copied from https://github.com/pytorch in torchvision/transforms/functional.py + """ + if not is_pil_image(img): + raise TypeError('img should be PIL Image. Got {}'.format(type(img))) + + enhancer = ImageEnhance.Contrast(img) + img = enhancer.enhance(contrast_factor) + return img + + +def adjust_saturation(img, saturation_factor): + """Adjust color saturation of an image. + Args: + img (PIL Image): PIL Image to be adjusted. + saturation_factor (float): How much to adjust the saturation. 0 will + give a black and white image, 1 will give the original image while + 2 will enhance the saturation by a factor of 2. + Returns: + PIL Image: Saturation adjusted image. + Copied from https://github.com/pytorch in torchvision/transforms/functional.py + """ + if not is_pil_image(img): + raise TypeError('img should be PIL Image. Got {}'.format(type(img))) + + enhancer = ImageEnhance.Color(img) + img = enhancer.enhance(saturation_factor) + return img + + +def adjust_hue(img, hue_factor): + """Adjust hue of an image. + The image hue is adjusted by converting the image to HSV and + cyclically shifting the intensities in the hue channel (H). + The image is then converted back to original image mode. + `hue_factor` is the amount of shift in H channel and must be in the + interval `[-0.5, 0.5]`. + See https://en.wikipedia.org/wiki/Hue for more details on Hue. + Args: + img (PIL Image): PIL Image to be adjusted. + hue_factor (float): How much to shift the hue channel. Should be in + [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in + HSV space in positive and negative direction respectively. + 0 means no shift. Therefore, both -0.5 and 0.5 will give an image + with complementary colors while 0 gives the original image. + Returns: + PIL Image: Hue adjusted image. + Copied from https://github.com/pytorch in torchvision/transforms/functional.py + """ + if not(-0.5 <= hue_factor <= 0.5): + raise ValueError('hue_factor is not in [-0.5, 0.5].'.format(hue_factor)) + + if not is_pil_image(img): + raise TypeError('img should be PIL Image. Got {}'.format(type(img))) + + input_mode = img.mode + if input_mode in {'L', '1', 'I', 'F'}: + return img + + h, s, v = img.convert('HSV').split() + + np_h = np.array(h, dtype=np.uint8) + # uint8 addition take cares of rotation across boundaries + with np.errstate(over='ignore'): + np_h += np.uint8(hue_factor * 255) + h = Image.fromarray(np_h, 'L') + + img = Image.merge('HSV', (h, s, v)).convert(input_mode) + return img + + + diff --git a/third_party/r2d2/tools/viz.py b/third_party/r2d2/tools/viz.py new file mode 100644 index 0000000000000000000000000000000000000000..c86103f3aeb468fca8b0ac9a412f22b85239361b --- /dev/null +++ b/third_party/r2d2/tools/viz.py @@ -0,0 +1,191 @@ +# Copyright 2019-present NAVER Corp. +# CC BY-NC-SA 3.0 +# Available only for non-commercial use + +import pdb +import numpy as np +import matplotlib.pyplot as pl + + +def make_colorwheel(): + ''' + Generates a color wheel for optical flow visualization as presented in: + Baker et al. "A Database and Evaluation Methodology for Optical Flow" (ICCV, 2007) + URL: http://vision.middlebury.edu/flow/flowEval-iccv07.pdf + According to the C++ source code of Daniel Scharstein + According to the Matlab source code of Deqing Sun + + Copied from https://github.com/tomrunia/OpticalFlow_Visualization/blob/master/flow_vis.py + Copyright (c) 2018 Tom Runia + ''' + + RY = 15 + YG = 6 + GC = 4 + CB = 11 + BM = 13 + MR = 6 + + ncols = RY + YG + GC + CB + BM + MR + colorwheel = np.zeros((ncols, 3)) + col = 0 + + # RY + colorwheel[0:RY, 0] = 255 + colorwheel[0:RY, 1] = np.floor(255*np.arange(0,RY)/RY) + col = col+RY + # YG + colorwheel[col:col+YG, 0] = 255 - np.floor(255*np.arange(0,YG)/YG) + colorwheel[col:col+YG, 1] = 255 + col = col+YG + # GC + colorwheel[col:col+GC, 1] = 255 + colorwheel[col:col+GC, 2] = np.floor(255*np.arange(0,GC)/GC) + col = col+GC + # CB + colorwheel[col:col+CB, 1] = 255 - np.floor(255*np.arange(CB)/CB) + colorwheel[col:col+CB, 2] = 255 + col = col+CB + # BM + colorwheel[col:col+BM, 2] = 255 + colorwheel[col:col+BM, 0] = np.floor(255*np.arange(0,BM)/BM) + col = col+BM + # MR + colorwheel[col:col+MR, 2] = 255 - np.floor(255*np.arange(MR)/MR) + colorwheel[col:col+MR, 0] = 255 + return colorwheel + + +def flow_compute_color(u, v, convert_to_bgr=False): + ''' + Applies the flow color wheel to (possibly clipped) flow components u and v. + According to the C++ source code of Daniel Scharstein + According to the Matlab source code of Deqing Sun + :param u: np.ndarray, input horizontal flow + :param v: np.ndarray, input vertical flow + :param convert_to_bgr: bool, whether to change ordering and output BGR instead of RGB + :return: + + Copied from https://github.com/tomrunia/OpticalFlow_Visualization/blob/master/flow_vis.py + Copyright (c) 2018 Tom Runia + ''' + + flow_image = np.zeros((u.shape[0], u.shape[1], 3), np.uint8) + + colorwheel = make_colorwheel() # shape [55x3] + ncols = colorwheel.shape[0] + + rad = np.sqrt(np.square(u) + np.square(v)) + a = np.arctan2(-v, -u)/np.pi + + fk = (a+1) / 2*(ncols-1) + k0 = np.floor(fk).astype(np.int32) + k1 = k0 + 1 + k1[k1 == ncols] = 0 + f = fk - k0 + + for i in range(colorwheel.shape[1]): + + tmp = colorwheel[:,i] + col0 = tmp[k0] / 255.0 + col1 = tmp[k1] / 255.0 + col = (1-f)*col0 + f*col1 + + idx = (rad <= 1) + col[idx] = 1 - rad[idx] * (1-col[idx]) + col[~idx] = col[~idx] * 0.75 # out of range? + + # Note the 2-i => BGR instead of RGB + ch_idx = 2-i if convert_to_bgr else i + flow_image[:,:,ch_idx] = np.floor(255 * col) + + return flow_image + + +def flow_to_color(flow_uv, clip_flow=None, convert_to_bgr=False): + ''' + Expects a two dimensional flow image of shape [H,W,2] + According to the C++ source code of Daniel Scharstein + According to the Matlab source code of Deqing Sun + :param flow_uv: np.ndarray of shape [H,W,2] + :param clip_flow: float, maximum clipping value for flow + :return: + + Copied from https://github.com/tomrunia/OpticalFlow_Visualization/blob/master/flow_vis.py + Copyright (c) 2018 Tom Runia + ''' + + assert flow_uv.ndim == 3, 'input flow must have three dimensions' + assert flow_uv.shape[2] == 2, 'input flow must have shape [H,W,2]' + + if clip_flow is not None: + flow_uv = np.clip(flow_uv, 0, clip_flow) + + u = flow_uv[:,:,0] + v = flow_uv[:,:,1] + + rad = np.sqrt(np.square(u) + np.square(v)) + rad_max = np.max(rad) + + epsilon = 1e-5 + u = u / (rad_max + epsilon) + v = v / (rad_max + epsilon) + + return flow_compute_color(u, v, convert_to_bgr) + + + +def show_flow( img0, img1, flow, mask=None ): + img0 = np.asarray(img0) + img1 = np.asarray(img1) + if mask is None: mask = 1 + mask = np.asarray(mask) + if mask.ndim == 2: mask = mask[:,:,None] + assert flow.ndim == 3 + assert flow.shape[:2] == img0.shape[:2] and flow.shape[2] == 2 + + def noticks(): + pl.xticks([]) + pl.yticks([]) + fig = pl.figure("showing correspondences") + ax1 = pl.subplot(221) + ax1.numaxis = 0 + pl.imshow(img0*mask) + noticks() + ax2 = pl.subplot(222) + ax2.numaxis = 1 + pl.imshow(img1) + noticks() + + ax = pl.subplot(212) + ax.numaxis = 0 + flow_img = flow_to_color(np.where(np.isnan(flow), 0, flow)) + pl.imshow(flow_img * mask) + noticks() + + pl.subplots_adjust(0.01, 0.01, 0.99, 0.99, wspace=0.02, hspace=0.02) + + def motion_notify_callback(event): + if event.inaxes is None: return + x,y = event.xdata, event.ydata + ax1.lines = [] + ax2.lines = [] + try: + x,y = int(x+0.5), int(y+0.5) + ax1.plot(x,y,'+',ms=10,mew=2,color='blue',scalex=False,scaley=False) + x,y = flow[y,x] + (x,y) + ax2.plot(x,y,'+',ms=10,mew=2,color='red',scalex=False,scaley=False) + # we redraw only the concerned axes + renderer = fig.canvas.get_renderer() + ax1.draw(renderer) + ax2.draw(renderer) + fig.canvas.blit(ax1.bbox) + fig.canvas.blit(ax2.bbox) + except IndexError: + return + + cid_move = fig.canvas.mpl_connect('motion_notify_event',motion_notify_callback) + print("Move your mouse over the images to show matches (ctrl-C to quit)") + pl.show() + + diff --git a/third_party/r2d2/train.py b/third_party/r2d2/train.py new file mode 100644 index 0000000000000000000000000000000000000000..10d23d9e40ebe8cb10c4d548b7fcb5c1c0fd7739 --- /dev/null +++ b/third_party/r2d2/train.py @@ -0,0 +1,138 @@ +# Copyright 2019-present NAVER Corp. +# CC BY-NC-SA 3.0 +# Available only for non-commercial use + +import os, pdb +import torch +import torch.optim as optim + +from tools import common, trainer +from tools.dataloader import * +from nets.patchnet import * +from nets.losses import * + +default_net = "Quad_L2Net_ConfCFS()" + +toy_db_debug = """SyntheticPairDataset( + ImgFolder('imgs'), + 'RandomScale(256,1024,can_upscale=True)', + 'RandomTilting(0.5), PixelNoise(25)')""" + +db_web_images = """SyntheticPairDataset( + web_images, + 'RandomScale(256,1024,can_upscale=True)', + 'RandomTilting(0.5), PixelNoise(25)')""" + +db_aachen_images = """SyntheticPairDataset( + aachen_db_images, + 'RandomScale(256,1024,can_upscale=True)', + 'RandomTilting(0.5), PixelNoise(25)')""" + +db_aachen_style_transfer = """TransformedPairs( + aachen_style_transfer_pairs, + 'RandomScale(256,1024,can_upscale=True), RandomTilting(0.5), PixelNoise(25)')""" + +db_aachen_flow = "aachen_flow_pairs" + +data_sources = dict( + D = toy_db_debug, + W = db_web_images, + A = db_aachen_images, + F = db_aachen_flow, + S = db_aachen_style_transfer, + ) + +default_dataloader = """PairLoader(CatPairDataset(`data`), + scale = 'RandomScale(256,1024,can_upscale=True)', + distort = 'ColorJitter(0.2,0.2,0.2,0.1)', + crop = 'RandomCrop(192)')""" + +default_sampler = """NghSampler2(ngh=7, subq=-8, subd=1, pos_d=3, neg_d=5, border=16, + subd_neg=-8,maxpool_pos=True)""" + +default_loss = """MultiLoss( + 1, ReliabilityLoss(`sampler`, base=0.5, nq=20), + 1, CosimLoss(N=`N`), + 1, PeakyLoss(N=`N`))""" + + +class MyTrainer(trainer.Trainer): + """ This class implements the network training. + Below is the function I need to overload to explain how to do the backprop. + """ + def forward_backward(self, inputs): + output = self.net(imgs=[inputs.pop('img1'),inputs.pop('img2')]) + allvars = dict(inputs, **output) + loss, details = self.loss_func(**allvars) + if torch.is_grad_enabled(): loss.backward() + return loss, details + + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser("Train R2D2") + + parser.add_argument("--data-loader", type=str, default=default_dataloader) + parser.add_argument("--train-data", type=str, default=list('WASF'), nargs='+', + choices = set(data_sources.keys())) + parser.add_argument("--net", type=str, default=default_net, help='network architecture') + + parser.add_argument("--pretrained", type=str, default="", help='pretrained model path') + parser.add_argument("--save-path", type=str, required=True, help='model save_path path') + + parser.add_argument("--loss", type=str, default=default_loss, help="loss function") + parser.add_argument("--sampler", type=str, default=default_sampler, help="AP sampler") + parser.add_argument("--N", type=int, default=16, help="patch size for repeatability") + + parser.add_argument("--epochs", type=int, default=25, help='number of training epochs') + parser.add_argument("--batch-size", "--bs", type=int, default=8, help="batch size") + parser.add_argument("--learning-rate", "--lr", type=str, default=1e-4) + parser.add_argument("--weight-decay", "--wd", type=float, default=5e-4) + + parser.add_argument("--threads", type=int, default=8, help='number of worker threads') + parser.add_argument("--gpu", type=int, nargs='+', default=[0], help='-1 for CPU') + + args = parser.parse_args() + + iscuda = common.torch_set_gpu(args.gpu) + common.mkdir_for(args.save_path) + + # Create data loader + from datasets import * + db = [data_sources[key] for key in args.train_data] + db = eval(args.data_loader.replace('`data`',','.join(db)).replace('\n','')) + print("Training image database =", db) + loader = threaded_loader(db, iscuda, args.threads, args.batch_size, shuffle=True) + + # create network + print("\n>> Creating net = " + args.net) + net = eval(args.net) + print(f" ( Model size: {common.model_size(net)/1000:.0f}K parameters )") + + # initialization + if args.pretrained: + checkpoint = torch.load(args.pretrained, lambda a,b:a) + net.load_pretrained(checkpoint['state_dict']) + + # create losses + loss = args.loss.replace('`sampler`',args.sampler).replace('`N`',str(args.N)) + print("\n>> Creating loss = " + loss) + loss = eval(loss.replace('\n','')) + + # create optimizer + optimizer = optim.Adam( [p for p in net.parameters() if p.requires_grad], + lr=args.learning_rate, weight_decay=args.weight_decay) + + train = MyTrainer(net, loader, loss, optimizer) + if iscuda: train = train.cuda() + + # Training loop # + for epoch in range(args.epochs): + print(f"\n>> Starting epoch {epoch}...") + train() + + print(f"\n>> Saving model to {args.save_path}") + torch.save({'net': args.net, 'state_dict': net.state_dict()}, args.save_path) + + diff --git a/third_party/r2d2/viz_heatmaps.py b/third_party/r2d2/viz_heatmaps.py new file mode 100644 index 0000000000000000000000000000000000000000..42705e70ecea82696a0d784b274f7f387fdf6595 --- /dev/null +++ b/third_party/r2d2/viz_heatmaps.py @@ -0,0 +1,122 @@ +import pdb +import os +import sys +import tqdm + +import numpy as np +import torch + +from PIL import Image +from matplotlib import pyplot as pl; pl.ion() +from scipy.ndimage import uniform_filter +smooth = lambda arr: uniform_filter(arr, 3) + +def transparent(img, alpha, cmap, **kw): + from matplotlib.colors import Normalize + colored_img = cmap(Normalize(clip=True,**kw)(img)) + colored_img[:,:,-1] = alpha + return colored_img + +from tools import common +from tools.dataloader import norm_RGB +from nets.patchnet import * +from extract import NonMaxSuppression + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser("Visualize the patch detector and descriptor") + + parser.add_argument("--img", type=str, default="imgs/brooklyn.png") + parser.add_argument("--resize", type=int, default=512) + parser.add_argument("--out", type=str, default="viz.png") + + parser.add_argument("--checkpoint", type=str, required=True, help='network path') + parser.add_argument("--net", type=str, default="", help='network command') + + parser.add_argument("--max-kpts", type=int, default=200) + parser.add_argument("--reliability-thr", type=float, default=0.8) + parser.add_argument("--repeatability-thr", type=float, default=0.7) + parser.add_argument("--border", type=int, default=20,help='rm keypoints close to border') + + parser.add_argument("--gpu", type=int, nargs='+', required=True, help='-1 for CPU') + parser.add_argument("--dbg", type=str, nargs='+', default=(), help='debug options') + + args = parser.parse_args() + args.dbg = set(args.dbg) + + iscuda = common.torch_set_gpu(args.gpu) + device = torch.device('cuda' if iscuda else 'cpu') + + # create network + checkpoint = torch.load(args.checkpoint, lambda a,b:a) + args.net = args.net or checkpoint['net'] + print("\n>> Creating net = " + args.net) + net = eval(args.net) + net.load_state_dict({k.replace('module.',''):v for k,v in checkpoint['state_dict'].items()}) + if iscuda: net = net.cuda() + print(f" ( Model size: {common.model_size(net)/1000:.0f}K parameters )") + + img = Image.open(args.img).convert('RGB') + if args.resize: img.thumbnail((args.resize,args.resize)) + img = np.asarray(img) + + detector = NonMaxSuppression( + rel_thr = args.reliability_thr, + rep_thr = args.repeatability_thr) + + with torch.no_grad(): + print(">> computing features...") + res = net(imgs=[norm_RGB(img).unsqueeze(0).to(device)]) + rela = res.get('reliability') + repe = res.get('repeatability') + kpts = detector(**res).T[:,[1,0]] + kpts = kpts[repe[0][0,0][kpts[:,1],kpts[:,0]].argsort()[-args.max_kpts:]] + + fig = pl.figure("viz") + kw = dict(cmap=pl.cm.RdYlGn, vmax=1) + crop = (slice(args.border,-args.border or 1),)*2 + + if 'reliability' in args.dbg: + + ax1 = pl.subplot(131) + pl.imshow(img[crop], cmap=pl.cm.gray) + pl.xticks(()); pl.yticks(()) + + pl.subplot(132) + pl.imshow(img[crop], cmap=pl.cm.gray, alpha=0) + pl.xticks(()); pl.yticks(()) + + x,y = kpts[:,0:2].cpu().numpy().T - args.border + pl.plot(x,y,'+',c=(0,1,0),ms=10, scalex=0, scaley=0) + + ax1 = pl.subplot(133) + rela = rela[0][0,0].cpu().numpy() + pl.imshow(rela[crop], cmap=pl.cm.RdYlGn, vmax=1, vmin=0.9) + pl.xticks(()); pl.yticks(()) + + else: + ax1 = pl.subplot(131) + pl.imshow(img[crop], cmap=pl.cm.gray) + pl.xticks(()); pl.yticks(()) + + x,y = kpts[:,0:2].cpu().numpy().T - args.border + pl.plot(x,y,'+',c=(0,1,0),ms=10, scalex=0, scaley=0) + + pl.subplot(132) + pl.imshow(img[crop], cmap=pl.cm.gray) + pl.xticks(()); pl.yticks(()) + c = repe[0][0,0].cpu().numpy() + pl.imshow(transparent(smooth(c)[crop], 0.5, vmin=0, **kw)) + + ax1 = pl.subplot(133) + pl.imshow(img[crop], cmap=pl.cm.gray) + pl.xticks(()); pl.yticks(()) + rela = rela[0][0,0].cpu().numpy() + pl.imshow(transparent(rela[crop], 0.5, vmin=0.9, **kw)) + + pl.gcf().set_size_inches(9, 2.73) + pl.subplots_adjust(0.01,0.01,0.99,0.99,hspace=0.1) + pl.savefig(args.out) + pdb.set_trace() +