diff --git a/hloc/match_dense.py b/hloc/match_dense.py index 9bf8b3640f0d79be1a75a7544f0fe8a99b742334..5681324c5340d7b184aff737f7b7aa31d261ce56 100644 --- a/hloc/match_dense.py +++ b/hloc/match_dense.py @@ -44,6 +44,25 @@ confs = { "max_error": 1, # max error for assigned keypoints (in px) "cell_size": 1, # size of quantization patch (max 1 kp/patch) }, + "eloftr": { + "output": "matches-eloftr", + "model": { + "name": "eloftr", + "weights": "weights/eloftr_outdoor.ckpt", + "max_keypoints": 2000, + "match_threshold": 0.2, + }, + "preprocessing": { + "grayscale": True, + "resize_max": 1024, + "dfactor": 32, + "width": 640, + "height": 480, + "force_resize": True, + }, + "max_error": 1, # max error for assigned keypoints (in px) + "cell_size": 1, # size of quantization patch (max 1 kp/patch) + }, # "loftr_quadtree": { # "output": "matches-loftr-quadtree", # "model": { diff --git a/hloc/matchers/eloftr.py b/hloc/matchers/eloftr.py new file mode 100644 index 0000000000000000000000000000000000000000..2c1e6245eb720c5b3545f9e2f5d2a6a5a93cb95b --- /dev/null +++ b/hloc/matchers/eloftr.py @@ -0,0 +1,88 @@ +import sys +import warnings +from copy import deepcopy +from pathlib import Path + +import torch + +eloftr_path = Path(__file__).parent / "../../third_party/EfficientLoFTR" +sys.path.append(str(eloftr_path)) + +from src.loftr import LoFTR as ELoFTR_ +from src.loftr import full_default_cfg, opt_default_cfg, reparameter + +from hloc import logger + +from ..utils.base_model import BaseModel + + +class LoFTR(BaseModel): + default_conf = { + "weights": "weights/eloftr_outdoor.ckpt", + "match_threshold": 0.2, + # "sinkhorn_iterations": 20, + "max_keypoints": -1, + # You can choose model type in ['full', 'opt'] + "model_type": "full", # 'full' for best quality, 'opt' for best efficiency + # You can choose numerical precision in ['fp32', 'mp', 'fp16']. 'fp16' for best efficiency + "precision": "fp32", + } + required_inputs = ["image0", "image1"] + + def _init(self, conf): + + if self.conf["model_type"] == "full": + _default_cfg = deepcopy(full_default_cfg) + elif self.conf["model_type"] == "opt": + _default_cfg = deepcopy(opt_default_cfg) + + if self.conf["precision"] == "mp": + _default_cfg["mp"] = True + elif self.conf["precision"] == "fp16": + _default_cfg["half"] = True + model_path = eloftr_path / self.conf["weights"] + cfg = _default_cfg + cfg["match_coarse"]["thr"] = conf["match_threshold"] + # cfg["match_coarse"]["skh_iters"] = conf["sinkhorn_iterations"] + state_dict = torch.load(model_path, map_location="cpu")["state_dict"] + matcher = ELoFTR_(config=cfg) + matcher.load_state_dict(state_dict) + self.net = reparameter(matcher) + + if self.conf["precision"] == "fp16": + self.net = self.net.half() + logger.info(f"Loaded Efficient LoFTR with weights {conf['weights']}") + + def _forward(self, data): + # For consistency with hloc pairs, we refine kpts in image0! + rename = { + "keypoints0": "keypoints1", + "keypoints1": "keypoints0", + "image0": "image1", + "image1": "image0", + "mask0": "mask1", + "mask1": "mask0", + } + data_ = {rename[k]: v for k, v in data.items()} + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + pred = self.net(data_) + pred = { + "keypoints0": data_["mkpts0_f"], + "keypoints1": data_["mkpts1_f"], + } + scores = data_["mconf"] + + top_k = self.conf["max_keypoints"] + if top_k is not None and len(scores) > top_k: + keep = torch.argsort(scores, descending=True)[:top_k] + pred["keypoints0"], pred["keypoints1"] = ( + pred["keypoints0"][keep], + pred["keypoints1"][keep], + ) + scores = scores[keep] + + # Switch back indices + pred = {(rename[k] if k in rename else k): v for k, v in pred.items()} + pred["scores"] = scores + return pred diff --git a/third_party/EfficientLoFTR/.gitignore b/third_party/EfficientLoFTR/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..580c0f0ca88656e5cdef86b4b62b7118bc621051 --- /dev/null +++ b/third_party/EfficientLoFTR/.gitignore @@ -0,0 +1,12 @@ +.vscode/ +__pycache__/ +*.pyc +*.DS_Store +*.swp +*.pth +tmp.* +*/.ipynb_checkpoints/* + +logs/ +weights/ +dump/ \ No newline at end of file diff --git a/third_party/EfficientLoFTR/README.md b/third_party/EfficientLoFTR/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5542a10866ba51492583faf7e90e50d75bb40a71 --- /dev/null +++ b/third_party/EfficientLoFTR/README.md @@ -0,0 +1,69 @@ +# Efficient LoFTR: Semi-Dense Local Feature Matching with Sparse-Like Speed + +### [Project Page](https://zju3dv.github.io/efficientloftr) | [Paper](https://zju3dv.github.io/efficientloftr/files/EfficientLoFTR.pdf) +
+ +> Efficient LoFTR: Semi-Dense Local Feature Matching with Sparse-Like Speed +> [Yifan Wang](https://github.com/wyf2020)\*, [Xingyi He](https://github.com/hxy-123)\*, [Sida Peng](https://pengsida.net), [Dongli Tan](https://github.com/Cuistiano), [Xiaowei Zhou](http://xzhou.me) +> CVPR 2024 + +https://github.com/zju3dv/EfficientLoFTR/assets/69951260/40890d21-180e-4e70-aeba-219178b0d824 + +## TODO List +- [x] Inference code and pretrained models +- [x] Code for reproducing the test-set results +- [ ] Add options of flash-attention and torch.compiler for better performance +- [x] jupyter notebook demo for matching a pair of images +- [ ] Training code + +## Installation +```shell +conda env create -f environment.yaml +conda activate eloftr +pip install torch==2.0.0+cu118 --index-url https://download.pytorch.org/whl/cu118 +pip install -r requirements.txt +``` +The test and training can be downloaded by [download link](https://drive.google.com/drive/folders/1DOcOPZb3-5cWxLqn256AhwUVjBPifhuf?usp=sharing) provided by LoFTR + +We provide the our pretrained model in [download link](https://drive.google.com/drive/folders/1GOw6iVqsB-f1vmG6rNmdCcgwfB4VZ7_Q?usp=sharing) + + +## Reproduce the testing results with pytorch-lightning +You need to setup the testing subsets of ScanNet and MegaDepth first. We create symlinks from the previously downloaded datasets to `data/{{dataset}}/test`. + +```shell +# set up symlinks +ln -s /path/to/scannet-1500-testset/* /path/to/EfficientLoFTR/data/scannet/test +ln -s /path/to/megadepth-1500-testset/* /path/to/EfficientLoFTR/data/megadepth/test +``` +### Inference time +```shell +conda activate eloftr +bash scripts/reproduce_test/indoor_full_time.sh +bash scripts/reproduce_test/indoor_opt_time.sh +``` + +### Accuracy +```shell +conda activate eloftr +bash scripts/reproduce_test/outdoor_full_auc.sh +bash scripts/reproduce_test/outdoor_opt_auc.sh +bash scripts/reproduce_test/indoor_full_auc.sh +bash scripts/reproduce_test/indoor_opt_auc.sh +``` + +## Training +The Training code is coming soon, please stay tuned! + +## Citation + +If you find this code useful for your research, please use the following BibTeX entry. + +```bibtex +@inproceedings{wang2024eloftr, + title={{Efficient LoFTR}: Semi-Dense Local Feature Matching with Sparse-Like Speed}, + author={Wang, Yifan and He, Xingyi and Peng, Sida and Tan, Dongli and Zhou, Xiaowei}, + booktitle={CVPR}, + year={2024} +} +``` diff --git a/third_party/EfficientLoFTR/assets/main_figure.jpg b/third_party/EfficientLoFTR/assets/main_figure.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cbde5fac4f7657af2b9240ca434c3f0c89c07a82 --- /dev/null +++ b/third_party/EfficientLoFTR/assets/main_figure.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f18ca33ce57f1752a9ee9bd3b59211bc968f83b8ab2534209648aea8e1bfec64 +size 2019735 diff --git a/third_party/EfficientLoFTR/assets/megadepth_test_1500_scene_info/0015_0.1_0.3.npz b/third_party/EfficientLoFTR/assets/megadepth_test_1500_scene_info/0015_0.1_0.3.npz new file mode 100644 index 0000000000000000000000000000000000000000..f4b1b79acff510aab203a8b604955dd89edffc45 --- /dev/null +++ b/third_party/EfficientLoFTR/assets/megadepth_test_1500_scene_info/0015_0.1_0.3.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d441df1d380b2ed34449b944d9f13127e695542fa275098d38a6298835672f22 +size 231253 diff --git a/third_party/EfficientLoFTR/assets/megadepth_test_1500_scene_info/0015_0.3_0.5.npz b/third_party/EfficientLoFTR/assets/megadepth_test_1500_scene_info/0015_0.3_0.5.npz new file mode 100644 index 0000000000000000000000000000000000000000..2b2de7bda22dc6e78e01e3f56ba1dafd46c1c581 --- /dev/null +++ b/third_party/EfficientLoFTR/assets/megadepth_test_1500_scene_info/0015_0.3_0.5.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f34b5231d04a84d84378c671dd26854869663b5eafeae2ebaf624a279325139 +size 231253 diff --git a/third_party/EfficientLoFTR/assets/megadepth_test_1500_scene_info/0022_0.1_0.3.npz b/third_party/EfficientLoFTR/assets/megadepth_test_1500_scene_info/0022_0.1_0.3.npz new file mode 100644 index 0000000000000000000000000000000000000000..5680f3747296a4d565dc9a95c719dce0472c7e63 --- /dev/null +++ b/third_party/EfficientLoFTR/assets/megadepth_test_1500_scene_info/0022_0.1_0.3.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba46e6b9ec291fc7271eb9741d5c75ca04b83d3d7281e049815de9cb9024f4d9 +size 272610 diff --git a/third_party/EfficientLoFTR/assets/megadepth_test_1500_scene_info/0022_0.3_0.5.npz b/third_party/EfficientLoFTR/assets/megadepth_test_1500_scene_info/0022_0.3_0.5.npz new file mode 100644 index 0000000000000000000000000000000000000000..79f5a30dd0a8cd8b60263fa721a4e5ef8394801c --- /dev/null +++ b/third_party/EfficientLoFTR/assets/megadepth_test_1500_scene_info/0022_0.3_0.5.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f4465da174b96deba61e5328886e4f2e687d34b890efca69e0c838736f8ae12 +size 272610 diff --git a/third_party/EfficientLoFTR/assets/megadepth_test_1500_scene_info/0022_0.5_0.7.npz b/third_party/EfficientLoFTR/assets/megadepth_test_1500_scene_info/0022_0.5_0.7.npz new file mode 100644 index 0000000000000000000000000000000000000000..0c1315698e217f3be3dbcc85be72fcd16477b9dd --- /dev/null +++ b/third_party/EfficientLoFTR/assets/megadepth_test_1500_scene_info/0022_0.5_0.7.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:684ae10f03001917c3ca0d12d441f372ce3c7e6637bd1277a3cda60df4207fe9 +size 272610 diff --git a/third_party/EfficientLoFTR/assets/megadepth_test_1500_scene_info/megadepth_test_1500.txt b/third_party/EfficientLoFTR/assets/megadepth_test_1500_scene_info/megadepth_test_1500.txt new file mode 100644 index 0000000000000000000000000000000000000000..85a2e16722183d3fe209a9ceb60c43d8315c32cf --- /dev/null +++ b/third_party/EfficientLoFTR/assets/megadepth_test_1500_scene_info/megadepth_test_1500.txt @@ -0,0 +1,5 @@ +0022_0.1_0.3 +0015_0.1_0.3 +0015_0.3_0.5 +0022_0.3_0.5 +0022_0.5_0.7 \ No newline at end of file diff --git a/third_party/EfficientLoFTR/assets/phototourism_sample_images/london_bridge_19481797_2295892421.jpg b/third_party/EfficientLoFTR/assets/phototourism_sample_images/london_bridge_19481797_2295892421.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ca687eeca4471e7bb9806059586fb23863a808a2 --- /dev/null +++ b/third_party/EfficientLoFTR/assets/phototourism_sample_images/london_bridge_19481797_2295892421.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45167ac6ca1ca2e4f5b4f3b88cea886cbcedf75cdddc6cd3214b93fe5cce93ab +size 295643 diff --git a/third_party/EfficientLoFTR/assets/phototourism_sample_images/london_bridge_49190386_5209386933.jpg b/third_party/EfficientLoFTR/assets/phototourism_sample_images/london_bridge_49190386_5209386933.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ca220b680bb89610b0ed28b4cd45ec65ecacc5f0 --- /dev/null +++ b/third_party/EfficientLoFTR/assets/phototourism_sample_images/london_bridge_49190386_5209386933.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:999d61b530e23ab7da3605de46676d0e89a7947b239ee77e74f6acd2a427ab5c +size 381816 diff --git a/third_party/EfficientLoFTR/assets/phototourism_sample_images/london_bridge_78916675_4568141288.jpg b/third_party/EfficientLoFTR/assets/phototourism_sample_images/london_bridge_78916675_4568141288.jpg new file mode 100644 index 0000000000000000000000000000000000000000..30b481f19532e3939ebaa85fd9e14d6571f72c41 --- /dev/null +++ b/third_party/EfficientLoFTR/assets/phototourism_sample_images/london_bridge_78916675_4568141288.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b95c1f0c56ead99a87530f7862ca80996b6039267f44c37f7c260cab8757c26 +size 293798 diff --git a/third_party/EfficientLoFTR/assets/phototourism_sample_images/london_bridge_94185272_3874562886.jpg b/third_party/EfficientLoFTR/assets/phototourism_sample_images/london_bridge_94185272_3874562886.jpg new file mode 100644 index 0000000000000000000000000000000000000000..eb928ab921ad5f9d558a1c8976e55ea826e8bbe7 --- /dev/null +++ b/third_party/EfficientLoFTR/assets/phototourism_sample_images/london_bridge_94185272_3874562886.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39b78b9b7e909ccf2f297265c9922ad34fa35ed580e0fc9edf376bb4e89d3f03 +size 368048 diff --git a/third_party/EfficientLoFTR/assets/phototourism_sample_images/piazza_san_marco_06795901_3725050516.jpg b/third_party/EfficientLoFTR/assets/phototourism_sample_images/piazza_san_marco_06795901_3725050516.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c417181146161214a70ae2a0be0d5f40fa8c1d5d --- /dev/null +++ b/third_party/EfficientLoFTR/assets/phototourism_sample_images/piazza_san_marco_06795901_3725050516.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32a07bc272b315ff3eaa12ade6aa9a6a9b99cae34a896517695a159bfada3398 +size 469610 diff --git a/third_party/EfficientLoFTR/assets/phototourism_sample_images/piazza_san_marco_15148634_5228701572.jpg b/third_party/EfficientLoFTR/assets/phototourism_sample_images/piazza_san_marco_15148634_5228701572.jpg new file mode 100644 index 0000000000000000000000000000000000000000..80cc9d56ec68d59ec7870ef5f538cfc98cf9c817 --- /dev/null +++ b/third_party/EfficientLoFTR/assets/phototourism_sample_images/piazza_san_marco_15148634_5228701572.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e95beadf2601a89edc69d66bb565300ed32d44498146ce02fc32f14a47f7c70 +size 457136 diff --git a/third_party/EfficientLoFTR/assets/phototourism_sample_images/piazza_san_marco_18627786_5929294590.jpg b/third_party/EfficientLoFTR/assets/phototourism_sample_images/piazza_san_marco_18627786_5929294590.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8250dacf14805c073177e4a10c8ae96e92c2e126 --- /dev/null +++ b/third_party/EfficientLoFTR/assets/phototourism_sample_images/piazza_san_marco_18627786_5929294590.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:421ea0ef24a6f6480afdf13e1d5483c6f40d4dc6928fd59af6943d26bafad790 +size 145430 diff --git a/third_party/EfficientLoFTR/assets/phototourism_sample_images/piazza_san_marco_43351518_2659980686.jpg b/third_party/EfficientLoFTR/assets/phototourism_sample_images/piazza_san_marco_43351518_2659980686.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ad666990d8cc65f6e0d76825e000b88409e43ed5 --- /dev/null +++ b/third_party/EfficientLoFTR/assets/phototourism_sample_images/piazza_san_marco_43351518_2659980686.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86a1247908eacbb0dc9d383edc03ee83b50ea5f4779c7c006df32959770ba28a +size 506435 diff --git a/third_party/EfficientLoFTR/assets/phototourism_sample_images/piazza_san_marco_58751010_4849458397.jpg b/third_party/EfficientLoFTR/assets/phototourism_sample_images/piazza_san_marco_58751010_4849458397.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f0fd5f68f21e54b4b4033e1d9c3b29193bab7f91 --- /dev/null +++ b/third_party/EfficientLoFTR/assets/phototourism_sample_images/piazza_san_marco_58751010_4849458397.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acd9e43d253516b23756339f0e82979a69f2f01fef9484c8ca1da5a8c9b3ba98 +size 601365 diff --git a/third_party/EfficientLoFTR/assets/phototourism_sample_images/st_pauls_cathedral_30776973_2635313996.jpg b/third_party/EfficientLoFTR/assets/phototourism_sample_images/st_pauls_cathedral_30776973_2635313996.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c9ee7aca8caeb5bc6a22ecf0c4f789d467741079 --- /dev/null +++ b/third_party/EfficientLoFTR/assets/phototourism_sample_images/st_pauls_cathedral_30776973_2635313996.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68de07942d852f81915367de73adfb5ff612646f33d5a4d523d83df5d6bbdab7 +size 531254 diff --git a/third_party/EfficientLoFTR/assets/phototourism_sample_images/st_pauls_cathedral_37347628_10902811376.jpg b/third_party/EfficientLoFTR/assets/phototourism_sample_images/st_pauls_cathedral_37347628_10902811376.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1828d6e5831c63925e60cfc4e2334beb73a601b2 --- /dev/null +++ b/third_party/EfficientLoFTR/assets/phototourism_sample_images/st_pauls_cathedral_37347628_10902811376.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e1e6f984286998887ccbd1c6c99632d6e97936eea185b9ee93476badacbde11 +size 646814 diff --git a/third_party/EfficientLoFTR/assets/phototourism_sample_images/united_states_capitol_26757027_6717084061.jpg b/third_party/EfficientLoFTR/assets/phototourism_sample_images/united_states_capitol_26757027_6717084061.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b61efcbf0dc78652eae119d6e8ada4c087f9d70d --- /dev/null +++ b/third_party/EfficientLoFTR/assets/phototourism_sample_images/united_states_capitol_26757027_6717084061.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05ad1e66d7fee2f9e11766160522ad823f1fcc0ab8a5740a6c89b1765228ea32 +size 334048 diff --git a/third_party/EfficientLoFTR/assets/phototourism_sample_images/united_states_capitol_98169888_3347710852.jpg b/third_party/EfficientLoFTR/assets/phototourism_sample_images/united_states_capitol_98169888_3347710852.jpg new file mode 100644 index 0000000000000000000000000000000000000000..11f51edc25202ed31722422798c87f88dcb296c9 --- /dev/null +++ b/third_party/EfficientLoFTR/assets/phototourism_sample_images/united_states_capitol_98169888_3347710852.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ed3a68939b922bc2362b1d8051c24d2ca03be6a431fcc7c423e157012debd5a +size 424584 diff --git a/third_party/EfficientLoFTR/assets/scannet_test_1500/intrinsics.npz b/third_party/EfficientLoFTR/assets/scannet_test_1500/intrinsics.npz new file mode 100644 index 0000000000000000000000000000000000000000..bcba553dab19a57fcea336e69abd77ca9e87bce1 --- /dev/null +++ b/third_party/EfficientLoFTR/assets/scannet_test_1500/intrinsics.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25ac102c69e2e4e2f0ab9c0d64f4da2b815e0901630768bdfde30080ced3605c +size 23922 diff --git a/third_party/EfficientLoFTR/assets/scannet_test_1500/scannet_test.txt b/third_party/EfficientLoFTR/assets/scannet_test_1500/scannet_test.txt new file mode 100644 index 0000000000000000000000000000000000000000..45cc7ffd9ca2fb5750ce3e545f58410674d7ab9d --- /dev/null +++ b/third_party/EfficientLoFTR/assets/scannet_test_1500/scannet_test.txt @@ -0,0 +1 @@ +test.npz \ No newline at end of file diff --git a/third_party/EfficientLoFTR/assets/scannet_test_1500/statistics.json b/third_party/EfficientLoFTR/assets/scannet_test_1500/statistics.json new file mode 100644 index 0000000000000000000000000000000000000000..0e3ff582943ac12711da7a392a55f0a42d3b4449 --- /dev/null +++ b/third_party/EfficientLoFTR/assets/scannet_test_1500/statistics.json @@ -0,0 +1,102 @@ +{ + "scene0707_00": 15, + "scene0708_00": 15, + "scene0709_00": 15, + "scene0710_00": 15, + "scene0711_00": 15, + "scene0712_00": 15, + "scene0713_00": 15, + "scene0714_00": 15, + "scene0715_00": 15, + "scene0716_00": 15, + "scene0717_00": 15, + "scene0718_00": 15, + "scene0719_00": 15, + "scene0720_00": 15, + "scene0721_00": 15, + "scene0722_00": 15, + "scene0723_00": 15, + "scene0724_00": 15, + "scene0725_00": 15, + "scene0726_00": 15, + "scene0727_00": 15, + "scene0728_00": 15, + "scene0729_00": 15, + "scene0730_00": 15, + "scene0731_00": 15, + "scene0732_00": 15, + "scene0733_00": 15, + "scene0734_00": 15, + "scene0735_00": 15, + "scene0736_00": 15, + "scene0737_00": 15, + "scene0738_00": 15, + "scene0739_00": 15, + "scene0740_00": 15, + "scene0741_00": 15, + "scene0742_00": 15, + "scene0743_00": 15, + "scene0744_00": 15, + "scene0745_00": 15, + "scene0746_00": 15, + "scene0747_00": 15, + "scene0748_00": 15, + "scene0749_00": 15, + "scene0750_00": 15, + "scene0751_00": 15, + "scene0752_00": 15, + "scene0753_00": 15, + "scene0754_00": 15, + "scene0755_00": 15, + "scene0756_00": 15, + "scene0757_00": 15, + "scene0758_00": 15, + "scene0759_00": 15, + "scene0760_00": 15, + "scene0761_00": 15, + "scene0762_00": 15, + "scene0763_00": 15, + "scene0764_00": 15, + "scene0765_00": 15, + "scene0766_00": 15, + "scene0767_00": 15, + "scene0768_00": 15, + "scene0769_00": 15, + "scene0770_00": 15, + "scene0771_00": 15, + "scene0772_00": 15, + "scene0773_00": 15, + "scene0774_00": 15, + "scene0775_00": 15, + "scene0776_00": 15, + "scene0777_00": 15, + "scene0778_00": 15, + "scene0779_00": 15, + "scene0780_00": 15, + "scene0781_00": 15, + "scene0782_00": 15, + "scene0783_00": 15, + "scene0784_00": 15, + "scene0785_00": 15, + "scene0786_00": 15, + "scene0787_00": 15, + "scene0788_00": 15, + "scene0789_00": 15, + "scene0790_00": 15, + "scene0791_00": 15, + "scene0792_00": 15, + "scene0793_00": 15, + "scene0794_00": 15, + "scene0795_00": 15, + "scene0796_00": 15, + "scene0797_00": 15, + "scene0798_00": 15, + "scene0799_00": 15, + "scene0800_00": 15, + "scene0801_00": 15, + "scene0802_00": 15, + "scene0803_00": 15, + "scene0804_00": 15, + "scene0805_00": 15, + "scene0806_00": 15 +} \ No newline at end of file diff --git a/third_party/EfficientLoFTR/assets/scannet_test_1500/test.npz b/third_party/EfficientLoFTR/assets/scannet_test_1500/test.npz new file mode 100644 index 0000000000000000000000000000000000000000..d2011c2913a9ae1311d18b08c089bd999ba3ad30 --- /dev/null +++ b/third_party/EfficientLoFTR/assets/scannet_test_1500/test.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b982b9c1f762e7d31af552ecc1ccf1a6add013197f74ec69c84a6deaa6f580ad +size 71687 diff --git a/third_party/EfficientLoFTR/configs/data/__init__.py b/third_party/EfficientLoFTR/configs/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/EfficientLoFTR/configs/data/base.py b/third_party/EfficientLoFTR/configs/data/base.py new file mode 100644 index 0000000000000000000000000000000000000000..03aab160fa4137ccc04380f94854a56fbb549074 --- /dev/null +++ b/third_party/EfficientLoFTR/configs/data/base.py @@ -0,0 +1,35 @@ +""" +The data config will be the last one merged into the main config. +Setups in data configs will override all existed setups! +""" + +from yacs.config import CfgNode as CN +_CN = CN() +_CN.DATASET = CN() +_CN.TRAINER = CN() + +# training data config +_CN.DATASET.TRAIN_DATA_ROOT = None +_CN.DATASET.TRAIN_POSE_ROOT = None +_CN.DATASET.TRAIN_NPZ_ROOT = None +_CN.DATASET.TRAIN_LIST_PATH = None +_CN.DATASET.TRAIN_INTRINSIC_PATH = None +# validation set config +_CN.DATASET.VAL_DATA_ROOT = None +_CN.DATASET.VAL_POSE_ROOT = None +_CN.DATASET.VAL_NPZ_ROOT = None +_CN.DATASET.VAL_LIST_PATH = None +_CN.DATASET.VAL_INTRINSIC_PATH = None + +# testing data config +_CN.DATASET.TEST_DATA_ROOT = None +_CN.DATASET.TEST_POSE_ROOT = None +_CN.DATASET.TEST_NPZ_ROOT = None +_CN.DATASET.TEST_LIST_PATH = None +_CN.DATASET.TEST_INTRINSIC_PATH = None + +# dataset config +_CN.DATASET.MIN_OVERLAP_SCORE_TRAIN = 0.4 +_CN.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0 # for both test and val + +cfg = _CN diff --git a/third_party/EfficientLoFTR/configs/data/debug/.gitignore b/third_party/EfficientLoFTR/configs/data/debug/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..94548af5beba7825284af746324c8dc5b2f1ea31 --- /dev/null +++ b/third_party/EfficientLoFTR/configs/data/debug/.gitignore @@ -0,0 +1,3 @@ +* +*/ +!.gitignore diff --git a/third_party/EfficientLoFTR/configs/data/megadepth_test_1500.py b/third_party/EfficientLoFTR/configs/data/megadepth_test_1500.py new file mode 100644 index 0000000000000000000000000000000000000000..876bd4cad7772922d81c83ad3107ab6b8af599a3 --- /dev/null +++ b/third_party/EfficientLoFTR/configs/data/megadepth_test_1500.py @@ -0,0 +1,13 @@ +from configs.data.base import cfg + +TEST_BASE_PATH = "assets/megadepth_test_1500_scene_info" + +cfg.DATASET.TEST_DATA_SOURCE = "MegaDepth" +cfg.DATASET.TEST_DATA_ROOT = "data/megadepth/test" +cfg.DATASET.TEST_NPZ_ROOT = f"{TEST_BASE_PATH}" +cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/megadepth_test_1500.txt" + +cfg.DATASET.MGDPT_IMG_RESIZE = 832 +cfg.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0 + +cfg.DATASET.NPE_NAME = 'megadepth' \ No newline at end of file diff --git a/third_party/EfficientLoFTR/configs/data/megadepth_trainval_832.py b/third_party/EfficientLoFTR/configs/data/megadepth_trainval_832.py new file mode 100644 index 0000000000000000000000000000000000000000..b4ce0dd463cf09d031464176a5f28a6fe5ba2ad3 --- /dev/null +++ b/third_party/EfficientLoFTR/configs/data/megadepth_trainval_832.py @@ -0,0 +1,24 @@ +from configs.data.base import cfg + + +TRAIN_BASE_PATH = "data/megadepth/index" +cfg.DATASET.TRAINVAL_DATA_SOURCE = "MegaDepth" +cfg.DATASET.TRAIN_DATA_ROOT = "data/megadepth/train" +cfg.DATASET.TRAIN_NPZ_ROOT = f"{TRAIN_BASE_PATH}/scene_info_0.1_0.7" +cfg.DATASET.TRAIN_LIST_PATH = f"{TRAIN_BASE_PATH}/trainvaltest_list/train_list.txt" +cfg.DATASET.MIN_OVERLAP_SCORE_TRAIN = 0.0 + +TEST_BASE_PATH = "data/megadepth/index" +cfg.DATASET.TEST_DATA_SOURCE = "MegaDepth" +cfg.DATASET.VAL_DATA_ROOT = cfg.DATASET.TEST_DATA_ROOT = "data/megadepth/test" +cfg.DATASET.VAL_NPZ_ROOT = cfg.DATASET.TEST_NPZ_ROOT = f"{TEST_BASE_PATH}/scene_info_val_1500" +cfg.DATASET.VAL_LIST_PATH = cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/trainvaltest_list/val_list.txt" +cfg.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0 # for both test and val + +# 368 scenes in total for MegaDepth +# (with difficulty balanced (further split each scene to 3 sub-scenes)) +cfg.TRAINER.N_SAMPLES_PER_SUBSET = 100 + +cfg.DATASET.MGDPT_IMG_RESIZE = 832 # for training on 32GB meme GPUs + +cfg.DATASET.NPE_NAME = 'megadepth' \ No newline at end of file diff --git a/third_party/EfficientLoFTR/configs/data/scannet_test_1500.py b/third_party/EfficientLoFTR/configs/data/scannet_test_1500.py new file mode 100644 index 0000000000000000000000000000000000000000..ca98ed4b120d699f8de00016f169a83c0c8ddac8 --- /dev/null +++ b/third_party/EfficientLoFTR/configs/data/scannet_test_1500.py @@ -0,0 +1,16 @@ +from configs.data.base import cfg + +TEST_BASE_PATH = "assets/scannet_test_1500" + +cfg.DATASET.TEST_DATA_SOURCE = "ScanNet" +cfg.DATASET.TEST_DATA_ROOT = "data/scannet/test" +cfg.DATASET.TEST_NPZ_ROOT = f"{TEST_BASE_PATH}" +cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/scannet_test.txt" +cfg.DATASET.TEST_INTRINSIC_PATH = f"{TEST_BASE_PATH}/intrinsics.npz" + +cfg.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0 + +cfg.DATASET.SCAN_IMG_RESIZEX = 640 +cfg.DATASET.SCAN_IMG_RESIZEY = 480 + +cfg.DATASET.NPE_NAME = 'scannet' \ No newline at end of file diff --git a/third_party/EfficientLoFTR/configs/loftr/eloftr_full.py b/third_party/EfficientLoFTR/configs/loftr/eloftr_full.py new file mode 100644 index 0000000000000000000000000000000000000000..24ff5f33b6cf6ee11c4b564050fbe736126b8bc5 --- /dev/null +++ b/third_party/EfficientLoFTR/configs/loftr/eloftr_full.py @@ -0,0 +1,36 @@ +from src.config.default import _CN as cfg + +# training config +cfg.TRAINER.CANONICAL_LR = 8e-3 +cfg.TRAINER.WARMUP_STEP = 1875 # 3 epochs +cfg.TRAINER.WARMUP_RATIO = 0.1 +cfg.TRAINER.MSLR_MILESTONES = [8, 12, 16, 20, 24] +cfg.TRAINER.RANSAC_PIXEL_THR = 0.5 +cfg.TRAINER.OPTIMIZER = "adamw" +cfg.TRAINER.ADAMW_DECAY = 0.1 +cfg.TRAINER.EPI_ERR_THR = 5e-4 # recommendation: 5e-4 for ScanNet, 1e-4 for MegaDepth (from SuperGlue) +cfg.TRAINER.GRADIENT_CLIPPING = 0.0 +cfg.LOFTR.LOSS.FINE_TYPE = 'l2' # ['l2_with_std', 'l2'] +cfg.LOFTR.LOSS.COARSE_OVERLAP_WEIGHT = True +cfg.LOFTR.LOSS.FINE_OVERLAP_WEIGHT = True +cfg.LOFTR.LOSS.LOCAL_WEIGHT = 0.25 +cfg.LOFTR.MATCH_COARSE.TRAIN_COARSE_PERCENT = 0.3 +cfg.LOFTR.MATCH_COARSE.SPARSE_SPVS = True + +# model config +cfg.LOFTR.RESOLUTION = (8, 1) +cfg.LOFTR.FINE_WINDOW_SIZE = 8 # window_size in fine_level, must be even +cfg.LOFTR.ALIGN_CORNER = False +cfg.LOFTR.MP = True # just for reproducing paper, FP16 is much faster on modern GPUs +cfg.LOFTR.REPLACE_NAN = True +cfg.LOFTR.EVAL_TIMES = 5 +cfg.LOFTR.COARSE.NO_FLASH = True # Not use Flash-Attention just for reproducing paper timing +cfg.LOFTR.MATCH_COARSE.THR = 0.2 # recommend 0.2 for full model and 25 for optimized model +cfg.LOFTR.MATCH_FINE.LOCAL_REGRESS_TEMPERATURE = 10.0 +cfg.LOFTR.MATCH_FINE.LOCAL_REGRESS_SLICEDIM = 8 + +# dataset config +cfg.DATASET.FP16 = False + +# full model config +cfg.LOFTR.MATCH_COARSE.FP16MATMUL = False \ No newline at end of file diff --git a/third_party/EfficientLoFTR/configs/loftr/eloftr_optimized.py b/third_party/EfficientLoFTR/configs/loftr/eloftr_optimized.py new file mode 100644 index 0000000000000000000000000000000000000000..5c044e49db7ecb31e22570d8295d8ac617dcf64c --- /dev/null +++ b/third_party/EfficientLoFTR/configs/loftr/eloftr_optimized.py @@ -0,0 +1,37 @@ +from src.config.default import _CN as cfg + +# training config +cfg.TRAINER.CANONICAL_LR = 8e-3 +cfg.TRAINER.WARMUP_STEP = 1875 # 3 epochs +cfg.TRAINER.WARMUP_RATIO = 0.1 +cfg.TRAINER.MSLR_MILESTONES = [8, 12, 16, 20, 24] +cfg.TRAINER.RANSAC_PIXEL_THR = 0.5 +cfg.TRAINER.OPTIMIZER = "adamw" +cfg.TRAINER.ADAMW_DECAY = 0.1 +cfg.TRAINER.EPI_ERR_THR = 5e-4 # recommendation: 5e-4 for ScanNet, 1e-4 for MegaDepth (from SuperGlue) +cfg.TRAINER.GRADIENT_CLIPPING = 0.0 +cfg.LOFTR.LOSS.FINE_TYPE = 'l2' # ['l2_with_std', 'l2'] +cfg.LOFTR.LOSS.COARSE_OVERLAP_WEIGHT = True +cfg.LOFTR.LOSS.FINE_OVERLAP_WEIGHT = True +cfg.LOFTR.LOSS.LOCAL_WEIGHT = 0.25 +cfg.LOFTR.MATCH_COARSE.TRAIN_COARSE_PERCENT = 0.3 +cfg.LOFTR.MATCH_COARSE.SPARSE_SPVS = True + +# model config +cfg.LOFTR.RESOLUTION = (8, 1) +cfg.LOFTR.FINE_WINDOW_SIZE = 8 # window_size in fine_level, must be even +cfg.LOFTR.ALIGN_CORNER = False +cfg.LOFTR.MP = True # just for reproducing paper, FP16 is much faster on modern GPUs +cfg.LOFTR.REPLACE_NAN = True +cfg.LOFTR.EVAL_TIMES = 5 +cfg.LOFTR.COARSE.NO_FLASH = True # Not use Flash-Attention just for reproducing paper timing +cfg.LOFTR.MATCH_FINE.LOCAL_REGRESS_TEMPERATURE = 10.0 +cfg.LOFTR.MATCH_FINE.LOCAL_REGRESS_SLICEDIM = 8 + +# dataset config +cfg.DATASET.FP16 = False + +# optimized model config +cfg.LOFTR.MATCH_COARSE.FP16MATMUL = True +cfg.LOFTR.MATCH_COARSE.SKIP_SOFTMAX = True +cfg.LOFTR.MATCH_COARSE.THR = 25.0 # recommend 0.2 for full model and 25 for optimized model \ No newline at end of file diff --git a/third_party/EfficientLoFTR/data/megadepth/index/.gitignore b/third_party/EfficientLoFTR/data/megadepth/index/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..5e7d2734cfc60289debf74293817c0a8f572ff32 --- /dev/null +++ b/third_party/EfficientLoFTR/data/megadepth/index/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore diff --git a/third_party/EfficientLoFTR/data/megadepth/test/.gitignore b/third_party/EfficientLoFTR/data/megadepth/test/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..5e7d2734cfc60289debf74293817c0a8f572ff32 --- /dev/null +++ b/third_party/EfficientLoFTR/data/megadepth/test/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore diff --git a/third_party/EfficientLoFTR/data/megadepth/train/.gitignore b/third_party/EfficientLoFTR/data/megadepth/train/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..5e7d2734cfc60289debf74293817c0a8f572ff32 --- /dev/null +++ b/third_party/EfficientLoFTR/data/megadepth/train/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore diff --git a/third_party/EfficientLoFTR/data/scannet/index/.gitignore b/third_party/EfficientLoFTR/data/scannet/index/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..94548af5beba7825284af746324c8dc5b2f1ea31 --- /dev/null +++ b/third_party/EfficientLoFTR/data/scannet/index/.gitignore @@ -0,0 +1,3 @@ +* +*/ +!.gitignore diff --git a/third_party/EfficientLoFTR/data/scannet/test/.gitignore b/third_party/EfficientLoFTR/data/scannet/test/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..94548af5beba7825284af746324c8dc5b2f1ea31 --- /dev/null +++ b/third_party/EfficientLoFTR/data/scannet/test/.gitignore @@ -0,0 +1,3 @@ +* +*/ +!.gitignore diff --git a/third_party/EfficientLoFTR/environment.yaml b/third_party/EfficientLoFTR/environment.yaml new file mode 100644 index 0000000000000000000000000000000000000000..52bc6f68c1b0d7c0f020453427873370753234bc --- /dev/null +++ b/third_party/EfficientLoFTR/environment.yaml @@ -0,0 +1,7 @@ +name: eloftr +channels: + - pytorch + - nvidia +dependencies: + - python=3.8 + - pip \ No newline at end of file diff --git a/third_party/EfficientLoFTR/notebooks/demo_single_pair.ipynb b/third_party/EfficientLoFTR/notebooks/demo_single_pair.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..21036882f5243a1adab978bbe509b3ae6f5877f7 --- /dev/null +++ b/third_party/EfficientLoFTR/notebooks/demo_single_pair.ipynb @@ -0,0 +1,173 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Demo EfficientLoFTR on a single pair of images\n", + "\n", + "This notebook shows how to use the eloftr matcher with different model type and numerical precision on the pretrained weights." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.chdir(\"..\")\n", + "from copy import deepcopy\n", + "\n", + "import torch\n", + "import cv2\n", + "import numpy as np\n", + "import matplotlib.cm as cm\n", + "from src.utils.plotting import make_matching_figure" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Outdoor Example\n", + "\n", + "We recommend using our pre-trained model for input in outdoor environments because our model has only been trained on MegaDepth, and there exists a domain gap between indoor and outdoor data." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'backbone_type': 'RepVGG', 'align_corner': False, 'resolution': (8, 1), 'fine_window_size': 8, 'mp': False, 'replace_nan': True, 'half': False, 'backbone': {'block_dims': [64, 128, 256]}, 'coarse': {'d_model': 256, 'd_ffn': 256, 'nhead': 8, 'layer_names': ['self', 'cross', 'self', 'cross', 'self', 'cross', 'self', 'cross'], 'agg_size0': 4, 'agg_size1': 4, 'no_flash': False, 'rope': True, 'npe': [832, 832, 832, 832]}, 'match_coarse': {'thr': 0.2, 'border_rm': 2, 'dsmax_temperature': 0.1, 'skip_softmax': False, 'fp16matmul': False, 'train_coarse_percent': 0.2, 'train_pad_num_gt_min': 200}, 'match_fine': {'local_regress_temperature': 10.0, 'local_regress_slicedim': 8}}\n" + ] + } + ], + "source": [ + "from src.loftr import LoFTR, full_default_cfg, opt_default_cfg, reparameter\n", + "\n", + "# You can choose model type in ['full', 'opt']\n", + "model_type = 'full' # 'full' for best quality, 'opt' for best efficiency\n", + "\n", + "# You can choose numerical precision in ['fp32', 'mp', 'fp16']. 'fp16' for best efficiency\n", + "precision = 'fp32' # Enjoy near-lossless precision with Mixed Precision (MP) / FP16 computation if you have a modern GPU (recommended NVIDIA architecture >= SM_70).\n", + "\n", + "# You can also change the default values like thr. and npe (based on input image size)\n", + "\n", + "if model_type == 'full':\n", + " _default_cfg = deepcopy(full_default_cfg)\n", + "elif model_type == 'opt':\n", + " _default_cfg = deepcopy(opt_default_cfg)\n", + " \n", + "if precision == 'mp':\n", + " _default_cfg['mp'] = True\n", + "elif precision == 'fp16':\n", + " _default_cfg['half'] = True\n", + " \n", + "print(_default_cfg)\n", + "matcher = LoFTR(config=_default_cfg)\n", + "\n", + "matcher.load_state_dict(torch.load(\"weights/eloftr_outdoor.ckpt\")['state_dict'])\n", + "matcher = reparameter(matcher) # no reparameterization will lead to low performance\n", + "\n", + "if precision == 'fp16':\n", + " matcher = matcher.half()\n", + "\n", + "matcher = matcher.eval().cuda()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# Load example images\n", + "img0_pth = \"assets/phototourism_sample_images/united_states_capitol_26757027_6717084061.jpg\"\n", + "img1_pth = \"assets/phototourism_sample_images/united_states_capitol_98169888_3347710852.jpg\"\n", + "img0_raw = cv2.imread(img0_pth, cv2.IMREAD_GRAYSCALE)\n", + "img1_raw = cv2.imread(img1_pth, cv2.IMREAD_GRAYSCALE)\n", + "img0_raw = cv2.resize(img0_raw, (img0_raw.shape[1]//32*32, img0_raw.shape[0]//32*32)) # input size shuold be divisible by 32\n", + "img1_raw = cv2.resize(img1_raw, (img1_raw.shape[1]//32*32, img1_raw.shape[0]//32*32))\n", + "\n", + "if precision == 'fp16':\n", + " img0 = torch.from_numpy(img0_raw)[None][None].half().cuda() / 255.\n", + " img1 = torch.from_numpy(img1_raw)[None][None].half().cuda() / 255.\n", + "else:\n", + " img0 = torch.from_numpy(img0_raw)[None][None].cuda() / 255.\n", + " img1 = torch.from_numpy(img1_raw)[None][None].cuda() / 255.\n", + "batch = {'image0': img0, 'image1': img1}\n", + "\n", + "# Inference with EfficientLoFTR and get prediction\n", + "with torch.no_grad():\n", + " if precision == 'mp':\n", + " with torch.autocast(enabled=True, device_type='cuda'):\n", + " matcher(batch)\n", + " else:\n", + " matcher(batch)\n", + " mkpts0 = batch['mkpts0_f'].cpu().numpy()\n", + " mkpts1 = batch['mkpts1_f'].cpu().numpy()\n", + " mconf = batch['mconf'].cpu().numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Draw\n", + "if model_type == 'opt':\n", + " print(mconf.max())\n", + " mconf = (mconf - min(20.0, mconf.min())) / (max(30.0, mconf.max()) - min(20.0, mconf.min()))\n", + "\n", + "color = cm.jet(mconf)\n", + "text = [\n", + " 'LoFTR',\n", + " 'Matches: {}'.format(len(mkpts0)),\n", + "]\n", + "fig = make_matching_figure(img0_raw, img1_raw, mkpts0, mkpts1, color, text=text)" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "5b8911f875a754a9ad2a8804064d078bf6a1985972bb0389b9d67771213c8e20" + }, + "kernelspec": { + "display_name": "Python 3.8.8 64-bit ('svcnn': conda)", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.19" + }, + "orig_nbformat": 2 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/third_party/EfficientLoFTR/requirements.txt b/third_party/EfficientLoFTR/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..ab86d90f1cf68a8d50060cba9b4582d6b4883a1d --- /dev/null +++ b/third_party/EfficientLoFTR/requirements.txt @@ -0,0 +1,17 @@ +opencv_python==4.4.0.46 +albumentations==0.5.1 --no-binary=imgaug,albumentations +ray>=1.0.1 +einops==0.3.0 +kornia==0.4.1 +loguru==0.5.3 +yacs>=0.1.8 +tqdm +autopep8 +pylint +ipython +jupyterlab +matplotlib +h5py==3.1.0 +pytorch-lightning==1.3.5 +torchmetrics==0.6.0 # version problem: https://github.com/NVIDIA/DeepLearningExamples/issues/1113#issuecomment-1102969461 +joblib>=1.0.1 \ No newline at end of file diff --git a/third_party/EfficientLoFTR/scripts/reproduce_test/indoor_full_auc.sh b/third_party/EfficientLoFTR/scripts/reproduce_test/indoor_full_auc.sh new file mode 100644 index 0000000000000000000000000000000000000000..b2f2e9bf327b99f11bb7a85e0b9d0474edc3d532 --- /dev/null +++ b/third_party/EfficientLoFTR/scripts/reproduce_test/indoor_full_auc.sh @@ -0,0 +1,35 @@ +#!/bin/bash -l +SCRIPTPATH=$(dirname $(readlink -f "$0")) +PROJECT_DIR="${SCRIPTPATH}/../../" + +export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH +cd $PROJECT_DIR + +main_cfg_path="configs/loftr/eloftr_full.py" + +profiler_name="inference" +n_nodes=1 # mannually keep this the same with --nodes +n_gpus_per_node=-1 +torch_num_workers=4 +batch_size=1 # per gpu + +ckpt_path="weights/eloftr_outdoor.ckpt" + +dump_dir="dump/eloftr_full_scannet" +data_cfg_path="configs/data/scannet_test_1500.py" +python ./test.py \ + ${data_cfg_path} \ + ${main_cfg_path} \ + --ckpt_path=${ckpt_path} \ + --dump_dir=${dump_dir} \ + --gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \ + --batch_size=${batch_size} --num_workers=${torch_num_workers}\ + --profiler_name=${profiler_name} \ + --benchmark \ + --scannetX '640' \ + --scannetY '480' \ + --rmbd 0 \ + --thr 0.1 \ + --deter \ + --ransac_times 5 +# Following the RoMa protocol, we repeat RANSAC 5 times to enhance robustness; however, this increases script runtime. \ No newline at end of file diff --git a/third_party/EfficientLoFTR/scripts/reproduce_test/indoor_full_time.sh b/third_party/EfficientLoFTR/scripts/reproduce_test/indoor_full_time.sh new file mode 100644 index 0000000000000000000000000000000000000000..d1dfe2cfb131c778b622946aadc1904da637124e --- /dev/null +++ b/third_party/EfficientLoFTR/scripts/reproduce_test/indoor_full_time.sh @@ -0,0 +1,33 @@ +#!/bin/bash -l +SCRIPTPATH=$(dirname $(readlink -f "$0")) +PROJECT_DIR="${SCRIPTPATH}/../../" + +export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH +cd $PROJECT_DIR + +main_cfg_path="configs/loftr/eloftr_full.py" + +profiler_name="inference" +n_nodes=1 # mannually keep this the same with --nodes +n_gpus_per_node=-1 +torch_num_workers=4 +batch_size=1 # per gpu + +ckpt_path="weights/eloftr_outdoor.ckpt" + +dump_dir="dump/eloftr_full_scannet" +data_cfg_path="configs/data/scannet_test_1500.py" +python ./test.py \ + ${data_cfg_path} \ + ${main_cfg_path} \ + --ckpt_path=${ckpt_path} \ + --dump_dir=${dump_dir} \ + --gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \ + --batch_size=${batch_size} --num_workers=${torch_num_workers}\ + --profiler_name=${profiler_name} \ + --benchmark \ + --scannetX '640' \ + --scannetY '480' \ + --rmbd 0 \ + --thr 0.1 \ + --ransac_times 1 diff --git a/third_party/EfficientLoFTR/scripts/reproduce_test/indoor_opt_auc.sh b/third_party/EfficientLoFTR/scripts/reproduce_test/indoor_opt_auc.sh new file mode 100644 index 0000000000000000000000000000000000000000..f9b6ae8afb928a6e8f4eb7dfa585d9b7806353e0 --- /dev/null +++ b/third_party/EfficientLoFTR/scripts/reproduce_test/indoor_opt_auc.sh @@ -0,0 +1,35 @@ +#!/bin/bash -l +SCRIPTPATH=$(dirname $(readlink -f "$0")) +PROJECT_DIR="${SCRIPTPATH}/../../" + +export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH +cd $PROJECT_DIR + +main_cfg_path="configs/loftr/eloftr_optimized.py" + +profiler_name="inference" +n_nodes=1 # mannually keep this the same with --nodes +n_gpus_per_node=-1 +torch_num_workers=4 +batch_size=1 # per gpu + +ckpt_path="weights/eloftr_outdoor.ckpt" + +dump_dir="dump/eloftr_full_scannet" +data_cfg_path="configs/data/scannet_test_1500.py" +python ./test.py \ + ${data_cfg_path} \ + ${main_cfg_path} \ + --ckpt_path=${ckpt_path} \ + --dump_dir=${dump_dir} \ + --gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \ + --batch_size=${batch_size} --num_workers=${torch_num_workers}\ + --profiler_name=${profiler_name} \ + --benchmark \ + --scannetX '640' \ + --scannetY '480' \ + --rmbd 1 \ + --thr 20 \ + --deter \ + --ransac_times 5 +# Following the RoMa protocol, we repeat RANSAC 5 times to enhance robustness; however, this increases script runtime. \ No newline at end of file diff --git a/third_party/EfficientLoFTR/scripts/reproduce_test/indoor_opt_time.sh b/third_party/EfficientLoFTR/scripts/reproduce_test/indoor_opt_time.sh new file mode 100644 index 0000000000000000000000000000000000000000..dec08661bcee163dcb250c6896b2719cedcea940 --- /dev/null +++ b/third_party/EfficientLoFTR/scripts/reproduce_test/indoor_opt_time.sh @@ -0,0 +1,33 @@ +#!/bin/bash -l +SCRIPTPATH=$(dirname $(readlink -f "$0")) +PROJECT_DIR="${SCRIPTPATH}/../../" + +export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH +cd $PROJECT_DIR + +main_cfg_path="configs/loftr/eloftr_optimized.py" + +profiler_name="inference" +n_nodes=1 # mannually keep this the same with --nodes +n_gpus_per_node=-1 +torch_num_workers=4 +batch_size=1 # per gpu + +ckpt_path="weights/eloftr_outdoor.ckpt" + +dump_dir="dump/eloftr_full_scannet" +data_cfg_path="configs/data/scannet_test_1500.py" +python ./test.py \ + ${data_cfg_path} \ + ${main_cfg_path} \ + --ckpt_path=${ckpt_path} \ + --dump_dir=${dump_dir} \ + --gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \ + --batch_size=${batch_size} --num_workers=${torch_num_workers}\ + --profiler_name=${profiler_name} \ + --benchmark \ + --scannetX '640' \ + --scannetY '480' \ + --rmbd 1 \ + --thr 20 \ + --ransac_times 1 \ No newline at end of file diff --git a/third_party/EfficientLoFTR/scripts/reproduce_test/outdoor_full_auc.sh b/third_party/EfficientLoFTR/scripts/reproduce_test/outdoor_full_auc.sh new file mode 100644 index 0000000000000000000000000000000000000000..08ed0fc7eccd33441d700d56f27bb573d4baca0e --- /dev/null +++ b/third_party/EfficientLoFTR/scripts/reproduce_test/outdoor_full_auc.sh @@ -0,0 +1,35 @@ +#!/bin/bash -l +SCRIPTPATH=$(dirname $(readlink -f "$0")) +PROJECT_DIR="${SCRIPTPATH}/../../" + +export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH +cd $PROJECT_DIR + +main_cfg_path="configs/loftr/eloftr_full.py" + +profiler_name="inference" +n_nodes=1 # mannually keep this the same with --nodes +n_gpus_per_node=-1 +torch_num_workers=4 +batch_size=1 # per gpu + +ckpt_path="weights/eloftr_outdoor.ckpt" + +dump_dir="dump/eloftr_full_megadepth" +data_cfg_path="configs/data/megadepth_test_1500.py" +size="1152" +python ./test.py \ + ${data_cfg_path} \ + ${main_cfg_path} \ + --ckpt_path=${ckpt_path} \ + --dump_dir=${dump_dir} \ + --gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \ + --batch_size=${batch_size} --num_workers=${torch_num_workers}\ + --profiler_name=${profiler_name} \ + --benchmark \ + --megasize $size \ + --npe \ + --thr 0.1 \ + --deter \ + --ransac_times 5 +# Following the RoMa protocol, we repeat RANSAC 5 times to enhance robustness; however, this increases script runtime. \ No newline at end of file diff --git a/third_party/EfficientLoFTR/scripts/reproduce_test/outdoor_opt_auc.sh b/third_party/EfficientLoFTR/scripts/reproduce_test/outdoor_opt_auc.sh new file mode 100644 index 0000000000000000000000000000000000000000..4e1af5987b16785effc390424eb37f38a55f842f --- /dev/null +++ b/third_party/EfficientLoFTR/scripts/reproduce_test/outdoor_opt_auc.sh @@ -0,0 +1,35 @@ +#!/bin/bash -l +SCRIPTPATH=$(dirname $(readlink -f "$0")) +PROJECT_DIR="${SCRIPTPATH}/../../" + +export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH +cd $PROJECT_DIR + +main_cfg_path="configs/loftr/eloftr_optimized.py" + +profiler_name="inference" +n_nodes=1 # mannually keep this the same with --nodes +n_gpus_per_node=-1 +torch_num_workers=4 +batch_size=1 # per gpu + +ckpt_path="weights/eloftr_outdoor.ckpt" + +dump_dir="dump/eloftr_full_megadepth" +data_cfg_path="configs/data/megadepth_test_1500.py" +size="1152" +python ./test.py \ + ${data_cfg_path} \ + ${main_cfg_path} \ + --ckpt_path=${ckpt_path} \ + --dump_dir=${dump_dir} \ + --gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \ + --batch_size=${batch_size} --num_workers=${torch_num_workers}\ + --profiler_name=${profiler_name} \ + --benchmark \ + --megasize $size \ + --npe \ + --thr 20 \ + --deter \ + --ransac_times 5 +# Following the RoMa protocol, we repeat RANSAC 5 times to enhance robustness; however, this increases script runtime. \ No newline at end of file diff --git a/third_party/EfficientLoFTR/scripts/varied_size/indoor_full_auc_varied_size.sh b/third_party/EfficientLoFTR/scripts/varied_size/indoor_full_auc_varied_size.sh new file mode 100644 index 0000000000000000000000000000000000000000..f6daa38224c5319549fe4ffe074f0c466d499eef --- /dev/null +++ b/third_party/EfficientLoFTR/scripts/varied_size/indoor_full_auc_varied_size.sh @@ -0,0 +1,44 @@ +#!/bin/bash -l +SCRIPTPATH=$(dirname $(readlink -f "$0")) +PROJECT_DIR="${SCRIPTPATH}/../../" + +export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH +cd $PROJECT_DIR + +main_cfg_path="configs/loftr/eloftr_full.py" + +profiler_name="inference" +n_nodes=1 # mannually keep this the same with --nodes +n_gpus_per_node=-1 +torch_num_workers=4 +batch_size=1 # per gpu + +ckpt_path="weights/eloftr_outdoor.ckpt" + +dump_dir="dump/eloftr_full_scannet" +data_cfg_path="configs/data/scannet_test_1500.py" + +declare -a scannetXY_arr=("640,480" "512,384" "384,288") + +for scannetXY in "${scannetXY_arr[@]}"; do + SCANNETX="${scannetXY%,*}" + SCANNETY="${scannetXY#*,}" + + python ./test.py \ + ${data_cfg_path} \ + ${main_cfg_path} \ + --ckpt_path=${ckpt_path} \ + --dump_dir=${dump_dir} \ + --gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \ + --batch_size=${batch_size} --num_workers=${torch_num_workers}\ + --profiler_name=${profiler_name} \ + --benchmark \ + --scannetX $SCANNETX \ + --scannetY $SCANNETY \ + --npe \ + --rmbd 0 \ + --deter \ + --ransac_times 5 \ + --fp32 # fp32 just for fair comparison + # Following the RoMa protocol, we repeat RANSAC 5 times to enhance robustness; however, this increases script runtime. +done \ No newline at end of file diff --git a/third_party/EfficientLoFTR/scripts/varied_size/indoor_full_time_varied_size.sh b/third_party/EfficientLoFTR/scripts/varied_size/indoor_full_time_varied_size.sh new file mode 100644 index 0000000000000000000000000000000000000000..3a37e0cde504ac4643437870dfa58107e9454ed9 --- /dev/null +++ b/third_party/EfficientLoFTR/scripts/varied_size/indoor_full_time_varied_size.sh @@ -0,0 +1,42 @@ +#!/bin/bash -l +SCRIPTPATH=$(dirname $(readlink -f "$0")) +PROJECT_DIR="${SCRIPTPATH}/../../" + +export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH +cd $PROJECT_DIR + +main_cfg_path="configs/loftr/eloftr_full.py" + +profiler_name="inference" +n_nodes=1 # mannually keep this the same with --nodes +n_gpus_per_node=-1 +torch_num_workers=4 +batch_size=1 # per gpu + +ckpt_path="weights/eloftr_outdoor.ckpt" + +dump_dir="dump/eloftr_full_scannet" +data_cfg_path="configs/data/scannet_test_1500.py" + +declare -a scannetXY_arr=("640,480" "512,384" "384,288") + +for scannetXY in "${scannetXY_arr[@]}"; do + SCANNETX="${scannetXY%,*}" + SCANNETY="${scannetXY#*,}" + + python ./test.py \ + ${data_cfg_path} \ + ${main_cfg_path} \ + --ckpt_path=${ckpt_path} \ + --dump_dir=${dump_dir} \ + --gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \ + --batch_size=${batch_size} --num_workers=${torch_num_workers}\ + --profiler_name=${profiler_name} \ + --benchmark \ + --scannetX $SCANNETX \ + --scannetY $SCANNETY \ + --npe \ + --rmbd 0 \ + --ransac_times 1 \ + --fp32 # fp32 just for fair comparison +done \ No newline at end of file diff --git a/third_party/EfficientLoFTR/scripts/varied_size/indoor_opt_auc_varied_size.sh b/third_party/EfficientLoFTR/scripts/varied_size/indoor_opt_auc_varied_size.sh new file mode 100644 index 0000000000000000000000000000000000000000..fb1dfeb98a7bd22a6ec61baddc04833c77b76e22 --- /dev/null +++ b/third_party/EfficientLoFTR/scripts/varied_size/indoor_opt_auc_varied_size.sh @@ -0,0 +1,47 @@ +#!/bin/bash -l +SCRIPTPATH=$(dirname $(readlink -f "$0")) +PROJECT_DIR="${SCRIPTPATH}/../../" + +export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH +cd $PROJECT_DIR + +main_cfg_path="configs/loftr/eloftr_optimized.py" + +profiler_name="inference" +n_nodes=1 # mannually keep this the same with --nodes +n_gpus_per_node=-1 +torch_num_workers=4 +batch_size=1 # per gpu +comment='reproduce_eloft_full_scannet' +METHOD='loftr' + +ckpt_path="weights/eloftr_outdoor.ckpt" + +dump_dir="dump/eloftr_full_scannet" +data_cfg_path="configs/data/scannet_test_1500.py" + +declare -a scannetXY_arr=("640,480" "512,384" "384,288") + +for scannetXY in "${scannetXY_arr[@]}"; do + SCANNETX="${scannetXY%,*}" + SCANNETY="${scannetXY#*,}" + + python ./test.py \ + ${data_cfg_path} \ + ${main_cfg_path} \ + --ckpt_path=${ckpt_path} \ + --dump_dir=${dump_dir} \ + --gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \ + --batch_size=${batch_size} --num_workers=${torch_num_workers}\ + --profiler_name=${profiler_name} \ + --benchmark \ + --scannetX $SCANNETX \ + --scannetY $SCANNETY \ + --npe \ + --rmbd 1 \ + --deter \ + --ransac_times 5 \ + --half \ + --flash + # Following the RoMa protocol, we repeat RANSAC 5 times to enhance robustness; however, this increases script runtime. +done \ No newline at end of file diff --git a/third_party/EfficientLoFTR/scripts/varied_size/indoor_opt_time_varied_size.sh b/third_party/EfficientLoFTR/scripts/varied_size/indoor_opt_time_varied_size.sh new file mode 100644 index 0000000000000000000000000000000000000000..8807e65f30e83cca7a46a6c960221fe82d8384f0 --- /dev/null +++ b/third_party/EfficientLoFTR/scripts/varied_size/indoor_opt_time_varied_size.sh @@ -0,0 +1,45 @@ +#!/bin/bash -l +SCRIPTPATH=$(dirname $(readlink -f "$0")) +PROJECT_DIR="${SCRIPTPATH}/../../" + +export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH +cd $PROJECT_DIR + +main_cfg_path="configs/loftr/eloftr_optimized.py" + +profiler_name="inference" +n_nodes=1 # mannually keep this the same with --nodes +n_gpus_per_node=-1 +torch_num_workers=4 +batch_size=1 # per gpu +comment='reproduce_eloft_full_scannet' +METHOD='loftr' + +ckpt_path="weights/eloftr_outdoor.ckpt" + +dump_dir="dump/eloftr_full_scannet" +data_cfg_path="configs/data/scannet_test_1500.py" + +declare -a scannetXY_arr=("640,480" "512,384" "384,288") + +for scannetXY in "${scannetXY_arr[@]}"; do + SCANNETX="${scannetXY%,*}" + SCANNETY="${scannetXY#*,}" + python ./test.py \ + ${data_cfg_path} \ + ${main_cfg_path} \ + --ckpt_path=${ckpt_path} \ + --dump_dir=${dump_dir} \ + --gpus=${n_gpus_per_node} --num_nodes=${n_nodes} --accelerator="ddp" \ + --batch_size=${batch_size} --num_workers=${torch_num_workers}\ + --profiler_name=${profiler_name} \ + --benchmark \ + --scannetX $SCANNETX \ + --scannetY $SCANNETY \ + --npe \ + --rmbd 1 \ + --ransac_times 1 \ + --half \ + --flash + +done \ No newline at end of file diff --git a/third_party/EfficientLoFTR/src/__init__.py b/third_party/EfficientLoFTR/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/EfficientLoFTR/src/config/default.py b/third_party/EfficientLoFTR/src/config/default.py new file mode 100644 index 0000000000000000000000000000000000000000..03d98095be47b6870cf1475bcfe239de44ee98f9 --- /dev/null +++ b/third_party/EfficientLoFTR/src/config/default.py @@ -0,0 +1,182 @@ +from yacs.config import CfgNode as CN +_CN = CN() + +############## ↓ LoFTR Pipeline ↓ ############## +_CN.LOFTR = CN() +_CN.LOFTR.BACKBONE_TYPE = 'RepVGG' +_CN.LOFTR.ALIGN_CORNER = False +_CN.LOFTR.RESOLUTION = (8, 1) +_CN.LOFTR.FINE_WINDOW_SIZE = 8 # window_size in fine_level, must be even +_CN.LOFTR.MP = False +_CN.LOFTR.REPLACE_NAN = False +_CN.LOFTR.EVAL_TIMES = 1 +_CN.LOFTR.HALF = False + +# 1. LoFTR-backbone (local feature CNN) config +_CN.LOFTR.BACKBONE = CN() +_CN.LOFTR.BACKBONE.BLOCK_DIMS = [64, 128, 256] # s1, s2, s3 + +# 2. LoFTR-coarse module config +_CN.LOFTR.COARSE = CN() +_CN.LOFTR.COARSE.D_MODEL = 256 +_CN.LOFTR.COARSE.D_FFN = 256 +_CN.LOFTR.COARSE.NHEAD = 8 +_CN.LOFTR.COARSE.LAYER_NAMES = ['self', 'cross'] * 4 +_CN.LOFTR.COARSE.AGG_SIZE0 = 4 +_CN.LOFTR.COARSE.AGG_SIZE1 = 4 +_CN.LOFTR.COARSE.NO_FLASH = False +_CN.LOFTR.COARSE.ROPE = True +_CN.LOFTR.COARSE.NPE = None # [832, 832, long_side, long_side] Suggest setting based on the long side of the input image, especially when the long_side > 832 + +# 3. Coarse-Matching config +_CN.LOFTR.MATCH_COARSE = CN() +_CN.LOFTR.MATCH_COARSE.THR = 0.2 # recommend 0.2 for full model and 25 for optimized model +_CN.LOFTR.MATCH_COARSE.BORDER_RM = 2 +_CN.LOFTR.MATCH_COARSE.DSMAX_TEMPERATURE = 0.1 +_CN.LOFTR.MATCH_COARSE.TRAIN_COARSE_PERCENT = 0.2 # training tricks: save GPU memory +_CN.LOFTR.MATCH_COARSE.TRAIN_PAD_NUM_GT_MIN = 200 # training tricks: avoid DDP deadlock +_CN.LOFTR.MATCH_COARSE.SPARSE_SPVS = True +_CN.LOFTR.MATCH_COARSE.SKIP_SOFTMAX = False +_CN.LOFTR.MATCH_COARSE.FP16MATMUL = False + +# 4. Fine-Matching config +_CN.LOFTR.MATCH_FINE = CN() +_CN.LOFTR.MATCH_FINE.SPARSE_SPVS = True +_CN.LOFTR.MATCH_FINE.LOCAL_REGRESS_TEMPERATURE = 1.0 +_CN.LOFTR.MATCH_FINE.LOCAL_REGRESS_SLICEDIM = 8 + +# 5. LoFTR Losses +# -- # coarse-level +_CN.LOFTR.LOSS = CN() +_CN.LOFTR.LOSS.COARSE_TYPE = 'focal' # ['focal', 'cross_entropy'] +_CN.LOFTR.LOSS.COARSE_WEIGHT = 1.0 +_CN.LOFTR.LOSS.COARSE_SIGMOID_WEIGHT = 1.0 +_CN.LOFTR.LOSS.LOCAL_WEIGHT = 0.5 +_CN.LOFTR.LOSS.COARSE_OVERLAP_WEIGHT = False +_CN.LOFTR.LOSS.FINE_OVERLAP_WEIGHT = False +_CN.LOFTR.LOSS.FINE_OVERLAP_WEIGHT2 = False +# -- - -- # focal loss (coarse) +_CN.LOFTR.LOSS.FOCAL_ALPHA = 0.25 +_CN.LOFTR.LOSS.FOCAL_GAMMA = 2.0 +_CN.LOFTR.LOSS.POS_WEIGHT = 1.0 +_CN.LOFTR.LOSS.NEG_WEIGHT = 1.0 + +# -- # fine-level +_CN.LOFTR.LOSS.FINE_TYPE = 'l2_with_std' # ['l2_with_std', 'l2'] +_CN.LOFTR.LOSS.FINE_WEIGHT = 1.0 +_CN.LOFTR.LOSS.FINE_CORRECT_THR = 1.0 # for filtering valid fine-level gts (some gt matches might fall out of the fine-level window) + + +############## Dataset ############## +_CN.DATASET = CN() +# 1. data config +# training and validating +_CN.DATASET.TRAINVAL_DATA_SOURCE = None # options: ['ScanNet', 'MegaDepth'] +_CN.DATASET.TRAIN_DATA_ROOT = None +_CN.DATASET.TRAIN_POSE_ROOT = None # (optional directory for poses) +_CN.DATASET.TRAIN_NPZ_ROOT = None +_CN.DATASET.TRAIN_LIST_PATH = None +_CN.DATASET.TRAIN_INTRINSIC_PATH = None +_CN.DATASET.VAL_DATA_ROOT = None +_CN.DATASET.VAL_POSE_ROOT = None # (optional directory for poses) +_CN.DATASET.VAL_NPZ_ROOT = None +_CN.DATASET.VAL_LIST_PATH = None # None if val data from all scenes are bundled into a single npz file +_CN.DATASET.VAL_INTRINSIC_PATH = None +_CN.DATASET.FP16 = False +# testing +_CN.DATASET.TEST_DATA_SOURCE = None +_CN.DATASET.TEST_DATA_ROOT = None +_CN.DATASET.TEST_POSE_ROOT = None # (optional directory for poses) +_CN.DATASET.TEST_NPZ_ROOT = None +_CN.DATASET.TEST_LIST_PATH = None # None if test data from all scenes are bundled into a single npz file +_CN.DATASET.TEST_INTRINSIC_PATH = None + +# 2. dataset config +# general options +_CN.DATASET.MIN_OVERLAP_SCORE_TRAIN = 0.4 # discard data with overlap_score < min_overlap_score +_CN.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0 +_CN.DATASET.AUGMENTATION_TYPE = None # options: [None, 'dark', 'mobile'] + +# scanNet options +_CN.DATASET.SCAN_IMG_RESIZEX = 640 # resize the longer side, zero-pad bottom-right to square. +_CN.DATASET.SCAN_IMG_RESIZEY = 480 # resize the shorter side, zero-pad bottom-right to square. + +# MegaDepth options +_CN.DATASET.MGDPT_IMG_RESIZE = 640 # resize the longer side, zero-pad bottom-right to square. +_CN.DATASET.MGDPT_IMG_PAD = True # pad img to square with size = MGDPT_IMG_RESIZE +_CN.DATASET.MGDPT_DEPTH_PAD = True # pad depthmap to square with size = 2000 +_CN.DATASET.MGDPT_DF = 8 + +_CN.DATASET.NPE_NAME = None + +############## Trainer ############## +_CN.TRAINER = CN() +_CN.TRAINER.WORLD_SIZE = 1 +_CN.TRAINER.CANONICAL_BS = 64 +_CN.TRAINER.CANONICAL_LR = 6e-3 +_CN.TRAINER.SCALING = None # this will be calculated automatically +_CN.TRAINER.FIND_LR = False # use learning rate finder from pytorch-lightning + +# optimizer +_CN.TRAINER.OPTIMIZER = "adamw" # [adam, adamw] +_CN.TRAINER.TRUE_LR = None # this will be calculated automatically at runtime +_CN.TRAINER.ADAM_DECAY = 0. # ADAM: for adam +_CN.TRAINER.ADAMW_DECAY = 0.1 + +# step-based warm-up +_CN.TRAINER.WARMUP_TYPE = 'linear' # [linear, constant] +_CN.TRAINER.WARMUP_RATIO = 0. +_CN.TRAINER.WARMUP_STEP = 4800 + +# learning rate scheduler +_CN.TRAINER.SCHEDULER = 'MultiStepLR' # [MultiStepLR, CosineAnnealing, ExponentialLR] +_CN.TRAINER.SCHEDULER_INTERVAL = 'epoch' # [epoch, step] +_CN.TRAINER.MSLR_MILESTONES = [3, 6, 9, 12] # MSLR: MultiStepLR +_CN.TRAINER.MSLR_GAMMA = 0.5 +_CN.TRAINER.COSA_TMAX = 30 # COSA: CosineAnnealing +_CN.TRAINER.ELR_GAMMA = 0.999992 # ELR: ExponentialLR, this value for 'step' interval + +# plotting related +_CN.TRAINER.ENABLE_PLOTTING = True +_CN.TRAINER.N_VAL_PAIRS_TO_PLOT = 32 # number of val/test paris for plotting +_CN.TRAINER.PLOT_MODE = 'evaluation' # ['evaluation', 'confidence'] +_CN.TRAINER.PLOT_MATCHES_ALPHA = 'dynamic' + +# geometric metrics and pose solver +_CN.TRAINER.EPI_ERR_THR = 5e-4 # recommendation: 5e-4 for ScanNet, 1e-4 for MegaDepth (from SuperGlue) +_CN.TRAINER.POSE_GEO_MODEL = 'E' # ['E', 'F', 'H'] +_CN.TRAINER.POSE_ESTIMATION_METHOD = 'RANSAC' # [RANSAC, LO-RANSAC] +_CN.TRAINER.RANSAC_PIXEL_THR = 0.5 +_CN.TRAINER.RANSAC_CONF = 0.99999 +_CN.TRAINER.RANSAC_MAX_ITERS = 10000 +_CN.TRAINER.USE_MAGSACPP = False + +# data sampler for train_dataloader +_CN.TRAINER.DATA_SAMPLER = 'scene_balance' # options: ['scene_balance', 'random', 'normal'] +# 'scene_balance' config +_CN.TRAINER.N_SAMPLES_PER_SUBSET = 200 +_CN.TRAINER.SB_SUBSET_SAMPLE_REPLACEMENT = True # whether sample each scene with replacement or not +_CN.TRAINER.SB_SUBSET_SHUFFLE = True # after sampling from scenes, whether shuffle within the epoch or not +_CN.TRAINER.SB_REPEAT = 1 # repeat N times for training the sampled data +# 'random' config +_CN.TRAINER.RDM_REPLACEMENT = True +_CN.TRAINER.RDM_NUM_SAMPLES = None + +# gradient clipping +_CN.TRAINER.GRADIENT_CLIPPING = 0.5 + +# reproducibility +# This seed affects the data sampling. With the same seed, the data sampling is promised +# to be the same. When resume training from a checkpoint, it's better to use a different +# seed, otherwise the sampled data will be exactly the same as before resuming, which will +# cause less unique data items sampled during the entire training. +# Use of different seed values might affect the final training result, since not all data items +# are used during training on ScanNet. (60M pairs of images sampled during traing from 230M pairs in total.) +_CN.TRAINER.SEED = 66 + + +def get_cfg_defaults(): + """Get a yacs CfgNode object with default values for my_project.""" + # Return a clone so that the defaults will not be altered + # This is for the "local variable" use pattern + return _CN.clone() diff --git a/third_party/EfficientLoFTR/src/datasets/megadepth.py b/third_party/EfficientLoFTR/src/datasets/megadepth.py new file mode 100644 index 0000000000000000000000000000000000000000..5f070b2b6da7ef779d41090773d4c45592e95514 --- /dev/null +++ b/third_party/EfficientLoFTR/src/datasets/megadepth.py @@ -0,0 +1,133 @@ +import os.path as osp +import numpy as np +import torch +import torch.nn.functional as F +from torch.utils.data import Dataset +from loguru import logger + +from src.utils.dataset import read_megadepth_gray, read_megadepth_depth + + +class MegaDepthDataset(Dataset): + def __init__(self, + root_dir, + npz_path, + mode='train', + min_overlap_score=0.4, + img_resize=None, + df=None, + img_padding=False, + depth_padding=False, + augment_fn=None, + fp16=False, + **kwargs): + """ + Manage one scene(npz_path) of MegaDepth dataset. + + Args: + root_dir (str): megadepth root directory that has `phoenix`. + npz_path (str): {scene_id}.npz path. This contains image pair information of a scene. + mode (str): options are ['train', 'val', 'test'] + min_overlap_score (float): how much a pair should have in common. In range of [0, 1]. Set to 0 when testing. + img_resize (int, optional): the longer edge of resized images. None for no resize. 640 is recommended. + This is useful during training with batches and testing with memory intensive algorithms. + df (int, optional): image size division factor. NOTE: this will change the final image size after img_resize. + img_padding (bool): If set to 'True', zero-pad the image to squared size. This is useful during training. + depth_padding (bool): If set to 'True', zero-pad depthmap to (2000, 2000). This is useful during training. + augment_fn (callable, optional): augments images with pre-defined visual effects. + """ + super().__init__() + self.root_dir = root_dir + self.mode = mode + self.scene_id = npz_path.split('.')[0] + + # prepare scene_info and pair_info + if mode == 'test' and min_overlap_score != 0: + logger.warning("You are using `min_overlap_score`!=0 in test mode. Set to 0.") + min_overlap_score = 0 + self.scene_info = np.load(npz_path, allow_pickle=True) + self.pair_infos = self.scene_info['pair_infos'].copy() + + del self.scene_info['pair_infos'] + self.pair_infos = [pair_info for pair_info in self.pair_infos if pair_info[1] > min_overlap_score] + + # parameters for image resizing, padding and depthmap padding + if mode == 'train': + assert img_resize is not None and img_padding and depth_padding + self.img_resize = img_resize + self.df = df + self.img_padding = img_padding + self.depth_max_size = 2000 if depth_padding else None # the upperbound of depthmaps size in megadepth. + + # for training LoFTR + self.augment_fn = augment_fn if mode == 'train' else None + self.coarse_scale = getattr(kwargs, 'coarse_scale', 0.125) + + self.fp16 = fp16 + + def __len__(self): + return len(self.pair_infos) + + def __getitem__(self, idx): + (idx0, idx1), overlap_score, central_matches = self.pair_infos[idx] + + # read grayscale image and mask. (1, h, w) and (h, w) + img_name0 = osp.join(self.root_dir, self.scene_info['image_paths'][idx0]) + img_name1 = osp.join(self.root_dir, self.scene_info['image_paths'][idx1]) + + # TODO: Support augmentation & handle seeds for each worker correctly. + image0, mask0, scale0 = read_megadepth_gray( + img_name0, self.img_resize, self.df, self.img_padding, None) + # np.random.choice([self.augment_fn, None], p=[0.5, 0.5])) + image1, mask1, scale1 = read_megadepth_gray( + img_name1, self.img_resize, self.df, self.img_padding, None) + # np.random.choice([self.augment_fn, None], p=[0.5, 0.5])) + + # read depth. shape: (h, w) + if self.mode in ['train', 'val']: + depth0 = read_megadepth_depth( + osp.join(self.root_dir, self.scene_info['depth_paths'][idx0]), pad_to=self.depth_max_size) + depth1 = read_megadepth_depth( + osp.join(self.root_dir, self.scene_info['depth_paths'][idx1]), pad_to=self.depth_max_size) + else: + depth0 = depth1 = torch.tensor([]) + + # read intrinsics of original size + K_0 = torch.tensor(self.scene_info['intrinsics'][idx0].copy(), dtype=torch.float).reshape(3, 3) + K_1 = torch.tensor(self.scene_info['intrinsics'][idx1].copy(), dtype=torch.float).reshape(3, 3) + + # read and compute relative poses + T0 = self.scene_info['poses'][idx0] + T1 = self.scene_info['poses'][idx1] + T_0to1 = torch.tensor(np.matmul(T1, np.linalg.inv(T0)), dtype=torch.float)[:4, :4] # (4, 4) + T_1to0 = T_0to1.inverse() + + if self.fp16: + image0, image1, depth0, depth1, scale0, scale1 = map(lambda x: x.half(), + [image0, image1, depth0, depth1, scale0, scale1]) + data = { + 'image0': image0, # (1, h, w) + 'depth0': depth0, # (h, w) + 'image1': image1, + 'depth1': depth1, + 'T_0to1': T_0to1, # (4, 4) + 'T_1to0': T_1to0, + 'K0': K_0, # (3, 3) + 'K1': K_1, + 'scale0': scale0, # [scale_w, scale_h] + 'scale1': scale1, + 'dataset_name': 'MegaDepth', + 'scene_id': self.scene_id, + 'pair_id': idx, + 'pair_names': (self.scene_info['image_paths'][idx0], self.scene_info['image_paths'][idx1]), + } + # for LoFTR training + if mask0 is not None: # img_padding is True + if self.coarse_scale: + [ts_mask_0, ts_mask_1] = F.interpolate(torch.stack([mask0, mask1], dim=0)[None].float(), + scale_factor=self.coarse_scale, + mode='nearest', + recompute_scale_factor=False)[0].bool() + data.update({'mask0': ts_mask_0, 'mask1': ts_mask_1}) + + return data diff --git a/third_party/EfficientLoFTR/src/datasets/sampler.py b/third_party/EfficientLoFTR/src/datasets/sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..81b6f435645632a013476f9a665a0861ab7fcb61 --- /dev/null +++ b/third_party/EfficientLoFTR/src/datasets/sampler.py @@ -0,0 +1,77 @@ +import torch +from torch.utils.data import Sampler, ConcatDataset + + +class RandomConcatSampler(Sampler): + """ Random sampler for ConcatDataset. At each epoch, `n_samples_per_subset` samples will be draw from each subset + in the ConcatDataset. If `subset_replacement` is ``True``, sampling within each subset will be done with replacement. + However, it is impossible to sample data without replacement between epochs, unless bulding a stateful sampler lived along the entire training phase. + + For current implementation, the randomness of sampling is ensured no matter the sampler is recreated across epochs or not and call `torch.manual_seed()` or not. + Args: + shuffle (bool): shuffle the random sampled indices across all sub-datsets. + repeat (int): repeatedly use the sampled indices multiple times for training. + [arXiv:1902.05509, arXiv:1901.09335] + NOTE: Don't re-initialize the sampler between epochs (will lead to repeated samples) + NOTE: This sampler behaves differently with DistributedSampler. + It assume the dataset is splitted across ranks instead of replicated. + TODO: Add a `set_epoch()` method to fullfill sampling without replacement across epochs. + ref: https://github.com/PyTorchLightning/pytorch-lightning/blob/e9846dd758cfb1500eb9dba2d86f6912eb487587/pytorch_lightning/trainer/training_loop.py#L373 + """ + def __init__(self, + data_source: ConcatDataset, + n_samples_per_subset: int, + subset_replacement: bool=True, + shuffle: bool=True, + repeat: int=1, + seed: int=None): + if not isinstance(data_source, ConcatDataset): + raise TypeError("data_source should be torch.utils.data.ConcatDataset") + + self.data_source = data_source + self.n_subset = len(self.data_source.datasets) + self.n_samples_per_subset = n_samples_per_subset + self.n_samples = self.n_subset * self.n_samples_per_subset * repeat + self.subset_replacement = subset_replacement + self.repeat = repeat + self.shuffle = shuffle + self.generator = torch.manual_seed(seed) + assert self.repeat >= 1 + + def __len__(self): + return self.n_samples + + def __iter__(self): + indices = [] + # sample from each sub-dataset + for d_idx in range(self.n_subset): + low = 0 if d_idx==0 else self.data_source.cumulative_sizes[d_idx-1] + high = self.data_source.cumulative_sizes[d_idx] + if self.subset_replacement: + rand_tensor = torch.randint(low, high, (self.n_samples_per_subset, ), + generator=self.generator, dtype=torch.int64) + else: # sample without replacement + len_subset = len(self.data_source.datasets[d_idx]) + rand_tensor = torch.randperm(len_subset, generator=self.generator) + low + if len_subset >= self.n_samples_per_subset: + rand_tensor = rand_tensor[:self.n_samples_per_subset] + else: # padding with replacement + rand_tensor_replacement = torch.randint(low, high, (self.n_samples_per_subset - len_subset, ), + generator=self.generator, dtype=torch.int64) + rand_tensor = torch.cat([rand_tensor, rand_tensor_replacement]) + indices.append(rand_tensor) + indices = torch.cat(indices) + if self.shuffle: # shuffle the sampled dataset (from multiple subsets) + rand_tensor = torch.randperm(len(indices), generator=self.generator) + indices = indices[rand_tensor] + + # repeat the sampled indices (can be used for RepeatAugmentation or pure RepeatSampling) + if self.repeat > 1: + repeat_indices = [indices.clone() for _ in range(self.repeat - 1)] + if self.shuffle: + _choice = lambda x: x[torch.randperm(len(x), generator=self.generator)] + repeat_indices = map(_choice, repeat_indices) + indices = torch.cat([indices, *repeat_indices], 0) + + assert indices.shape[0] == self.n_samples + return iter(indices.tolist()) diff --git a/third_party/EfficientLoFTR/src/datasets/scannet.py b/third_party/EfficientLoFTR/src/datasets/scannet.py new file mode 100644 index 0000000000000000000000000000000000000000..41743aaa0b0f6827c116ab6166ae71964515d196 --- /dev/null +++ b/third_party/EfficientLoFTR/src/datasets/scannet.py @@ -0,0 +1,129 @@ +from os import path as osp +from typing import Dict +from unicodedata import name + +import numpy as np +import torch +import torch.utils as utils +from numpy.linalg import inv +from src.utils.dataset import ( + read_scannet_gray, + read_scannet_depth, + read_scannet_pose, + read_scannet_intrinsic +) + + +class ScanNetDataset(utils.data.Dataset): + def __init__(self, + root_dir, + npz_path, + intrinsic_path, + mode='train', + min_overlap_score=0.4, + augment_fn=None, + pose_dir=None, + img_resize=None, + fp16=False, + **kwargs): + """Manage one scene of ScanNet Dataset. + Args: + root_dir (str): ScanNet root directory that contains scene folders. + npz_path (str): {scene_id}.npz path. This contains image pair information of a scene. + intrinsic_path (str): path to depth-camera intrinsic file. + mode (str): options are ['train', 'val', 'test']. + augment_fn (callable, optional): augments images with pre-defined visual effects. + pose_dir (str): ScanNet root directory that contains all poses. + (we use a separate (optional) pose_dir since we store images and poses separately.) + """ + super().__init__() + self.root_dir = root_dir + self.pose_dir = pose_dir if pose_dir is not None else root_dir + self.mode = mode + + # prepare data_names, intrinsics and extrinsics(T) + with np.load(npz_path) as data: + self.data_names = data['name'] + if 'score' in data.keys() and mode not in ['val' or 'test']: + kept_mask = data['score'] > min_overlap_score + self.data_names = self.data_names[kept_mask] + self.intrinsics = dict(np.load(intrinsic_path)) + + # for training LoFTR + self.augment_fn = augment_fn if mode == 'train' else None + + self.fp16 = fp16 + self.img_resize = img_resize + + def __len__(self): + return len(self.data_names) + + def _read_abs_pose(self, scene_name, name): + pth = osp.join(self.pose_dir, + scene_name, + 'pose', f'{name}.txt') + return read_scannet_pose(pth) + + def _compute_rel_pose(self, scene_name, name0, name1): + pose0 = self._read_abs_pose(scene_name, name0) + pose1 = self._read_abs_pose(scene_name, name1) + + return np.matmul(pose1, inv(pose0)) # (4, 4) + + def __getitem__(self, idx): + data_name = self.data_names[idx] + scene_name, scene_sub_name, stem_name_0, stem_name_1 = data_name + scene_name = f'scene{scene_name:04d}_{scene_sub_name:02d}' + + # read the grayscale image which will be resized to (1, 480, 640) + img_name0 = osp.join(self.root_dir, scene_name, 'color', f'{stem_name_0}.jpg') + img_name1 = osp.join(self.root_dir, scene_name, 'color', f'{stem_name_1}.jpg') + + # TODO: Support augmentation & handle seeds for each worker correctly. + image0 = read_scannet_gray(img_name0, resize=self.img_resize, augment_fn=None) + # augment_fn=np.random.choice([self.augment_fn, None], p=[0.5, 0.5])) + image1 = read_scannet_gray(img_name1, resize=self.img_resize, augment_fn=None) + # augment_fn=np.random.choice([self.augment_fn, None], p=[0.5, 0.5])) + + # read the depthmap which is stored as (480, 640) + if self.mode in ['train', 'val']: + depth0 = read_scannet_depth(osp.join(self.root_dir, scene_name, 'depth', f'{stem_name_0}.png')) + depth1 = read_scannet_depth(osp.join(self.root_dir, scene_name, 'depth', f'{stem_name_1}.png')) + else: + depth0 = depth1 = torch.tensor([]) + + # read the intrinsic of depthmap + K_0 = K_1 = torch.tensor(self.intrinsics[scene_name].copy(), dtype=torch.float).reshape(3, 3) + + # read and compute relative poses + T_0to1 = torch.tensor(self._compute_rel_pose(scene_name, stem_name_0, stem_name_1), + dtype=torch.float32) + T_1to0 = T_0to1.inverse() + + h_new, w_new = self.img_resize[1], self.img_resize[0] + scale0 = torch.tensor([640/w_new, 480/h_new], dtype=torch.float) + scale1 = torch.tensor([640/w_new, 480/h_new], dtype=torch.float) + + if self.fp16: + image0, image1, depth0, depth1, scale0, scale1 = map(lambda x: x.half(), + [image0, image1, depth0, depth1, scale0, scale1]) + + data = { + 'image0': image0, # (1, h, w) + 'depth0': depth0, # (h, w) + 'image1': image1, + 'depth1': depth1, + 'T_0to1': T_0to1, # (4, 4) + 'T_1to0': T_1to0, + 'K0': K_0, # (3, 3) + 'K1': K_1, + 'scale0': scale0, # [scale_w, scale_h] + 'scale1': scale1, + 'dataset_name': 'ScanNet', + 'scene_id': scene_name, + 'pair_id': idx, + 'pair_names': (osp.join(scene_name, 'color', f'{stem_name_0}.jpg'), + osp.join(scene_name, 'color', f'{stem_name_1}.jpg')) + } + + return data \ No newline at end of file diff --git a/third_party/EfficientLoFTR/src/lightning/data.py b/third_party/EfficientLoFTR/src/lightning/data.py new file mode 100644 index 0000000000000000000000000000000000000000..28730fc6bf30fcd99a35b6708e26b7a9a1eca9df --- /dev/null +++ b/third_party/EfficientLoFTR/src/lightning/data.py @@ -0,0 +1,357 @@ +import os +import math +from collections import abc +from loguru import logger +from torch.utils.data.dataset import Dataset +from tqdm import tqdm +from os import path as osp +from pathlib import Path +from joblib import Parallel, delayed + +import pytorch_lightning as pl +from torch import distributed as dist +from torch.utils.data import ( + Dataset, + DataLoader, + ConcatDataset, + DistributedSampler, + RandomSampler, + dataloader +) + +from src.utils.augment import build_augmentor +from src.utils.dataloader import get_local_split +from src.utils.misc import tqdm_joblib +from src.utils import comm +from src.datasets.megadepth import MegaDepthDataset +from src.datasets.scannet import ScanNetDataset +from src.datasets.sampler import RandomConcatSampler + + +class MultiSceneDataModule(pl.LightningDataModule): + """ + For distributed training, each training process is assgined + only a part of the training scenes to reduce memory overhead. + """ + def __init__(self, args, config): + super().__init__() + + # 1. data config + # Train and Val should from the same data source + self.trainval_data_source = config.DATASET.TRAINVAL_DATA_SOURCE + self.test_data_source = config.DATASET.TEST_DATA_SOURCE + # training and validating + self.train_data_root = config.DATASET.TRAIN_DATA_ROOT + self.train_pose_root = config.DATASET.TRAIN_POSE_ROOT # (optional) + self.train_npz_root = config.DATASET.TRAIN_NPZ_ROOT + self.train_list_path = config.DATASET.TRAIN_LIST_PATH + self.train_intrinsic_path = config.DATASET.TRAIN_INTRINSIC_PATH + self.val_data_root = config.DATASET.VAL_DATA_ROOT + self.val_pose_root = config.DATASET.VAL_POSE_ROOT # (optional) + self.val_npz_root = config.DATASET.VAL_NPZ_ROOT + self.val_list_path = config.DATASET.VAL_LIST_PATH + self.val_intrinsic_path = config.DATASET.VAL_INTRINSIC_PATH + # testing + self.test_data_root = config.DATASET.TEST_DATA_ROOT + self.test_pose_root = config.DATASET.TEST_POSE_ROOT # (optional) + self.test_npz_root = config.DATASET.TEST_NPZ_ROOT + self.test_list_path = config.DATASET.TEST_LIST_PATH + self.test_intrinsic_path = config.DATASET.TEST_INTRINSIC_PATH + + # 2. dataset config + # general options + self.min_overlap_score_test = config.DATASET.MIN_OVERLAP_SCORE_TEST # 0.4, omit data with overlap_score < min_overlap_score + self.min_overlap_score_train = config.DATASET.MIN_OVERLAP_SCORE_TRAIN + self.augment_fn = build_augmentor(config.DATASET.AUGMENTATION_TYPE) # None, options: [None, 'dark', 'mobile'] + + # ScanNet options + self.scan_img_resizeX = config.DATASET.SCAN_IMG_RESIZEX # 640 + self.scan_img_resizeY = config.DATASET.SCAN_IMG_RESIZEY # 480 + + + # MegaDepth options + self.mgdpt_img_resize = config.DATASET.MGDPT_IMG_RESIZE # 832 + self.mgdpt_img_pad = config.DATASET.MGDPT_IMG_PAD # True + self.mgdpt_depth_pad = config.DATASET.MGDPT_DEPTH_PAD # True + self.mgdpt_df = config.DATASET.MGDPT_DF # 8 + self.coarse_scale = 1 / config.LOFTR.RESOLUTION[0] # 0.125. for training loftr. + + self.fp16 = config.DATASET.FP16 + + # 3.loader parameters + self.train_loader_params = { + 'batch_size': args.batch_size, + 'num_workers': args.num_workers, + 'pin_memory': getattr(args, 'pin_memory', True) + } + self.val_loader_params = { + 'batch_size': 1, + 'shuffle': False, + 'num_workers': args.num_workers, + 'pin_memory': getattr(args, 'pin_memory', True) + } + self.test_loader_params = { + 'batch_size': 1, + 'shuffle': False, + 'num_workers': args.num_workers, + 'pin_memory': True + } + + # 4. sampler + self.data_sampler = config.TRAINER.DATA_SAMPLER + self.n_samples_per_subset = config.TRAINER.N_SAMPLES_PER_SUBSET + self.subset_replacement = config.TRAINER.SB_SUBSET_SAMPLE_REPLACEMENT + self.shuffle = config.TRAINER.SB_SUBSET_SHUFFLE + self.repeat = config.TRAINER.SB_REPEAT + + # (optional) RandomSampler for debugging + + # misc configurations + self.parallel_load_data = getattr(args, 'parallel_load_data', False) + self.seed = config.TRAINER.SEED # 66 + + def setup(self, stage=None): + """ + Setup train / val / test dataset. This method will be called by PL automatically. + Args: + stage (str): 'fit' in training phase, and 'test' in testing phase. + """ + + assert stage in ['fit', 'validate', 'test'], "stage must be either fit or test" + + try: + self.world_size = dist.get_world_size() + self.rank = dist.get_rank() + logger.info(f"[rank:{self.rank}] world_size: {self.world_size}") + except AssertionError as ae: + self.world_size = 1 + self.rank = 0 + # logger.warning(" (set wolrd_size=1 and rank=0)") + logger.warning(str(ae) + " (set wolrd_size=1 and rank=0)") + + if stage == 'fit': + self.train_dataset = self._setup_dataset( + self.train_data_root, + self.train_npz_root, + self.train_list_path, + self.train_intrinsic_path, + mode='train', + min_overlap_score=self.min_overlap_score_train, + pose_dir=self.train_pose_root) + # setup multiple (optional) validation subsets + if isinstance(self.val_list_path, (list, tuple)): + self.val_dataset = [] + if not isinstance(self.val_npz_root, (list, tuple)): + self.val_npz_root = [self.val_npz_root for _ in range(len(self.val_list_path))] + for npz_list, npz_root in zip(self.val_list_path, self.val_npz_root): + self.val_dataset.append(self._setup_dataset( + self.val_data_root, + npz_root, + npz_list, + self.val_intrinsic_path, + mode='val', + min_overlap_score=self.min_overlap_score_test, + pose_dir=self.val_pose_root)) + else: + self.val_dataset = self._setup_dataset( + self.val_data_root, + self.val_npz_root, + self.val_list_path, + self.val_intrinsic_path, + mode='val', + min_overlap_score=self.min_overlap_score_test, + pose_dir=self.val_pose_root) + logger.info(f'[rank:{self.rank}] Train & Val Dataset loaded!') + elif stage == 'validate': + if isinstance(self.val_list_path, (list, tuple)): + self.val_dataset = [] + if not isinstance(self.val_npz_root, (list, tuple)): + self.val_npz_root = [self.val_npz_root for _ in range(len(self.val_list_path))] + for npz_list, npz_root in zip(self.val_list_path, self.val_npz_root): + self.val_dataset.append(self._setup_dataset( + self.val_data_root, + npz_root, + npz_list, + self.val_intrinsic_path, + mode='val', + min_overlap_score=self.min_overlap_score_test, + pose_dir=self.val_pose_root)) + else: + self.val_dataset = self._setup_dataset( + self.val_data_root, + self.val_npz_root, + self.val_list_path, + self.val_intrinsic_path, + mode='val', + min_overlap_score=self.min_overlap_score_test, + pose_dir=self.val_pose_root) + logger.info(f'[rank:{self.rank}] Val Dataset loaded!') + else: # stage == 'test + self.test_dataset = self._setup_dataset( + self.test_data_root, + self.test_npz_root, + self.test_list_path, + self.test_intrinsic_path, + mode='test', + min_overlap_score=self.min_overlap_score_test, + pose_dir=self.test_pose_root) + logger.info(f'[rank:{self.rank}]: Test Dataset loaded!') + + def _setup_dataset(self, + data_root, + split_npz_root, + scene_list_path, + intri_path, + mode='train', + min_overlap_score=0., + pose_dir=None): + """ Setup train / val / test set""" + with open(scene_list_path, 'r') as f: + npz_names = [name.split()[0] for name in f.readlines()] + + if mode == 'train': + local_npz_names = get_local_split(npz_names, self.world_size, self.rank, self.seed) + else: + local_npz_names = npz_names + logger.info(f'[rank {self.rank}]: {len(local_npz_names)} scene(s) assigned.') + + dataset_builder = self._build_concat_dataset_parallel \ + if self.parallel_load_data \ + else self._build_concat_dataset + return dataset_builder(data_root, local_npz_names, split_npz_root, intri_path, + mode=mode, min_overlap_score=min_overlap_score, pose_dir=pose_dir) + + def _build_concat_dataset( + self, + data_root, + npz_names, + npz_dir, + intrinsic_path, + mode, + min_overlap_score=0., + pose_dir=None + ): + datasets = [] + augment_fn = self.augment_fn if mode == 'train' else None + data_source = self.trainval_data_source if mode in ['train', 'val'] else self.test_data_source + if str(data_source).lower() == 'megadepth': + npz_names = [f'{n}.npz' for n in npz_names] + for npz_name in tqdm(npz_names, + desc=f'[rank:{self.rank}] loading {mode} datasets', + disable=int(self.rank) != 0): + # `ScanNetDataset`/`MegaDepthDataset` load all data from npz_path when initialized, which might take time. + npz_path = osp.join(npz_dir, npz_name) + if data_source == 'ScanNet': + datasets.append( + ScanNetDataset(data_root, + npz_path, + intrinsic_path, + mode=mode, + min_overlap_score=min_overlap_score, + augment_fn=augment_fn, + pose_dir=pose_dir, + img_resize=(self.scan_img_resizeX, self.scan_img_resizeY), + fp16 = self.fp16, + )) + elif data_source == 'MegaDepth': + datasets.append( + MegaDepthDataset(data_root, + npz_path, + mode=mode, + min_overlap_score=min_overlap_score, + img_resize=self.mgdpt_img_resize, + df=self.mgdpt_df, + img_padding=self.mgdpt_img_pad, + depth_padding=self.mgdpt_depth_pad, + augment_fn=augment_fn, + coarse_scale=self.coarse_scale, + fp16 = self.fp16, + )) + else: + raise NotImplementedError() + return ConcatDataset(datasets) + + def _build_concat_dataset_parallel( + self, + data_root, + npz_names, + npz_dir, + intrinsic_path, + mode, + min_overlap_score=0., + pose_dir=None, + ): + augment_fn = self.augment_fn if mode == 'train' else None + data_source = self.trainval_data_source if mode in ['train', 'val'] else self.test_data_source + if str(data_source).lower() == 'megadepth': + npz_names = [f'{n}.npz' for n in npz_names] + with tqdm_joblib(tqdm(desc=f'[rank:{self.rank}] loading {mode} datasets', + total=len(npz_names), disable=int(self.rank) != 0)): + if data_source == 'ScanNet': + datasets = Parallel(n_jobs=math.floor(len(os.sched_getaffinity(0)) * 0.9 / comm.get_local_size()))( + delayed(lambda x: _build_dataset( + ScanNetDataset, + data_root, + osp.join(npz_dir, x), + intrinsic_path, + mode=mode, + min_overlap_score=min_overlap_score, + augment_fn=augment_fn, + pose_dir=pose_dir))(name) + for name in npz_names) + elif data_source == 'MegaDepth': + # TODO: _pickle.PicklingError: Could not pickle the task to send it to the workers. + raise NotImplementedError() + datasets = Parallel(n_jobs=math.floor(len(os.sched_getaffinity(0)) * 0.9 / comm.get_local_size()))( + delayed(lambda x: _build_dataset( + MegaDepthDataset, + data_root, + osp.join(npz_dir, x), + mode=mode, + min_overlap_score=min_overlap_score, + img_resize=self.mgdpt_img_resize, + df=self.mgdpt_df, + img_padding=self.mgdpt_img_pad, + depth_padding=self.mgdpt_depth_pad, + augment_fn=augment_fn, + coarse_scale=self.coarse_scale))(name) + for name in npz_names) + else: + raise ValueError(f'Unknown dataset: {data_source}') + return ConcatDataset(datasets) + + def train_dataloader(self): + """ Build training dataloader for ScanNet / MegaDepth. """ + assert self.data_sampler in ['scene_balance'] + logger.info(f'[rank:{self.rank}/{self.world_size}]: Train Sampler and DataLoader re-init (should not re-init between epochs!).') + if self.data_sampler == 'scene_balance': + sampler = RandomConcatSampler(self.train_dataset, + self.n_samples_per_subset, + self.subset_replacement, + self.shuffle, self.repeat, self.seed) + else: + sampler = None + dataloader = DataLoader(self.train_dataset, sampler=sampler, **self.train_loader_params) + return dataloader + + def val_dataloader(self): + """ Build validation dataloader for ScanNet / MegaDepth. """ + logger.info(f'[rank:{self.rank}/{self.world_size}]: Val Sampler and DataLoader re-init.') + if not isinstance(self.val_dataset, abc.Sequence): + sampler = DistributedSampler(self.val_dataset, shuffle=False) + return DataLoader(self.val_dataset, sampler=sampler, **self.val_loader_params) + else: + dataloaders = [] + for dataset in self.val_dataset: + sampler = DistributedSampler(dataset, shuffle=False) + dataloaders.append(DataLoader(dataset, sampler=sampler, **self.val_loader_params)) + return dataloaders + + def test_dataloader(self, *args, **kwargs): + logger.info(f'[rank:{self.rank}/{self.world_size}]: Test Sampler and DataLoader re-init.') + sampler = DistributedSampler(self.test_dataset, shuffle=False) + return DataLoader(self.test_dataset, sampler=sampler, **self.test_loader_params) + + +def _build_dataset(dataset: Dataset, *args, **kwargs): + return dataset(*args, **kwargs) diff --git a/third_party/EfficientLoFTR/src/lightning/lightning_loftr.py b/third_party/EfficientLoFTR/src/lightning/lightning_loftr.py new file mode 100644 index 0000000000000000000000000000000000000000..38f6f4ae25cc99279c496a9ac7760296b1b03bb0 --- /dev/null +++ b/third_party/EfficientLoFTR/src/lightning/lightning_loftr.py @@ -0,0 +1,268 @@ + +from collections import defaultdict +import pprint +from loguru import logger +from pathlib import Path + +import torch +import numpy as np +import pytorch_lightning as pl +from matplotlib import pyplot as plt + +from src.loftr import LoFTR +# from src.loftr.utils.supervision import compute_supervision_coarse, compute_supervision_fine +# from src.losses.loftr_loss import LoFTRLoss +from src.optimizers import build_optimizer, build_scheduler +from src.utils.metrics import ( + compute_symmetrical_epipolar_errors, + compute_pose_errors, + aggregate_metrics +) +from src.utils.plotting import make_matching_figures +from src.utils.comm import gather, all_gather +from src.utils.misc import lower_config, flattenList +from src.utils.profiler import PassThroughProfiler + +from torch.profiler import profile + +def reparameter(matcher): + module = matcher.backbone.layer0 + if hasattr(module, 'switch_to_deploy'): + module.switch_to_deploy() + for modules in [matcher.backbone.layer1, matcher.backbone.layer2, matcher.backbone.layer3]: + for module in modules: + if hasattr(module, 'switch_to_deploy'): + module.switch_to_deploy() + for modules in [matcher.fine_preprocess.layer2_outconv2, matcher.fine_preprocess.layer1_outconv2]: + for module in modules: + if hasattr(module, 'switch_to_deploy'): + module.switch_to_deploy() + return matcher + + +class PL_LoFTR(pl.LightningModule): + def __init__(self, config, pretrained_ckpt=None, profiler=None, dump_dir=None): + """ + TODO: + - use the new version of PL logging API. + """ + super().__init__() + # Misc + self.config = config # full config + _config = lower_config(self.config) + self.loftr_cfg = lower_config(_config['loftr']) + self.profiler = profiler or PassThroughProfiler() + self.n_vals_plot = max(config.TRAINER.N_VAL_PAIRS_TO_PLOT // config.TRAINER.WORLD_SIZE, 1) + + # Matcher: LoFTR + self.matcher = LoFTR(config=_config['loftr'], profiler=self.profiler) + # self.loss = LoFTRLoss(_config) + + # Pretrained weights + if pretrained_ckpt: + state_dict = torch.load(pretrained_ckpt, map_location='cpu')['state_dict'] + msg=self.matcher.load_state_dict(state_dict, strict=False) + logger.info(f"Load \'{pretrained_ckpt}\' as pretrained checkpoint") + + # Testing + self.warmup = False + self.reparameter = False + self.start_event = torch.cuda.Event(enable_timing=True) + self.end_event = torch.cuda.Event(enable_timing=True) + self.total_ms = 0 + + def configure_optimizers(self): + # FIXME: The scheduler did not work properly when `--resume_from_checkpoint` + optimizer = build_optimizer(self, self.config) + scheduler = build_scheduler(self.config, optimizer) + return [optimizer], [scheduler] + + def optimizer_step( + self, epoch, batch_idx, optimizer, optimizer_idx, + optimizer_closure, on_tpu, using_native_amp, using_lbfgs): + # learning rate warm up + warmup_step = self.config.TRAINER.WARMUP_STEP + if self.trainer.global_step < warmup_step: + if self.config.TRAINER.WARMUP_TYPE == 'linear': + base_lr = self.config.TRAINER.WARMUP_RATIO * self.config.TRAINER.TRUE_LR + lr = base_lr + \ + (self.trainer.global_step / self.config.TRAINER.WARMUP_STEP) * \ + abs(self.config.TRAINER.TRUE_LR - base_lr) + for pg in optimizer.param_groups: + pg['lr'] = lr + elif self.config.TRAINER.WARMUP_TYPE == 'constant': + pass + else: + raise ValueError(f'Unknown lr warm-up strategy: {self.config.TRAINER.WARMUP_TYPE}') + + # update params + optimizer.step(closure=optimizer_closure) + optimizer.zero_grad() + + def _trainval_inference(self, batch): + with self.profiler.profile("Compute coarse supervision"): + with torch.autocast(enabled=False, device_type='cuda'): + compute_supervision_coarse(batch, self.config) + + with self.profiler.profile("LoFTR"): + with torch.autocast(enabled=self.config.LOFTR.MP, device_type='cuda'): + self.matcher(batch) + + with self.profiler.profile("Compute fine supervision"): + with torch.autocast(enabled=False, device_type='cuda'): + compute_supervision_fine(batch, self.config, self.logger) + + with self.profiler.profile("Compute losses"): + with torch.autocast(enabled=self.config.LOFTR.MP, device_type='cuda'): + self.loss(batch) + + def _compute_metrics(self, batch): + compute_symmetrical_epipolar_errors(batch) # compute epi_errs for each match + compute_pose_errors(batch, self.config) # compute R_errs, t_errs, pose_errs for each pair + + rel_pair_names = list(zip(*batch['pair_names'])) + bs = batch['image0'].size(0) + metrics = { + # to filter duplicate pairs caused by DistributedSampler + 'identifiers': ['#'.join(rel_pair_names[b]) for b in range(bs)], + 'epi_errs': [(batch['epi_errs'].reshape(-1,1))[batch['m_bids'] == b].reshape(-1).cpu().numpy() for b in range(bs)], + 'R_errs': batch['R_errs'], + 't_errs': batch['t_errs'], + 'inliers': batch['inliers'], + 'num_matches': [batch['mconf'].shape[0]], # batch size = 1 only + } + ret_dict = {'metrics': metrics} + return ret_dict, rel_pair_names + + def training_step(self, batch, batch_idx): + self._trainval_inference(batch) + + # logging + if self.trainer.global_rank == 0 and self.global_step % self.trainer.log_every_n_steps == 0: + # scalars + for k, v in batch['loss_scalars'].items(): + self.logger.experiment.add_scalar(f'train/{k}', v, self.global_step) + + # figures + if self.config.TRAINER.ENABLE_PLOTTING: + compute_symmetrical_epipolar_errors(batch) # compute epi_errs for each match + figures = make_matching_figures(batch, self.config, self.config.TRAINER.PLOT_MODE) + for k, v in figures.items(): + self.logger.experiment.add_figure(f'train_match/{k}', v, self.global_step) + return {'loss': batch['loss']} + + def training_epoch_end(self, outputs): + avg_loss = torch.stack([x['loss'] for x in outputs]).mean() + if self.trainer.global_rank == 0: + self.logger.experiment.add_scalar( + 'train/avg_loss_on_epoch', avg_loss, + global_step=self.current_epoch) + + def validation_step(self, batch, batch_idx): + self._trainval_inference(batch) + + ret_dict, _ = self._compute_metrics(batch) + + val_plot_interval = max(self.trainer.num_val_batches[0] // self.n_vals_plot, 1) + figures = {self.config.TRAINER.PLOT_MODE: []} + if batch_idx % val_plot_interval == 0: + figures = make_matching_figures(batch, self.config, mode=self.config.TRAINER.PLOT_MODE) + + return { + **ret_dict, + 'loss_scalars': batch['loss_scalars'], + 'figures': figures, + } + + def validation_epoch_end(self, outputs): + # handle multiple validation sets + multi_outputs = [outputs] if not isinstance(outputs[0], (list, tuple)) else outputs + multi_val_metrics = defaultdict(list) + + for valset_idx, outputs in enumerate(multi_outputs): + # since pl performs sanity_check at the very begining of the training + cur_epoch = self.trainer.current_epoch + if not self.trainer.resume_from_checkpoint and self.trainer.running_sanity_check: + cur_epoch = -1 + + # 1. loss_scalars: dict of list, on cpu + _loss_scalars = [o['loss_scalars'] for o in outputs] + loss_scalars = {k: flattenList(all_gather([_ls[k] for _ls in _loss_scalars])) for k in _loss_scalars[0]} + + # 2. val metrics: dict of list, numpy + _metrics = [o['metrics'] for o in outputs] + metrics = {k: flattenList(all_gather(flattenList([_me[k] for _me in _metrics]))) for k in _metrics[0]} + # NOTE: all ranks need to `aggregate_merics`, but only log at rank-0 + val_metrics_4tb = aggregate_metrics(metrics, self.config.TRAINER.EPI_ERR_THR, config=self.config) + for thr in [5, 10, 20]: + multi_val_metrics[f'auc@{thr}'].append(val_metrics_4tb[f'auc@{thr}']) + + # 3. figures + _figures = [o['figures'] for o in outputs] + figures = {k: flattenList(gather(flattenList([_me[k] for _me in _figures]))) for k in _figures[0]} + + # tensorboard records only on rank 0 + if self.trainer.global_rank == 0: + for k, v in loss_scalars.items(): + mean_v = torch.stack(v).mean() + self.logger.experiment.add_scalar(f'val_{valset_idx}/avg_{k}', mean_v, global_step=cur_epoch) + + for k, v in val_metrics_4tb.items(): + self.logger.experiment.add_scalar(f"metrics_{valset_idx}/{k}", v, global_step=cur_epoch) + + for k, v in figures.items(): + if self.trainer.global_rank == 0: + for plot_idx, fig in enumerate(v): + self.logger.experiment.add_figure( + f'val_match_{valset_idx}/{k}/pair-{plot_idx}', fig, cur_epoch, close=True) + plt.close('all') + + for thr in [5, 10, 20]: + # log on all ranks for ModelCheckpoint callback to work properly + self.log(f'auc@{thr}', torch.tensor(np.mean(multi_val_metrics[f'auc@{thr}']))) # ckpt monitors on this + + def test_step(self, batch, batch_idx): + if (self.config.LOFTR.BACKBONE_TYPE == 'RepVGG') and not self.reparameter: + self.matcher = reparameter(self.matcher) + if self.config.LOFTR.HALF: + self.matcher = self.matcher.eval().half() + self.reparameter = True + + if not self.warmup: + if self.config.LOFTR.HALF: + for i in range(50): + self.matcher(batch) + else: + with torch.autocast(enabled=self.config.LOFTR.MP, device_type='cuda'): + for i in range(50): + self.matcher(batch) + self.warmup = True + torch.cuda.synchronize() + + if self.config.LOFTR.HALF: + self.start_event.record() + self.matcher(batch) + self.end_event.record() + torch.cuda.synchronize() + self.total_ms += self.start_event.elapsed_time(self.end_event) + else: + with torch.autocast(enabled=self.config.LOFTR.MP, device_type='cuda'): + self.start_event.record() + self.matcher(batch) + self.end_event.record() + torch.cuda.synchronize() + self.total_ms += self.start_event.elapsed_time(self.end_event) + + ret_dict, rel_pair_names = self._compute_metrics(batch) + return ret_dict + + def test_epoch_end(self, outputs): + # metrics: dict of list, numpy + _metrics = [o['metrics'] for o in outputs] + metrics = {k: flattenList(gather(flattenList([_me[k] for _me in _metrics]))) for k in _metrics[0]} + + # [{key: [{...}, *#bs]}, *#batch] + if self.trainer.global_rank == 0: + print('Averaged Matching time over 1500 pairs: {:.2f} ms'.format(self.total_ms / 1500)) + val_metrics_4tb = aggregate_metrics(metrics, self.config.TRAINER.EPI_ERR_THR, config=self.config) + logger.info('\n' + pprint.pformat(val_metrics_4tb)) \ No newline at end of file diff --git a/third_party/EfficientLoFTR/src/loftr/__init__.py b/third_party/EfficientLoFTR/src/loftr/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..362c0d04fa437c0073016a9ceac607ec5cffdfb7 --- /dev/null +++ b/third_party/EfficientLoFTR/src/loftr/__init__.py @@ -0,0 +1,4 @@ +from .loftr import LoFTR +from .utils.full_config import full_default_cfg +from .utils.opt_config import opt_default_cfg +from .loftr import reparameter \ No newline at end of file diff --git a/third_party/EfficientLoFTR/src/loftr/backbone/__init__.py b/third_party/EfficientLoFTR/src/loftr/backbone/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..aa2c02f486958a542550c0943ce284cf97e44050 --- /dev/null +++ b/third_party/EfficientLoFTR/src/loftr/backbone/__init__.py @@ -0,0 +1,11 @@ +from .backbone import RepVGG_8_1_align + +def build_backbone(config): + if config['backbone_type'] == 'RepVGG': + if config['align_corner'] is False: + if config['resolution'] == (8, 1): + return RepVGG_8_1_align(config['backbone']) + else: + raise ValueError(f"LOFTR.ALIGN_CORNER {config['align_corner']} not supported.") + else: + raise ValueError(f"LOFTR.BACKBONE_TYPE {config['backbone_type']} not supported.") diff --git a/third_party/EfficientLoFTR/src/loftr/backbone/backbone.py b/third_party/EfficientLoFTR/src/loftr/backbone/backbone.py new file mode 100644 index 0000000000000000000000000000000000000000..f1d921971914d4d465e5e4c7fe87c90e773d4ef1 --- /dev/null +++ b/third_party/EfficientLoFTR/src/loftr/backbone/backbone.py @@ -0,0 +1,37 @@ +import torch.nn as nn +import torch.nn.functional as F +from .repvgg import create_RepVGG + +class RepVGG_8_1_align(nn.Module): + """ + RepVGG backbone, output resolution are 1/8 and 1. + Each block has 2 layers. + """ + + def __init__(self, config): + super().__init__() + backbone = create_RepVGG(False) + + self.layer0, self.layer1, self.layer2, self.layer3 = backbone.stage0, backbone.stage1, backbone.stage2, backbone.stage3 + + for layer in [self.layer0, self.layer1, self.layer2, self.layer3]: + for m in layer.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def forward(self, x): + out = self.layer0(x) # 1/2 + for module in self.layer1: + out = module(out) # 1/2 + x1 = out + for module in self.layer2: + out = module(out) # 1/4 + x2 = out + for module in self.layer3: + out = module(out) # 1/8 + x3 = out + + return {'feats_c': x3, 'feats_f': None, 'feats_x2': x2, 'feats_x1': x1} diff --git a/third_party/EfficientLoFTR/src/loftr/backbone/repvgg.py b/third_party/EfficientLoFTR/src/loftr/backbone/repvgg.py new file mode 100644 index 0000000000000000000000000000000000000000..45b038c82511e902947b65d7c63ff461dc24c3e7 --- /dev/null +++ b/third_party/EfficientLoFTR/src/loftr/backbone/repvgg.py @@ -0,0 +1,224 @@ +# -------------------------------------------------------- +# RepVGG: Making VGG-style ConvNets Great Again (https://openaccess.thecvf.com/content/CVPR2021/papers/Ding_RepVGG_Making_VGG-Style_ConvNets_Great_Again_CVPR_2021_paper.pdf) +# Github source: https://github.com/DingXiaoH/RepVGG +# Licensed under The MIT License [see LICENSE for details] +# Modified from: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py +# -------------------------------------------------------- +import torch.nn as nn +import numpy as np +import torch +import copy +# from se_block import SEBlock +import torch.utils.checkpoint as checkpoint +from loguru import logger + +def conv_bn(in_channels, out_channels, kernel_size, stride, padding, groups=1): + result = nn.Sequential() + result.add_module('conv', nn.Conv2d(in_channels=in_channels, out_channels=out_channels, + kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False)) + result.add_module('bn', nn.BatchNorm2d(num_features=out_channels)) + return result + +class RepVGGBlock(nn.Module): + + def __init__(self, in_channels, out_channels, kernel_size, + stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', deploy=False, use_se=False): + super(RepVGGBlock, self).__init__() + self.deploy = deploy + self.groups = groups + self.in_channels = in_channels + + assert kernel_size == 3 + assert padding == 1 + + padding_11 = padding - kernel_size // 2 + + self.nonlinearity = nn.ReLU() + + if use_se: + # Note that RepVGG-D2se uses SE before nonlinearity. But RepVGGplus models uses SE after nonlinearity. + # self.se = SEBlock(out_channels, internal_neurons=out_channels // 16) + raise ValueError(f"SEBlock not supported") + else: + self.se = nn.Identity() + + if deploy: + self.rbr_reparam = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, + padding=padding, dilation=dilation, groups=groups, bias=True, padding_mode=padding_mode) + else: + self.rbr_identity = nn.BatchNorm2d(num_features=in_channels) if out_channels == in_channels and stride == 1 else None + self.rbr_dense = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups) + self.rbr_1x1 = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride, padding=padding_11, groups=groups) + + def forward(self, inputs): + if hasattr(self, 'rbr_reparam'): + return self.nonlinearity(self.se(self.rbr_reparam(inputs))) + + if self.rbr_identity is None: + id_out = 0 + else: + id_out = self.rbr_identity(inputs) + + return self.nonlinearity(self.se(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out)) + + + # Optional. This may improve the accuracy and facilitates quantization in some cases. + # 1. Cancel the original weight decay on rbr_dense.conv.weight and rbr_1x1.conv.weight. + # 2. Use like this. + # loss = criterion(....) + # for every RepVGGBlock blk: + # loss += weight_decay_coefficient * 0.5 * blk.get_cust_L2() + # optimizer.zero_grad() + # loss.backward() + def get_custom_L2(self): + K3 = self.rbr_dense.conv.weight + K1 = self.rbr_1x1.conv.weight + t3 = (self.rbr_dense.bn.weight / ((self.rbr_dense.bn.running_var + self.rbr_dense.bn.eps).sqrt())).reshape(-1, 1, 1, 1).detach() + t1 = (self.rbr_1x1.bn.weight / ((self.rbr_1x1.bn.running_var + self.rbr_1x1.bn.eps).sqrt())).reshape(-1, 1, 1, 1).detach() + + l2_loss_circle = (K3 ** 2).sum() - (K3[:, :, 1:2, 1:2] ** 2).sum() # The L2 loss of the "circle" of weights in 3x3 kernel. Use regular L2 on them. + eq_kernel = K3[:, :, 1:2, 1:2] * t3 + K1 * t1 # The equivalent resultant central point of 3x3 kernel. + l2_loss_eq_kernel = (eq_kernel ** 2 / (t3 ** 2 + t1 ** 2)).sum() # Normalize for an L2 coefficient comparable to regular L2. + return l2_loss_eq_kernel + l2_loss_circle + + + +# This func derives the equivalent kernel and bias in a DIFFERENTIABLE way. +# You can get the equivalent kernel and bias at any time and do whatever you want, + # for example, apply some penalties or constraints during training, just like you do to the other models. +# May be useful for quantization or pruning. + def get_equivalent_kernel_bias(self): + kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense) + kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1) + kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity) + return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid + + def _pad_1x1_to_3x3_tensor(self, kernel1x1): + if kernel1x1 is None: + return 0 + else: + return torch.nn.functional.pad(kernel1x1, [1,1,1,1]) + + def _fuse_bn_tensor(self, branch): + if branch is None: + return 0, 0 + if isinstance(branch, nn.Sequential): + kernel = branch.conv.weight + running_mean = branch.bn.running_mean + running_var = branch.bn.running_var + gamma = branch.bn.weight + beta = branch.bn.bias + eps = branch.bn.eps + else: + assert isinstance(branch, nn.BatchNorm2d) + if not hasattr(self, 'id_tensor'): + input_dim = self.in_channels // self.groups + kernel_value = np.zeros((self.in_channels, input_dim, 3, 3), dtype=np.float32) + for i in range(self.in_channels): + kernel_value[i, i % input_dim, 1, 1] = 1 + self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device) + kernel = self.id_tensor + running_mean = branch.running_mean + running_var = branch.running_var + gamma = branch.weight + beta = branch.bias + eps = branch.eps + std = (running_var + eps).sqrt() + t = (gamma / std).reshape(-1, 1, 1, 1) + return kernel * t, beta - running_mean * gamma / std + + def switch_to_deploy(self): + if hasattr(self, 'rbr_reparam'): + return + kernel, bias = self.get_equivalent_kernel_bias() + self.rbr_reparam = nn.Conv2d(in_channels=self.rbr_dense.conv.in_channels, out_channels=self.rbr_dense.conv.out_channels, + kernel_size=self.rbr_dense.conv.kernel_size, stride=self.rbr_dense.conv.stride, + padding=self.rbr_dense.conv.padding, dilation=self.rbr_dense.conv.dilation, groups=self.rbr_dense.conv.groups, bias=True) + self.rbr_reparam.weight.data = kernel + self.rbr_reparam.bias.data = bias + self.__delattr__('rbr_dense') + self.__delattr__('rbr_1x1') + if hasattr(self, 'rbr_identity'): + self.__delattr__('rbr_identity') + if hasattr(self, 'id_tensor'): + self.__delattr__('id_tensor') + self.deploy = True + + + +class RepVGG(nn.Module): + + def __init__(self, num_blocks, num_classes=1000, width_multiplier=None, override_groups_map=None, deploy=False, use_se=False, use_checkpoint=False): + super(RepVGG, self).__init__() + assert len(width_multiplier) == 4 + self.deploy = deploy + self.override_groups_map = override_groups_map or dict() + assert 0 not in self.override_groups_map + self.use_se = use_se + self.use_checkpoint = use_checkpoint + + self.in_planes = min(64, int(64 * width_multiplier[0])) + self.stage0 = RepVGGBlock(in_channels=1, out_channels=self.in_planes, kernel_size=3, stride=2, padding=1, deploy=self.deploy, use_se=self.use_se) + self.cur_layer_idx = 1 + self.stage1 = self._make_stage(int(64 * width_multiplier[0]), num_blocks[0], stride=1) + self.stage2 = self._make_stage(int(128 * width_multiplier[1]), num_blocks[1], stride=2) + self.stage3 = self._make_stage(int(256 * width_multiplier[2]), num_blocks[2], stride=2) + + def _make_stage(self, planes, num_blocks, stride): + strides = [stride] + [1]*(num_blocks-1) + blocks = [] + for stride in strides: + cur_groups = self.override_groups_map.get(self.cur_layer_idx, 1) + blocks.append(RepVGGBlock(in_channels=self.in_planes, out_channels=planes, kernel_size=3, + stride=stride, padding=1, groups=cur_groups, deploy=self.deploy, use_se=self.use_se)) + self.in_planes = planes + self.cur_layer_idx += 1 + return nn.ModuleList(blocks) + + def forward(self, x): + out = self.stage0(x) + for stage in (self.stage1, self.stage2, self.stage3): + for block in stage: + if self.use_checkpoint: + out = checkpoint.checkpoint(block, out) + else: + out = block(out) + out = self.gap(out) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +optional_groupwise_layers = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26] +g2_map = {l: 2 for l in optional_groupwise_layers} +g4_map = {l: 4 for l in optional_groupwise_layers} + +def create_RepVGG(deploy=False, use_checkpoint=False): + return RepVGG(num_blocks=[2, 4, 14, 1], num_classes=1000, + width_multiplier=[1, 1, 1, 2.5], override_groups_map=None, deploy=deploy, use_checkpoint=use_checkpoint) + +# Use this for converting a RepVGG model or a bigger model with RepVGG as its component +# Use like this +# model = create_RepVGG_A0(deploy=False) +# train model or load weights +# repvgg_model_convert(model, save_path='repvgg_deploy.pth') +# If you want to preserve the original model, call with do_copy=True + +# ====================== for using RepVGG as the backbone of a bigger model, e.g., PSPNet, the pseudo code will be like +# train_backbone = create_RepVGG_B2(deploy=False) +# train_backbone.load_state_dict(torch.load('RepVGG-B2-train.pth')) +# train_pspnet = build_pspnet(backbone=train_backbone) +# segmentation_train(train_pspnet) +# deploy_pspnet = repvgg_model_convert(train_pspnet) +# segmentation_test(deploy_pspnet) +# ===================== example_pspnet.py shows an example + +def repvgg_model_convert(model:torch.nn.Module, save_path=None, do_copy=True): + if do_copy: + model = copy.deepcopy(model) + for module in model.modules(): + if hasattr(module, 'switch_to_deploy'): + module.switch_to_deploy() + if save_path is not None: + torch.save(model.state_dict(), save_path) + return model diff --git a/third_party/EfficientLoFTR/src/loftr/loftr.py b/third_party/EfficientLoFTR/src/loftr/loftr.py new file mode 100644 index 0000000000000000000000000000000000000000..8f76939a1b0c68504f535d4c9eb4ef91d19cd63a --- /dev/null +++ b/third_party/EfficientLoFTR/src/loftr/loftr.py @@ -0,0 +1,124 @@ +import torch +import torch.nn as nn +from einops.einops import rearrange + +from .backbone import build_backbone +from .loftr_module import LocalFeatureTransformer, FinePreprocess +from .utils.coarse_matching import CoarseMatching +from .utils.fine_matching import FineMatching +from ..utils.misc import detect_NaN + +from loguru import logger + +def reparameter(matcher): + module = matcher.backbone.layer0 + if hasattr(module, 'switch_to_deploy'): + module.switch_to_deploy() + for modules in [matcher.backbone.layer1, matcher.backbone.layer2, matcher.backbone.layer3]: + for module in modules: + if hasattr(module, 'switch_to_deploy'): + module.switch_to_deploy() + for modules in [matcher.fine_preprocess.layer2_outconv2, matcher.fine_preprocess.layer1_outconv2]: + for module in modules: + if hasattr(module, 'switch_to_deploy'): + module.switch_to_deploy() + return matcher + +class LoFTR(nn.Module): + def __init__(self, config, profiler=None): + super().__init__() + # Misc + self.config = config + self.profiler = profiler + + # Modules + self.backbone = build_backbone(config) + self.loftr_coarse = LocalFeatureTransformer(config) + self.coarse_matching = CoarseMatching(config['match_coarse']) + self.fine_preprocess = FinePreprocess(config) + self.fine_matching = FineMatching(config) + + def forward(self, data): + """ + Update: + data (dict): { + 'image0': (torch.Tensor): (N, 1, H, W) + 'image1': (torch.Tensor): (N, 1, H, W) + 'mask0'(optional) : (torch.Tensor): (N, H, W) '0' indicates a padded position + 'mask1'(optional) : (torch.Tensor): (N, H, W) + } + """ + # 1. Local Feature CNN + data.update({ + 'bs': data['image0'].size(0), + 'hw0_i': data['image0'].shape[2:], 'hw1_i': data['image1'].shape[2:] + }) + + if data['hw0_i'] == data['hw1_i']: # faster & better BN convergence + ret_dict = self.backbone(torch.cat([data['image0'], data['image1']], dim=0)) + feats_c = ret_dict['feats_c'] + data.update({ + 'feats_x2': ret_dict['feats_x2'], + 'feats_x1': ret_dict['feats_x1'], + }) + (feat_c0, feat_c1) = feats_c.split(data['bs']) + else: # handle different input shapes + ret_dict0, ret_dict1 = self.backbone(data['image0']), self.backbone(data['image1']) + feat_c0 = ret_dict0['feats_c'] + feat_c1 = ret_dict1['feats_c'] + data.update({ + 'feats_x2_0': ret_dict0['feats_x2'], + 'feats_x1_0': ret_dict0['feats_x1'], + 'feats_x2_1': ret_dict1['feats_x2'], + 'feats_x1_1': ret_dict1['feats_x1'], + }) + + + mul = self.config['resolution'][0] // self.config['resolution'][1] + data.update({ + 'hw0_c': feat_c0.shape[2:], 'hw1_c': feat_c1.shape[2:], + 'hw0_f': [feat_c0.shape[2] * mul, feat_c0.shape[3] * mul] , + 'hw1_f': [feat_c1.shape[2] * mul, feat_c1.shape[3] * mul] + }) + + # 2. coarse-level loftr module + mask_c0 = mask_c1 = None # mask is useful in training + if 'mask0' in data: + mask_c0, mask_c1 = data['mask0'], data['mask1'] + + feat_c0, feat_c1 = self.loftr_coarse(feat_c0, feat_c1, mask_c0, mask_c1) + + feat_c0 = rearrange(feat_c0, 'n c h w -> n (h w) c') + feat_c1 = rearrange(feat_c1, 'n c h w -> n (h w) c') + + # detect NaN during mixed precision training + if self.config['replace_nan'] and (torch.any(torch.isnan(feat_c0)) or torch.any(torch.isnan(feat_c1))): + detect_NaN(feat_c0, feat_c1) + + # 3. match coarse-level + self.coarse_matching(feat_c0, feat_c1, data, + mask_c0=mask_c0.view(mask_c0.size(0), -1) if mask_c0 is not None else mask_c0, + mask_c1=mask_c1.view(mask_c1.size(0), -1) if mask_c1 is not None else mask_c1 + ) + + # prevent fp16 overflow during mixed precision training + feat_c0, feat_c1 = map(lambda feat: feat / feat.shape[-1]**.5, + [feat_c0, feat_c1]) + + # 4. fine-level refinement + feat_f0_unfold, feat_f1_unfold = self.fine_preprocess(feat_c0, feat_c1, data) + + # detect NaN during mixed precision training + if self.config['replace_nan'] and (torch.any(torch.isnan(feat_f0_unfold)) or torch.any(torch.isnan(feat_f1_unfold))): + detect_NaN(feat_f0_unfold, feat_f1_unfold) + + del feat_c0, feat_c1, mask_c0, mask_c1 + + # 5. match fine-level + self.fine_matching(feat_f0_unfold, feat_f1_unfold, data) + + def load_state_dict(self, state_dict, *args, **kwargs): + for k in list(state_dict.keys()): + if k.startswith('matcher.'): + state_dict[k.replace('matcher.', '', 1)] = state_dict.pop(k) + return super().load_state_dict(state_dict, *args, **kwargs) \ No newline at end of file diff --git a/third_party/EfficientLoFTR/src/loftr/loftr_module/__init__.py b/third_party/EfficientLoFTR/src/loftr/loftr_module/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ca51db4f50a0c4f3dcd795e74b83e633ab2e990a --- /dev/null +++ b/third_party/EfficientLoFTR/src/loftr/loftr_module/__init__.py @@ -0,0 +1,2 @@ +from .transformer import LocalFeatureTransformer +from .fine_preprocess import FinePreprocess diff --git a/third_party/EfficientLoFTR/src/loftr/loftr_module/fine_preprocess.py b/third_party/EfficientLoFTR/src/loftr/loftr_module/fine_preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..ca37e02a0e709650d8133db04e84e9cfccbd6bf0 --- /dev/null +++ b/third_party/EfficientLoFTR/src/loftr/loftr_module/fine_preprocess.py @@ -0,0 +1,112 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from einops.einops import rearrange, repeat + +from loguru import logger + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution without padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, padding=0, bias=False) + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) + +class FinePreprocess(nn.Module): + def __init__(self, config): + super().__init__() + + self.config = config + block_dims = config['backbone']['block_dims'] + self.W = self.config['fine_window_size'] + self.fine_d_model = block_dims[0] + + self.layer3_outconv = conv1x1(block_dims[2], block_dims[2]) + self.layer2_outconv = conv1x1(block_dims[1], block_dims[2]) + self.layer2_outconv2 = nn.Sequential( + conv3x3(block_dims[2], block_dims[2]), + nn.BatchNorm2d(block_dims[2]), + nn.LeakyReLU(), + conv3x3(block_dims[2], block_dims[1]), + ) + self.layer1_outconv = conv1x1(block_dims[0], block_dims[1]) + self.layer1_outconv2 = nn.Sequential( + conv3x3(block_dims[1], block_dims[1]), + nn.BatchNorm2d(block_dims[1]), + nn.LeakyReLU(), + conv3x3(block_dims[1], block_dims[0]), + ) + + self._reset_parameters() + + def _reset_parameters(self): + for p in self.parameters(): + if p.dim() > 1: + nn.init.kaiming_normal_(p, mode="fan_out", nonlinearity="relu") + + def inter_fpn(self, feat_c, x2, x1, stride): + feat_c = self.layer3_outconv(feat_c) + feat_c = F.interpolate(feat_c, scale_factor=2., mode='bilinear', align_corners=False) + + x2 = self.layer2_outconv(x2) + x2 = self.layer2_outconv2(x2+feat_c) + x2 = F.interpolate(x2, scale_factor=2., mode='bilinear', align_corners=False) + + x1 = self.layer1_outconv(x1) + x1 = self.layer1_outconv2(x1+x2) + x1 = F.interpolate(x1, scale_factor=2., mode='bilinear', align_corners=False) + return x1 + + def forward(self, feat_c0, feat_c1, data): + W = self.W + stride = data['hw0_f'][0] // data['hw0_c'][0] + + data.update({'W': W}) + if data['b_ids'].shape[0] == 0: + feat0 = torch.empty(0, self.W**2, self.fine_d_model, device=feat_c0.device) + feat1 = torch.empty(0, self.W**2, self.fine_d_model, device=feat_c0.device) + return feat0, feat1 + + if data['hw0_i'] == data['hw1_i']: + feat_c = rearrange(torch.cat([feat_c0, feat_c1], 0), 'b (h w) c -> b c h w', h=data['hw0_c'][0]) # 1/8 feat + x2 = data['feats_x2'] # 1/4 feat + x1 = data['feats_x1'] # 1/2 feat + del data['feats_x2'], data['feats_x1'] + + # 1. fine feature extraction + x1 = self.inter_fpn(feat_c, x2, x1, stride) + feat_f0, feat_f1 = torch.chunk(x1, 2, dim=0) + + # 2. unfold(crop) all local windows + feat_f0 = F.unfold(feat_f0, kernel_size=(W, W), stride=stride, padding=0) + feat_f0 = rearrange(feat_f0, 'n (c ww) l -> n l ww c', ww=W**2) + feat_f1 = F.unfold(feat_f1, kernel_size=(W+2, W+2), stride=stride, padding=1) + feat_f1 = rearrange(feat_f1, 'n (c ww) l -> n l ww c', ww=(W+2)**2) + + # 3. select only the predicted matches + feat_f0 = feat_f0[data['b_ids'], data['i_ids']] # [n, ww, cf] + feat_f1 = feat_f1[data['b_ids'], data['j_ids']] + + return feat_f0, feat_f1 + else: # handle different input shapes + feat_c0, feat_c1 = rearrange(feat_c0, 'b (h w) c -> b c h w', h=data['hw0_c'][0]), rearrange(feat_c1, 'b (h w) c -> b c h w', h=data['hw1_c'][0]) # 1/8 feat + x2_0, x2_1 = data['feats_x2_0'], data['feats_x2_1'] # 1/4 feat + x1_0, x1_1 = data['feats_x1_0'], data['feats_x1_1'] # 1/2 feat + del data['feats_x2_0'], data['feats_x1_0'], data['feats_x2_1'], data['feats_x1_1'] + + # 1. fine feature extraction + feat_f0, feat_f1 = self.inter_fpn(feat_c0, x2_0, x1_0, stride), self.inter_fpn(feat_c1, x2_1, x1_1, stride) + + # 2. unfold(crop) all local windows + feat_f0 = F.unfold(feat_f0, kernel_size=(W, W), stride=stride, padding=0) + feat_f0 = rearrange(feat_f0, 'n (c ww) l -> n l ww c', ww=W**2) + feat_f1 = F.unfold(feat_f1, kernel_size=(W+2, W+2), stride=stride, padding=1) + feat_f1 = rearrange(feat_f1, 'n (c ww) l -> n l ww c', ww=(W+2)**2) + + # 3. select only the predicted matches + feat_f0 = feat_f0[data['b_ids'], data['i_ids']] # [n, ww, cf] + feat_f1 = feat_f1[data['b_ids'], data['j_ids']] + + return feat_f0, feat_f1 \ No newline at end of file diff --git a/third_party/EfficientLoFTR/src/loftr/loftr_module/linear_attention.py b/third_party/EfficientLoFTR/src/loftr/loftr_module/linear_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..4d95414e35441522c7df65c88a403672d3aa227b --- /dev/null +++ b/third_party/EfficientLoFTR/src/loftr/loftr_module/linear_attention.py @@ -0,0 +1,103 @@ +""" +Linear Transformer proposed in "Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention" +Modified from: https://github.com/idiap/fast-transformers/blob/master/fast_transformers/attention/linear_attention.py +""" + +import torch +from torch.nn import Module +import torch.nn.functional as F +from einops.einops import rearrange + +if hasattr(F, 'scaled_dot_product_attention'): + FLASH_AVAILABLE = True + from torch.backends.cuda import sdp_kernel +else: + FLASH_AVAILABLE = False + +def crop_feature(query, key, value, x_mask, source_mask): + mask_h0, mask_w0, mask_h1, mask_w1 = x_mask[0].sum(-2)[0], x_mask[0].sum(-1)[0], source_mask[0].sum(-2)[0], source_mask[0].sum(-1)[0] + query = query[:, :mask_h0, :mask_w0, :] + key = key[:, :mask_h1, :mask_w1, :] + value = value[:, :mask_h1, :mask_w1, :] + return query, key, value, mask_h0, mask_w0 + +def pad_feature(m, mask_h0, mask_w0, x_mask): + bs, L, H, D = m.size() + m = m.view(bs, mask_h0, mask_w0, H, D) + if mask_h0 != x_mask.size(-2): + m = torch.cat([m, torch.zeros(m.size(0), x_mask.size(-2)-mask_h0, x_mask.size(-1), H, D, device=m.device, dtype=m.dtype)], dim=1) + elif mask_w0 != x_mask.size(-1): + m = torch.cat([m, torch.zeros(m.size(0), x_mask.size(-2), x_mask.size(-1)-mask_w0, H, D, device=m.device, dtype=m.dtype)], dim=2) + return m + +class Attention(Module): + def __init__(self, no_flash=False, nhead=8, dim=256, fp32=False): + super().__init__() + self.flash = FLASH_AVAILABLE and not no_flash + self.nhead = nhead + self.dim = dim + self.fp32 = fp32 + + def attention(self, query, key, value, q_mask=None, kv_mask=None): + assert q_mask is None and kv_mask is None, "Not support generalized attention mask yet." + if self.flash and not self.fp32: + args = [x.contiguous() for x in [query, key, value]] + with sdp_kernel(enable_math= False, enable_flash= True, enable_mem_efficient= False): + out = F.scaled_dot_product_attention(*args) + elif self.flash: + args = [x.contiguous() for x in [query, key, value]] + out = F.scaled_dot_product_attention(*args) + else: + QK = torch.einsum("nlhd,nshd->nlsh", query, key) + + # Compute the attention and the weighted average + softmax_temp = 1. / query.size(3)**.5 # sqrt(D) + A = torch.softmax(softmax_temp * QK, dim=2) + + out = torch.einsum("nlsh,nshd->nlhd", A, value) + return out + + def _forward(self, query, key, value, q_mask=None, kv_mask=None): + if q_mask is not None: + query, key, value, mask_h0, mask_w0 = crop_feature(query, key, value, q_mask, kv_mask) + + if self.flash: + query, key, value = map(lambda x: rearrange(x, 'n h w (nhead d) -> n nhead (h w) d', nhead=self.nhead, d=self.dim), [query, key, value]) + else: + query, key, value = map(lambda x: rearrange(x, 'n h w (nhead d) -> n (h w) nhead d', nhead=self.nhead, d=self.dim), [query, key, value]) + + m = self.attention(query, key, value, q_mask=None, kv_mask=None) + + if self.flash: + m = rearrange(m, 'n nhead L d -> n L nhead d', nhead=self.nhead, d=self.dim) + + if q_mask is not None: + m = pad_feature(m, mask_h0, mask_w0, q_mask) + + return m + + def forward(self, query, key, value, q_mask=None, kv_mask=None): + """ Multi-head scaled dot-product attention, a.k.a full attention. + Args: + if FLASH_AVAILABLE: # pytorch scaled_dot_product_attention + queries: [N, H, L, D] + keys: [N, H, S, D] + values: [N, H, S, D] + else: + queries: [N, L, H, D] + keys: [N, S, H, D] + values: [N, S, H, D] + q_mask: [N, L] + kv_mask: [N, S] + Returns: + queried_values: (N, L, H, D) + """ + bs = query.size(0) + if bs == 1 or q_mask is None: + m = self._forward(query, key, value, q_mask=q_mask, kv_mask=kv_mask) + else: # for faster trainning with padding mask while batch size > 1 + m_list = [] + for i in range(bs): + m_list.append(self._forward(query[i:i+1], key[i:i+1], value[i:i+1], q_mask=q_mask[i:i+1], kv_mask=kv_mask[i:i+1])) + m = torch.cat(m_list, dim=0) + return m \ No newline at end of file diff --git a/third_party/EfficientLoFTR/src/loftr/loftr_module/transformer.py b/third_party/EfficientLoFTR/src/loftr/loftr_module/transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..fd6eaeda7a4ac360c812d07c5c5757717bb39f3e --- /dev/null +++ b/third_party/EfficientLoFTR/src/loftr/loftr_module/transformer.py @@ -0,0 +1,164 @@ +import copy +import torch +import torch.nn as nn +import torch.nn.functional as F +from .linear_attention import Attention, crop_feature, pad_feature +from einops.einops import rearrange +from collections import OrderedDict +from ..utils.position_encoding import RoPEPositionEncodingSine +import numpy as np +from loguru import logger + +class AG_RoPE_EncoderLayer(nn.Module): + def __init__(self, + d_model, + nhead, + agg_size0=4, + agg_size1=4, + no_flash=False, + rope=False, + npe=None, + fp32=False, + ): + super(AG_RoPE_EncoderLayer, self).__init__() + + self.dim = d_model // nhead + self.nhead = nhead + self.agg_size0, self.agg_size1 = agg_size0, agg_size1 + self.rope = rope + + # aggregate and position encoding + self.aggregate = nn.Conv2d(d_model, d_model, kernel_size=agg_size0, padding=0, stride=agg_size0, bias=False, groups=d_model) if self.agg_size0 != 1 else nn.Identity() + self.max_pool = torch.nn.MaxPool2d(kernel_size=self.agg_size1, stride=self.agg_size1) if self.agg_size1 != 1 else nn.Identity() + self.rope_pos_enc = RoPEPositionEncodingSine(d_model, max_shape=(256, 256), npe=npe, ropefp16=True) + + # multi-head attention + self.q_proj = nn.Linear(d_model, d_model, bias=False) + self.k_proj = nn.Linear(d_model, d_model, bias=False) + self.v_proj = nn.Linear(d_model, d_model, bias=False) + self.attention = Attention(no_flash, self.nhead, self.dim, fp32) + self.merge = nn.Linear(d_model, d_model, bias=False) + + # feed-forward network + self.mlp = nn.Sequential( + nn.Linear(d_model*2, d_model*2, bias=False), + nn.LeakyReLU(inplace = True), + nn.Linear(d_model*2, d_model, bias=False), + ) + + # norm + self.norm1 = nn.LayerNorm(d_model) + self.norm2 = nn.LayerNorm(d_model) + + def forward(self, x, source, x_mask=None, source_mask=None): + """ + Args: + x (torch.Tensor): [N, C, H0, W0] + source (torch.Tensor): [N, C, H1, W1] + x_mask (torch.Tensor): [N, H0, W0] (optional) (L = H0*W0) + source_mask (torch.Tensor): [N, H1, W1] (optional) (S = H1*W1) + """ + bs, C, H0, W0 = x.size() + H1, W1 = source.size(-2), source.size(-1) + + # Aggragate feature + assert x_mask is None and source_mask is None + query, source = self.norm1(self.aggregate(x).permute(0,2,3,1)), self.norm1(self.max_pool(source).permute(0,2,3,1)) # [N, H, W, C] + if x_mask is not None: + x_mask, source_mask = map(lambda x: self.max_pool(x.float()).bool(), [x_mask, source_mask]) + query, key, value = self.q_proj(query), self.k_proj(source), self.v_proj(source) + + # Positional encoding + if self.rope: + query = self.rope_pos_enc(query) + key = self.rope_pos_enc(key) + + # multi-head attention handle padding mask + m = self.attention(query, key, value, q_mask=x_mask, kv_mask=source_mask) + m = self.merge(m.reshape(bs, -1, self.nhead*self.dim)) # [N, L, C] + + # Upsample feature + m = rearrange(m, 'b (h w) c -> b c h w', h=H0 // self.agg_size0, w=W0 // self.agg_size0) # [N, C, H0, W0] + if self.agg_size0 != 1: + m = torch.nn.functional.interpolate(m, scale_factor=self.agg_size0, mode='bilinear', align_corners=False) # [N, C, H0, W0] + + # feed-forward network + m = self.mlp(torch.cat([x, m], dim=1).permute(0, 2, 3, 1)) # [N, H0, W0, C] + m = self.norm2(m).permute(0, 3, 1, 2) # [N, C, H0, W0] + + return x + m + +class LocalFeatureTransformer(nn.Module): + """A Local Feature Transformer (LoFTR) module.""" + + def __init__(self, config): + super(LocalFeatureTransformer, self).__init__() + + self.full_config = config + self.fp32 = not (config['mp'] or config['half']) + config = config['coarse'] + self.d_model = config['d_model'] + self.nhead = config['nhead'] + self.layer_names = config['layer_names'] + self.agg_size0, self.agg_size1 = config['agg_size0'], config['agg_size1'] + self.rope = config['rope'] + + self_layer = AG_RoPE_EncoderLayer(config['d_model'], config['nhead'], config['agg_size0'], config['agg_size1'], + config['no_flash'], config['rope'], config['npe'], self.fp32) + cross_layer = AG_RoPE_EncoderLayer(config['d_model'], config['nhead'], config['agg_size0'], config['agg_size1'], + config['no_flash'], False, config['npe'], self.fp32) + self.layers = nn.ModuleList([copy.deepcopy(self_layer) if _ == 'self' else copy.deepcopy(cross_layer) for _ in self.layer_names]) + self._reset_parameters() + + def _reset_parameters(self): + for p in self.parameters(): + if p.dim() > 1: + nn.init.xavier_uniform_(p) + + def forward(self, feat0, feat1, mask0=None, mask1=None, data=None): + """ + Args: + feat0 (torch.Tensor): [N, C, H, W] + feat1 (torch.Tensor): [N, C, H, W] + mask0 (torch.Tensor): [N, L] (optional) + mask1 (torch.Tensor): [N, S] (optional) + """ + H0, W0, H1, W1 = feat0.size(-2), feat0.size(-1), feat1.size(-2), feat1.size(-1) + bs = feat0.shape[0] + + feature_cropped = False + if bs == 1 and mask0 is not None and mask1 is not None: + mask_H0, mask_W0, mask_H1, mask_W1 = mask0.size(-2), mask0.size(-1), mask1.size(-2), mask1.size(-1) + mask_h0, mask_w0, mask_h1, mask_w1 = mask0[0].sum(-2)[0], mask0[0].sum(-1)[0], mask1[0].sum(-2)[0], mask1[0].sum(-1)[0] + mask_h0, mask_w0, mask_h1, mask_w1 = mask_h0//self.agg_size0*self.agg_size0, mask_w0//self.agg_size0*self.agg_size0, mask_h1//self.agg_size1*self.agg_size1, mask_w1//self.agg_size1*self.agg_size1 + feat0 = feat0[:, :, :mask_h0, :mask_w0] + feat1 = feat1[:, :, :mask_h1, :mask_w1] + feature_cropped = True + + for i, (layer, name) in enumerate(zip(self.layers, self.layer_names)): + if feature_cropped: + mask0, mask1 = None, None + if name == 'self': + feat0 = layer(feat0, feat0, mask0, mask0) + feat1 = layer(feat1, feat1, mask1, mask1) + elif name == 'cross': + feat0 = layer(feat0, feat1, mask0, mask1) + feat1 = layer(feat1, feat0, mask1, mask0) + else: + raise KeyError + + if feature_cropped: + # padding feature + bs, c, mask_h0, mask_w0 = feat0.size() + if mask_h0 != mask_H0: + feat0 = torch.cat([feat0, torch.zeros(bs, c, mask_H0-mask_h0, mask_W0, device=feat0.device, dtype=feat0.dtype)], dim=-2) + elif mask_w0 != mask_W0: + feat0 = torch.cat([feat0, torch.zeros(bs, c, mask_H0, mask_W0-mask_w0, device=feat0.device, dtype=feat0.dtype)], dim=-1) + + bs, c, mask_h1, mask_w1 = feat1.size() + if mask_h1 != mask_H1: + feat1 = torch.cat([feat1, torch.zeros(bs, c, mask_H1-mask_h1, mask_W1, device=feat1.device, dtype=feat1.dtype)], dim=-2) + elif mask_w1 != mask_W1: + feat1 = torch.cat([feat1, torch.zeros(bs, c, mask_H1, mask_W1-mask_w1, device=feat1.device, dtype=feat1.dtype)], dim=-1) + + return feat0, feat1 \ No newline at end of file diff --git a/third_party/EfficientLoFTR/src/loftr/utils/coarse_matching.py b/third_party/EfficientLoFTR/src/loftr/utils/coarse_matching.py new file mode 100644 index 0000000000000000000000000000000000000000..156c9eecf8c2cfb54b8eb22a8663d5cda5afa6a8 --- /dev/null +++ b/third_party/EfficientLoFTR/src/loftr/utils/coarse_matching.py @@ -0,0 +1,241 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from einops.einops import rearrange, repeat + +from loguru import logger +import numpy as np + +INF = 1e9 + +def mask_border(m, b: int, v): + """ Mask borders with value + Args: + m (torch.Tensor): [N, H0, W0, H1, W1] + b (int) + v (m.dtype) + """ + if b <= 0: + return + + m[:, :b] = v + m[:, :, :b] = v + m[:, :, :, :b] = v + m[:, :, :, :, :b] = v + m[:, -b:] = v + m[:, :, -b:] = v + m[:, :, :, -b:] = v + m[:, :, :, :, -b:] = v + + +def mask_border_with_padding(m, bd, v, p_m0, p_m1): + if bd <= 0: + return + + m[:, :bd] = v + m[:, :, :bd] = v + m[:, :, :, :bd] = v + m[:, :, :, :, :bd] = v + + h0s, w0s = p_m0.sum(1).max(-1)[0].int(), p_m0.sum(-1).max(-1)[0].int() + h1s, w1s = p_m1.sum(1).max(-1)[0].int(), p_m1.sum(-1).max(-1)[0].int() + for b_idx, (h0, w0, h1, w1) in enumerate(zip(h0s, w0s, h1s, w1s)): + m[b_idx, h0 - bd:] = v + m[b_idx, :, w0 - bd:] = v + m[b_idx, :, :, h1 - bd:] = v + m[b_idx, :, :, :, w1 - bd:] = v + + +def compute_max_candidates(p_m0, p_m1): + """Compute the max candidates of all pairs within a batch + + Args: + p_m0, p_m1 (torch.Tensor): padded masks + """ + h0s, w0s = p_m0.sum(1).max(-1)[0], p_m0.sum(-1).max(-1)[0] + h1s, w1s = p_m1.sum(1).max(-1)[0], p_m1.sum(-1).max(-1)[0] + max_cand = torch.sum( + torch.min(torch.stack([h0s * w0s, h1s * w1s], -1), -1)[0]) + return max_cand + +class CoarseMatching(nn.Module): + def __init__(self, config): + super().__init__() + self.config = config + # general config + self.thr = config['thr'] + self.border_rm = config['border_rm'] + self.temperature = config['dsmax_temperature'] + self.skip_softmax = config['skip_softmax'] + self.fp16matmul = config['fp16matmul'] + # -- # for trainig fine-level LoFTR + self.train_coarse_percent = config['train_coarse_percent'] + self.train_pad_num_gt_min = config['train_pad_num_gt_min'] + + def forward(self, feat_c0, feat_c1, data, mask_c0=None, mask_c1=None): + """ + Args: + feat0 (torch.Tensor): [N, L, C] + feat1 (torch.Tensor): [N, S, C] + data (dict) + mask_c0 (torch.Tensor): [N, L] (optional) + mask_c1 (torch.Tensor): [N, S] (optional) + Update: + data (dict): { + 'b_ids' (torch.Tensor): [M'], + 'i_ids' (torch.Tensor): [M'], + 'j_ids' (torch.Tensor): [M'], + 'm_bids' (torch.Tensor): [M], + 'mkpts0_c' (torch.Tensor): [M, 2], + 'mkpts1_c' (torch.Tensor): [M, 2], + 'mconf' (torch.Tensor): [M]} + NOTE: M' != M during training. + """ + N, L, S, C = feat_c0.size(0), feat_c0.size(1), feat_c1.size(1), feat_c0.size(2) + + # normalize + feat_c0, feat_c1 = map(lambda feat: feat / feat.shape[-1]**.5, + [feat_c0, feat_c1]) + + if self.fp16matmul: + sim_matrix = torch.einsum("nlc,nsc->nls", feat_c0, + feat_c1) / self.temperature + del feat_c0, feat_c1 + if mask_c0 is not None: + sim_matrix = sim_matrix.masked_fill( + ~(mask_c0[..., None] * mask_c1[:, None]).bool(), + -1e4 + ) + else: + with torch.autocast(enabled=False, device_type='cuda'): + sim_matrix = torch.einsum("nlc,nsc->nls", feat_c0, + feat_c1) / self.temperature + del feat_c0, feat_c1 + if mask_c0 is not None: + sim_matrix = sim_matrix.float().masked_fill( + ~(mask_c0[..., None] * mask_c1[:, None]).bool(), + -INF + ) + if self.skip_softmax: + sim_matrix = sim_matrix + else: + sim_matrix = F.softmax(sim_matrix, 1) * F.softmax(sim_matrix, 2) + + data.update({'conf_matrix': sim_matrix}) + + # predict coarse matches from conf_matrix + data.update(**self.get_coarse_match(sim_matrix, data)) + + @torch.no_grad() + def get_coarse_match(self, conf_matrix, data): + """ + Args: + conf_matrix (torch.Tensor): [N, L, S] + data (dict): with keys ['hw0_i', 'hw1_i', 'hw0_c', 'hw1_c'] + Returns: + coarse_matches (dict): { + 'b_ids' (torch.Tensor): [M'], + 'i_ids' (torch.Tensor): [M'], + 'j_ids' (torch.Tensor): [M'], + 'm_bids' (torch.Tensor): [M], + 'mkpts0_c' (torch.Tensor): [M, 2], + 'mkpts1_c' (torch.Tensor): [M, 2], + 'mconf' (torch.Tensor): [M]} + """ + axes_lengths = { + 'h0c': data['hw0_c'][0], + 'w0c': data['hw0_c'][1], + 'h1c': data['hw1_c'][0], + 'w1c': data['hw1_c'][1] + } + _device = conf_matrix.device + # 1. confidence thresholding + mask = conf_matrix > self.thr + mask = rearrange(mask, 'b (h0c w0c) (h1c w1c) -> b h0c w0c h1c w1c', + **axes_lengths) + + if 'mask0' not in data: + mask_border(mask, self.border_rm, False) + else: + mask_border_with_padding(mask, self.border_rm, False, + data['mask0'], data['mask1']) + mask = rearrange(mask, 'b h0c w0c h1c w1c -> b (h0c w0c) (h1c w1c)', + **axes_lengths) + + # 2. mutual nearest + mask = mask \ + * (conf_matrix == conf_matrix.max(dim=2, keepdim=True)[0]) \ + * (conf_matrix == conf_matrix.max(dim=1, keepdim=True)[0]) + + # 3. find all valid coarse matches + # this only works when at most one `True` in each row + mask_v, all_j_ids = mask.max(dim=2) + b_ids, i_ids = torch.where(mask_v) + j_ids = all_j_ids[b_ids, i_ids] + mconf = conf_matrix[b_ids, i_ids, j_ids] + + # 4. Random sampling of training samples for fine-level LoFTR + # (optional) pad samples with gt coarse-level matches + if self.training: + # NOTE: + # The sampling is performed across all pairs in a batch without manually balancing + # #samples for fine-level increases w.r.t. batch_size + if 'mask0' not in data: + num_candidates_max = mask.size(0) * max( + mask.size(1), mask.size(2)) + else: + num_candidates_max = compute_max_candidates( + data['mask0'], data['mask1']) + num_matches_train = int(num_candidates_max * + self.train_coarse_percent) + num_matches_pred = len(b_ids) + assert self.train_pad_num_gt_min < num_matches_train, "min-num-gt-pad should be less than num-train-matches" + + # pred_indices is to select from prediction + if num_matches_pred <= num_matches_train - self.train_pad_num_gt_min: + pred_indices = torch.arange(num_matches_pred, device=_device) + else: + pred_indices = torch.randint( + num_matches_pred, + (num_matches_train - self.train_pad_num_gt_min, ), + device=_device) + + # gt_pad_indices is to select from gt padding. e.g. max(3787-4800, 200) + gt_pad_indices = torch.randint( + len(data['spv_b_ids']), + (max(num_matches_train - num_matches_pred, + self.train_pad_num_gt_min), ), + device=_device) + mconf_gt = torch.zeros(len(data['spv_b_ids']), device=_device) # set conf of gt paddings to all zero + + b_ids, i_ids, j_ids, mconf = map( + lambda x, y: torch.cat([x[pred_indices], y[gt_pad_indices]], + dim=0), + *zip([b_ids, data['spv_b_ids']], [i_ids, data['spv_i_ids']], + [j_ids, data['spv_j_ids']], [mconf, mconf_gt])) + + # These matches select patches that feed into fine-level network + coarse_matches = {'b_ids': b_ids, 'i_ids': i_ids, 'j_ids': j_ids} + + # 4. Update with matches in original image resolution + scale = data['hw0_i'][0] / data['hw0_c'][0] + + scale0 = scale * data['scale0'][b_ids] if 'scale0' in data else scale + scale1 = scale * data['scale1'][b_ids] if 'scale1' in data else scale + mkpts0_c = torch.stack( + [i_ids % data['hw0_c'][1], i_ids // data['hw0_c'][1]], + dim=1) * scale0 + mkpts1_c = torch.stack( + [j_ids % data['hw1_c'][1], j_ids // data['hw1_c'][1]], + dim=1) * scale1 + + m_bids = b_ids[mconf != 0] + # These matches is the current prediction (for visualization) + coarse_matches.update({ + 'm_bids': m_bids, # mconf == 0 => gt matches + 'mkpts0_c': mkpts0_c[mconf != 0], + 'mkpts1_c': mkpts1_c[mconf != 0], + 'mconf': mconf[mconf != 0] + }) + + return coarse_matches \ No newline at end of file diff --git a/third_party/EfficientLoFTR/src/loftr/utils/fine_matching.py b/third_party/EfficientLoFTR/src/loftr/utils/fine_matching.py new file mode 100644 index 0000000000000000000000000000000000000000..8d6da60c9fe8230e01a5ab47334d5ab506cbc7af --- /dev/null +++ b/third_party/EfficientLoFTR/src/loftr/utils/fine_matching.py @@ -0,0 +1,155 @@ +import math +import torch +import torch.nn as nn +import torch.nn.functional as F + +from kornia.geometry.subpix import dsnt +from kornia.utils.grid import create_meshgrid + +from loguru import logger + +class FineMatching(nn.Module): + """FineMatching with s2d paradigm""" + + def __init__(self, config): + super().__init__() + self.config = config + self.local_regress_temperature = config['match_fine']['local_regress_temperature'] + self.local_regress_slicedim = config['match_fine']['local_regress_slicedim'] + self.fp16 = config['half'] + + def forward(self, feat_0, feat_1, data): + """ + Args: + feat0 (torch.Tensor): [M, WW, C] + feat1 (torch.Tensor): [M, WW, C] + data (dict) + Update: + data (dict):{ + 'expec_f' (torch.Tensor): [M, 3], + 'mkpts0_f' (torch.Tensor): [M, 2], + 'mkpts1_f' (torch.Tensor): [M, 2]} + """ + M, WW, C = feat_0.shape + W = int(math.sqrt(WW)) + scale = data['hw0_i'][0] / data['hw0_f'][0] + self.M, self.W, self.WW, self.C, self.scale = M, W, WW, C, scale + + # corner case: if no coarse matches found + if M == 0: + assert self.training == False, "M is always > 0 while training, see coarse_matching.py" + data.update({ + 'conf_matrix_f': torch.empty(0, WW, WW, device=feat_0.device), + 'mkpts0_f': data['mkpts0_c'], + 'mkpts1_f': data['mkpts1_c'], + }) + return + + # compute pixel-level confidence matrix + with torch.autocast(enabled=True, device_type='cuda'): + feat_f0, feat_f1 = feat_0[...,:-self.local_regress_slicedim], feat_1[...,:-self.local_regress_slicedim] + feat_ff0, feat_ff1 = feat_0[...,-self.local_regress_slicedim:], feat_1[...,-self.local_regress_slicedim:] + feat_f0, feat_f1 = feat_f0 / C**.5, feat_f1 / C**.5 + conf_matrix_f = torch.einsum('mlc,mrc->mlr', feat_f0, feat_f1) + conf_matrix_ff = torch.einsum('mlc,mrc->mlr', feat_ff0, feat_ff1 / (self.local_regress_slicedim)**.5) + + softmax_matrix_f = F.softmax(conf_matrix_f, 1) * F.softmax(conf_matrix_f, 2) + softmax_matrix_f = softmax_matrix_f.reshape(M, self.WW, self.W+2, self.W+2) + softmax_matrix_f = softmax_matrix_f[...,1:-1,1:-1].reshape(M, self.WW, self.WW) + + # for fine-level supervision + if self.training: + data.update({'sim_matrix_ff': conf_matrix_ff}) + data.update({'conf_matrix_f': softmax_matrix_f}) + + # compute pixel-level absolute kpt coords + self.get_fine_ds_match(softmax_matrix_f, data) + + # generate seconde-stage 3x3 grid + idx_l, idx_r = data['idx_l'], data['idx_r'] + m_ids = torch.arange(M, device=idx_l.device, dtype=torch.long).unsqueeze(-1) + m_ids = m_ids[:len(data['mconf'])] + idx_r_iids, idx_r_jids = idx_r // W, idx_r % W + + m_ids, idx_l, idx_r_iids, idx_r_jids = m_ids.reshape(-1), idx_l.reshape(-1), idx_r_iids.reshape(-1), idx_r_jids.reshape(-1) + delta = create_meshgrid(3, 3, True, conf_matrix_ff.device).to(torch.long) # [1, 3, 3, 2] + + m_ids = m_ids[...,None,None].expand(-1, 3, 3) + idx_l = idx_l[...,None,None].expand(-1, 3, 3) # [m, k, 3, 3] + + idx_r_iids = idx_r_iids[...,None,None].expand(-1, 3, 3) + delta[None, ..., 1] + idx_r_jids = idx_r_jids[...,None,None].expand(-1, 3, 3) + delta[None, ..., 0] + + if idx_l.numel() == 0: + data.update({ + 'mkpts0_f': data['mkpts0_c'], + 'mkpts1_f': data['mkpts1_c'], + }) + return + + # compute second-stage heatmap + conf_matrix_ff = conf_matrix_ff.reshape(M, self.WW, self.W+2, self.W+2) + conf_matrix_ff = conf_matrix_ff[m_ids, idx_l, idx_r_iids, idx_r_jids] + conf_matrix_ff = conf_matrix_ff.reshape(-1, 9) + conf_matrix_ff = F.softmax(conf_matrix_ff / self.local_regress_temperature, -1) + heatmap = conf_matrix_ff.reshape(-1, 3, 3) + + # compute coordinates from heatmap + coords_normalized = dsnt.spatial_expectation2d(heatmap[None], True)[0] + + if data['bs'] == 1: + scale1 = scale * data['scale1'] if 'scale0' in data else scale + else: + scale1 = scale * data['scale1'][data['b_ids']][:len(data['mconf']), ...][:,None,:].expand(-1, -1, 2).reshape(-1, 2) if 'scale0' in data else scale + + # compute subpixel-level absolute kpt coords + self.get_fine_match_local(coords_normalized, data, scale1) + + def get_fine_match_local(self, coords_normed, data, scale1): + W, WW, C, scale = self.W, self.WW, self.C, self.scale + + mkpts0_c, mkpts1_c = data['mkpts0_c'], data['mkpts1_c'] + + # mkpts0_f and mkpts1_f + mkpts0_f = mkpts0_c + mkpts1_f = mkpts1_c + (coords_normed * (3 // 2) * scale1) + + data.update({ + "mkpts0_f": mkpts0_f, + "mkpts1_f": mkpts1_f + }) + + @torch.no_grad() + def get_fine_ds_match(self, conf_matrix, data): + W, WW, C, scale = self.W, self.WW, self.C, self.scale + m, _, _ = conf_matrix.shape + + conf_matrix = conf_matrix.reshape(m, -1)[:len(data['mconf']),...] + val, idx = torch.max(conf_matrix, dim = -1) + idx = idx[:,None] + idx_l, idx_r = idx // WW, idx % WW + + data.update({'idx_l': idx_l, 'idx_r': idx_r}) + + if self.fp16: + grid = create_meshgrid(W, W, False, conf_matrix.device, dtype=torch.float16) - W // 2 + 0.5 # kornia >= 0.5.1 + else: + grid = create_meshgrid(W, W, False, conf_matrix.device) - W // 2 + 0.5 + grid = grid.reshape(1, -1, 2).expand(m, -1, -1) + delta_l = torch.gather(grid, 1, idx_l.unsqueeze(-1).expand(-1, -1, 2)) + delta_r = torch.gather(grid, 1, idx_r.unsqueeze(-1).expand(-1, -1, 2)) + + scale0 = scale * data['scale0'][data['b_ids']] if 'scale0' in data else scale + scale1 = scale * data['scale1'][data['b_ids']] if 'scale0' in data else scale + + if torch.is_tensor(scale0) and scale0.numel() > 1: # scale0 is a tensor + mkpts0_f = (data['mkpts0_c'][:,None,:] + (delta_l * scale0[:len(data['mconf']),...][:,None,:])).reshape(-1, 2) + mkpts1_f = (data['mkpts1_c'][:,None,:] + (delta_r * scale1[:len(data['mconf']),...][:,None,:])).reshape(-1, 2) + else: # scale0 is a float + mkpts0_f = (data['mkpts0_c'][:,None,:] + (delta_l * scale0)).reshape(-1, 2) + mkpts1_f = (data['mkpts1_c'][:,None,:] + (delta_r * scale1)).reshape(-1, 2) + + data.update({ + "mkpts0_c": mkpts0_f, + "mkpts1_c": mkpts1_f + }) \ No newline at end of file diff --git a/third_party/EfficientLoFTR/src/loftr/utils/full_config.py b/third_party/EfficientLoFTR/src/loftr/utils/full_config.py new file mode 100644 index 0000000000000000000000000000000000000000..ccf84b48f6693be4bf9306c8bd987d87a6f43792 --- /dev/null +++ b/third_party/EfficientLoFTR/src/loftr/utils/full_config.py @@ -0,0 +1,50 @@ +from yacs.config import CfgNode as CN + + +def lower_config(yacs_cfg): + if not isinstance(yacs_cfg, CN): + return yacs_cfg + return {k.lower(): lower_config(v) for k, v in yacs_cfg.items()} + + +_CN = CN() +_CN.BACKBONE_TYPE = 'RepVGG' +_CN.ALIGN_CORNER = False +_CN.RESOLUTION = (8, 1) +_CN.FINE_WINDOW_SIZE = 8 # window_size in fine_level, must be even +_CN.MP = False +_CN.REPLACE_NAN = True +_CN.HALF = False + +# 1. LoFTR-backbone (local feature CNN) config +_CN.BACKBONE = CN() +_CN.BACKBONE.BLOCK_DIMS = [64, 128, 256] # s1, s2, s3 + +# 2. LoFTR-coarse module config +_CN.COARSE = CN() +_CN.COARSE.D_MODEL = 256 +_CN.COARSE.D_FFN = 256 +_CN.COARSE.NHEAD = 8 +_CN.COARSE.LAYER_NAMES = ['self', 'cross'] * 4 +_CN.COARSE.AGG_SIZE0 = 4 +_CN.COARSE.AGG_SIZE1 = 4 +_CN.COARSE.NO_FLASH = False +_CN.COARSE.ROPE = True +_CN.COARSE.NPE = [832, 832, 832, 832] # [832, 832, long_side, long_side] Suggest setting based on the long side of the input image, especially when the long_side > 832 + +# 3. Coarse-Matching config +_CN.MATCH_COARSE = CN() +_CN.MATCH_COARSE.THR = 0.2 # recommend 0.2 for full model and 25 for optimized model +_CN.MATCH_COARSE.BORDER_RM = 2 +_CN.MATCH_COARSE.DSMAX_TEMPERATURE = 0.1 +_CN.MATCH_COARSE.SKIP_SOFTMAX = False # False for full model and True for optimized model +_CN.MATCH_COARSE.FP16MATMUL = False # False for full model and True for optimized model +_CN.MATCH_COARSE.TRAIN_COARSE_PERCENT = 0.2 # training tricks: save GPU memory +_CN.MATCH_COARSE.TRAIN_PAD_NUM_GT_MIN = 200 # training tricks: avoid DDP deadlock + +# 4. Fine-Matching config +_CN.MATCH_FINE = CN() +_CN.MATCH_FINE.LOCAL_REGRESS_TEMPERATURE = 10.0 # use 10.0 as fine local regress temperature, not 1.0 +_CN.MATCH_FINE.LOCAL_REGRESS_SLICEDIM = 8 + +full_default_cfg = lower_config(_CN) diff --git a/third_party/EfficientLoFTR/src/loftr/utils/geometry.py b/third_party/EfficientLoFTR/src/loftr/utils/geometry.py new file mode 100644 index 0000000000000000000000000000000000000000..f95cdb65b48324c4f4ceb20231b1bed992b41116 --- /dev/null +++ b/third_party/EfficientLoFTR/src/loftr/utils/geometry.py @@ -0,0 +1,54 @@ +import torch + + +@torch.no_grad() +def warp_kpts(kpts0, depth0, depth1, T_0to1, K0, K1): + """ Warp kpts0 from I0 to I1 with depth, K and Rt + Also check covisibility and depth consistency. + Depth is consistent if relative error < 0.2 (hard-coded). + + Args: + kpts0 (torch.Tensor): [N, L, 2] - , + depth0 (torch.Tensor): [N, H, W], + depth1 (torch.Tensor): [N, H, W], + T_0to1 (torch.Tensor): [N, 3, 4], + K0 (torch.Tensor): [N, 3, 3], + K1 (torch.Tensor): [N, 3, 3], + Returns: + calculable_mask (torch.Tensor): [N, L] + warped_keypoints0 (torch.Tensor): [N, L, 2] + """ + kpts0_long = kpts0.round().long() + + # Sample depth, get calculable_mask on depth != 0 + kpts0_depth = torch.stack( + [depth0[i, kpts0_long[i, :, 1], kpts0_long[i, :, 0]] for i in range(kpts0.shape[0])], dim=0 + ) # (N, L) + nonzero_mask = kpts0_depth != 0 + + # Unproject + kpts0_h = torch.cat([kpts0, torch.ones_like(kpts0[:, :, [0]])], dim=-1) * kpts0_depth[..., None] # (N, L, 3) + kpts0_cam = K0.inverse() @ kpts0_h.transpose(2, 1) # (N, 3, L) + + # Rigid Transform + w_kpts0_cam = T_0to1[:, :3, :3] @ kpts0_cam + T_0to1[:, :3, [3]] # (N, 3, L) + w_kpts0_depth_computed = w_kpts0_cam[:, 2, :] + + # Project + w_kpts0_h = (K1 @ w_kpts0_cam).transpose(2, 1) # (N, L, 3) + w_kpts0 = w_kpts0_h[:, :, :2] / (w_kpts0_h[:, :, [2]] + 1e-4) # (N, L, 2), +1e-4 to avoid zero depth + + # Covisible Check + h, w = depth1.shape[1:3] + covisible_mask = (w_kpts0[:, :, 0] > 0) * (w_kpts0[:, :, 0] < w-1) * \ + (w_kpts0[:, :, 1] > 0) * (w_kpts0[:, :, 1] < h-1) + w_kpts0_long = w_kpts0.long() + w_kpts0_long[~covisible_mask, :] = 0 + + w_kpts0_depth = torch.stack( + [depth1[i, w_kpts0_long[i, :, 1], w_kpts0_long[i, :, 0]] for i in range(w_kpts0_long.shape[0])], dim=0 + ) # (N, L) + consistent_mask = ((w_kpts0_depth - w_kpts0_depth_computed) / w_kpts0_depth).abs() < 0.2 + valid_mask = nonzero_mask * covisible_mask * consistent_mask + + return valid_mask, w_kpts0 diff --git a/third_party/EfficientLoFTR/src/loftr/utils/opt_config.py b/third_party/EfficientLoFTR/src/loftr/utils/opt_config.py new file mode 100644 index 0000000000000000000000000000000000000000..61b7fa1e88a72db226dbbbf3b47c2b4f40e7aff7 --- /dev/null +++ b/third_party/EfficientLoFTR/src/loftr/utils/opt_config.py @@ -0,0 +1,50 @@ +from yacs.config import CfgNode as CN + + +def lower_config(yacs_cfg): + if not isinstance(yacs_cfg, CN): + return yacs_cfg + return {k.lower(): lower_config(v) for k, v in yacs_cfg.items()} + + +_CN = CN() +_CN.BACKBONE_TYPE = 'RepVGG' +_CN.ALIGN_CORNER = False +_CN.RESOLUTION = (8, 1) +_CN.FINE_WINDOW_SIZE = 8 # window_size in fine_level, must be even +_CN.MP = False +_CN.REPLACE_NAN = True +_CN.HALF = False + +# 1. LoFTR-backbone (local feature CNN) config +_CN.BACKBONE = CN() +_CN.BACKBONE.BLOCK_DIMS = [64, 128, 256] # s1, s2, s3 + +# 2. LoFTR-coarse module config +_CN.COARSE = CN() +_CN.COARSE.D_MODEL = 256 +_CN.COARSE.D_FFN = 256 +_CN.COARSE.NHEAD = 8 +_CN.COARSE.LAYER_NAMES = ['self', 'cross'] * 4 +_CN.COARSE.AGG_SIZE0 = 4 +_CN.COARSE.AGG_SIZE1 = 4 +_CN.COARSE.NO_FLASH = False +_CN.COARSE.ROPE = True +_CN.COARSE.NPE = [832, 832, 832, 832] # [832, 832, long_side, long_side] Suggest setting based on the long side of the input image, especially when the long_side > 832 + +# 3. Coarse-Matching config +_CN.MATCH_COARSE = CN() +_CN.MATCH_COARSE.THR = 25 # recommend 0.2 for full model and 25 for optimized model +_CN.MATCH_COARSE.BORDER_RM = 2 +_CN.MATCH_COARSE.DSMAX_TEMPERATURE = 0.1 +_CN.MATCH_COARSE.SKIP_SOFTMAX = True # False for full model and True for optimized model +_CN.MATCH_COARSE.FP16MATMUL = True # False for full model and True for optimized model +_CN.MATCH_COARSE.TRAIN_COARSE_PERCENT = 0.2 # training tricks: save GPU memory +_CN.MATCH_COARSE.TRAIN_PAD_NUM_GT_MIN = 200 # training tricks: avoid DDP deadlock + +# 4. Fine-Matching config +_CN.MATCH_FINE = CN() +_CN.MATCH_FINE.LOCAL_REGRESS_TEMPERATURE = 10.0 # use 10.0 as fine local regress temperature, not 1.0 +_CN.MATCH_FINE.LOCAL_REGRESS_SLICEDIM = 8 + +opt_default_cfg = lower_config(_CN) diff --git a/third_party/EfficientLoFTR/src/loftr/utils/position_encoding.py b/third_party/EfficientLoFTR/src/loftr/utils/position_encoding.py new file mode 100644 index 0000000000000000000000000000000000000000..6431d0e2ce468fad5b0f0f6838c2ae2e5c089b32 --- /dev/null +++ b/third_party/EfficientLoFTR/src/loftr/utils/position_encoding.py @@ -0,0 +1,50 @@ +import math +import torch +from torch import nn + +class RoPEPositionEncodingSine(nn.Module): + """ + This is a sinusoidal position encoding that generalized to 2-dimensional images + """ + + def __init__(self, d_model, max_shape=(256, 256), npe=None, ropefp16=True): + """ + Args: + max_shape (tuple): for 1/8 featmap, the max length of 256 corresponds to 2048 pixels + """ + super().__init__() + + i_position = torch.ones(max_shape).cumsum(0).float().unsqueeze(-1) # [H, 1] + j_position = torch.ones(max_shape).cumsum(1).float().unsqueeze(-1) # [W, 1] + + assert npe is not None + train_res_H, train_res_W, test_res_H, test_res_W = npe[0], npe[1], npe[2], npe[3] # train_res_H, train_res_W, test_res_H, test_res_W + i_position, j_position = i_position * train_res_H / test_res_H, j_position * train_res_W / test_res_W + + div_term = torch.exp(torch.arange(0, d_model//4, 1).float() * (-math.log(10000.0) / (d_model//4))) + div_term = div_term[None, None, :] # [1, 1, C//4] + + sin = torch.zeros(*max_shape, d_model//2, dtype=torch.float16 if ropefp16 else torch.float32) + cos = torch.zeros(*max_shape, d_model//2, dtype=torch.float16 if ropefp16 else torch.float32) + sin[:, :, 0::2] = torch.sin(i_position * div_term).half() if ropefp16 else torch.sin(i_position * div_term) + sin[:, :, 1::2] = torch.sin(j_position * div_term).half() if ropefp16 else torch.sin(j_position * div_term) + cos[:, :, 0::2] = torch.cos(i_position * div_term).half() if ropefp16 else torch.cos(i_position * div_term) + cos[:, :, 1::2] = torch.cos(j_position * div_term).half() if ropefp16 else torch.cos(j_position * div_term) + + sin = sin.repeat_interleave(2, dim=-1) + cos = cos.repeat_interleave(2, dim=-1) + + self.register_buffer('sin', sin.unsqueeze(0), persistent=False) # [1, H, W, C//2] + self.register_buffer('cos', cos.unsqueeze(0), persistent=False) # [1, H, W, C//2] + + def forward(self, x, ratio=1): + """ + Args: + x: [N, H, W, C] + """ + return (x * self.cos[:, :x.size(1), :x.size(2), :]) + (self.rotate_half(x) * self.sin[:, :x.size(1), :x.size(2), :]) + + def rotate_half(self, x): + x = x.unflatten(-1, (-1, 2)) + x1, x2 = x.unbind(dim=-1) + return torch.stack((-x2, x1), dim=-1).flatten(start_dim=-2) \ No newline at end of file diff --git a/third_party/EfficientLoFTR/src/optimizers/__init__.py b/third_party/EfficientLoFTR/src/optimizers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e1db2285352586c250912bdd2c4ae5029620ab5f --- /dev/null +++ b/third_party/EfficientLoFTR/src/optimizers/__init__.py @@ -0,0 +1,42 @@ +import torch +from torch.optim.lr_scheduler import MultiStepLR, CosineAnnealingLR, ExponentialLR + + +def build_optimizer(model, config): + name = config.TRAINER.OPTIMIZER + lr = config.TRAINER.TRUE_LR + + if name == "adam": + return torch.optim.Adam(model.parameters(), lr=lr, weight_decay=config.TRAINER.ADAM_DECAY) + elif name == "adamw": + return torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=config.TRAINER.ADAMW_DECAY) + else: + raise ValueError(f"TRAINER.OPTIMIZER = {name} is not a valid optimizer!") + + +def build_scheduler(config, optimizer): + """ + Returns: + scheduler (dict):{ + 'scheduler': lr_scheduler, + 'interval': 'step', # or 'epoch' + 'monitor': 'val_f1', (optional) + 'frequency': x, (optional) + } + """ + scheduler = {'interval': config.TRAINER.SCHEDULER_INTERVAL} + name = config.TRAINER.SCHEDULER + + if name == 'MultiStepLR': + scheduler.update( + {'scheduler': MultiStepLR(optimizer, config.TRAINER.MSLR_MILESTONES, gamma=config.TRAINER.MSLR_GAMMA)}) + elif name == 'CosineAnnealing': + scheduler.update( + {'scheduler': CosineAnnealingLR(optimizer, config.TRAINER.COSA_TMAX)}) + elif name == 'ExponentialLR': + scheduler.update( + {'scheduler': ExponentialLR(optimizer, config.TRAINER.ELR_GAMMA)}) + else: + raise NotImplementedError() + + return scheduler diff --git a/third_party/EfficientLoFTR/src/utils/augment.py b/third_party/EfficientLoFTR/src/utils/augment.py new file mode 100644 index 0000000000000000000000000000000000000000..d7c5d3e11b6fe083aaeff7555bb7ce3a4bfb755d --- /dev/null +++ b/third_party/EfficientLoFTR/src/utils/augment.py @@ -0,0 +1,55 @@ +import albumentations as A + + +class DarkAug(object): + """ + Extreme dark augmentation aiming at Aachen Day-Night + """ + + def __init__(self) -> None: + self.augmentor = A.Compose([ + A.RandomBrightnessContrast(p=0.75, brightness_limit=(-0.6, 0.0), contrast_limit=(-0.5, 0.3)), + A.Blur(p=0.1, blur_limit=(3, 9)), + A.MotionBlur(p=0.2, blur_limit=(3, 25)), + A.RandomGamma(p=0.1, gamma_limit=(15, 65)), + A.HueSaturationValue(p=0.1, val_shift_limit=(-100, -40)) + ], p=0.75) + + def __call__(self, x): + return self.augmentor(image=x)['image'] + + +class MobileAug(object): + """ + Random augmentations aiming at images of mobile/handhold devices. + """ + + def __init__(self): + self.augmentor = A.Compose([ + A.MotionBlur(p=0.25), + A.ColorJitter(p=0.5), + A.RandomRain(p=0.1), # random occlusion + A.RandomSunFlare(p=0.1), + A.JpegCompression(p=0.25), + A.ISONoise(p=0.25) + ], p=1.0) + + def __call__(self, x): + return self.augmentor(image=x)['image'] + + +def build_augmentor(method=None, **kwargs): + if method is not None: + raise NotImplementedError('Using of augmentation functions are not supported yet!') + if method == 'dark': + return DarkAug() + elif method == 'mobile': + return MobileAug() + elif method is None: + return None + else: + raise ValueError(f'Invalid augmentation method: {method}') + + +if __name__ == '__main__': + augmentor = build_augmentor('FDA') diff --git a/third_party/EfficientLoFTR/src/utils/comm.py b/third_party/EfficientLoFTR/src/utils/comm.py new file mode 100644 index 0000000000000000000000000000000000000000..26ec9517cc47e224430106d8ae9aa99a3fe49167 --- /dev/null +++ b/third_party/EfficientLoFTR/src/utils/comm.py @@ -0,0 +1,265 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +""" +[Copied from detectron2] +This file contains primitives for multi-gpu communication. +This is useful when doing distributed training. +""" + +import functools +import logging +import numpy as np +import pickle +import torch +import torch.distributed as dist + +_LOCAL_PROCESS_GROUP = None +""" +A torch process group which only includes processes that on the same machine as the current process. +This variable is set when processes are spawned by `launch()` in "engine/launch.py". +""" + + +def get_world_size() -> int: + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank() -> int: + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + return dist.get_rank() + + +def get_local_rank() -> int: + """ + Returns: + The rank of the current process within the local (per-machine) process group. + """ + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + assert _LOCAL_PROCESS_GROUP is not None + return dist.get_rank(group=_LOCAL_PROCESS_GROUP) + + +def get_local_size() -> int: + """ + Returns: + The size of the per-machine process group, + i.e. the number of processes per machine. + """ + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size(group=_LOCAL_PROCESS_GROUP) + + +def is_main_process() -> bool: + return get_rank() == 0 + + +def synchronize(): + """ + Helper function to synchronize (barrier) among all processes when + using distributed training + """ + if not dist.is_available(): + return + if not dist.is_initialized(): + return + world_size = dist.get_world_size() + if world_size == 1: + return + dist.barrier() + + +@functools.lru_cache() +def _get_global_gloo_group(): + """ + Return a process group based on gloo backend, containing all the ranks + The result is cached. + """ + if dist.get_backend() == "nccl": + return dist.new_group(backend="gloo") + else: + return dist.group.WORLD + + +def _serialize_to_tensor(data, group): + backend = dist.get_backend(group) + assert backend in ["gloo", "nccl"] + device = torch.device("cpu" if backend == "gloo" else "cuda") + + buffer = pickle.dumps(data) + if len(buffer) > 1024 ** 3: + logger = logging.getLogger(__name__) + logger.warning( + "Rank {} trying to all-gather {:.2f} GB of data on device {}".format( + get_rank(), len(buffer) / (1024 ** 3), device + ) + ) + storage = torch.ByteStorage.from_buffer(buffer) + tensor = torch.ByteTensor(storage).to(device=device) + return tensor + + +def _pad_to_largest_tensor(tensor, group): + """ + Returns: + list[int]: size of the tensor, on each rank + Tensor: padded tensor that has the max size + """ + world_size = dist.get_world_size(group=group) + assert ( + world_size >= 1 + ), "comm.gather/all_gather must be called from ranks within the given group!" + local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device) + size_list = [ + torch.zeros([1], dtype=torch.int64, device=tensor.device) for _ in range(world_size) + ] + dist.all_gather(size_list, local_size, group=group) + + size_list = [int(size.item()) for size in size_list] + + max_size = max(size_list) + + # we pad the tensor because torch all_gather does not support + # gathering tensors of different shapes + if local_size != max_size: + padding = torch.zeros((max_size - local_size,), dtype=torch.uint8, device=tensor.device) + tensor = torch.cat((tensor, padding), dim=0) + return size_list, tensor + + +def all_gather(data, group=None): + """ + Run all_gather on arbitrary picklable data (not necessarily tensors). + + Args: + data: any picklable object + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + + Returns: + list[data]: list of data gathered from each rank + """ + if get_world_size() == 1: + return [data] + if group is None: + group = _get_global_gloo_group() + if dist.get_world_size(group) == 1: + return [data] + + tensor = _serialize_to_tensor(data, group) + + size_list, tensor = _pad_to_largest_tensor(tensor, group) + max_size = max(size_list) + + # receiving Tensor from all ranks + tensor_list = [ + torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list + ] + dist.all_gather(tensor_list, tensor, group=group) + + data_list = [] + for size, tensor in zip(size_list, tensor_list): + buffer = tensor.cpu().numpy().tobytes()[:size] + data_list.append(pickle.loads(buffer)) + + return data_list + + +def gather(data, dst=0, group=None): + """ + Run gather on arbitrary picklable data (not necessarily tensors). + + Args: + data: any picklable object + dst (int): destination rank + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + + Returns: + list[data]: on dst, a list of data gathered from each rank. Otherwise, + an empty list. + """ + if get_world_size() == 1: + return [data] + if group is None: + group = _get_global_gloo_group() + if dist.get_world_size(group=group) == 1: + return [data] + rank = dist.get_rank(group=group) + + tensor = _serialize_to_tensor(data, group) + size_list, tensor = _pad_to_largest_tensor(tensor, group) + + # receiving Tensor from all ranks + if rank == dst: + max_size = max(size_list) + tensor_list = [ + torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list + ] + dist.gather(tensor, tensor_list, dst=dst, group=group) + + data_list = [] + for size, tensor in zip(size_list, tensor_list): + buffer = tensor.cpu().numpy().tobytes()[:size] + data_list.append(pickle.loads(buffer)) + return data_list + else: + dist.gather(tensor, [], dst=dst, group=group) + return [] + + +def shared_random_seed(): + """ + Returns: + int: a random number that is the same across all workers. + If workers need a shared RNG, they can use this shared seed to + create one. + + All workers must call this function, otherwise it will deadlock. + """ + ints = np.random.randint(2 ** 31) + all_ints = all_gather(ints) + return all_ints[0] + + +def reduce_dict(input_dict, average=True): + """ + Reduce the values in the dictionary from all processes so that process with rank + 0 has the reduced results. + + Args: + input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor. + average (bool): whether to do average or sum + + Returns: + a dict with the same keys as input_dict, after reduction. + """ + world_size = get_world_size() + if world_size < 2: + return input_dict + with torch.no_grad(): + names = [] + values = [] + # sort the keys so that they are consistent across processes + for k in sorted(input_dict.keys()): + names.append(k) + values.append(input_dict[k]) + values = torch.stack(values, dim=0) + dist.reduce(values, dst=0) + if dist.get_rank() == 0 and average: + # only main process gets accumulated, so only divide by + # world_size in this case + values /= world_size + reduced_dict = {k: v for k, v in zip(names, values)} + return reduced_dict diff --git a/third_party/EfficientLoFTR/src/utils/dataloader.py b/third_party/EfficientLoFTR/src/utils/dataloader.py new file mode 100644 index 0000000000000000000000000000000000000000..6da37b880a290c2bb3ebb028d0c8dab592acc5c1 --- /dev/null +++ b/third_party/EfficientLoFTR/src/utils/dataloader.py @@ -0,0 +1,23 @@ +import numpy as np + + +# --- PL-DATAMODULE --- + +def get_local_split(items: list, world_size: int, rank: int, seed: int): + """ The local rank only loads a split of the dataset. """ + n_items = len(items) + items_permute = np.random.RandomState(seed).permutation(items) + if n_items % world_size == 0: + padded_items = items_permute + else: + padding = np.random.RandomState(seed).choice( + items, + world_size - (n_items % world_size), + replace=True) + padded_items = np.concatenate([items_permute, padding]) + assert len(padded_items) % world_size == 0, \ + f'len(padded_items): {len(padded_items)}; world_size: {world_size}; len(padding): {len(padding)}' + n_per_rank = len(padded_items) // world_size + local_items = padded_items[n_per_rank * rank: n_per_rank * (rank+1)] + + return local_items diff --git a/third_party/EfficientLoFTR/src/utils/dataset.py b/third_party/EfficientLoFTR/src/utils/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..37831292d08b5e9f13eeb0dee64ae8882f52a63f --- /dev/null +++ b/third_party/EfficientLoFTR/src/utils/dataset.py @@ -0,0 +1,186 @@ +import io +from loguru import logger + +import cv2 +import numpy as np +import h5py +import torch +from numpy.linalg import inv + + +try: + # for internel use only + from .client import MEGADEPTH_CLIENT, SCANNET_CLIENT +except Exception: + MEGADEPTH_CLIENT = SCANNET_CLIENT = None + +# --- DATA IO --- + +def load_array_from_s3( + path, client, cv_type, + use_h5py=False, +): + byte_str = client.Get(path) + try: + if not use_h5py: + raw_array = np.fromstring(byte_str, np.uint8) + data = cv2.imdecode(raw_array, cv_type) + else: + f = io.BytesIO(byte_str) + data = np.array(h5py.File(f, 'r')['/depth']) + except Exception as ex: + print(f"==> Data loading failure: {path}") + raise ex + + assert data is not None + return data + + +def imread_gray(path, augment_fn=None, client=SCANNET_CLIENT): + cv_type = cv2.IMREAD_GRAYSCALE if augment_fn is None \ + else cv2.IMREAD_COLOR + if str(path).startswith('s3://'): + image = load_array_from_s3(str(path), client, cv_type) + else: + image = cv2.imread(str(path), cv_type) + + if augment_fn is not None: + image = cv2.imread(str(path), cv2.IMREAD_COLOR) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image = augment_fn(image) + image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) + return image # (h, w) + + +def get_resized_wh(w, h, resize=None): + if resize is not None: # resize the longer edge + scale = resize / max(h, w) + w_new, h_new = int(round(w*scale)), int(round(h*scale)) + else: + w_new, h_new = w, h + return w_new, h_new + + +def get_divisible_wh(w, h, df=None): + if df is not None: + w_new, h_new = map(lambda x: int(x // df * df), [w, h]) + else: + w_new, h_new = w, h + return w_new, h_new + + +def pad_bottom_right(inp, pad_size, ret_mask=False): + assert isinstance(pad_size, int) and pad_size >= max(inp.shape[-2:]), f"{pad_size} < {max(inp.shape[-2:])}" + mask = None + if inp.ndim == 2: + padded = np.zeros((pad_size, pad_size), dtype=inp.dtype) + padded[:inp.shape[0], :inp.shape[1]] = inp + if ret_mask: + mask = np.zeros((pad_size, pad_size), dtype=bool) + mask[:inp.shape[0], :inp.shape[1]] = True + elif inp.ndim == 3: + padded = np.zeros((inp.shape[0], pad_size, pad_size), dtype=inp.dtype) + padded[:, :inp.shape[1], :inp.shape[2]] = inp + if ret_mask: + mask = np.zeros((inp.shape[0], pad_size, pad_size), dtype=bool) + mask[:, :inp.shape[1], :inp.shape[2]] = True + else: + raise NotImplementedError() + return padded, mask + + +# --- MEGADEPTH --- + +def read_megadepth_gray(path, resize=None, df=None, padding=False, augment_fn=None): + """ + Args: + resize (int, optional): the longer edge of resized images. None for no resize. + padding (bool): If set to 'True', zero-pad resized images to squared size. + augment_fn (callable, optional): augments images with pre-defined visual effects + Returns: + image (torch.tensor): (1, h, w) + mask (torch.tensor): (h, w) + scale (torch.tensor): [w/w_new, h/h_new] + """ + # read image + image = imread_gray(path, augment_fn, client=MEGADEPTH_CLIENT) + + # resize image + w, h = image.shape[1], image.shape[0] + w_new, h_new = get_resized_wh(w, h, resize) + w_new, h_new = get_divisible_wh(w_new, h_new, df) + + image = cv2.resize(image, (w_new, h_new)) + scale = torch.tensor([w/w_new, h/h_new], dtype=torch.float) + + if padding: # padding + pad_to = max(h_new, w_new) + image, mask = pad_bottom_right(image, pad_to, ret_mask=True) + else: + mask = None + + image = torch.from_numpy(image).float()[None] / 255 # (h, w) -> (1, h, w) and normalized + if mask is not None: + mask = torch.from_numpy(mask) + + return image, mask, scale + + +def read_megadepth_depth(path, pad_to=None): + if str(path).startswith('s3://'): + depth = load_array_from_s3(path, MEGADEPTH_CLIENT, None, use_h5py=True) + else: + depth = np.array(h5py.File(path, 'r')['depth']) + if pad_to is not None: + depth, _ = pad_bottom_right(depth, pad_to, ret_mask=False) + depth = torch.from_numpy(depth).float() # (h, w) + return depth + + +# --- ScanNet --- + +def read_scannet_gray(path, resize=(640, 480), augment_fn=None): + """ + Args: + resize (tuple): align image to depthmap, in (w, h). + augment_fn (callable, optional): augments images with pre-defined visual effects + Returns: + image (torch.tensor): (1, h, w) + mask (torch.tensor): (h, w) + scale (torch.tensor): [w/w_new, h/h_new] + """ + # read and resize image + image = imread_gray(path, augment_fn) + image = cv2.resize(image, resize) + + # (h, w) -> (1, h, w) and normalized + image = torch.from_numpy(image).float()[None] / 255 + return image + + +def read_scannet_depth(path): + if str(path).startswith('s3://'): + depth = load_array_from_s3(str(path), SCANNET_CLIENT, cv2.IMREAD_UNCHANGED) + else: + depth = cv2.imread(str(path), cv2.IMREAD_UNCHANGED) + depth = depth / 1000 + depth = torch.from_numpy(depth).float() # (h, w) + return depth + + +def read_scannet_pose(path): + """ Read ScanNet's Camera2World pose and transform it to World2Camera. + + Returns: + pose_w2c (np.ndarray): (4, 4) + """ + cam2world = np.loadtxt(path, delimiter=' ') + world2cam = inv(cam2world) + return world2cam + + +def read_scannet_intrinsic(path): + """ Read ScanNet's intrinsic matrix and return the 3x3 matrix. + """ + intrinsic = np.loadtxt(path, delimiter=' ') + return intrinsic[:-1, :-1] diff --git a/third_party/EfficientLoFTR/src/utils/metrics.py b/third_party/EfficientLoFTR/src/utils/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..1cc37896f363567d1f91d630def7bb717569a4ed --- /dev/null +++ b/third_party/EfficientLoFTR/src/utils/metrics.py @@ -0,0 +1,264 @@ +import torch +import cv2 +import numpy as np +from collections import OrderedDict +from loguru import logger +from kornia.geometry.epipolar import numeric +from kornia.geometry.conversions import convert_points_to_homogeneous +import pprint + + +# --- METRICS --- + +def relative_pose_error(T_0to1, R, t, ignore_gt_t_thr=0.0): + # angle error between 2 vectors + t_gt = T_0to1[:3, 3] + n = np.linalg.norm(t) * np.linalg.norm(t_gt) + t_err = np.rad2deg(np.arccos(np.clip(np.dot(t, t_gt) / n, -1.0, 1.0))) + t_err = np.minimum(t_err, 180 - t_err) # handle E ambiguity + if np.linalg.norm(t_gt) < ignore_gt_t_thr: # pure rotation is challenging + t_err = 0 + + # angle error between 2 rotation matrices + R_gt = T_0to1[:3, :3] + cos = (np.trace(np.dot(R.T, R_gt)) - 1) / 2 + cos = np.clip(cos, -1., 1.) # handle numercial errors + R_err = np.rad2deg(np.abs(np.arccos(cos))) + + return t_err, R_err + + +def symmetric_epipolar_distance(pts0, pts1, E, K0, K1): + """Squared symmetric epipolar distance. + This can be seen as a biased estimation of the reprojection error. + Args: + pts0 (torch.Tensor): [N, 2] + E (torch.Tensor): [3, 3] + """ + pts0 = (pts0 - K0[[0, 1], [2, 2]][None]) / K0[[0, 1], [0, 1]][None] + pts1 = (pts1 - K1[[0, 1], [2, 2]][None]) / K1[[0, 1], [0, 1]][None] + pts0 = convert_points_to_homogeneous(pts0) + pts1 = convert_points_to_homogeneous(pts1) + + Ep0 = pts0 @ E.T # [N, 3] + p1Ep0 = torch.sum(pts1 * Ep0, -1) # [N,] + Etp1 = pts1 @ E # [N, 3] + + d = p1Ep0**2 * (1.0 / (Ep0[:, 0]**2 + Ep0[:, 1]**2) + 1.0 / (Etp1[:, 0]**2 + Etp1[:, 1]**2)) # N + return d + + +def compute_symmetrical_epipolar_errors(data): + """ + Update: + data (dict):{"epi_errs": [M]} + """ + Tx = numeric.cross_product_matrix(data['T_0to1'][:, :3, 3]) + E_mat = Tx @ data['T_0to1'][:, :3, :3] + + m_bids = data['m_bids'] + pts0 = data['mkpts0_f'] + pts1 = data['mkpts1_f'] + + epi_errs = [] + for bs in range(Tx.size(0)): + mask = m_bids == bs + epi_errs.append( + symmetric_epipolar_distance(pts0[mask], pts1[mask], E_mat[bs], data['K0'][bs], data['K1'][bs])) + epi_errs = torch.cat(epi_errs, dim=0) + + data.update({'epi_errs': epi_errs}) + + +def estimate_pose(kpts0, kpts1, K0, K1, thresh, conf=0.99999): + if len(kpts0) < 5: + return None + # normalize keypoints + kpts0 = (kpts0 - K0[[0, 1], [2, 2]][None]) / K0[[0, 1], [0, 1]][None] + kpts1 = (kpts1 - K1[[0, 1], [2, 2]][None]) / K1[[0, 1], [0, 1]][None] + + # normalize ransac threshold + ransac_thr = thresh / np.mean([K0[0, 0], K1[1, 1], K0[0, 0], K1[1, 1]]) + + # compute pose with cv2 + E, mask = cv2.findEssentialMat( + kpts0, kpts1, np.eye(3), threshold=ransac_thr, prob=conf, method=cv2.RANSAC) + if E is None: + print("\nE is None while trying to recover pose.\n") + return None + + # recover pose from E + best_num_inliers = 0 + ret = None + for _E in np.split(E, len(E) / 3): + n, R, t, _ = cv2.recoverPose(_E, kpts0, kpts1, np.eye(3), 1e9, mask=mask) + if n > best_num_inliers: + ret = (R, t[:, 0], mask.ravel() > 0) + best_num_inliers = n + + return ret + + +def estimate_lo_pose(kpts0, kpts1, K0, K1, thresh, conf=0.99999): + from .warppers import Camera, Pose + import poselib + camera0, camera1 = Camera.from_calibration_matrix(K0).float(), Camera.from_calibration_matrix(K1).float() + pts0, pts1 = kpts0, kpts1 + + M, info = poselib.estimate_relative_pose( + pts0, + pts1, + camera0.to_cameradict(), + camera1.to_cameradict(), + { + "max_epipolar_error": thresh, + }, + ) + success = M is not None and ( ((M.t != [0., 0., 0.]).all()) or ((M.q != [1., 0., 0., 0.]).all()) ) + if success: + M = Pose.from_Rt(torch.tensor(M.R), torch.tensor(M.t)) # .to(pts0) + # print(M) + else: + M = Pose.from_4x4mat(torch.eye(4).numpy()) # .to(pts0) + # print(M) + + estimation = { + "success": success, + "M_0to1": M, + "inliers": torch.tensor(info.pop("inliers")), # .to(pts0), + **info, + } + return estimation + + +def compute_pose_errors(data, config): + """ + Update: + data (dict):{ + "R_errs" List[float]: [N] + "t_errs" List[float]: [N] + "inliers" List[np.ndarray]: [N] + } + """ + pixel_thr = config.TRAINER.RANSAC_PIXEL_THR # 0.5 + conf = config.TRAINER.RANSAC_CONF # 0.99999 + RANSAC = config.TRAINER.POSE_ESTIMATION_METHOD + data.update({'R_errs': [], 't_errs': [], 'inliers': []}) + + m_bids = data['m_bids'].cpu().numpy() + pts0 = data['mkpts0_f'].cpu().numpy() + pts1 = data['mkpts1_f'].cpu().numpy() + K0 = data['K0'].cpu().numpy() + K1 = data['K1'].cpu().numpy() + T_0to1 = data['T_0to1'].cpu().numpy() + + for bs in range(K0.shape[0]): + mask = m_bids == bs + if config.LOFTR.EVAL_TIMES >= 1: + bpts0, bpts1 = pts0[mask], pts1[mask] + R_list, T_list, inliers_list = [], [], [] + # for _ in range(config.LOFTR.EVAL_TIMES): + for _ in range(5): + shuffling = np.random.permutation(np.arange(len(bpts0))) + if _ >= config.LOFTR.EVAL_TIMES: + continue + bpts0 = bpts0[shuffling] + bpts1 = bpts1[shuffling] + + if RANSAC == 'RANSAC': + ret = estimate_pose(bpts0, bpts1, K0[bs], K1[bs], pixel_thr, conf=conf) + if ret is None: + R_list.append(np.inf) + T_list.append(np.inf) + inliers_list.append(np.array([]).astype(bool)) + else: + R, t, inliers = ret + t_err, R_err = relative_pose_error(T_0to1[bs], R, t, ignore_gt_t_thr=0.0) + R_list.append(R_err) + T_list.append(t_err) + inliers_list.append(inliers) + + elif RANSAC == 'LO-RANSAC': + est = estimate_lo_pose(bpts0, bpts1, K0[bs], K1[bs], pixel_thr, conf=conf) + if not est["success"]: + R_list.append(90) + T_list.append(90) + inliers_list.append(np.array([]).astype(bool)) + else: + M = est["M_0to1"] + inl = est["inliers"].numpy() + t_error, r_error = relative_pose_error(T_0to1[bs], M.R, M.t, ignore_gt_t_thr=0.0) + R_list.append(r_error) + T_list.append(t_error) + inliers_list.append(inl) + else: + raise ValueError(f"Unknown RANSAC method: {RANSAC}") + + data['R_errs'].append(R_list) + data['t_errs'].append(T_list) + data['inliers'].append(inliers_list[0]) + + +# --- METRIC AGGREGATION --- + +def error_auc(errors, thresholds): + """ + Args: + errors (list): [N,] + thresholds (list) + """ + errors = [0] + sorted(list(errors)) + recall = list(np.linspace(0, 1, len(errors))) + + aucs = [] + thresholds = [5, 10, 20] + for thr in thresholds: + last_index = np.searchsorted(errors, thr) + y = recall[:last_index] + [recall[last_index-1]] + x = errors[:last_index] + [thr] + aucs.append(np.trapz(y, x) / thr) + + return {f'auc@{t}': auc for t, auc in zip(thresholds, aucs)} + + +def epidist_prec(errors, thresholds, ret_dict=False): + precs = [] + for thr in thresholds: + prec_ = [] + for errs in errors: + correct_mask = errs < thr + prec_.append(np.mean(correct_mask) if len(correct_mask) > 0 else 0) + precs.append(np.mean(prec_) if len(prec_) > 0 else 0) + if ret_dict: + return {f'prec@{t:.0e}': prec for t, prec in zip(thresholds, precs)} + else: + return precs + + +def aggregate_metrics(metrics, epi_err_thr=5e-4, config=None): + """ Aggregate metrics for the whole dataset: + (This method should be called once per dataset) + 1. AUC of the pose error (angular) at the threshold [5, 10, 20] + 2. Mean matching precision at the threshold 5e-4(ScanNet), 1e-4(MegaDepth) + """ + # filter duplicates + unq_ids = OrderedDict((iden, id) for id, iden in enumerate(metrics['identifiers'])) + unq_ids = list(unq_ids.values()) + logger.info(f'Aggregating metrics over {len(unq_ids)} unique items...') + + # pose auc + angular_thresholds = [5, 10, 20] + + if config.LOFTR.EVAL_TIMES >= 1: + pose_errors = np.max(np.stack([metrics['R_errs'], metrics['t_errs']]), axis=0).reshape(-1, config.LOFTR.EVAL_TIMES)[unq_ids].reshape(-1) + else: + pose_errors = np.max(np.stack([metrics['R_errs'], metrics['t_errs']]), axis=0)[unq_ids] + aucs = error_auc(pose_errors, angular_thresholds) # (auc@5, auc@10, auc@20) + + # matching precision + dist_thresholds = [epi_err_thr] + precs = epidist_prec(np.array(metrics['epi_errs'], dtype=object)[unq_ids], dist_thresholds, True) # (prec@err_thr) + + u_num_mathces = np.array(metrics['num_matches'], dtype=object)[unq_ids] + num_matches = {f'num_matches': u_num_mathces.mean() } + return {**aucs, **precs, **num_matches} diff --git a/third_party/EfficientLoFTR/src/utils/misc.py b/third_party/EfficientLoFTR/src/utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..604b9bc93e0fb92a9750fe72f3d692edc84207b5 --- /dev/null +++ b/third_party/EfficientLoFTR/src/utils/misc.py @@ -0,0 +1,106 @@ +import os +import contextlib +import joblib +from typing import Union +from loguru import _Logger, logger +from itertools import chain + +import torch +from yacs.config import CfgNode as CN +from pytorch_lightning.utilities import rank_zero_only + + +def lower_config(yacs_cfg): + if not isinstance(yacs_cfg, CN): + return yacs_cfg + return {k.lower(): lower_config(v) for k, v in yacs_cfg.items()} + + +def upper_config(dict_cfg): + if not isinstance(dict_cfg, dict): + return dict_cfg + return {k.upper(): upper_config(v) for k, v in dict_cfg.items()} + + +def log_on(condition, message, level): + if condition: + assert level in ['INFO', 'DEBUG', 'WARNING', 'ERROR', 'CRITICAL'] + logger.log(level, message) + + +def get_rank_zero_only_logger(logger: _Logger): + if rank_zero_only.rank == 0: + return logger + else: + for _level in logger._core.levels.keys(): + level = _level.lower() + setattr(logger, level, + lambda x: None) + logger._log = lambda x: None + return logger + + +def setup_gpus(gpus: Union[str, int]) -> int: + """ A temporary fix for pytorch-lighting 1.3.x """ + gpus = str(gpus) + gpu_ids = [] + + if ',' not in gpus: + n_gpus = int(gpus) + return n_gpus if n_gpus != -1 else torch.cuda.device_count() + else: + gpu_ids = [i.strip() for i in gpus.split(',') if i != ''] + + # setup environment variables + visible_devices = os.getenv('CUDA_VISIBLE_DEVICES') + if visible_devices is None: + os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" + os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(i) for i in gpu_ids) + visible_devices = os.getenv('CUDA_VISIBLE_DEVICES') + logger.warning(f'[Temporary Fix] manually set CUDA_VISIBLE_DEVICES when specifying gpus to use: {visible_devices}') + else: + logger.warning('[Temporary Fix] CUDA_VISIBLE_DEVICES already set by user or the main process.') + return len(gpu_ids) + + +def flattenList(x): + return list(chain(*x)) + + +@contextlib.contextmanager +def tqdm_joblib(tqdm_object): + """Context manager to patch joblib to report into tqdm progress bar given as argument + + Usage: + with tqdm_joblib(tqdm(desc="My calculation", total=10)) as progress_bar: + Parallel(n_jobs=16)(delayed(sqrt)(i**2) for i in range(10)) + + When iterating over a generator, directly use of tqdm is also a solutin (but monitor the task queuing, instead of finishing) + ret_vals = Parallel(n_jobs=args.world_size)( + delayed(lambda x: _compute_cov_score(pid, *x))(param) + for param in tqdm(combinations(image_ids, 2), + desc=f'Computing cov_score of [{pid}]', + total=len(image_ids)*(len(image_ids)-1)/2)) + Src: https://stackoverflow.com/a/58936697 + """ + class TqdmBatchCompletionCallback(joblib.parallel.BatchCompletionCallBack): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, *args, **kwargs): + tqdm_object.update(n=self.batch_size) + return super().__call__(*args, **kwargs) + + old_batch_callback = joblib.parallel.BatchCompletionCallBack + joblib.parallel.BatchCompletionCallBack = TqdmBatchCompletionCallback + try: + yield tqdm_object + finally: + joblib.parallel.BatchCompletionCallBack = old_batch_callback + tqdm_object.close() + +def detect_NaN(feat_0, feat_1): + logger.info(f'NaN detected in feature') + logger.info(f"#NaN in feat_0: {torch.isnan(feat_0).int().sum()}, #NaN in feat_1: {torch.isnan(feat_1).int().sum()}") + feat_0[torch.isnan(feat_0)] = 0 + feat_1[torch.isnan(feat_1)] = 0 diff --git a/third_party/EfficientLoFTR/src/utils/plotting.py b/third_party/EfficientLoFTR/src/utils/plotting.py new file mode 100644 index 0000000000000000000000000000000000000000..9d4260c7487cfbc76dda94c589957601cea972d4 --- /dev/null +++ b/third_party/EfficientLoFTR/src/utils/plotting.py @@ -0,0 +1,154 @@ +import bisect +import numpy as np +import matplotlib.pyplot as plt +import matplotlib + +import torch + +def _compute_conf_thresh(data): + dataset_name = data['dataset_name'][0].lower() + if dataset_name == 'scannet': + thr = 5e-4 + elif dataset_name == 'megadepth': + thr = 1e-4 + else: + raise ValueError(f'Unknown dataset: {dataset_name}') + return thr + + +# --- VISUALIZATION --- # + +def make_matching_figure( + img0, img1, mkpts0, mkpts1, color, + kpts0=None, kpts1=None, text=[], dpi=75, path=None): + # draw image pair + assert mkpts0.shape[0] == mkpts1.shape[0], f'mkpts0: {mkpts0.shape[0]} v.s. mkpts1: {mkpts1.shape[0]}' + fig, axes = plt.subplots(1, 2, figsize=(10, 6), dpi=dpi) + axes[0].imshow(img0, cmap='gray') + axes[1].imshow(img1, cmap='gray') + for i in range(2): # clear all frames + axes[i].get_yaxis().set_ticks([]) + axes[i].get_xaxis().set_ticks([]) + for spine in axes[i].spines.values(): + spine.set_visible(False) + plt.tight_layout(pad=1) + + if kpts0 is not None: + assert kpts1 is not None + axes[0].scatter(kpts0[:, 0], kpts0[:, 1], c='w', s=2) + axes[1].scatter(kpts1[:, 0], kpts1[:, 1], c='w', s=2) + + # draw matches + if mkpts0.shape[0] != 0 and mkpts1.shape[0] != 0: + fig.canvas.draw() + transFigure = fig.transFigure.inverted() + fkpts0 = transFigure.transform(axes[0].transData.transform(mkpts0)) + fkpts1 = transFigure.transform(axes[1].transData.transform(mkpts1)) + fig.lines = [matplotlib.lines.Line2D((fkpts0[i, 0], fkpts1[i, 0]), + (fkpts0[i, 1], fkpts1[i, 1]), + transform=fig.transFigure, c=color[i], linewidth=1) + for i in range(len(mkpts0))] + + axes[0].scatter(mkpts0[:, 0], mkpts0[:, 1], c=color, s=4) + axes[1].scatter(mkpts1[:, 0], mkpts1[:, 1], c=color, s=4) + + # put txts + txt_color = 'k' if img0[:100, :200].mean() > 200 else 'w' + fig.text( + 0.01, 0.99, '\n'.join(text), transform=fig.axes[0].transAxes, + fontsize=15, va='top', ha='left', color=txt_color) + + # save or return figure + if path: + plt.savefig(str(path), bbox_inches='tight', pad_inches=0) + plt.close() + else: + return fig + + +def _make_evaluation_figure(data, b_id, alpha='dynamic'): + b_mask = data['m_bids'] == b_id + conf_thr = _compute_conf_thresh(data) + + img0 = (data['image0'][b_id][0].cpu().numpy() * 255).round().astype(np.int32) + img1 = (data['image1'][b_id][0].cpu().numpy() * 255).round().astype(np.int32) + kpts0 = data['mkpts0_f'][b_mask].cpu().numpy() + kpts1 = data['mkpts1_f'][b_mask].cpu().numpy() + + # for megadepth, we visualize matches on the resized image + if 'scale0' in data: + kpts0 = kpts0 / data['scale0'][b_id].cpu().numpy()[[1, 0]] + kpts1 = kpts1 / data['scale1'][b_id].cpu().numpy()[[1, 0]] + + epi_errs = data['epi_errs'][b_mask].cpu().numpy() + correct_mask = epi_errs < conf_thr + precision = np.mean(correct_mask) if len(correct_mask) > 0 else 0 + n_correct = np.sum(correct_mask) + n_gt_matches = int(data['conf_matrix_gt'][b_id].sum().cpu()) + recall = 0 if n_gt_matches == 0 else n_correct / (n_gt_matches) + # recall might be larger than 1, since the calculation of conf_matrix_gt + # uses groundtruth depths and camera poses, but epipolar distance is used here. + + # matching info + if alpha == 'dynamic': + alpha = dynamic_alpha(len(correct_mask)) + color = error_colormap(epi_errs, conf_thr, alpha=alpha) + + text = [ + f'#Matches {len(kpts0)}', + f'Precision({conf_thr:.2e}) ({100 * precision:.1f}%): {n_correct}/{len(kpts0)}', + f'Recall({conf_thr:.2e}) ({100 * recall:.1f}%): {n_correct}/{n_gt_matches}' + ] + + # make the figure + figure = make_matching_figure(img0, img1, kpts0, kpts1, + color, text=text) + return figure + +def _make_confidence_figure(data, b_id): + # TODO: Implement confidence figure + raise NotImplementedError() + +def make_matching_figures(data, config, mode='evaluation'): + """ Make matching figures for a batch. + + Args: + data (Dict): a batch updated by PL_LoFTR. + config (Dict): matcher config + Returns: + figures (Dict[str, List[plt.figure]] + """ + assert mode in ['evaluation', 'confidence', 'gt'] # 'confidence' + figures = {mode: []} + for b_id in range(data['image0'].size(0)): + if mode == 'evaluation': + fig = _make_evaluation_figure( + data, b_id, + alpha=config.TRAINER.PLOT_MATCHES_ALPHA) + elif mode == 'confidence': + fig = _make_confidence_figure(data, b_id) + else: + raise ValueError(f'Unknown plot mode: {mode}') + figures[mode].append(fig) + return figures + + +def dynamic_alpha(n_matches, + milestones=[0, 300, 1000, 2000], + alphas=[1.0, 0.8, 0.4, 0.2]): + if n_matches == 0: + return 1.0 + ranges = list(zip(alphas, alphas[1:] + [None])) + loc = bisect.bisect_right(milestones, n_matches) - 1 + _range = ranges[loc] + if _range[1] is None: + return _range[0] + return _range[1] + (milestones[loc + 1] - n_matches) / ( + milestones[loc + 1] - milestones[loc]) * (_range[0] - _range[1]) + + +def error_colormap(err, thr, alpha=1.0): + assert alpha <= 1.0 and alpha > 0, f"Invaid alpha value: {alpha}" + x = 1 - np.clip(err / (thr * 2), 0, 1) + return np.clip( + np.stack([2-x*2, x*2, np.zeros_like(x), np.ones_like(x)*alpha], -1), 0, 1) \ No newline at end of file diff --git a/third_party/EfficientLoFTR/src/utils/profiler.py b/third_party/EfficientLoFTR/src/utils/profiler.py new file mode 100644 index 0000000000000000000000000000000000000000..6d21ed79fb506ef09c75483355402c48a195aaa9 --- /dev/null +++ b/third_party/EfficientLoFTR/src/utils/profiler.py @@ -0,0 +1,39 @@ +import torch +from pytorch_lightning.profiler import SimpleProfiler, PassThroughProfiler +from contextlib import contextmanager +from pytorch_lightning.utilities import rank_zero_only + + +class InferenceProfiler(SimpleProfiler): + """ + This profiler records duration of actions with cuda.synchronize() + Use this in test time. + """ + + def __init__(self): + super().__init__() + self.start = rank_zero_only(self.start) + self.stop = rank_zero_only(self.stop) + self.summary = rank_zero_only(self.summary) + + @contextmanager + def profile(self, action_name: str) -> None: + try: + torch.cuda.synchronize() + self.start(action_name) + yield action_name + finally: + torch.cuda.synchronize() + self.stop(action_name) + + +def build_profiler(name): + if name == 'inference': + return InferenceProfiler() + elif name == 'pytorch': + from pytorch_lightning.profiler import PyTorchProfiler + return PyTorchProfiler(use_cuda=True, profile_memory=True, row_limit=100) + elif name is None: + return PassThroughProfiler() + else: + raise ValueError(f'Invalid profiler: {name}') diff --git a/third_party/EfficientLoFTR/src/utils/warppers.py b/third_party/EfficientLoFTR/src/utils/warppers.py new file mode 100644 index 0000000000000000000000000000000000000000..a2f33b78f0d1645b3eefc8c9c6dbe14f24f7b2d6 --- /dev/null +++ b/third_party/EfficientLoFTR/src/utils/warppers.py @@ -0,0 +1,426 @@ +""" +Convenience classes for an SE3 pose and a pinhole Camera with lens distortion. +Based on PyTorch tensors: differentiable, batched, with GPU support. +Modified from: https://github.com/cvg/glue-factory/blob/scannet1500/gluefactory/geometry/wrappers.py +""" + +import functools +import inspect +import math +from typing import Dict, List, NamedTuple, Optional, Tuple, Union + +import numpy as np +import torch + +from .warppers_utils import ( + J_distort_points, + distort_points, + skew_symmetric, + so3exp_map, + to_homogeneous, +) + + +def autocast(func): + """Cast the inputs of a TensorWrapper method to PyTorch tensors + if they are numpy arrays. Use the device and dtype of the wrapper. + """ + + @functools.wraps(func) + def wrap(self, *args): + device = torch.device("cpu") + dtype = None + if isinstance(self, TensorWrapper): + if self._data is not None: + device = self.device + dtype = self.dtype + elif not inspect.isclass(self) or not issubclass(self, TensorWrapper): + raise ValueError(self) + + cast_args = [] + for arg in args: + if isinstance(arg, np.ndarray): + arg = torch.from_numpy(arg) + arg = arg.to(device=device, dtype=dtype) + cast_args.append(arg) + return func(self, *cast_args) + + return wrap + + +class TensorWrapper: + _data = None + + @autocast + def __init__(self, data: torch.Tensor): + self._data = data + + @property + def shape(self): + return self._data.shape[:-1] + + @property + def device(self): + return self._data.device + + @property + def dtype(self): + return self._data.dtype + + def __getitem__(self, index): + return self.__class__(self._data[index]) + + def __setitem__(self, index, item): + self._data[index] = item.data + + def to(self, *args, **kwargs): + return self.__class__(self._data.to(*args, **kwargs)) + + def cpu(self): + return self.__class__(self._data.cpu()) + + def cuda(self): + return self.__class__(self._data.cuda()) + + def pin_memory(self): + return self.__class__(self._data.pin_memory()) + + def float(self): + return self.__class__(self._data.float()) + + def double(self): + return self.__class__(self._data.double()) + + def detach(self): + return self.__class__(self._data.detach()) + + @classmethod + def stack(cls, objects: List, dim=0, *, out=None): + data = torch.stack([obj._data for obj in objects], dim=dim, out=out) + return cls(data) + + @classmethod + def __torch_function__(self, func, types, args=(), kwargs=None): + if kwargs is None: + kwargs = {} + if func is torch.stack: + return self.stack(*args, **kwargs) + else: + return NotImplemented + + +class Pose(TensorWrapper): + def __init__(self, data: torch.Tensor): + assert data.shape[-1] == 12 + super().__init__(data) + + @classmethod + @autocast + def from_Rt(cls, R: torch.Tensor, t: torch.Tensor): + """Pose from a rotation matrix and translation vector. + Accepts numpy arrays or PyTorch tensors. + + Args: + R: rotation matrix with shape (..., 3, 3). + t: translation vector with shape (..., 3). + """ + assert R.shape[-2:] == (3, 3) + assert t.shape[-1] == 3 + assert R.shape[:-2] == t.shape[:-1] + data = torch.cat([R.flatten(start_dim=-2), t], -1) + return cls(data) + + @classmethod + @autocast + def from_aa(cls, aa: torch.Tensor, t: torch.Tensor): + """Pose from an axis-angle rotation vector and translation vector. + Accepts numpy arrays or PyTorch tensors. + + Args: + aa: axis-angle rotation vector with shape (..., 3). + t: translation vector with shape (..., 3). + """ + assert aa.shape[-1] == 3 + assert t.shape[-1] == 3 + assert aa.shape[:-1] == t.shape[:-1] + return cls.from_Rt(so3exp_map(aa), t) + + @classmethod + def from_4x4mat(cls, T: torch.Tensor): + """Pose from an SE(3) transformation matrix. + Args: + T: transformation matrix with shape (..., 4, 4). + """ + assert T.shape[-2:] == (4, 4) + R, t = T[..., :3, :3], T[..., :3, 3] + return cls.from_Rt(R, t) + + @classmethod + def from_colmap(cls, image: NamedTuple): + """Pose from a COLMAP Image.""" + return cls.from_Rt(image.qvec2rotmat(), image.tvec) + + @property + def R(self) -> torch.Tensor: + """Underlying rotation matrix with shape (..., 3, 3).""" + rvec = self._data[..., :9] + return rvec.reshape(rvec.shape[:-1] + (3, 3)) + + @property + def t(self) -> torch.Tensor: + """Underlying translation vector with shape (..., 3).""" + return self._data[..., -3:] + + def inv(self) -> "Pose": + """Invert an SE(3) pose.""" + R = self.R.transpose(-1, -2) + t = -(R @ self.t.unsqueeze(-1)).squeeze(-1) + return self.__class__.from_Rt(R, t) + + def compose(self, other: "Pose") -> "Pose": + """Chain two SE(3) poses: T_B2C.compose(T_A2B) -> T_A2C.""" + R = self.R @ other.R + t = self.t + (self.R @ other.t.unsqueeze(-1)).squeeze(-1) + return self.__class__.from_Rt(R, t) + + @autocast + def transform(self, p3d: torch.Tensor) -> torch.Tensor: + """Transform a set of 3D points. + Args: + p3d: 3D points, numpy array or PyTorch tensor with shape (..., 3). + """ + assert p3d.shape[-1] == 3 + # assert p3d.shape[:-2] == self.shape # allow broadcasting + return p3d @ self.R.transpose(-1, -2) + self.t.unsqueeze(-2) + + def __mul__(self, p3D: torch.Tensor) -> torch.Tensor: + """Transform a set of 3D points: T_A2B * p3D_A -> p3D_B.""" + return self.transform(p3D) + + def __matmul__( + self, other: Union["Pose", torch.Tensor] + ) -> Union["Pose", torch.Tensor]: + """Transform a set of 3D points: T_A2B * p3D_A -> p3D_B. + or chain two SE(3) poses: T_B2C @ T_A2B -> T_A2C.""" + if isinstance(other, self.__class__): + return self.compose(other) + else: + return self.transform(other) + + @autocast + def J_transform(self, p3d_out: torch.Tensor): + # [[1,0,0,0,-pz,py], + # [0,1,0,pz,0,-px], + # [0,0,1,-py,px,0]] + J_t = torch.diag_embed(torch.ones_like(p3d_out)) + J_rot = -skew_symmetric(p3d_out) + J = torch.cat([J_t, J_rot], dim=-1) + return J # N x 3 x 6 + + def numpy(self) -> Tuple[np.ndarray]: + return self.R.numpy(), self.t.numpy() + + def magnitude(self) -> Tuple[torch.Tensor]: + """Magnitude of the SE(3) transformation. + Returns: + dr: rotation anngle in degrees. + dt: translation distance in meters. + """ + trace = torch.diagonal(self.R, dim1=-1, dim2=-2).sum(-1) + cos = torch.clamp((trace - 1) / 2, -1, 1) + dr = torch.acos(cos).abs() / math.pi * 180 + dt = torch.norm(self.t, dim=-1) + return dr, dt + + def __repr__(self): + return f"Pose: {self.shape} {self.dtype} {self.device}" + + +class Camera(TensorWrapper): + eps = 1e-4 + + def __init__(self, data: torch.Tensor): + assert data.shape[-1] in {6, 8, 10} + super().__init__(data) + + @classmethod + def from_colmap(cls, camera: Union[Dict, NamedTuple]): + """Camera from a COLMAP Camera tuple or dictionary. + We use the corner-convetion from COLMAP (center of top left pixel is (0.5, 0.5)) + """ + if isinstance(camera, tuple): + camera = camera._asdict() + + model = camera["model"] + params = camera["params"] + + if model in ["OPENCV", "PINHOLE", "RADIAL"]: + (fx, fy, cx, cy), params = np.split(params, [4]) + elif model in ["SIMPLE_PINHOLE", "SIMPLE_RADIAL"]: + (f, cx, cy), params = np.split(params, [3]) + fx = fy = f + if model == "SIMPLE_RADIAL": + params = np.r_[params, 0.0] + else: + raise NotImplementedError(model) + + data = np.r_[camera["width"], camera["height"], fx, fy, cx, cy, params] + return cls(data) + + @classmethod + @autocast + def from_calibration_matrix(cls, K: torch.Tensor): + cx, cy = K[..., 0, 2], K[..., 1, 2] + fx, fy = K[..., 0, 0], K[..., 1, 1] + data = torch.stack([2 * cx, 2 * cy, fx, fy, cx, cy], -1) + return cls(data) + + @autocast + def calibration_matrix(self): + K = torch.zeros( + *self._data.shape[:-1], + 3, + 3, + device=self._data.device, + dtype=self._data.dtype, + ) + K[..., 0, 2] = self._data[..., 4] + K[..., 1, 2] = self._data[..., 5] + K[..., 0, 0] = self._data[..., 2] + K[..., 1, 1] = self._data[..., 3] + K[..., 2, 2] = 1.0 + return K + + @property + def size(self) -> torch.Tensor: + """Size (width height) of the images, with shape (..., 2).""" + return self._data[..., :2] + + @property + def f(self) -> torch.Tensor: + """Focal lengths (fx, fy) with shape (..., 2).""" + return self._data[..., 2:4] + + @property + def c(self) -> torch.Tensor: + """Principal points (cx, cy) with shape (..., 2).""" + return self._data[..., 4:6] + + @property + def dist(self) -> torch.Tensor: + """Distortion parameters, with shape (..., {0, 2, 4}).""" + return self._data[..., 6:] + + @autocast + def scale(self, scales: torch.Tensor): + """Update the camera parameters after resizing an image.""" + s = scales + data = torch.cat([self.size * s, self.f * s, self.c * s, self.dist], -1) + return self.__class__(data) + + def crop(self, left_top: Tuple[float], size: Tuple[int]): + """Update the camera parameters after cropping an image.""" + left_top = self._data.new_tensor(left_top) + size = self._data.new_tensor(size) + data = torch.cat([size, self.f, self.c - left_top, self.dist], -1) + return self.__class__(data) + + @autocast + def in_image(self, p2d: torch.Tensor): + """Check if 2D points are within the image boundaries.""" + assert p2d.shape[-1] == 2 + # assert p2d.shape[:-2] == self.shape # allow broadcasting + size = self.size.unsqueeze(-2) + valid = torch.all((p2d >= 0) & (p2d <= (size - 1)), -1) + return valid + + @autocast + def project(self, p3d: torch.Tensor) -> Tuple[torch.Tensor]: + """Project 3D points into the camera plane and check for visibility.""" + z = p3d[..., -1] + valid = z > self.eps + z = z.clamp(min=self.eps) + p2d = p3d[..., :-1] / z.unsqueeze(-1) + return p2d, valid + + def J_project(self, p3d: torch.Tensor): + x, y, z = p3d[..., 0], p3d[..., 1], p3d[..., 2] + zero = torch.zeros_like(z) + z = z.clamp(min=self.eps) + J = torch.stack([1 / z, zero, -x / z**2, zero, 1 / z, -y / z**2], dim=-1) + J = J.reshape(p3d.shape[:-1] + (2, 3)) + return J # N x 2 x 3 + + @autocast + def distort(self, pts: torch.Tensor) -> Tuple[torch.Tensor]: + """Distort normalized 2D coordinates + and check for validity of the distortion model. + """ + assert pts.shape[-1] == 2 + # assert pts.shape[:-2] == self.shape # allow broadcasting + return distort_points(pts, self.dist) + + def J_distort(self, pts: torch.Tensor): + return J_distort_points(pts, self.dist) # N x 2 x 2 + + @autocast + def denormalize(self, p2d: torch.Tensor) -> torch.Tensor: + """Convert normalized 2D coordinates into pixel coordinates.""" + return p2d * self.f.unsqueeze(-2) + self.c.unsqueeze(-2) + + @autocast + def normalize(self, p2d: torch.Tensor) -> torch.Tensor: + """Convert normalized 2D coordinates into pixel coordinates.""" + return (p2d - self.c.unsqueeze(-2)) / self.f.unsqueeze(-2) + + def J_denormalize(self): + return torch.diag_embed(self.f).unsqueeze(-3) # 1 x 2 x 2 + + @autocast + def cam2image(self, p3d: torch.Tensor) -> Tuple[torch.Tensor]: + """Transform 3D points into 2D pixel coordinates.""" + p2d, visible = self.project(p3d) + p2d, mask = self.distort(p2d) + p2d = self.denormalize(p2d) + valid = visible & mask & self.in_image(p2d) + return p2d, valid + + def J_world2image(self, p3d: torch.Tensor): + p2d_dist, valid = self.project(p3d) + J = self.J_denormalize() @ self.J_distort(p2d_dist) @ self.J_project(p3d) + return J, valid + + @autocast + def image2cam(self, p2d: torch.Tensor) -> torch.Tensor: + """Convert 2D pixel corrdinates to 3D points with z=1""" + assert self._data.shape + p2d = self.normalize(p2d) + # iterative undistortion + return to_homogeneous(p2d) + + def to_cameradict(self, camera_model: Optional[str] = None) -> List[Dict]: + data = self._data.clone() + if data.dim() == 1: + data = data.unsqueeze(0) + assert data.dim() == 2 + b, d = data.shape + if camera_model is None: + camera_model = {6: "PINHOLE", 8: "RADIAL", 10: "OPENCV"}[d] + cameras = [] + for i in range(b): + if camera_model.startswith("SIMPLE_"): + params = [x.item() for x in data[i, 3 : min(d, 7)]] + else: + params = [x.item() for x in data[i, 2:]] + cameras.append( + { + "model": camera_model, + "width": int(data[i, 0].item()), + "height": int(data[i, 1].item()), + "params": params, + } + ) + return cameras if self._data.dim() == 2 else cameras[0] + + def __repr__(self): + return f"Camera {self.shape} {self.dtype} {self.device}" \ No newline at end of file diff --git a/third_party/EfficientLoFTR/src/utils/warppers_utils.py b/third_party/EfficientLoFTR/src/utils/warppers_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..ad3ef5c05d74cd3bd46f5b3b0d8c6d331a17dfad --- /dev/null +++ b/third_party/EfficientLoFTR/src/utils/warppers_utils.py @@ -0,0 +1,171 @@ +""" +Modified from: https://github.com/cvg/glue-factory/blob/scannet1500/gluefactory/geometry/utils.py +""" + +import numpy as np +import torch + + +def to_homogeneous(points): + """Convert N-dimensional points to homogeneous coordinates. + Args: + points: torch.Tensor or numpy.ndarray with size (..., N). + Returns: + A torch.Tensor or numpy.ndarray with size (..., N+1). + """ + if isinstance(points, torch.Tensor): + pad = points.new_ones(points.shape[:-1] + (1,)) + return torch.cat([points, pad], dim=-1) + elif isinstance(points, np.ndarray): + pad = np.ones((points.shape[:-1] + (1,)), dtype=points.dtype) + return np.concatenate([points, pad], axis=-1) + else: + raise ValueError + + +def from_homogeneous(points, eps=0.0): + """Remove the homogeneous dimension of N-dimensional points. + Args: + points: torch.Tensor or numpy.ndarray with size (..., N+1). + eps: Epsilon value to prevent zero division. + Returns: + A torch.Tensor or numpy ndarray with size (..., N). + """ + return points[..., :-1] / (points[..., -1:] + eps) + + +def batched_eye_like(x: torch.Tensor, n: int): + """Create a batch of identity matrices. + Args: + x: a reference torch.Tensor whose batch dimension will be copied. + n: the size of each identity matrix. + Returns: + A torch.Tensor of size (B, n, n), with same dtype and device as x. + """ + return torch.eye(n).to(x)[None].repeat(len(x), 1, 1) + + +def skew_symmetric(v): + """Create a skew-symmetric matrix from a (batched) vector of size (..., 3).""" + z = torch.zeros_like(v[..., 0]) + M = torch.stack( + [ + z, + -v[..., 2], + v[..., 1], + v[..., 2], + z, + -v[..., 0], + -v[..., 1], + v[..., 0], + z, + ], + dim=-1, + ).reshape(v.shape[:-1] + (3, 3)) + return M + + +def transform_points(T, points): + return from_homogeneous(to_homogeneous(points) @ T.transpose(-1, -2)) + + +def is_inside(pts, shape): + return (pts > 0).all(-1) & (pts < shape[:, None]).all(-1) + + +def so3exp_map(w, eps: float = 1e-7): + """Compute rotation matrices from batched twists. + Args: + w: batched 3D axis-angle vectors of size (..., 3). + Returns: + A batch of rotation matrices of size (..., 3, 3). + """ + theta = w.norm(p=2, dim=-1, keepdim=True) + small = theta < eps + div = torch.where(small, torch.ones_like(theta), theta) + W = skew_symmetric(w / div) + theta = theta[..., None] # ... x 1 x 1 + res = W * torch.sin(theta) + (W @ W) * (1 - torch.cos(theta)) + res = torch.where(small[..., None], W, res) # first-order Taylor approx + return torch.eye(3).to(W) + res + + +@torch.jit.script +def distort_points(pts, dist): + """Distort normalized 2D coordinates + and check for validity of the distortion model. + """ + dist = dist.unsqueeze(-2) # add point dimension + ndist = dist.shape[-1] + undist = pts + valid = torch.ones(pts.shape[:-1], device=pts.device, dtype=torch.bool) + if ndist > 0: + k1, k2 = dist[..., :2].split(1, -1) + r2 = torch.sum(pts**2, -1, keepdim=True) + radial = k1 * r2 + k2 * r2**2 + undist = undist + pts * radial + + # The distortion model is supposedly only valid within the image + # boundaries. Because of the negative radial distortion, points that + # are far outside of the boundaries might actually be mapped back + # within the image. To account for this, we discard points that are + # beyond the inflection point of the distortion model, + # e.g. such that d(r + k_1 r^3 + k2 r^5)/dr = 0 + limited = ((k2 > 0) & ((9 * k1**2 - 20 * k2) > 0)) | ((k2 <= 0) & (k1 > 0)) + limit = torch.abs( + torch.where( + k2 > 0, + (torch.sqrt(9 * k1**2 - 20 * k2) - 3 * k1) / (10 * k2), + 1 / (3 * k1), + ) + ) + valid = valid & torch.squeeze(~limited | (r2 < limit), -1) + + if ndist > 2: + p12 = dist[..., 2:] + p21 = p12.flip(-1) + uv = torch.prod(pts, -1, keepdim=True) + undist = undist + 2 * p12 * uv + p21 * (r2 + 2 * pts**2) + # TODO: handle tangential boundaries + + return undist, valid + + +@torch.jit.script +def J_distort_points(pts, dist): + dist = dist.unsqueeze(-2) # add point dimension + ndist = dist.shape[-1] + + J_diag = torch.ones_like(pts) + J_cross = torch.zeros_like(pts) + if ndist > 0: + k1, k2 = dist[..., :2].split(1, -1) + r2 = torch.sum(pts**2, -1, keepdim=True) + uv = torch.prod(pts, -1, keepdim=True) + radial = k1 * r2 + k2 * r2**2 + d_radial = 2 * k1 + 4 * k2 * r2 + J_diag += radial + (pts**2) * d_radial + J_cross += uv * d_radial + + if ndist > 2: + p12 = dist[..., 2:] + p21 = p12.flip(-1) + J_diag += 2 * p12 * pts.flip(-1) + 6 * p21 * pts + J_cross += 2 * p12 * pts + 2 * p21 * pts.flip(-1) + + J = torch.diag_embed(J_diag) + torch.diag_embed(J_cross).flip(-1) + return J + + +def get_image_coords(img): + h, w = img.shape[-2:] + return ( + torch.stack( + torch.meshgrid( + torch.arange(h, dtype=torch.float32, device=img.device), + torch.arange(w, dtype=torch.float32, device=img.device), + indexing="ij", + )[::-1], + dim=0, + ).permute(1, 2, 0) + )[None] + 0.5 \ No newline at end of file diff --git a/third_party/EfficientLoFTR/test.py b/third_party/EfficientLoFTR/test.py new file mode 100644 index 0000000000000000000000000000000000000000..e7a04324e9cd16c74ec3affbe17e9764d2a0002b --- /dev/null +++ b/third_party/EfficientLoFTR/test.py @@ -0,0 +1,143 @@ +import pytorch_lightning as pl +import argparse +import pprint +from loguru import logger as loguru_logger + +from src.config.default import get_cfg_defaults +from src.utils.profiler import build_profiler + +from src.lightning.data import MultiSceneDataModule +from src.lightning.lightning_loftr import PL_LoFTR + +import torch + +def parse_args(): + # init a costum parser which will be added into pl.Trainer parser + # check documentation: https://pytorch-lightning.readthedocs.io/en/latest/common/trainer.html#trainer-flags + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + 'data_cfg_path', type=str, help='data config path') + parser.add_argument( + 'main_cfg_path', type=str, help='main config path') + parser.add_argument( + '--ckpt_path', type=str, default="weights/indoor_ds.ckpt", help='path to the checkpoint') + parser.add_argument( + '--dump_dir', type=str, default=None, help="if set, the matching results will be dump to dump_dir") + parser.add_argument( + '--profiler_name', type=str, default=None, help='options: [inference, pytorch], or leave it unset') + parser.add_argument( + '--batch_size', type=int, default=1, help='batch_size per gpu') + parser.add_argument( + '--num_workers', type=int, default=2) + parser.add_argument( + '--thr', type=float, default=None, help='modify the coarse-level matching threshold.') + parser.add_argument( + '--pixel_thr', type=float, default=None, help='modify the RANSAC threshold.') + parser.add_argument( + '--ransac', type=str, default=None, help='modify the RANSAC method') + parser.add_argument( + '--scannetX', type=int, default=None, help='ScanNet resize X') + parser.add_argument( + '--scannetY', type=int, default=None, help='ScanNet resize Y') + parser.add_argument( + '--megasize', type=int, default=None, help='MegaDepth resize') + parser.add_argument( + '--npe', action='store_true', default=False, help='') + parser.add_argument( + '--fp32', action='store_true', default=False, help='') + parser.add_argument( + '--ransac_times', type=int, default=None, help='repeat ransac multiple times for more robust evaluation') + parser.add_argument( + '--rmbd', type=int, default=None, help='remove border matches') + parser.add_argument( + '--deter', action='store_true', default=False, help='use deterministic mode for testing') + parser.add_argument( + '--half', action='store_true', default=False, help='pure16') + parser.add_argument( + '--flash', action='store_true', default=False, help='flash') + + parser = pl.Trainer.add_argparse_args(parser) + return parser.parse_args() + +def inplace_relu(m): + classname = m.__class__.__name__ + if classname.find('ReLU') != -1: + m.inplace=True + +if __name__ == '__main__': + # parse arguments + args = parse_args() + pprint.pprint(vars(args)) + + # init default-cfg and merge it with the main- and data-cfg + config = get_cfg_defaults() + config.merge_from_file(args.main_cfg_path) + config.merge_from_file(args.data_cfg_path) + if args.deter: + torch.backends.cudnn.deterministic = True + pl.seed_everything(config.TRAINER.SEED) # reproducibility + + # tune when testing + if args.thr is not None: + config.LOFTR.MATCH_COARSE.THR = args.thr + + if args.scannetX is not None and args.scannetY is not None: + config.DATASET.SCAN_IMG_RESIZEX = args.scannetX + config.DATASET.SCAN_IMG_RESIZEY = args.scannetY + if args.megasize is not None: + config.DATASET.MGDPT_IMG_RESIZE = args.megasize + + if args.npe: + if config.LOFTR.COARSE.ROPE: + assert config.DATASET.NPE_NAME is not None + if config.DATASET.NPE_NAME is not None: + if config.DATASET.NPE_NAME == 'megadepth': + config.LOFTR.COARSE.NPE = [832, 832, config.DATASET.MGDPT_IMG_RESIZE, config.DATASET.MGDPT_IMG_RESIZE] # [832, 832, 1152, 1152] + elif config.DATASET.NPE_NAME == 'scannet': + config.LOFTR.COARSE.NPE = [832, 832, config.DATASET.SCAN_IMG_RESIZEX, config.DATASET.SCAN_IMG_RESIZEX] # [832, 832, 640, 640] + else: + config.LOFTR.COARSE.NPE = [832, 832, 832, 832] + + if args.ransac_times is not None: + config.LOFTR.EVAL_TIMES = args.ransac_times + + if args.rmbd is not None: + config.LOFTR.MATCH_COARSE.BORDER_RM = args.rmbd + + if args.pixel_thr is not None: + config.TRAINER.RANSAC_PIXEL_THR = args.pixel_thr + + if args.ransac is not None: + config.TRAINER.POSE_ESTIMATION_METHOD = args.ransac + if args.ransac == 'LO-RANSAC' and config.TRAINER.RANSAC_PIXEL_THR == 0.5: + config.TRAINER.RANSAC_PIXEL_THR = 2.0 + + if args.fp32: + config.LOFTR.MP = False + + if args.half: + config.LOFTR.HALF = True + config.DATASET.FP16 = True + else: + config.LOFTR.HALF = False + config.DATASET.FP16 = False + + if args.flash: + config.LOFTR.COARSE.NO_FLASH = False + + loguru_logger.info(f"Args and config initialized!") + + # lightning module + profiler = build_profiler(args.profiler_name) + model = PL_LoFTR(config, pretrained_ckpt=args.ckpt_path, profiler=profiler, dump_dir=args.dump_dir) + loguru_logger.info(f"LoFTR-lightning initialized!") + + # lightning data + data_module = MultiSceneDataModule(args, config) + loguru_logger.info(f"DataModule initialized!") + + # lightning trainer + trainer = pl.Trainer.from_argparse_args(args, replace_sampler_ddp=False, logger=False) + + loguru_logger.info(f"Start testing!") + trainer.test(model, datamodule=data_module, verbose=False) \ No newline at end of file diff --git a/ui/config.yaml b/ui/config.yaml index 22fe08bc00873d1b4417f31c3f975eec0fb83040..d94cc3f67789b454c248b10468b9b2354ba358a9 100644 --- a/ui/config.yaml +++ b/ui/config.yaml @@ -35,7 +35,7 @@ matcher_zoo: name: Mast3R #dispaly name source: "CVPR 2024" github: https://github.com/naver/mast3r - paper: https://arxiv.org/abs/2312.14132 + paper: https://arxiv.org/abs/2406.09756 project: https://dust3r.europe.naverlabs.com display: true DUSt3R: @@ -91,6 +91,16 @@ matcher_zoo: paper: https://arxiv.org/pdf/2104.00680 project: https://zju3dv.github.io/loftr display: true + eloftr: + matcher: eloftr + dense: true + info: + name: Efficient LoFTR #dispaly name + source: "CVPR 2024" + github: https://github.com/zju3dv/efficientloftr + paper: https://zju3dv.github.io/efficientloftr/files/EfficientLoFTR.pdf + project: https://zju3dv.github.io/efficientloftr + display: true cotr: enable: false skip_ci: true