diff --git a/.gitignore b/.gitignore index 8cbe2e58034837efc5de2fa55d0963db97d83ad0..f89f210402661daa3b47ed2bfee122048b61ad94 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,7 @@ third_party/REKD Dockerfile hloc/matchers/dedode.py gradio_cached_examples + +hloc/matchers/quadtree.py +third_party/QuadTreeAttention +desktop.ini diff --git a/app.py b/app.py index c20e864d07dd835c81bc43f124a25e2340c74825..923d3beacda50dfc1292a7e0f17889a04fc62442 100644 --- a/app.py +++ b/app.py @@ -14,6 +14,9 @@ This Space demonstrates [Image Matching WebUI](https://github.com/Vincentqyw/ima 🔎 For more details about supported local features and matchers, please refer to https://github.com/Vincentqyw/image-matching-webui +🚀 All algorithms run on CPU for inference on HF, causing slow speeds and high latency. For faster inference, please download the [source code](https://github.com/Vincentqyw/image-matching-webui) for local deployment or check [openxlab space](https://github.com/Vincentqyw/image-matching-webui) and [direct URL](https://g-app-center-083997-7409-n9elr1.openxlab.space) + +🐛 Your feedback is valuable to me. Please do not hesitate to report any bugs [here](https://github.com/Vincentqyw/image-matching-webui/issues). """ @@ -102,9 +105,9 @@ def run(config): ) with gr.Row(): - button_reset = gr.Button(label="Reset", value="Reset") + button_reset = gr.Button(value="Reset") button_run = gr.Button( - label="Run Match", value="Run Match", variant="primary" + value="Run Match", variant="primary" ) with gr.Accordion("Advanced Setting", open=False): @@ -242,7 +245,7 @@ def run(config): ) with gr.Accordion("Open for More: Geometry info", open=False): geometry_result = gr.JSON(label="Reconstructed Geometry") - + # callbacks match_image_src.change( fn=ui_change_imagebox, diff --git a/common/utils.py b/common/utils.py index 4a0e19a035b64ddb32972a5e48bcdabf197afba6..0610c340a8483b918e61004eca0516782753247d 100644 --- a/common/utils.py +++ b/common/utils.py @@ -48,6 +48,7 @@ def gen_examples(): pairs = list(combinations(imgs_list, 2)) selected = random.sample(range(len(pairs)), count) return [pairs[i] for i in selected] + # image pair path path = "datasets/sacre_coeur/mapping" pairs = gen_images_pairs(path, len(example_matchers)) @@ -176,7 +177,10 @@ def compute_geom( if H is not None: geo_info["Homography"] = H.tolist() _, H1, H2 = cv2.stereoRectifyUncalibrated( - mkpts0.reshape(-1, 2), mkpts1.reshape(-1, 2), F, imgSize=(w1, h1) + mkpts0.reshape(-1, 2), + mkpts1.reshape(-1, 2), + F, + imgSize=(w1, h1), ) geo_info["H1"] = H1.tolist() geo_info["H2"] = H2.tolist() @@ -504,6 +508,11 @@ matcher_zoo = { "config_feature": extract_features.confs["d2net-ss"], "dense": False, }, + "rord": { + "config": match_features.confs["NN-mutual"], + "config_feature": extract_features.confs["rord"], + "dense": False, + }, # "d2net-ms": { # "config": match_features.confs["NN-mutual"], # "config_feature": extract_features.confs["d2net-ms"], diff --git a/hloc/extract_features.py b/hloc/extract_features.py index 8e8dc822d6a2415d0fa48e4fb3223a44d9ce8d2a..24932f73f59d804af103dd5fb7c3ca983958333b 100644 --- a/hloc/extract_features.py +++ b/hloc/extract_features.py @@ -115,6 +115,18 @@ confs = { "resize_max": 1600, }, }, + "rord": { + "output": "feats-rord-ss-n5000-r1600", + "model": { + "name": "rord", + "multiscale": False, + "max_keypoints": 5000, + }, + "preprocessing": { + "grayscale": False, + "resize_max": 1600, + }, + }, "rootsift": { "output": "feats-rootsift-n5000-r1600", "model": { diff --git a/hloc/extractors/alike.py b/hloc/extractors/alike.py index f7086186c7dcf828d81f19a8bdcc40214f9f7d21..dcfe4542301eaf0b0092d5e166e59915d033db57 100644 --- a/hloc/extractors/alike.py +++ b/hloc/extractors/alike.py @@ -1,6 +1,5 @@ import sys from pathlib import Path -import subprocess import torch from ..utils.base_model import BaseModel diff --git a/hloc/extractors/d2net.py b/hloc/extractors/d2net.py index 93921626c3b49daa2b243dd0b5f540b38f244cec..c6760acb9d3b036b5325a2e3ec2a30a70fb2684b 100644 --- a/hloc/extractors/d2net.py +++ b/hloc/extractors/d2net.py @@ -10,7 +10,6 @@ sys.path.append(str(d2net_path)) from lib.model_test import D2Net as _D2Net from lib.pyramid import process_multiscale - class D2Net(BaseModel): default_conf = { "model_name": "d2_tf.pth", diff --git a/hloc/extractors/darkfeat.py b/hloc/extractors/darkfeat.py index 6256317f20dd19b0f5e777ae0c78669e4a2e1bd5..80cee30c2327e49efea8ad615496c992a9c6291e 100644 --- a/hloc/extractors/darkfeat.py +++ b/hloc/extractors/darkfeat.py @@ -1,11 +1,8 @@ import sys from pathlib import Path import subprocess -import logging - from ..utils.base_model import BaseModel - -logger = logging.getLogger(__name__) +from .. import logger darkfeat_path = Path(__file__).parent / "../../third_party/DarkFeat" sys.path.append(str(darkfeat_path)) diff --git a/hloc/extractors/dedode.py b/hloc/extractors/dedode.py index c73577bd7be4b7998c884b92349d8f15ab3646c4..801333b32d53da36671733c0311127a902ce6195 100644 --- a/hloc/extractors/dedode.py +++ b/hloc/extractors/dedode.py @@ -1,10 +1,10 @@ import sys from pathlib import Path import subprocess -import logging import torch from PIL import Image from ..utils.base_model import BaseModel +from .. import logger import torchvision.transforms as transforms dedode_path = Path(__file__).parent / "../../third_party/DeDoDe" @@ -14,8 +14,6 @@ from DeDoDe import dedode_detector_L, dedode_descriptor_B from DeDoDe.utils import to_pixel_coords device = torch.device("cuda" if torch.cuda.is_available() else "cpu") -logger = logging.getLogger(__name__) - class DeDoDe(BaseModel): default_conf = { diff --git a/hloc/extractors/example.py b/hloc/extractors/example.py index 75d06aef4b2522789308cf42564669915d92f961..6a7ff418b6227d54f192da3629b9f16ceb2d7fca 100644 --- a/hloc/extractors/example.py +++ b/hloc/extractors/example.py @@ -2,7 +2,7 @@ import sys from pathlib import Path import subprocess import torch -import logging +from .. import logger from ..utils.base_model import BaseModel @@ -12,8 +12,6 @@ sys.path.append(str(example_path)) # import some modules here device = torch.device("cuda" if torch.cuda.is_available() else "cpu") -logger = logging.getLogger(__name__) - class Example(BaseModel): # change to your default configs diff --git a/hloc/extractors/fire_local.py b/hloc/extractors/fire_local.py index 6efb7389ba2a591b84c6b4dffdd26467cb1a4f96..b66ea57428e444237c6a0f7207e3c0d10ed48be8 100644 --- a/hloc/extractors/fire_local.py +++ b/hloc/extractors/fire_local.py @@ -1,13 +1,12 @@ from pathlib import Path import subprocess -import logging import sys import torch import torchvision.transforms as tvf from ..utils.base_model import BaseModel +from .. import logger -logger = logging.getLogger(__name__) fire_path = Path(__file__).parent / "../../third_party/fire" sys.path.append(str(fire_path)) diff --git a/hloc/extractors/netvlad.py b/hloc/extractors/netvlad.py index be642eb7d0af55b3184ca7ed441af76f41f4029a..81bc63bc74bc51c1d8de55ee6393bc3371fc7657 100644 --- a/hloc/extractors/netvlad.py +++ b/hloc/extractors/netvlad.py @@ -1,6 +1,5 @@ from pathlib import Path import subprocess -import logging import numpy as np import torch import torch.nn as nn @@ -9,8 +8,7 @@ import torchvision.models as models from scipy.io import loadmat from ..utils.base_model import BaseModel - -logger = logging.getLogger(__name__) +from .. import logger EPS = 1e-6 diff --git a/hloc/extractors/rord.py b/hloc/extractors/rord.py new file mode 100644 index 0000000000000000000000000000000000000000..6338ab6ba3edccf3b8366aded834bdcc9437939b --- /dev/null +++ b/hloc/extractors/rord.py @@ -0,0 +1,75 @@ +import sys +from pathlib import Path +import subprocess +import torch + +from ..utils.base_model import BaseModel +from .. import logger + +rord_path = Path(__file__).parent / "../../third_party/RoRD" +sys.path.append(str(rord_path)) +from lib.model_test import D2Net as _RoRD +from lib.pyramid import process_multiscale + +class RoRD(BaseModel): + default_conf = { + "model_name": "rord.pth", + "checkpoint_dir": rord_path / "models", + "use_relu": True, + "multiscale": False, + "max_keypoints": 1024, + } + required_inputs = ["image"] + weight_urls = { + "rord.pth": "https://drive.google.com/uc?id=12414ZGKwgPAjNTGtNrlB4VV9l7W76B2o&confirm=t", + } + proxy = "http://localhost:1080" + + def _init(self, conf): + model_path = conf["checkpoint_dir"] / conf["model_name"] + link = self.weight_urls[conf["model_name"]] + if not model_path.exists(): + model_path.parent.mkdir(exist_ok=True) + cmd_wo_proxy = ["gdown", link, "-O", str(model_path)] + cmd = ["gdown", link, "-O", str(model_path), "--proxy", self.proxy] + logger.info( + f"Downloading the RoRD model with `{cmd_wo_proxy}`." + ) + try: + subprocess.run(cmd_wo_proxy, check=True) + except subprocess.CalledProcessError as e: + logger.info(f"Downloading the RoRD model with `{cmd}`.") + try: + subprocess.run(cmd, check=True) + except subprocess.CalledProcessError as e: + logger.error(f"Failed to download the RoRD model.") + raise e + logger.info("RoRD model loaded.") + self.net = _RoRD( + model_file=model_path, use_relu=conf["use_relu"], use_cuda=False + ) + + def _forward(self, data): + image = data["image"] + image = image.flip(1) # RGB -> BGR + norm = image.new_tensor([103.939, 116.779, 123.68]) + image = image * 255 - norm.view(1, 3, 1, 1) # caffe normalization + + if self.conf["multiscale"]: + keypoints, scores, descriptors = process_multiscale(image, self.net) + else: + keypoints, scores, descriptors = process_multiscale( + image, self.net, scales=[1] + ) + keypoints = keypoints[:, [1, 0]] # (x, y) and remove the scale + + idxs = scores.argsort()[-self.conf["max_keypoints"] or None :] + keypoints = keypoints[idxs, :2] + descriptors = descriptors[idxs] + scores = scores[idxs] + + return { + "keypoints": torch.from_numpy(keypoints)[None], + "scores": torch.from_numpy(scores)[None], + "descriptors": torch.from_numpy(descriptors.T)[None], + } diff --git a/hloc/matchers/aspanformer.py b/hloc/matchers/aspanformer.py index cc58a055f691bf2ed83e418b7de2155a625771f2..6e6e265859a4a7da7e7e26c5bb5a65aebbb90373 100644 --- a/hloc/matchers/aspanformer.py +++ b/hloc/matchers/aspanformer.py @@ -4,9 +4,8 @@ from ..utils.base_model import BaseModel from ..utils import do_system from pathlib import Path import subprocess -import logging -logger = logging.getLogger(__name__) +from .. import logger sys.path.append(str(Path(__file__).parent / "../../third_party")) from ASpanFormer.src.ASpanFormer.aspanformer import ASpanFormer as _ASpanFormer @@ -77,7 +76,9 @@ class ASpanFormer(BaseModel): # update: match threshold _config["aspan"]["match_coarse"]["thr"] = conf["match_threshold"] - _config["aspan"]["match_coarse"]["skh_iters"] = conf["sinkhorn_iterations"] + _config["aspan"]["match_coarse"]["skh_iters"] = conf[ + "sinkhorn_iterations" + ] self.net = _ASpanFormer(config=_config["aspan"]) weight_path = model_path diff --git a/hloc/matchers/dkm.py b/hloc/matchers/dkm.py index c07f1762952e447c8eb61984fe0e9d9a0486ee12..5de526bc7c3ab1f65527c5614ea616be76f0dd43 100644 --- a/hloc/matchers/dkm.py +++ b/hloc/matchers/dkm.py @@ -3,16 +3,14 @@ from pathlib import Path import torch from PIL import Image import subprocess -import logging from ..utils.base_model import BaseModel +from .. import logger sys.path.append(str(Path(__file__).parent / "../../third_party")) from DKM.dkm import DKMv3_outdoor dkm_path = Path(__file__).parent / "../../third_party/DKM" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") -logger = logging.getLogger(__name__) - class DKMv3(BaseModel): default_conf = { diff --git a/hloc/matchers/gluestick.py b/hloc/matchers/gluestick.py index 1a9d8907ed9b82193100ad1fedc6a9333165d7f5..093ba3665c95ac881ae22682497fb5af5722a55b 100644 --- a/hloc/matchers/gluestick.py +++ b/hloc/matchers/gluestick.py @@ -1,11 +1,9 @@ import sys from pathlib import Path import subprocess -import logging import torch from ..utils.base_model import BaseModel - -logger = logging.getLogger(__name__) +from .. import logger gluestick_path = Path(__file__).parent / "../../third_party/GlueStick" sys.path.append(str(gluestick_path)) diff --git a/hloc/matchers/lightglue.py b/hloc/matchers/lightglue.py index c25e23394721cbc409f1952be2b3c0c31806fab2..f4983d0bc5513f60c14a99e84d08617d751c0115 100644 --- a/hloc/matchers/lightglue.py +++ b/hloc/matchers/lightglue.py @@ -1,9 +1,8 @@ import sys from pathlib import Path -import logging from ..utils.base_model import BaseModel +from .. import logger -logger = logging.getLogger(__name__) lightglue_path = Path(__file__).parent / "../../third_party/LightGlue" sys.path.append(str(lightglue_path)) from lightglue import LightGlue as LG diff --git a/hloc/matchers/roma.py b/hloc/matchers/roma.py index 96a830aeaed79c36bf81b666a234be1d9d5a0a13..1fe913270978346a124f17b57ca3d22c53d43d72 100644 --- a/hloc/matchers/roma.py +++ b/hloc/matchers/roma.py @@ -1,10 +1,10 @@ import sys from pathlib import Path import subprocess -import logging import torch from PIL import Image from ..utils.base_model import BaseModel +from .. import logger roma_path = Path(__file__).parent / "../../third_party/Roma" sys.path.append(str(roma_path)) @@ -12,8 +12,6 @@ sys.path.append(str(roma_path)) from roma.models.model_zoo.roma_models import roma_model device = torch.device("cuda" if torch.cuda.is_available() else "cpu") -logger = logging.getLogger(__name__) - class Roma(BaseModel): default_conf = { diff --git a/hloc/matchers/sgmnet.py b/hloc/matchers/sgmnet.py index f39d4fe1ca6008d656cb6ab4d2a8d02a33ef52b9..7c5d1bd79ba7695fb9836a6228ccecab7d9f55c9 100644 --- a/hloc/matchers/sgmnet.py +++ b/hloc/matchers/sgmnet.py @@ -1,12 +1,12 @@ import sys from pathlib import Path import subprocess -import logging import torch from PIL import Image from collections import OrderedDict, namedtuple from ..utils.base_model import BaseModel from ..utils import do_system +from .. import logger sgmnet_path = Path(__file__).parent / "../../third_party/SGMNet" sys.path.append(str(sgmnet_path)) @@ -14,7 +14,6 @@ sys.path.append(str(sgmnet_path)) from sgmnet import matcher as SGM_Model device = torch.device("cuda" if torch.cuda.is_available() else "cpu") -logger = logging.getLogger(__name__) class SGMNet(BaseModel): diff --git a/hloc/matchers/sold2.py b/hloc/matchers/sold2.py index 5feafc6e7b41aa3ef4c7e19e0736d0a58bd73c54..62e953ff7ca53076b887419bb25ec20c069677c6 100644 --- a/hloc/matchers/sold2.py +++ b/hloc/matchers/sold2.py @@ -4,18 +4,13 @@ from ..utils.base_model import BaseModel import torch from ..utils.base_model import BaseModel +from .. import logger +import subprocess sold2_path = Path(__file__).parent / "../../third_party/SOLD2" sys.path.append(str(sold2_path)) from sold2.model.line_matcher import LineMatcher -from sold2.misc.visualize_util import ( - plot_images, - plot_lines, - plot_line_matches, - plot_color_line_matches, - plot_keypoints, -) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") @@ -36,9 +31,21 @@ class SOLD2(BaseModel): "image1", ] + weight_urls = { + "sold2_wireframe.tar": "https://www.polybox.ethz.ch/index.php/s/blOrW89gqSLoHOk/download", + } # Initialize the line matcher def _init(self, conf): checkpoint_path = conf["checkpoint_dir"] / conf["weights"] + + # Download the model. + if not checkpoint_path.exists(): + checkpoint_path.parent.mkdir(exist_ok=True) + link = self.weight_urls[conf["weights"]] + cmd = ["wget", link, "-O", str(checkpoint_path)] + logger.info(f"Downloading the SOLD2 model with `{cmd}`.") + subprocess.run(cmd, check=True) + mode = "dynamic" # 'dynamic' or 'static' match_config = { "model_cfg": { diff --git a/hloc/utils/__init__.py b/hloc/utils/__init__.py index 5d5079d59ac615ce4c3d4b2e9e869eca9a4c411c..7c1e6e13ec689af7d948e5155ca773ee038df7bb 100644 --- a/hloc/utils/__init__.py +++ b/hloc/utils/__init__.py @@ -1,7 +1,7 @@ import os import logging - -logger = logging.getLogger(__name__) +import sys +from .. import logger def do_system(cmd, verbose=False): diff --git a/third_party/RoRD/LICENSE b/third_party/RoRD/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..02fe94a0cae3c3fcff8250a082bf233987e09388 --- /dev/null +++ b/third_party/RoRD/LICENSE @@ -0,0 +1,251 @@ +Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International Public License + +By exercising the Licensed Rights (defined below), You accept and agree to be +bound by the terms and conditions of this Creative Commons +Attribution-NonCommercial-NoDerivatives 4.0 International Public License +("Public License"). To the extent this Public License may be interpreted as a +contract, You are granted the Licensed Rights in consideration of Your +acceptance of these terms and conditions, and the Licensor grants You such +rights in consideration of benefits the Licensor receives from making the +Licensed Material available under these terms and conditions. + +Section 1 – Definitions. + + a. Adapted Material means material subject to Copyright and Similar Rights + that is derived from or based upon the Licensed Material and in which + the Licensed Material is translated, altered, arranged, transformed, or + otherwise modified in a manner requiring permission under the Copyright + and Similar Rights held by the Licensor. For purposes of this Public + License, where the Licensed Material is a musical work, performance, or + sound recording, Adapted Material is always produced where the Licensed + Material is synched in timed relation with a moving image. + b. Copyright and Similar Rights means copyright and/or similar rights + closely related to copyright including, without limitation, + performance, broadcast, sound recording, and Sui Generis Database + Rights, without regard to how the rights are labeled or categorized. + For purposes of this Public License, the rights specified in Section + 2(b)(1)-(2) are not Copyright and Similar Rights. + c. Effective Technological Measures means those measures that, in the + absence of proper authority, may not be circumvented under laws + fulfilling obligations under Article 11 of the WIPO Copyright Treaty + adopted on December 20, 1996, and/or similar international agreements. + d. Exceptions and Limitations means fair use, fair dealing, and/or any + other exception or limitation to Copyright and Similar Rights that + applies to Your use of the Licensed Material. + e. Licensed Material means the artistic or literary work, database, or + other material to which the Licensor applied this Public License. + f. Licensed Rights means the rights granted to You subject to the terms + and conditions of this Public License, which are limited to all + Copyright and Similar Rights that apply to Your use of the Licensed + Material and that the Licensor has authority to license. + g. Licensor means the individual(s) or entity(ies) granting rights under + this Public License. + h. NonCommercial means not primarily intended for or directed towards + commercial advantage or monetary compensation. For purposes of this + Public License, the exchange of the Licensed Material for other + material subject to Copyright and Similar Rights by digital + file-sharing or similar means is NonCommercial provided there is no + payment of monetary compensation in connection with the exchange. + i. Share means to provide material to the public by any means or process + that requires permission under the Licensed Rights, such as + reproduction, public display, public performance, distribution, + dissemination, communication, or importation, and to make material + available to the public including in ways that members of the public + may access the material from a place and at a time individually chosen + by them. + j. Sui Generis Database Rights means rights other than copyright resulting + from Directive 96/9/EC of the European Parliament and of the Council of + 11 March 1996 on the legal protection of databases, as amended and/or + succeeded, as well as other essentially equivalent rights anywhere in + the world. + k. You means the individual or entity exercising the Licensed Rights under + this Public License. Your has a corresponding meaning. + +Section 2 – Scope. + + a. License grant. + 1. Subject to the terms and conditions of this Public License, the + Licensor hereby grants You a worldwide, royalty-free, + non-sublicensable, non-exclusive, irrevocable license to exercise + the Licensed Rights in the Licensed Material to: + A. reproduce and Share the Licensed Material, in whole or in part, + for NonCommercial purposes only; and + B. produce and reproduce, but not Share, Adapted Material for + NonCommercial purposes only. + 2. Exceptions and Limitations. For the avoidance of doubt, where + Exceptions and Limitations apply to Your use, this Public License + does not apply, and You do not need to comply with its terms and + conditions. + 3. Term. The term of this Public License is specified in Section 6(a). + 4. Media and formats; technical modifications allowed. The Licensor + authorizes You to exercise the Licensed Rights in all media and + formats whether now known or hereafter created, and to make + technical modifications necessary to do so. The Licensor waives + and/or agrees not to assert any right or authority to forbid You + from making technical modifications necessary to exercise the + Licensed Rights, including technical modifications necessary to + circumvent Effective Technological Measures. For purposes of this + Public License, simply making modifications authorized by this + Section 2(a)(4) never produces Adapted Material. + 5. Downstream recipients. + A. Offer from the Licensor – Licensed Material. Every recipient of + the Licensed Material automatically receives an offer from the + Licensor to exercise the Licensed Rights under the terms and + conditions of this Public License. + B. No downstream restrictions. You may not offer or impose any + additional or different terms or conditions on, or apply any + Effective Technological Measures to, the Licensed Material if + doing so restricts exercise of the Licensed Rights by any + recipient of the Licensed Material. + 6. No endorsement. Nothing in this Public License constitutes or may + be construed as permission to assert or imply that You are, or that + Your use of the Licensed Material is, connected with, or sponsored, + endorsed, or granted official status by, the Licensor or others + designated to receive attribution as provided in Section + 3(a)(1)(A)(i). + + b. Other rights. + 1. Moral rights, such as the right of integrity, are not licensed + under this Public License, nor are publicity, privacy, and/or other + similar personality rights; however, to the extent possible, the + Licensor waives and/or agrees not to assert any such rights held by + the Licensor to the limited extent necessary to allow You to + exercise the Licensed Rights, but not otherwise. + 2. Patent and trademark rights are not licensed under this Public + License. + 3. To the extent possible, the Licensor waives any right to collect + royalties from You for the exercise of the Licensed Rights, whether + directly or through a collecting society under any voluntary or + waivable statutory or compulsory licensing scheme. In all other + cases the Licensor expressly reserves any right to collect such + royalties, including when the Licensed Material is used other than + for NonCommercial purposes. + +Section 3 – License Conditions. + +Your exercise of the Licensed Rights is expressly made subject to the following conditions. + + a. Attribution. + + 1. If You Share the Licensed Material, You must: + A. retain the following if it is supplied by the Licensor with the + Licensed Material: + i. identification of the creator(s) of the Licensed Material + and any others designated to receive attribution, in any + reasonable manner requested by the Licensor (including by + pseudonym if designated); + ii. a copyright notice; + iii. a notice that refers to this Public License; + iv. a notice that refers to the disclaimer of warranties; + v. a URI or hyperlink to the Licensed Material to the extent + reasonably practicable; + B. indicate if You modified the Licensed Material and retain an + indication of any previous modifications; and + C. indicate the Licensed Material is licensed under this Public + License, and include the text of, or the URI or hyperlink to, + this Public License. + + For the avoidance of doubt, You do not have permission under this + Public License to Share Adapted Material. + + 2. You may satisfy the conditions in Section 3(a)(1) in any reasonable + manner based on the medium, means, and context in which You Share + the Licensed Material. For example, it may be reasonable to satisfy + the conditions by providing a URI or hyperlink to a resource that + includes the required information. + 3. If requested by the Licensor, You must remove any of the + information required by Section 3(a)(1)(A) to the extent reasonably + practicable. + +Section 4 – Sui Generis Database Rights. + +Where the Licensed Rights include Sui Generis Database Rights that apply to +Your use of the Licensed Material: + + a. for the avoidance of doubt, Section 2(a)(1) grants You the right to + extract, reuse, reproduce, and Share all or a substantial portion of + the contents of the database for NonCommercial purposes only and + provided You do not Share Adapted Material; + b. if You include all or a substantial portion of the database contents in + a database in which You have Sui Generis Database Rights, then the + database in which You have Sui Generis Database Rights (but not its + individual contents) is Adapted Material; and + c. You must comply with the conditions in Section 3(a) if You Share all or + a substantial portion of the contents of the database. + +For the avoidance of doubt, this Section 4 supplements and does not replace +Your obligations under this Public License where the Licensed Rights include +other Copyright and Similar Rights. + +Section 5 – Disclaimer of Warranties and Limitation of Liability. + + a. Unless otherwise separately undertaken by the Licensor, to the extent + possible, the Licensor offers the Licensed Material as-is and + as-available, and makes no representations or warranties of any kind + concerning the Licensed Material, whether express, implied, statutory, + or other. This includes, without limitation, warranties of title, + merchantability, fitness for a particular purpose, non-infringement, + absence of latent or other defects, accuracy, or the presence or + absence of errors, whether or not known or discoverable. Where + disclaimers of warranties are not allowed in full or in part, this + disclaimer may not apply to You. + b. To the extent possible, in no event will the Licensor be liable to You + on any legal theory (including, without limitation, negligence) or + otherwise for any direct, special, indirect, incidental, consequential, + punitive, exemplary, or other losses, costs, expenses, or damages + arising out of this Public License or use of the Licensed Material, + even if the Licensor has been advised of the possibility of such + losses, costs, expenses, or damages. Where a limitation of liability is + not allowed in full or in part, this limitation may not apply to You. + c. The disclaimer of warranties and limitation of liability provided above + shall be interpreted in a manner that, to the extent possible, most + closely approximates an absolute disclaimer and waiver of all + liability. + +Section 6 – Term and Termination. + + a. This Public License applies for the term of the Copyright and Similar + Rights licensed here. However, if You fail to comply with this Public + License, then Your rights under this Public License terminate + automatically. + b. Where Your right to use the Licensed Material has terminated under + Section 6(a), it reinstates: + 1. automatically as of the date the violation is cured, provided it is + cured within 30 days of Your discovery of the violation; or + 2. upon express reinstatement by the Licensor. + + For the avoidance of doubt, this Section 6(b) does not affect any right + the Licensor may have to seek remedies for Your violations of this + Public License. + + c. For the avoidance of doubt, the Licensor may also offer the Licensed + Material under separate terms or conditions or stop distributing the + Licensed Material at any time; however, doing so will not terminate + this Public License. + d. Sections 1, 5, 6, 7, and 8 survive termination of this Public License. + +Section 7 – Other Terms and Conditions. + + a. The Licensor shall not be bound by any additional or different terms or + conditions communicated by You unless expressly agreed. + b. Any arrangements, understandings, or agreements regarding the Licensed + Material not stated herein are separate from and independent of the + terms and conditions of this Public License. + +Section 8 – Interpretation. + + a. For the avoidance of doubt, this Public License does not, and shall not + be interpreted to, reduce, limit, restrict, or impose conditions on any + use of the Licensed Material that could lawfully be made without + permission under this Public License. + b. To the extent possible, if any provision of this Public License is + deemed unenforceable, it shall be automatically reformed to the minimum + extent necessary to make it enforceable. If the provision cannot be + reformed, it shall be severed from this Public License without + affecting the enforceability of the remaining terms and conditions. + c. No term or condition of this Public License will be waived and no + failure to comply consented to unless expressly agreed to by the Licensor. + d. Nothing in this Public License constitutes or may be interpreted as a + limitation upon, or waiver of, any privileges and immunities that apply + to the Licensor or You, including from the legal processes of any + jurisdiction or authority. \ No newline at end of file diff --git a/third_party/RoRD/assets/register_ortho.jpg b/third_party/RoRD/assets/register_ortho.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2cad7ed34c9f5dd5d6bf1c3a960f823730721a7e --- /dev/null +++ b/third_party/RoRD/assets/register_ortho.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ae058933d1602e685c225a2593554a406f921cbda1f9a9a5a9292d30fe71c6e +size 76704 diff --git a/third_party/RoRD/assets/register_persp.jpg b/third_party/RoRD/assets/register_persp.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d6e4211f4a8122863bb43c1d0fbf46b3c3f1d3e9 --- /dev/null +++ b/third_party/RoRD/assets/register_persp.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a76f0ca084e821269944e3ae1ace05ef8324393dfcc2c9193a67cd0f80391c9a +size 126866 diff --git a/third_party/RoRD/assets/register_pointcloud.jpg b/third_party/RoRD/assets/register_pointcloud.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8c4aba614bd70187c0175f64b6457f11fab94a4b --- /dev/null +++ b/third_party/RoRD/assets/register_pointcloud.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b726bf5a13ae292597b7c8b94fc3d1c7356fd6dfcceeb17b93bff74f65b3d19 +size 234238 diff --git a/third_party/RoRD/assets/rord_evalRT.jpg b/third_party/RoRD/assets/rord_evalRT.jpg new file mode 100644 index 0000000000000000000000000000000000000000..de7741abd191a2ad21e2fe46fc5d9d7726733d43 --- /dev/null +++ b/third_party/RoRD/assets/rord_evalRT.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b9055e5fc89083a0966a73de15539db38b62732cdff7846abe9ed68ca589ac3 +size 92889 diff --git a/third_party/RoRD/assets/rord_extract.jpg b/third_party/RoRD/assets/rord_extract.jpg new file mode 100644 index 0000000000000000000000000000000000000000..877bb0e65a22d1eaedbc7142ee7d694bd8c74e99 --- /dev/null +++ b/third_party/RoRD/assets/rord_extract.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cedae008701ea04b4cf14ed15ace850a7b284df3103beceb4c45851725754d3f +size 113350 diff --git a/third_party/RoRD/assets/sift_extract.jpg b/third_party/RoRD/assets/sift_extract.jpg new file mode 100644 index 0000000000000000000000000000000000000000..41a85cf5fe74e92b7bc736d307963e49ad394b0e --- /dev/null +++ b/third_party/RoRD/assets/sift_extract.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9121c9d4e507232b3eb60f949194b48fcad6ac7c8f51f0f6ddeaf8d8269064b +size 78577 diff --git a/third_party/RoRD/assets/teaser2.jpg b/third_party/RoRD/assets/teaser2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c490a518adda63885069ce1a58fe124cf061ed3e --- /dev/null +++ b/third_party/RoRD/assets/teaser2.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c76bf17da04332b8bdd1449d5896bfbcc277a73e7f937ede316a97d41624c3e +size 976363 diff --git a/third_party/RoRD/configs/camera.txt b/third_party/RoRD/configs/camera.txt new file mode 100644 index 0000000000000000000000000000000000000000..5d6de117b66c81716a59ff42a6c969a0f0ec989f --- /dev/null +++ b/third_party/RoRD/configs/camera.txt @@ -0,0 +1,3 @@ +382.1996765136719 381.8395690917969 312.7102355957031 247.72047424316406 1000.0 + + diff --git a/third_party/RoRD/configs/train_scenes.txt b/third_party/RoRD/configs/train_scenes.txt new file mode 100644 index 0000000000000000000000000000000000000000..60aaa16bf2f9879dee1bf6bb318614d0b3c772ea --- /dev/null +++ b/third_party/RoRD/configs/train_scenes.txt @@ -0,0 +1,7 @@ +temple_nara_japan +brandenburg_gate +taj_mahal +buckingham_palace +grand_place_brussels +hagia_sophia_interior +westminster_abbey diff --git a/third_party/RoRD/configs/train_scenes_small.txt b/third_party/RoRD/configs/train_scenes_small.txt new file mode 100644 index 0000000000000000000000000000000000000000..9f9438732d66b02540804289ed91e96ff14af035 --- /dev/null +++ b/third_party/RoRD/configs/train_scenes_small.txt @@ -0,0 +1 @@ +brandenburg_gate \ No newline at end of file diff --git a/third_party/RoRD/demo/__init__.py b/third_party/RoRD/demo/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/third_party/RoRD/demo/depth/depth1_1.png b/third_party/RoRD/demo/depth/depth1_1.png new file mode 100644 index 0000000000000000000000000000000000000000..74af097adb6d9a522da12eb65623cd4ba3909912 --- /dev/null +++ b/third_party/RoRD/demo/depth/depth1_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b99487f8fd54fd5fc15e84d24f15287972f98eaf24a44b3daf1c6374e51b6cc +size 171080 diff --git a/third_party/RoRD/demo/depth/depth1_2.png b/third_party/RoRD/demo/depth/depth1_2.png new file mode 100644 index 0000000000000000000000000000000000000000..cfe80e710e2c53f1bb4fe3ec42953fe1df79a8a2 --- /dev/null +++ b/third_party/RoRD/demo/depth/depth1_2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:006b4531f1c204d846dda996a3cca93fcf81c74e01aad68183a5382531564659 +size 192951 diff --git a/third_party/RoRD/demo/depth/depth2_1.png b/third_party/RoRD/demo/depth/depth2_1.png new file mode 100644 index 0000000000000000000000000000000000000000..5687616e374ce2791cad1a6c99b34b5f7ab9aa12 --- /dev/null +++ b/third_party/RoRD/demo/depth/depth2_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd153d4d09f95b25361088c315dbcc92d6e97b329e27af35b2f4dde10433a743 +size 198731 diff --git a/third_party/RoRD/demo/depth/depth2_2.png b/third_party/RoRD/demo/depth/depth2_2.png new file mode 100644 index 0000000000000000000000000000000000000000..85590c7426910323839dc15a22c411f6904b9331 --- /dev/null +++ b/third_party/RoRD/demo/depth/depth2_2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd35728a0e7a507e5b8ed8c9725925c0d78a756663dd3e87622e6970afdc64b8 +size 198050 diff --git a/third_party/RoRD/demo/depth/depth3_1.png b/third_party/RoRD/demo/depth/depth3_1.png new file mode 100644 index 0000000000000000000000000000000000000000..9abd8d04a57daa704fe009682bbfc64ff5312eda --- /dev/null +++ b/third_party/RoRD/demo/depth/depth3_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80870cd890fdddf6307924f702653a9797cd0e3dc8775a0750253e58967b0993 +size 238663 diff --git a/third_party/RoRD/demo/depth/depth3_2.png b/third_party/RoRD/demo/depth/depth3_2.png new file mode 100644 index 0000000000000000000000000000000000000000..46ae367fdecae8b5c87f1760d4d69323bcd7bc26 --- /dev/null +++ b/third_party/RoRD/demo/depth/depth3_2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b2bb7f780db03e1a0898bfa16bf5466dc04f8d9e755d59ce9151425b80fce13 +size 279470 diff --git a/third_party/RoRD/demo/register.py b/third_party/RoRD/demo/register.py new file mode 100644 index 0000000000000000000000000000000000000000..ba626920887639c6c95f869231d8080de64c2ee8 --- /dev/null +++ b/third_party/RoRD/demo/register.py @@ -0,0 +1,265 @@ +import numpy as np +import copy +import argparse +import os, sys +import open3d as o3d +from sys import argv +from PIL import Image +import math +import cv2 +import torch + +sys.path.append("../") +from lib.extractMatchTop import getPerspKeypoints, getPerspKeypointsEnsemble, siftMatching +from lib.model_test import D2Net + +#### Cuda #### +use_cuda = torch.cuda.is_available() +device = torch.device('cuda:0' if use_cuda else 'cpu') + +#### Argument Parsing #### +parser = argparse.ArgumentParser(description='RoRD ICP evaluation') + +parser.add_argument( + '--rgb1', type=str, default = 'rgb/rgb2_1.jpg', + help='path to the rgb image1' +) +parser.add_argument( + '--rgb2', type=str, default = 'rgb/rgb2_2.jpg', + help='path to the rgb image2' +) + +parser.add_argument( + '--depth1', type=str, default = 'depth/depth2_1.png', + help='path to the depth image1' +) + +parser.add_argument( + '--depth2', type=str, default = 'depth/depth2_2.png', + help='path to the depth image2' +) + +parser.add_argument( + '--model_rord', type=str, default = '../models/rord.pth', + help='path to the RoRD model for evaluation' +) + +parser.add_argument( + '--model_d2', type=str, + help='path to the vanilla D2-Net model for evaluation' +) + +parser.add_argument( + '--model_ens', action='store_true', + help='ensemble model of RoRD + D2-Net' +) + +parser.add_argument( + '--sift', action='store_true', + help='Sift' +) + +parser.add_argument( + '--camera_file', type=str, default='../configs/camera.txt', + help='path to the camera intrinsics file. In order: focal_x, focal_y, center_x, center_y, scaling_factor.' +) + +parser.add_argument( + '--viz3d', action='store_true', + help='visualize the pointcloud registrations' +) + +args = parser.parse_args() + +if args.model_ens: # Change default paths accordingly for ensemble + model1_ens = '../../models/rord.pth' + model2_ens = '../../models/d2net.pth' + +def draw_registration_result(source, target, transformation): + source_temp = copy.deepcopy(source) + target_temp = copy.deepcopy(target) + source_temp.transform(transformation) + + target_temp += source_temp + # print("Saved registered PointCloud.") + # o3d.io.write_point_cloud("registered.pcd", target_temp) + + trgSph.append(source_temp); trgSph.append(target_temp) + axis1 = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5, origin=[0, 0, 0]) + axis2 = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5, origin=[0, 0, 0]) + axis2.transform(transformation) + trgSph.append(axis1); trgSph.append(axis2) + print("Showing registered PointCloud.") + o3d.visualization.draw_geometries(trgSph) + + +def readDepth(depthFile): + depth = Image.open(depthFile) + if depth.mode != "I": + raise Exception("Depth image is not in intensity format") + + return np.asarray(depth) + +def readCamera(camera): + with open (camera, "rt") as file: + contents = file.read().split() + + focalX = float(contents[0]) + focalY = float(contents[1]) + centerX = float(contents[2]) + centerY = float(contents[3]) + scalingFactor = float(contents[4]) + + return focalX, focalY, centerX, centerY, scalingFactor + +def getPointCloud(rgbFile, depthFile, pts): + thresh = 15.0 + + depth = readDepth(depthFile) + rgb = Image.open(rgbFile) + + points = [] + colors = [] + + corIdx = [-1]*len(pts) + corPts = [None]*len(pts) + ptIdx = 0 + + for v in range(depth.shape[0]): + for u in range(depth.shape[1]): + Z = depth[v, u] / scalingFactor + if Z==0: continue + if (Z > thresh): continue + + X = (u - centerX) * Z / focalX + Y = (v - centerY) * Z / focalY + + points.append((X, Y, Z)) + colors.append(rgb.getpixel((u, v))) + + if((u, v) in pts): + # print("Point found.") + index = pts.index((u, v)) + corIdx[index] = ptIdx + corPts[index] = (X, Y, Z) + + ptIdx = ptIdx+1 + + points = np.asarray(points) + colors = np.asarray(colors) + + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(points) + pcd.colors = o3d.utility.Vector3dVector(colors/255) + + return pcd, corIdx, corPts + + +def convertPts(A): + X = A[0]; Y = A[1] + + x = []; y = [] + + for i in range(len(X)): + x.append(int(float(X[i]))) + + for i in range(len(Y)): + y.append(int(float(Y[i]))) + + pts = [] + for i in range(len(x)): + pts.append((x[i], y[i])) + + return pts + + +def getSphere(pts): + sphs = [] + + for ele in pts: + if(ele is not None): + sphere = o3d.geometry.TriangleMesh.create_sphere(radius=0.03) + sphere.paint_uniform_color([0.9, 0.2, 0]) + + trans = np.identity(4) + trans[0, 3] = ele[0] + trans[1, 3] = ele[1] + trans[2, 3] = ele[2] + + sphere.transform(trans) + sphs.append(sphere) + + return sphs + + +def get3dCor(src, trg): + corr = [] + + for sId, tId in zip(src, trg): + if(sId != -1 and tId != -1): + corr.append((sId, tId)) + + corr = np.asarray(corr) + + return corr + +if __name__ == "__main__": + + focalX, focalY, centerX, centerY, scalingFactor = readCamera(args.camera_file) + + rgb_name_src = os.path.basename(args.rgb1) + H_name_src = os.path.splitext(rgb_name_src)[0] + '.npy' + srcH = os.path.join(os.path.dirname(args.rgb1), H_name_src) + rgb_name_trg = os.path.basename(args.rgb2) + H_name_trg = os.path.splitext(rgb_name_trg)[0] + '.npy' + trgH = os.path.join(os.path.dirname(args.rgb2), H_name_trg) + + use_cuda = torch.cuda.is_available() + device = torch.device('cuda:0' if use_cuda else 'cpu') + model1 = D2Net(model_file=args.model_d2) + model1 = model1.to(device) + model2 = D2Net(model_file=args.model_rord) + model2 = model2.to(device) + + if args.model_rord: + srcPts, trgPts, matchImg, matchImgOrtho = getPerspKeypoints(args.rgb1, args.rgb2, srcH, trgH, model2, device) + elif args.model_d2: + srcPts, trgPts, matchImg, matchImgOrtho = getPerspKeypoints(args.rgb1, args.rgb2, srcH, trgH, model1, device) + elif args.model_ens: + model1 = D2Net(model_file=model1_ens) + model1 = model1.to(device) + model2 = D2Net(model_file=model2_ens) + model2 = model2.to(device) + srcPts, trgPts, matchImg, matchImgOrtho = getPerspKeypointsEnsemble(model1, model2, args.rgb1, args.rgb2, srcH, trgH, device) + elif args.sift: + srcPts, trgPts, matchImg, matchImgOrtho = siftMatching(args.rgb1, args.rgb2, srcH, trgH, device) + + #### Visualization #### + print("\nShowing matches in perspective and orthographic view. Press q\n") + cv2.imshow('Orthographic view', matchImgOrtho) + cv2.imshow('Perspective view', matchImg) + cv2.waitKey() + + srcPts = convertPts(srcPts) + trgPts = convertPts(trgPts) + + srcCld, srcIdx, srcCor = getPointCloud(args.rgb1, args.depth1, srcPts) + trgCld, trgIdx, trgCor = getPointCloud(args.rgb2, args.depth2, trgPts) + + srcSph = getSphere(srcCor) + trgSph = getSphere(trgCor) + axis = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5, origin=[0, 0, 0]) + srcSph.append(srcCld); srcSph.append(axis) + trgSph.append(trgCld); trgSph.append(axis) + + corr = get3dCor(srcIdx, trgIdx) + + p2p = o3d.registration.TransformationEstimationPointToPoint() + trans_init = p2p.compute_transformation(srcCld, trgCld, o3d.utility.Vector2iVector(corr)) + print("Transformation matrix: \n", trans_init) + + if args.viz3d: + # o3d.visualization.draw_geometries(srcSph) + # o3d.visualization.draw_geometries(trgSph) + + draw_registration_result(srcCld, trgCld, trans_init) diff --git a/third_party/RoRD/demo/rgb/rgb1_1.jpg b/third_party/RoRD/demo/rgb/rgb1_1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ac08fea8afae713813fbb8d5e0f6291ac55cd4de --- /dev/null +++ b/third_party/RoRD/demo/rgb/rgb1_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b1d39690370373d343f7b5346d0680be4bf193db345116d6f2278239da4580b +size 76742 diff --git a/third_party/RoRD/demo/rgb/rgb1_1.npy b/third_party/RoRD/demo/rgb/rgb1_1.npy new file mode 100644 index 0000000000000000000000000000000000000000..aaa6c824cd9087e1342ac896a1aa2ed8370e517d --- /dev/null +++ b/third_party/RoRD/demo/rgb/rgb1_1.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99396bb9e7c265b8bad5237806d37d8fd9d92a772e118f6de22668f1db011948 +size 200 diff --git a/third_party/RoRD/demo/rgb/rgb1_2.jpg b/third_party/RoRD/demo/rgb/rgb1_2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ec7f52ffecd5ec5d34ccacc626290e6d078308b5 --- /dev/null +++ b/third_party/RoRD/demo/rgb/rgb1_2.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8478af0cab017dfaf2c6d45831e67a0adfd882d02bb87379580c00098b1afa4a +size 76020 diff --git a/third_party/RoRD/demo/rgb/rgb1_2.npy b/third_party/RoRD/demo/rgb/rgb1_2.npy new file mode 100644 index 0000000000000000000000000000000000000000..69b70ad0364f2a0ae3e2f671f698a6c59d93fbb4 --- /dev/null +++ b/third_party/RoRD/demo/rgb/rgb1_2.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f29bb750adcb50b497192ecbd554cf3cd74c3f1c9809d41994c5acd1654179f2 +size 200 diff --git a/third_party/RoRD/demo/rgb/rgb2_1.jpg b/third_party/RoRD/demo/rgb/rgb2_1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1b26f116a12245cbceb55c8726a1f8f58b527aeb --- /dev/null +++ b/third_party/RoRD/demo/rgb/rgb2_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57a0652bcfbcf9cf6bb75768c9d0950705fb41fa75bb3c410ca13a046ec70c95 +size 103685 diff --git a/third_party/RoRD/demo/rgb/rgb2_1.npy b/third_party/RoRD/demo/rgb/rgb2_1.npy new file mode 100644 index 0000000000000000000000000000000000000000..2a77c3943b215939dd8b46b65f57efbeb3d35052 --- /dev/null +++ b/third_party/RoRD/demo/rgb/rgb2_1.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b911f2c3962789f99f31fc78313262ec3fa257b9dd8887d318f69fa7a303c04 +size 200 diff --git a/third_party/RoRD/demo/rgb/rgb2_2.jpg b/third_party/RoRD/demo/rgb/rgb2_2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..63aa0e8c6c504f0e9b1e44953b071b9ff7bbc839 --- /dev/null +++ b/third_party/RoRD/demo/rgb/rgb2_2.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d94b8cfc6f73be41d900a4600c35ac76b098e04375f57b3c32ccadb8f7d00660 +size 110673 diff --git a/third_party/RoRD/demo/rgb/rgb2_2.npy b/third_party/RoRD/demo/rgb/rgb2_2.npy new file mode 100644 index 0000000000000000000000000000000000000000..72a268c76638ee131cf2f826c6ddfe27ca309c24 --- /dev/null +++ b/third_party/RoRD/demo/rgb/rgb2_2.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63b82950927db25768fe129af2138f535faf3da2789c87e7c98957c90d8423f2 +size 200 diff --git a/third_party/RoRD/demo/rgb/rgb3_1.jpg b/third_party/RoRD/demo/rgb/rgb3_1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..13e95db092537577a9b045e685fc7a13ea1e5855 --- /dev/null +++ b/third_party/RoRD/demo/rgb/rgb3_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8e07ba17dfe649b98893a347596ec029e133b1696301b844c39c2c8fa54f994 +size 104833 diff --git a/third_party/RoRD/demo/rgb/rgb3_1.npy b/third_party/RoRD/demo/rgb/rgb3_1.npy new file mode 100644 index 0000000000000000000000000000000000000000..cf99b52bcf4ab4844b53976408f3256af900c551 --- /dev/null +++ b/third_party/RoRD/demo/rgb/rgb3_1.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3262c4ce815dad042112aed3f3a082806fc3dab62ee6bd02492ec94abbf6987 +size 200 diff --git a/third_party/RoRD/demo/rgb/rgb3_2.jpg b/third_party/RoRD/demo/rgb/rgb3_2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e80041ea9ca13373901e672c8e9475a02ab3aa6f --- /dev/null +++ b/third_party/RoRD/demo/rgb/rgb3_2.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b18c61d052100474665df7de431ed9bc7ee11ff1df56998c85822087b6e2bee +size 97519 diff --git a/third_party/RoRD/demo/rgb/rgb3_2.npy b/third_party/RoRD/demo/rgb/rgb3_2.npy new file mode 100644 index 0000000000000000000000000000000000000000..215edebc6b5de17302cbe1ca676c0aeeaa1a2d98 --- /dev/null +++ b/third_party/RoRD/demo/rgb/rgb3_2.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ead41093b722414997f4bd93c092c0962564fd8fa3f0749953d7de810c44a55 +size 200 diff --git a/third_party/RoRD/demo/rgb/rgb4_1.jpg b/third_party/RoRD/demo/rgb/rgb4_1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..778d47bdf569fa5032a899de2f5d664d5f9ffef8 --- /dev/null +++ b/third_party/RoRD/demo/rgb/rgb4_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aab274e73fc3b1359755b52e07ae2cc414edb62a798228513f2ac7209fefd4e0 +size 139284 diff --git a/third_party/RoRD/demo/rgb/rgb4_1.npy b/third_party/RoRD/demo/rgb/rgb4_1.npy new file mode 100644 index 0000000000000000000000000000000000000000..5f13bbda0cd07c0d5aadb2a40538bddd5ab70ee2 --- /dev/null +++ b/third_party/RoRD/demo/rgb/rgb4_1.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c3c0d4d53277bea29afafcf03e5dcd4c970b9669f1e802b0c79bcafb2fcfe8d +size 200 diff --git a/third_party/RoRD/demo/rgb/rgb4_2.jpg b/third_party/RoRD/demo/rgb/rgb4_2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1a1bfd02a50f172e0534d3b7218fa41122d703ca --- /dev/null +++ b/third_party/RoRD/demo/rgb/rgb4_2.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:767d9b0c3c4691abac9ad8288e1f26d21fbae2b36bf8b8eb6ea882af4631846c +size 115978 diff --git a/third_party/RoRD/demo/rgb/rgb4_2.npy b/third_party/RoRD/demo/rgb/rgb4_2.npy new file mode 100644 index 0000000000000000000000000000000000000000..48848cd735401e302bf4ac6970afa0800c6d9a95 --- /dev/null +++ b/third_party/RoRD/demo/rgb/rgb4_2.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5322abcec53e89387d666a7ab594cbc97a80b860876b4db2d053772d0727a95b +size 200 diff --git a/third_party/RoRD/demo/rgb/teaser.jpg b/third_party/RoRD/demo/rgb/teaser.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b2b7ec4a4687230b5d899dcade68d03881d80d87 --- /dev/null +++ b/third_party/RoRD/demo/rgb/teaser.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a06efb176f53c816f568f63d5aa3b1f054fa2e22607fc15776d94e3d216eaab +size 462702 diff --git a/third_party/RoRD/evaluation/DiverseView/evalRT.py b/third_party/RoRD/evaluation/DiverseView/evalRT.py new file mode 100644 index 0000000000000000000000000000000000000000..d0be9aef58e408668112e0587a03b2b33012a342 --- /dev/null +++ b/third_party/RoRD/evaluation/DiverseView/evalRT.py @@ -0,0 +1,307 @@ +import numpy as np +import argparse +import copy +import os, sys +import open3d as o3d +from sys import argv, exit +from PIL import Image +import math +from tqdm import tqdm +import cv2 + + +sys.path.append("../../") + +from lib.extractMatchTop import getPerspKeypoints, getPerspKeypointsEnsemble, siftMatching +import pandas as pd + + +import torch +from lib.model_test import D2Net + +#### Cuda #### +use_cuda = torch.cuda.is_available() +device = torch.device('cuda:0' if use_cuda else 'cpu') + +#### Argument Parsing #### +parser = argparse.ArgumentParser(description='RoRD ICP evaluation on a DiverseView dataset sequence.') + +parser.add_argument('--dataset', type=str, default='/scratch/udit/realsense/RoRD_data/preprocessed/', + help='path to the dataset folder') + +parser.add_argument('--sequence', type=str, default='data1') + +parser.add_argument( + '--output_dir', type=str, default='out', + help='output directory for RT estimates' +) + +parser.add_argument( + '--model_rord', type=str, help='path to the RoRD model for evaluation' +) + +parser.add_argument( + '--model_d2', type=str, help='path to the vanilla D2-Net model for evaluation' +) + +parser.add_argument( + '--model_ens', action='store_true', + help='ensemble model of RoRD + D2-Net' +) + +parser.add_argument( + '--sift', action='store_true', + help='Sift' +) + +parser.add_argument( + '--viz3d', action='store_true', + help='visualize the pointcloud registrations' +) + +parser.add_argument( + '--log_interval', type=int, default=9, + help='Matched image logging interval' +) + +parser.add_argument( + '--camera_file', type=str, default='../../configs/camera.txt', + help='path to the camera intrinsics file. In order: focal_x, focal_y, center_x, center_y, scaling_factor.' +) + +parser.add_argument( + '--persp', action='store_true', default=False, + help='Feature matching on perspective images.' +) + +parser.set_defaults(fp16=False) +args = parser.parse_args() + + +if args.model_ens: # Change default paths accordingly for ensemble + model1_ens = '../../models/rord.pth' + model2_ens = '../../models/d2net.pth' + +def draw_registration_result(source, target, transformation): + source_temp = copy.deepcopy(source) + target_temp = copy.deepcopy(target) + source_temp.transform(transformation) + trgSph.append(source_temp); trgSph.append(target_temp) + axis1 = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5, origin=[0, 0, 0]) + axis2 = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5, origin=[0, 0, 0]) + axis2.transform(transformation) + trgSph.append(axis1); trgSph.append(axis2) + o3d.visualization.draw_geometries(trgSph) + +def readDepth(depthFile): + depth = Image.open(depthFile) + if depth.mode != "I": + raise Exception("Depth image is not in intensity format") + + return np.asarray(depth) + +def readCamera(camera): + with open (camera, "rt") as file: + contents = file.read().split() + + focalX = float(contents[0]) + focalY = float(contents[1]) + centerX = float(contents[2]) + centerY = float(contents[3]) + scalingFactor = float(contents[4]) + + return focalX, focalY, centerX, centerY, scalingFactor + + +def getPointCloud(rgbFile, depthFile, pts): + thresh = 15.0 + + depth = readDepth(depthFile) + rgb = Image.open(rgbFile) + + points = [] + colors = [] + + corIdx = [-1]*len(pts) + corPts = [None]*len(pts) + ptIdx = 0 + + for v in range(depth.shape[0]): + for u in range(depth.shape[1]): + Z = depth[v, u] / scalingFactor + if Z==0: continue + if (Z > thresh): continue + + X = (u - centerX) * Z / focalX + Y = (v - centerY) * Z / focalY + + points.append((X, Y, Z)) + colors.append(rgb.getpixel((u, v))) + + if((u, v) in pts): + index = pts.index((u, v)) + corIdx[index] = ptIdx + corPts[index] = (X, Y, Z) + + ptIdx = ptIdx+1 + + points = np.asarray(points) + colors = np.asarray(colors) + + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(points) + pcd.colors = o3d.utility.Vector3dVector(colors/255) + + return pcd, corIdx, corPts + + +def convertPts(A): + X = A[0]; Y = A[1] + + x = []; y = [] + + for i in range(len(X)): + x.append(int(float(X[i]))) + + for i in range(len(Y)): + y.append(int(float(Y[i]))) + + pts = [] + for i in range(len(x)): + pts.append((x[i], y[i])) + + return pts + + +def getSphere(pts): + sphs = [] + + for element in pts: + if(element is not None): + sphere = o3d.geometry.TriangleMesh.create_sphere(radius=0.03) + sphere.paint_uniform_color([0.9, 0.2, 0]) + + trans = np.identity(4) + trans[0, 3] = element[0] + trans[1, 3] = element[1] + trans[2, 3] = element[2] + + sphere.transform(trans) + sphs.append(sphere) + + return sphs + + +def get3dCor(src, trg): + corr = [] + + for sId, tId in zip(src, trg): + if(sId != -1 and tId != -1): + corr.append((sId, tId)) + + corr = np.asarray(corr) + + return corr + +if __name__ == "__main__": + camera_file = args.camera_file + rgb_csv = args.dataset + args.sequence + '/rtImagesRgb.csv' + depth_csv = args.dataset + args.sequence + '/rtImagesDepth.csv' + + os.makedirs(os.path.join(args.output_dir, 'vis'), exist_ok=True) + dir_name = args.output_dir + os.makedirs(args.output_dir, exist_ok=True) + + focalX, focalY, centerX, centerY, scalingFactor = readCamera(camera_file) + + df_rgb = pd.read_csv(rgb_csv) + df_dep = pd.read_csv(depth_csv) + + model1 = D2Net(model_file=args.model_d2).to(device) + model2 = D2Net(model_file=args.model_rord).to(device) + + queryId = 0 + for im_q, dep_q in tqdm(zip(df_rgb['query'], df_dep['query']), total=df_rgb.shape[0]): + filter_list = [] + dbId = 0 + for im_d, dep_d in tqdm(zip(df_rgb.iteritems(), df_dep.iteritems()), total=df_rgb.shape[1]): + if im_d[0] == 'query': + continue + rgb_name_src = os.path.basename(im_q) + H_name_src = os.path.splitext(rgb_name_src)[0] + '.npy' + srcH = args.dataset + args.sequence + '/rgb/' + H_name_src + rgb_name_trg = os.path.basename(im_d[1][1]) + H_name_trg = os.path.splitext(rgb_name_trg)[0] + '.npy' + trgH = args.dataset + args.sequence + '/rgb/' + H_name_trg + + srcImg = srcH.replace('.npy', '.jpg') + trgImg = trgH.replace('.npy', '.jpg') + + if args.model_rord: + if args.persp: + srcPts, trgPts, matchImg, _ = getPerspKeypoints(srcImg, trgImg, HFile1=None, HFile2=None, model=model2, device=device) + else: + srcPts, trgPts, matchImg, _ = getPerspKeypoints(srcImg, trgImg, srcH, trgH, model2, device) + + elif args.model_d2: + if args.persp: + srcPts, trgPts, matchImg, _ = getPerspKeypoints(srcImg, trgImg, HFile1=None, HFile2=None, model=model2, device=device) + else: + srcPts, trgPts, matchImg, _ = getPerspKeypoints(srcImg, trgImg, srcH, trgH, model1, device) + + elif args.model_ens: + model1 = D2Net(model_file=model1_ens) + model1 = model1.to(device) + model2 = D2Net(model_file=model2_ens) + model2 = model2.to(device) + srcPts, trgPts, matchImg = getPerspKeypointsEnsemble(model1, model2, srcImg, trgImg, srcH, trgH, device) + + elif args.sift: + if args.persp: + srcPts, trgPts, matchImg, _ = siftMatching(srcImg, trgImg, HFile1=None, HFile2=None, device=device) + else: + srcPts, trgPts, matchImg, _ = siftMatching(srcImg, trgImg, srcH, trgH, device) + + if(isinstance(srcPts, list) == True): + print(np.identity(4)) + filter_list.append(np.identity(4)) + continue + + + srcPts = convertPts(srcPts) + trgPts = convertPts(trgPts) + + depth_name_src = os.path.dirname(os.path.dirname(args.dataset)) + '/' + dep_q + depth_name_trg = os.path.dirname(os.path.dirname(args.dataset)) + '/' + dep_d[1][1] + + srcCld, srcIdx, srcCor = getPointCloud(srcImg, depth_name_src, srcPts) + trgCld, trgIdx, trgCor = getPointCloud(trgImg, depth_name_trg, trgPts) + + srcSph = getSphere(srcCor) + trgSph = getSphere(trgCor) + axis = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5, origin=[0, 0, 0]) + srcSph.append(srcCld); srcSph.append(axis) + trgSph.append(trgCld); trgSph.append(axis) + + corr = get3dCor(srcIdx, trgIdx) + + p2p = o3d.pipelines.registration.TransformationEstimationPointToPoint() + trans_init = p2p.compute_transformation(srcCld, trgCld, o3d.utility.Vector2iVector(corr)) + # print(trans_init) + filter_list.append(trans_init) + + if args.viz3d: + o3d.visualization.draw_geometries(srcSph) + o3d.visualization.draw_geometries(trgSph) + draw_registration_result(srcCld, trgCld, trans_init) + + if(dbId%args.log_interval == 0): + cv2.imwrite(os.path.join(args.output_dir, 'vis') + "/matchImg.%02d.%02d.jpg"%(queryId, dbId//args.log_interval), matchImg) + dbId += 1 + + + RT = np.stack(filter_list).transpose(1,2,0) + + np.save(os.path.join(dir_name, str(queryId) + '.npy'), RT) + queryId += 1 + print('-----check-------', RT.shape) diff --git a/third_party/RoRD/extractMatch.py b/third_party/RoRD/extractMatch.py new file mode 100644 index 0000000000000000000000000000000000000000..b413dde1334b52fef294fb0c10c2acfe5b901534 --- /dev/null +++ b/third_party/RoRD/extractMatch.py @@ -0,0 +1,195 @@ +import argparse + +import numpy as np + +import imageio + +import torch + +from tqdm import tqdm +import time +import scipy +import scipy.io +import scipy.misc +import os +import sys + +from lib.model_test import D2Net +from lib.utils import preprocess_image +from lib.pyramid import process_multiscale + +import cv2 +import matplotlib.pyplot as plt +from PIL import Image +from skimage.feature import match_descriptors +from skimage.measure import ransac +from skimage.transform import ProjectiveTransform, AffineTransform +import pydegensac + + +parser = argparse.ArgumentParser(description='Feature extraction script') +parser.add_argument('imgs', type=str, nargs=2) +parser.add_argument( + '--preprocessing', type=str, default='caffe', + help='image preprocessing (caffe or torch)' +) + +parser.add_argument( + '--model_file', type=str, + help='path to the full model' +) + +parser.add_argument( + '--no-relu', dest='use_relu', action='store_false', + help='remove ReLU after the dense feature extraction module' +) +parser.set_defaults(use_relu=True) + +parser.add_argument( + '--sift', dest='use_sift', action='store_true', + help='Show sift matching as well' +) +parser.set_defaults(use_sift=False) + + +def extract(image, args, model, device): + if len(image.shape) == 2: + image = image[:, :, np.newaxis] + image = np.repeat(image, 3, -1) + + input_image = preprocess_image( + image, + preprocessing=args.preprocessing + ) + with torch.no_grad(): + keypoints, scores, descriptors = process_multiscale( + torch.tensor( + input_image[np.newaxis, :, :, :].astype(np.float32), + device=device + ), + model, + scales=[1] + ) + + keypoints = keypoints[:, [1, 0, 2]] + + feat = {} + feat['keypoints'] = keypoints + feat['scores'] = scores + feat['descriptors'] = descriptors + + return feat + + +def rordMatching(image1, image2, feat1, feat2, matcher="BF"): + if(matcher == "BF"): + + t0 = time.time() + bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True) + matches = bf.match(feat1['descriptors'], feat2['descriptors']) + matches = sorted(matches, key=lambda x:x.distance) + t1 = time.time() + print("Time to extract matches: ", t1-t0) + + print("Number of raw matches:", len(matches)) + + match1 = [m.queryIdx for m in matches] + match2 = [m.trainIdx for m in matches] + + keypoints_left = feat1['keypoints'][match1, : 2] + keypoints_right = feat2['keypoints'][match2, : 2] + + np.random.seed(0) + + t0 = time.time() + + H, inliers = pydegensac.findHomography(keypoints_left, keypoints_right, 10.0, 0.99, 10000) + + t1 = time.time() + print("Time for ransac: ", t1-t0) + + n_inliers = np.sum(inliers) + print('Number of inliers: %d.' % n_inliers) + + inlier_keypoints_left = [cv2.KeyPoint(point[0], point[1], 1) for point in keypoints_left[inliers]] + inlier_keypoints_right = [cv2.KeyPoint(point[0], point[1], 1) for point in keypoints_right[inliers]] + placeholder_matches = [cv2.DMatch(idx, idx, 1) for idx in range(n_inliers)] + + draw_params = dict(matchColor = (0,255,0), + singlePointColor = (255,0,0), + # matchesMask = matchesMask, + flags = 0) + image3 = cv2.drawMatches(image1, inlier_keypoints_left, image2, inlier_keypoints_right, placeholder_matches, None, **draw_params) + + plt.figure(figsize=(20, 20)) + plt.imshow(image3) + plt.axis('off') + plt.show() + + +def siftMatching(img1, img2): + img1 = np.array(cv2.cvtColor(np.array(img1), cv2.COLOR_BGR2RGB)) + img2 = np.array(cv2.cvtColor(np.array(img2), cv2.COLOR_BGR2RGB)) + + # surf = cv2.xfeatures2d.SURF_create(100) + surf = cv2.xfeatures2d.SIFT_create() + + kp1, des1 = surf.detectAndCompute(img1, None) + kp2, des2 = surf.detectAndCompute(img2, None) + + FLANN_INDEX_KDTREE = 0 + index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5) + search_params = dict(checks = 50) + flann = cv2.FlannBasedMatcher(index_params, search_params) + matches = flann.knnMatch(des1,des2,k=2) + good = [] + for m, n in matches: + if m.distance < 0.7*n.distance: + good.append(m) + + src_pts = np.float32([ kp1[m.queryIdx].pt for m in good ]).reshape(-1, 2) + dst_pts = np.float32([ kp2[m.trainIdx].pt for m in good ]).reshape(-1, 2) + + model, inliers = pydegensac.findHomography(src_pts, dst_pts, 10.0, 0.99, 10000) + + n_inliers = np.sum(inliers) + print('Number of inliers: %d.' % n_inliers) + + inlier_keypoints_left = [cv2.KeyPoint(point[0], point[1], 1) for point in src_pts[inliers]] + inlier_keypoints_right = [cv2.KeyPoint(point[0], point[1], 1) for point in dst_pts[inliers]] + placeholder_matches = [cv2.DMatch(idx, idx, 1) for idx in range(n_inliers)] + image3 = cv2.drawMatches(img1, inlier_keypoints_left, img2, inlier_keypoints_right, placeholder_matches, None) + + cv2.imshow('Matches', image3) + cv2.waitKey(0) + + src_pts = np.float32([ inlier_keypoints_left[m.queryIdx].pt for m in placeholder_matches ]).reshape(-1, 2) + dst_pts = np.float32([ inlier_keypoints_right[m.trainIdx].pt for m in placeholder_matches ]).reshape(-1, 2) + + return src_pts, dst_pts + + +if __name__ == '__main__': + use_cuda = torch.cuda.is_available() + device = torch.device("cuda:0" if use_cuda else "cpu") + args = parser.parse_args() + + model = D2Net( + model_file=args.model_file, + use_relu=args.use_relu, + use_cuda=use_cuda + ) + + image1 = np.array(Image.open(args.imgs[0])) + image2 = np.array(Image.open(args.imgs[1])) + + print('--\nRoRD\n--') + feat1 = extract(image1, args, model, device) + feat2 = extract(image2, args, model, device) + print("Features extracted.") + + rordMatching(image1, image2, feat1, feat2, matcher="BF") + + if(args.use_sift): + print('--\nSIFT\n--') + siftMatching(image1, image2) diff --git a/third_party/RoRD/requirements.txt b/third_party/RoRD/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..b1ccf515e5c18ac9dfb51d110e9f225de8fb3dab --- /dev/null +++ b/third_party/RoRD/requirements.txt @@ -0,0 +1,11 @@ +torch==1.7.0 +torchvision==0.8.1 +opencv-python==3.4.2.16 +opencv-contrib-python==3.4.2.16 +pydegensac +tqdm +imageio +scipy +numpy +scikit-image +open3d==0.9.0.0 diff --git a/third_party/RoRD/scripts/getRTImages.py b/third_party/RoRD/scripts/getRTImages.py new file mode 100644 index 0000000000000000000000000000000000000000..6972c349c0dc2c046c67e194ba79ea6d7da725bd --- /dev/null +++ b/third_party/RoRD/scripts/getRTImages.py @@ -0,0 +1,54 @@ +import os +import re +from sys import argv, exit +import csv +import numpy as np + + +def natural_sort(l): + convert = lambda text: int(text) if text.isdigit() else text.lower() + alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] + return sorted(l, key = alphanum_key) + + +def getPairs(imgs): + queryIdxs = np.linspace(start=0, stop=len(imgs)-1, num=10).astype(int).tolist() + databaseIdxs = np.linspace(start=10, stop=len(imgs)-10, num=100).astype(int).tolist() + + queryImgs = [imgs[idx] for idx in queryIdxs] + databaseImgs = [imgs[idx] for idx in databaseIdxs] + + return queryImgs, databaseImgs + + +def writeCSV(qImgs, dImgs): + with open('rtImagesDepth.csv', 'w', newline='') as file: + writer = csv.writer(file) + + title = [] + title.append('query') + + for i in range(len(dImgs)): + title.append('data' + str(i+1)) + + writer.writerow(title) + + for qImg in qImgs: + row = [] + row.append(qImg) + + for dImg in dImgs: + row.append(dImg) + + writer.writerow(row) + + +if __name__ == '__main__': + rgbDir = argv[1] + rgbImgs = natural_sort([file for file in os.listdir(rgbDir) if (file.find("jpg") != -1 or file.find("png") != -1)]) + + rgbImgs = [os.path.join(rgbDir, img) for img in rgbImgs] + + queryImgs, databaseImgs = getPairs(rgbImgs) + + writeCSV(queryImgs, databaseImgs) \ No newline at end of file diff --git a/third_party/RoRD/scripts/metricRT.py b/third_party/RoRD/scripts/metricRT.py new file mode 100644 index 0000000000000000000000000000000000000000..99a323b269e79d4c8f179bae3227224beff57f6c --- /dev/null +++ b/third_party/RoRD/scripts/metricRT.py @@ -0,0 +1,63 @@ +import numpy as np +import re +import os +import argparse + + +def natural_sort(l): + convert = lambda text: int(text) if text.isdigit() else text.lower() + alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] + + return sorted(l, key = alphanum_key) + + +def angular_distance_np(R_hat, R): + # measure the angular distance between two rotation matrice + # R1,R2: [n, 3, 3] + if R_hat.shape == (3,3): + R_hat = R_hat[np.newaxis,:] + if R.shape == (3,3): + R = R[np.newaxis,:] + n = R.shape[0] + trace_idx = [0,4,8] + trace = np.matmul(R_hat, R.transpose(0,2,1)).reshape(n,-1)[:,trace_idx].sum(1) + metric = np.arccos(((trace - 1)/2).clip(-1,1)) / np.pi * 180.0 + + return metric + + +def main(): + parser = argparse.ArgumentParser(description='Rotation and translation metric.') + parser.add_argument('--trans1', type=str) + parser.add_argument('--trans2', type=str) + + args = parser.parse_args() + + transFiles1 = natural_sort([file for file in os.listdir(args.trans1) if (file.find("npy") != -1 )]) + transFiles1 = [os.path.join(args.trans1, img) for img in transFiles1] + + transFiles2 = natural_sort([file for file in os.listdir(args.trans2) if (file.find("npy") != -1 )]) + transFiles2 = [os.path.join(args.trans2, img) for img in transFiles2] + + # print(len(transFiles1), transFiles1) + # print(len(transFiles2), transFiles2) + + for T1_file, T2_file in zip(transFiles1, transFiles2): + T1 = np.load(T1_file) + T2 = np.load(T2_file) + print("Shapes: ", T1.shape, T2.shape) + + for i in range(T1.shape[2]): + R1 = T1[:3, :3, i] + R2 = T2[:3, :3, i] + t1 = T1[:4, -1, i] + t2 = T2[:4, -1, i] + + R_norm = angular_distance_np(R1.reshape(1,3,3), R2.reshape(1,3,3))[0] + + print("R norm:", R_norm) + exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/third_party/RoRD/trainPT_ipr.py b/third_party/RoRD/trainPT_ipr.py new file mode 100644 index 0000000000000000000000000000000000000000..f730bbb52338509956e9979ddb07d5bef0bd57d0 --- /dev/null +++ b/third_party/RoRD/trainPT_ipr.py @@ -0,0 +1,225 @@ +import argparse +import numpy as np +import os +import sys + +import shutil + +import torch +import torch.optim as optim + +from torch.utils.data import DataLoader + +from tqdm import tqdm + +import warnings + +from lib.exceptions import NoGradientError +from lib.losses.lossPhotoTourism import loss_function +from lib.model import D2Net +from lib.dataloaders.datasetPhotoTourism_ipr import PhotoTourismIPR + + +# CUDA +use_cuda = torch.cuda.is_available() +device = torch.device("cuda:0" if use_cuda else "cpu") + +# Seed +torch.manual_seed(1) +if use_cuda: + torch.cuda.manual_seed(1) +np.random.seed(1) + +# Argument parsing +parser = argparse.ArgumentParser(description='Training script') + +parser.add_argument( + '--dataset_path', type=str, default="/scratch/udit/phototourism/", + help='path to the dataset' +) + +parser.add_argument( + '--preprocessing', type=str, default='caffe', + help='image preprocessing (caffe or torch)' +) + +parser.add_argument( + '--init_model', type=str, default='models/d2net.pth', + help='path to the initial model' +) + +parser.add_argument( + '--num_epochs', type=int, default=10, + help='number of training epochs' +) +parser.add_argument( + '--lr', type=float, default=1e-3, + help='initial learning rate' +) +parser.add_argument( + '--batch_size', type=int, default=1, + help='batch size' +) +parser.add_argument( + '--num_workers', type=int, default=16, + help='number of workers for data loading' +) + +parser.add_argument( + '--log_interval', type=int, default=250, + help='loss logging interval' +) + +parser.add_argument( + '--log_file', type=str, default='log.txt', + help='loss logging file' +) + +parser.add_argument( + '--plot', dest='plot', action='store_true', + help='plot training pairs' +) +parser.set_defaults(plot=False) + +parser.add_argument( + '--checkpoint_directory', type=str, default='checkpoints', + help='directory for training checkpoints' +) +parser.add_argument( + '--checkpoint_prefix', type=str, default='rord', + help='prefix for training checkpoints' +) + +args = parser.parse_args() +print(args) + +# Creating CNN model +model = D2Net( + model_file=args.init_model, + use_cuda=False +) +model = model.to(device) + +# Optimizer +optimizer = optim.Adam( + filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr +) + +training_dataset = PhotoTourismIPR( + base_path=args.dataset_path, + preprocessing=args.preprocessing +) +training_dataset.build_dataset() + +training_dataloader = DataLoader( + training_dataset, + batch_size=args.batch_size, + num_workers=args.num_workers +) + +# Define epoch function +def process_epoch( + epoch_idx, + model, loss_function, optimizer, dataloader, device, + log_file, args, train=True, plot_path=None +): + epoch_losses = [] + + torch.set_grad_enabled(train) + + progress_bar = tqdm(enumerate(dataloader), total=len(dataloader)) + for batch_idx, batch in progress_bar: + if train: + optimizer.zero_grad() + + batch['train'] = train + batch['epoch_idx'] = epoch_idx + batch['batch_idx'] = batch_idx + batch['batch_size'] = args.batch_size + batch['preprocessing'] = args.preprocessing + batch['log_interval'] = args.log_interval + + try: + loss = loss_function(model, batch, device, plot=args.plot, plot_path=plot_path) + except NoGradientError: + # print("failed") + continue + + current_loss = loss.data.cpu().numpy()[0] + epoch_losses.append(current_loss) + + progress_bar.set_postfix(loss=('%.4f' % np.mean(epoch_losses))) + + if batch_idx % args.log_interval == 0: + log_file.write('[%s] epoch %d - batch %d / %d - avg_loss: %f\n' % ( + 'train' if train else 'valid', + epoch_idx, batch_idx, len(dataloader), np.mean(epoch_losses) + )) + + if train: + loss.backward() + optimizer.step() + + log_file.write('[%s] epoch %d - avg_loss: %f\n' % ( + 'train' if train else 'valid', + epoch_idx, + np.mean(epoch_losses) + )) + log_file.flush() + + return np.mean(epoch_losses) + + +# Create the checkpoint directory +checkpoint_directory = os.path.join(args.checkpoint_directory, args.checkpoint_prefix) +if os.path.isdir(checkpoint_directory): + print('[Warning] Checkpoint directory already exists.') +else: + os.makedirs(checkpoint_directory, exist_ok=True) + +# Open the log file for writing +log_file = os.path.join(checkpoint_directory,args.log_file) +if os.path.exists(log_file): + print('[Warning] Log file already exists.') +log_file = open(log_file, 'a+') + +# Create the folders for plotting if need be +plot_path=None +if args.plot: + plot_path = os.path.join(checkpoint_directory,'train_vis') + if os.path.isdir(plot_path): + print('[Warning] Plotting directory already exists.') + else: + os.makedirs(plot_path, exist_ok=True) + + +# Initialize the history +train_loss_history = [] + +# Start the training +for epoch_idx in range(1, args.num_epochs + 1): + # Process epoch + train_loss_history.append( + process_epoch( + epoch_idx, + model, loss_function, optimizer, training_dataloader, device, + log_file, args, train=True, plot_path=plot_path + ) + ) + + # Save the current checkpoint + checkpoint_path = os.path.join( + checkpoint_directory, + '%02d.pth' % (epoch_idx) + ) + checkpoint = { + 'args': args, + 'epoch_idx': epoch_idx, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict(), + 'train_loss_history': train_loss_history, + } + torch.save(checkpoint, checkpoint_path) + +# Close the log file +log_file.close() diff --git a/third_party/RoRD/trainers/trainPT_combined.py b/third_party/RoRD/trainers/trainPT_combined.py new file mode 100644 index 0000000000000000000000000000000000000000..a32fcf00937a451195270bc5f2e3e4f43af36237 --- /dev/null +++ b/third_party/RoRD/trainers/trainPT_combined.py @@ -0,0 +1,289 @@ + +import argparse +import numpy as np +import os +import sys +sys.path.append("../") + +import shutil + +import torch +import torch.optim as optim + +from torch.utils.data import DataLoader + +from tqdm import tqdm + +import warnings + +# from lib.dataset import MegaDepthDataset + +from lib.exceptions import NoGradientError +from lib.loss import loss_function as orig_loss +from lib.losses.lossPhotoTourism import loss_function as ipr_loss +from lib.model import D2Net +from lib.dataloaders.datasetPhotoTourism_combined import PhotoTourismCombined + + +# CUDA +use_cuda = torch.cuda.is_available() +device = torch.device("cuda:1" if use_cuda else "cpu") + +# Seed +torch.manual_seed(1) +if use_cuda: + torch.cuda.manual_seed(1) +np.random.seed(1) + +# Argument parsing +parser = argparse.ArgumentParser(description='Training script') + +parser.add_argument( + '--dataset_path', type=str, default="/scratch/udit/phototourism/", + help='path to the dataset' +) +# parser.add_argument( +# '--scene_info_path', type=str, required=True, +# help='path to the processed scenes' +# ) + +parser.add_argument( + '--preprocessing', type=str, default='caffe', + help='image preprocessing (caffe or torch)' +) + +parser.add_argument( + '--model_file', type=str, default='models/d2_ots.pth', + help='path to the full model' +) + +parser.add_argument( + '--num_epochs', type=int, default=10, + help='number of training epochs' +) +parser.add_argument( + '--lr', type=float, default=1e-3, + help='initial learning rate' +) +parser.add_argument( + '--batch_size', type=int, default=1, + help='batch size' +) +parser.add_argument( + '--num_workers', type=int, default=16, + help='number of workers for data loading' +) + +parser.add_argument( + '--use_validation', dest='use_validation', action='store_true', + help='use the validation split' +) +parser.set_defaults(use_validation=False) + +parser.add_argument( + '--log_interval', type=int, default=250, + help='loss logging interval' +) + +parser.add_argument( + '--log_file', type=str, default='log.txt', + help='loss logging file' +) + +parser.add_argument( + '--plot', dest='plot', action='store_true', + help='plot training pairs' +) +parser.set_defaults(plot=False) + +parser.add_argument( + '--checkpoint_directory', type=str, default='checkpoints', + help='directory for training checkpoints' +) +parser.add_argument( + '--checkpoint_prefix', type=str, default='d2', + help='prefix for training checkpoints' +) + +args = parser.parse_args() +print(args) + +# Creating CNN model +model = D2Net( + model_file=args.model_file, + use_cuda=False +) +model = model.to(device) + +# Optimizer +optimizer = optim.Adam( + filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr +) + +# Dataset +if args.use_validation: + validation_dataset = PhotoTourismCombined( + # scene_list_path='megadepth_utils/valid_scenes.txt', + # scene_info_path=args.scene_info_path, + base_path=args.dataset_path, + train=False, + preprocessing=args.preprocessing, + pairs_per_scene=25 + ) + # validation_dataset.build_dataset() + validation_dataloader = DataLoader( + validation_dataset, + batch_size=args.batch_size, + num_workers=args.num_workers + ) + +training_dataset = PhotoTourismCombined( + # scene_list_path='megadepth_utils/train_scenes.txt', + # scene_info_path=args.scene_info_path, + base_path=args.dataset_path, + preprocessing=args.preprocessing +) +# training_dataset.build_dataset() + +training_dataloader = DataLoader( + training_dataset, + batch_size=args.batch_size, + num_workers=args.num_workers +) + + +# Define epoch function +def process_epoch( + epoch_idx, + model, loss_function, optimizer, dataloader, device, + log_file, args, train=True, plot_path=None +): + epoch_losses = [] + + torch.set_grad_enabled(train) + + progress_bar = tqdm(enumerate(dataloader), total=len(dataloader)) + for batch_idx, (batch,method) in progress_bar: + if train: + optimizer.zero_grad() + + batch['train'] = train + batch['epoch_idx'] = epoch_idx + batch['batch_idx'] = batch_idx + batch['batch_size'] = args.batch_size + batch['preprocessing'] = args.preprocessing + batch['log_interval'] = args.log_interval + + try: + loss = loss_function[method](model, batch, device, plot=args.plot, plot_path=plot_path) + except NoGradientError: + # print("failed") + continue + + current_loss = loss.data.cpu().numpy()[0] + epoch_losses.append(current_loss) + + progress_bar.set_postfix(loss=('%.4f' % np.mean(epoch_losses))) + + if batch_idx % args.log_interval == 0: + log_file.write('[%s] epoch %d - batch %d / %d - avg_loss: %f\n' % ( + 'train' if train else 'valid', + epoch_idx, batch_idx, len(dataloader), np.mean(epoch_losses) + )) + + if train: + loss.backward() + optimizer.step() + + log_file.write('[%s] epoch %d - avg_loss: %f\n' % ( + 'train' if train else 'valid', + epoch_idx, + np.mean(epoch_losses) + )) + log_file.flush() + + return np.mean(epoch_losses) + + +# Create the checkpoint directory +checkpoint_directory = os.path.join(args.checkpoint_directory, args.checkpoint_prefix) +if os.path.isdir(checkpoint_directory): + print('[Warning] Checkpoint directory already exists.') +else: + os.makedirs(checkpoint_directory, exist_ok=True) + +# Open the log file for writing +log_file = os.path.join(checkpoint_directory,args.log_file) +if os.path.exists(log_file): + print('[Warning] Log file already exists.') +log_file = open(log_file, 'a+') + +# Create the folders for plotting if need be +plot_path=None +if args.plot: + plot_path = os.path.join(checkpoint_directory,'train_vis') + if os.path.isdir(plot_path): + print('[Warning] Plotting directory already exists.') + else: + os.makedirs(plot_path, exist_ok=True) + + +# Initialize the history +train_loss_history = [] +validation_loss_history = [] +if args.use_validation: + min_validation_loss = process_epoch( + 0, + model, [orig_loss, ipr_loss], optimizer, validation_dataloader, device, + log_file, args, + train=False + ) + +# Start the training +for epoch_idx in range(1, args.num_epochs + 1): + # Process epoch + train_loss_history.append( + process_epoch( + epoch_idx, + model, [orig_loss, ipr_loss], optimizer, training_dataloader, device, + log_file, args, train=True, plot_path=plot_path + ) + ) + + if args.use_validation: + validation_loss_history.append( + process_epoch( + epoch_idx, + model, [orig_loss, ipr_loss], optimizer, validation_dataloader, device, + log_file, args, + train=False + ) + ) + + # Save the current checkpoint + checkpoint_path = os.path.join( + checkpoint_directory, + '%02d.pth' % (epoch_idx) + ) + checkpoint = { + 'args': args, + 'epoch_idx': epoch_idx, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict(), + 'train_loss_history': train_loss_history, + 'validation_loss_history': validation_loss_history + } + torch.save(checkpoint, checkpoint_path) + if ( + args.use_validation and + validation_loss_history[-1] < min_validation_loss + ): + min_validation_loss = validation_loss_history[-1] + best_checkpoint_path = os.path.join( + checkpoint_directory, + '%s.best.pth' % args.checkpoint_prefix + ) + shutil.copy(checkpoint_path, best_checkpoint_path) + +# Close the log file +log_file.close()