File size: 2,129 Bytes
4d9207d
9223079
 
4d9207d
9223079
 
d46c0a9
9223079
4d9207d
d46c0a9
631fac6
4d9207d
 
 
9223079
d46c0a9
9223079
 
 
 
 
 
2b78237
9223079
 
 
 
 
 
 
 
 
4d9207d
ed369cd
9223079
 
 
 
 
 
 
 
4d9207d
9223079
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b78237
 
 
 
 
9223079
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import subprocess
import sys
from pathlib import Path

import torch

from hloc import logger

from ..utils.base_model import BaseModel

d2net_path = Path(__file__).parent / "../../third_party/d2net"
sys.path.append(str(d2net_path))
from lib.model_test import D2Net as _D2Net
from lib.pyramid import process_multiscale


class D2Net(BaseModel):
    default_conf = {
        "model_name": "d2_tf.pth",
        "checkpoint_dir": d2net_path / "models",
        "use_relu": True,
        "multiscale": False,
        "max_keypoints": 1024,
    }
    required_inputs = ["image"]

    def _init(self, conf):
        model_file = conf["checkpoint_dir"] / conf["model_name"]
        if not model_file.exists():
            model_file.parent.mkdir(exist_ok=True)
            cmd = [
                "wget",
                "--quiet",
                "https://dusmanu.com/files/d2-net/" + conf["model_name"],
                "-O",
                str(model_file),
            ]
            subprocess.run(cmd, check=True)

        self.net = _D2Net(
            model_file=model_file, use_relu=conf["use_relu"], use_cuda=False
        )
        logger.info("Load D2Net model done.")

    def _forward(self, data):
        image = data["image"]
        image = image.flip(1)  # RGB -> BGR
        norm = image.new_tensor([103.939, 116.779, 123.68])
        image = image * 255 - norm.view(1, 3, 1, 1)  # caffe normalization

        if self.conf["multiscale"]:
            keypoints, scores, descriptors = process_multiscale(image, self.net)
        else:
            keypoints, scores, descriptors = process_multiscale(
                image, self.net, scales=[1]
            )
        keypoints = keypoints[:, [1, 0]]  # (x, y) and remove the scale

        idxs = scores.argsort()[-self.conf["max_keypoints"] or None :]
        keypoints = keypoints[idxs, :2]
        descriptors = descriptors[idxs]
        scores = scores[idxs]

        return {
            "keypoints": torch.from_numpy(keypoints)[None],
            "scores": torch.from_numpy(scores)[None],
            "descriptors": torch.from_numpy(descriptors.T)[None],
        }