File size: 4,981 Bytes
dbf8b7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import pickle
import h5py
import numpy as np
import torch
from dkm.utils import *
from PIL import Image
from tqdm import tqdm


class Yfcc100mBenchmark:
    def __init__(self, data_root="data/yfcc100m_test") -> None:
        self.scenes = [
            "buckingham_palace",
            "notre_dame_front_facade",
            "reichstag",
            "sacre_coeur",
        ]
        self.data_root = data_root

    def benchmark(self, model, r=2):
        model.train(False)
        with torch.no_grad():
            data_root = self.data_root
            meta_info = open(
                f"{data_root}/yfcc_test_pairs_with_gt.txt", "r"
            ).readlines()
            tot_e_t, tot_e_R, tot_e_pose = [], [], []
            for scene_ind in range(len(self.scenes)):
                scene = self.scenes[scene_ind]
                pairs = np.array(
                    pickle.load(
                        open(f"{data_root}/pairs/{scene}-te-1000-pairs.pkl", "rb")
                    )
                )
                scene_dir = f"{data_root}/yfcc100m/{scene}/test/"
                calibs = open(scene_dir + "calibration.txt", "r").read().split("\n")
                images = open(scene_dir + "images.txt", "r").read().split("\n")
                pair_inds = np.random.choice(
                    range(len(pairs)), size=len(pairs), replace=False
                )
                for pairind in tqdm(pair_inds):
                    idx1, idx2 = pairs[pairind]
                    params = meta_info[1000 * scene_ind + pairind].split()
                    rot1, rot2 = int(params[2]), int(params[3])
                    calib1 = h5py.File(scene_dir + calibs[idx1], "r")
                    K1, R1, t1, _, _ = get_pose(calib1)
                    calib2 = h5py.File(scene_dir + calibs[idx2], "r")
                    K2, R2, t2, _, _ = get_pose(calib2)

                    R, t = compute_relative_pose(R1, t1, R2, t2)
                    im1 = images[idx1]
                    im2 = images[idx2]
                    im1 = Image.open(scene_dir + im1).rotate(rot1 * 90, expand=True)
                    w1, h1 = im1.size
                    im2 = Image.open(scene_dir + im2).rotate(rot2 * 90, expand=True)
                    w2, h2 = im2.size
                    K1 = rotate_intrinsic(K1, rot1)
                    K2 = rotate_intrinsic(K2, rot2)

                    dense_matches, dense_certainty = model.match(im1, im2)
                    dense_certainty = dense_certainty ** (1 / r)
                    sparse_matches, sparse_confidence = model.sample(
                        dense_matches, dense_certainty, 10000
                    )
                    scale1 = 480 / min(w1, h1)
                    scale2 = 480 / min(w2, h2)
                    w1, h1 = scale1 * w1, scale1 * h1
                    w2, h2 = scale2 * w2, scale2 * h2
                    K1 = K1 * scale1
                    K2 = K2 * scale2

                    kpts1 = sparse_matches[:, :2]
                    kpts1 = np.stack(
                        (w1 * kpts1[:, 0] / 2, h1 * kpts1[:, 1] / 2), axis=-1
                    )
                    kpts2 = sparse_matches[:, 2:]
                    kpts2 = np.stack(
                        (w2 * kpts2[:, 0] / 2, h2 * kpts2[:, 1] / 2), axis=-1
                    )
                    try:
                        threshold = 1.0
                        norm_threshold = threshold / (
                            np.mean(np.abs(K1[:2, :2])) + np.mean(np.abs(K2[:2, :2]))
                        )
                        R_est, t_est, mask = estimate_pose(
                            kpts1,
                            kpts2,
                            K1[:2, :2],
                            K2[:2, :2],
                            norm_threshold,
                            conf=0.9999999,
                        )
                        T1_to_2 = np.concatenate((R_est, t_est), axis=-1)  #
                        e_t, e_R = compute_pose_error(T1_to_2, R, t)
                        e_pose = max(e_t, e_R)
                    except:
                        e_t, e_R = 90, 90
                        e_pose = max(e_t, e_R)
                    tot_e_t.append(e_t)
                    tot_e_R.append(e_R)
                    tot_e_pose.append(e_pose)
            tot_e_pose = np.array(tot_e_pose)
            thresholds = [5, 10, 20]
            auc = pose_auc(tot_e_pose, thresholds)
            acc_5 = (tot_e_pose < 5).mean()
            acc_10 = (tot_e_pose < 10).mean()
            acc_15 = (tot_e_pose < 15).mean()
            acc_20 = (tot_e_pose < 20).mean()
            map_5 = acc_5
            map_10 = np.mean([acc_5, acc_10])
            map_20 = np.mean([acc_5, acc_10, acc_15, acc_20])
            return {
                "auc_5": auc[0],
                "auc_10": auc[1],
                "auc_20": auc[2],
                "map_5": map_5,
                "map_10": map_10,
                "map_20": map_20,
            }