|
import os.path as osp |
|
import numpy as np |
|
import torch |
|
from roma.utils import * |
|
from PIL import Image |
|
from tqdm import tqdm |
|
|
|
|
|
class ScanNetBenchmark: |
|
def __init__(self, data_root="data/scannet") -> None: |
|
self.data_root = data_root |
|
|
|
def benchmark(self, model, model_name=None): |
|
model.train(False) |
|
with torch.no_grad(): |
|
data_root = self.data_root |
|
tmp = np.load(osp.join(data_root, "test.npz")) |
|
pairs, rel_pose = tmp["name"], tmp["rel_pose"] |
|
tot_e_t, tot_e_R, tot_e_pose = [], [], [] |
|
pair_inds = np.random.choice( |
|
range(len(pairs)), size=len(pairs), replace=False |
|
) |
|
for pairind in tqdm(pair_inds, smoothing=0.9): |
|
scene = pairs[pairind] |
|
scene_name = f"scene0{scene[0]}_00" |
|
im_A_path = osp.join( |
|
self.data_root, |
|
"scans_test", |
|
scene_name, |
|
"color", |
|
f"{scene[2]}.jpg", |
|
) |
|
im_A = Image.open(im_A_path) |
|
im_B_path = osp.join( |
|
self.data_root, |
|
"scans_test", |
|
scene_name, |
|
"color", |
|
f"{scene[3]}.jpg", |
|
) |
|
im_B = Image.open(im_B_path) |
|
T_gt = rel_pose[pairind].reshape(3, 4) |
|
R, t = T_gt[:3, :3], T_gt[:3, 3] |
|
K = np.stack( |
|
[ |
|
np.array([float(i) for i in r.split()]) |
|
for r in open( |
|
osp.join( |
|
self.data_root, |
|
"scans_test", |
|
scene_name, |
|
"intrinsic", |
|
"intrinsic_color.txt", |
|
), |
|
"r", |
|
) |
|
.read() |
|
.split("\n") |
|
if r |
|
] |
|
) |
|
w1, h1 = im_A.size |
|
w2, h2 = im_B.size |
|
K1 = K.copy() |
|
K2 = K.copy() |
|
dense_matches, dense_certainty = model.match(im_A_path, im_B_path) |
|
sparse_matches, sparse_certainty = model.sample( |
|
dense_matches, dense_certainty, 5000 |
|
) |
|
scale1 = 480 / min(w1, h1) |
|
scale2 = 480 / min(w2, h2) |
|
w1, h1 = scale1 * w1, scale1 * h1 |
|
w2, h2 = scale2 * w2, scale2 * h2 |
|
K1 = K1 * scale1 |
|
K2 = K2 * scale2 |
|
|
|
offset = 0.5 |
|
kpts1 = sparse_matches[:, :2] |
|
kpts1 = np.stack( |
|
( |
|
w1 * (kpts1[:, 0] + 1) / 2 - offset, |
|
h1 * (kpts1[:, 1] + 1) / 2 - offset, |
|
), |
|
axis=-1, |
|
) |
|
kpts2 = sparse_matches[:, 2:] |
|
kpts2 = np.stack( |
|
( |
|
w2 * (kpts2[:, 0] + 1) / 2 - offset, |
|
h2 * (kpts2[:, 1] + 1) / 2 - offset, |
|
), |
|
axis=-1, |
|
) |
|
for _ in range(5): |
|
shuffling = np.random.permutation(np.arange(len(kpts1))) |
|
kpts1 = kpts1[shuffling] |
|
kpts2 = kpts2[shuffling] |
|
try: |
|
norm_threshold = 0.5 / ( |
|
np.mean(np.abs(K1[:2, :2])) + np.mean(np.abs(K2[:2, :2])) |
|
) |
|
R_est, t_est, mask = estimate_pose( |
|
kpts1, |
|
kpts2, |
|
K1, |
|
K2, |
|
norm_threshold, |
|
conf=0.99999, |
|
) |
|
T1_to_2_est = np.concatenate((R_est, t_est), axis=-1) |
|
e_t, e_R = compute_pose_error(T1_to_2_est, R, t) |
|
e_pose = max(e_t, e_R) |
|
except Exception as e: |
|
print(repr(e)) |
|
e_t, e_R = 90, 90 |
|
e_pose = max(e_t, e_R) |
|
tot_e_t.append(e_t) |
|
tot_e_R.append(e_R) |
|
tot_e_pose.append(e_pose) |
|
tot_e_t.append(e_t) |
|
tot_e_R.append(e_R) |
|
tot_e_pose.append(e_pose) |
|
tot_e_pose = np.array(tot_e_pose) |
|
thresholds = [5, 10, 20] |
|
auc = pose_auc(tot_e_pose, thresholds) |
|
acc_5 = (tot_e_pose < 5).mean() |
|
acc_10 = (tot_e_pose < 10).mean() |
|
acc_15 = (tot_e_pose < 15).mean() |
|
acc_20 = (tot_e_pose < 20).mean() |
|
map_5 = acc_5 |
|
map_10 = np.mean([acc_5, acc_10]) |
|
map_20 = np.mean([acc_5, acc_10, acc_15, acc_20]) |
|
return { |
|
"auc_5": auc[0], |
|
"auc_10": auc[1], |
|
"auc_20": auc[2], |
|
"map_5": map_5, |
|
"map_10": map_10, |
|
"map_20": map_20, |
|
} |
|
|