Vincentqyw
add: roma
c608946
raw
history blame
5.5 kB
import os.path as osp
import numpy as np
import torch
from roma.utils import *
from PIL import Image
from tqdm import tqdm
class ScanNetBenchmark:
def __init__(self, data_root="data/scannet") -> None:
self.data_root = data_root
def benchmark(self, model, model_name = None):
model.train(False)
with torch.no_grad():
data_root = self.data_root
tmp = np.load(osp.join(data_root, "test.npz"))
pairs, rel_pose = tmp["name"], tmp["rel_pose"]
tot_e_t, tot_e_R, tot_e_pose = [], [], []
pair_inds = np.random.choice(
range(len(pairs)), size=len(pairs), replace=False
)
for pairind in tqdm(pair_inds, smoothing=0.9):
scene = pairs[pairind]
scene_name = f"scene0{scene[0]}_00"
im_A_path = osp.join(
self.data_root,
"scans_test",
scene_name,
"color",
f"{scene[2]}.jpg",
)
im_A = Image.open(im_A_path)
im_B_path = osp.join(
self.data_root,
"scans_test",
scene_name,
"color",
f"{scene[3]}.jpg",
)
im_B = Image.open(im_B_path)
T_gt = rel_pose[pairind].reshape(3, 4)
R, t = T_gt[:3, :3], T_gt[:3, 3]
K = np.stack(
[
np.array([float(i) for i in r.split()])
for r in open(
osp.join(
self.data_root,
"scans_test",
scene_name,
"intrinsic",
"intrinsic_color.txt",
),
"r",
)
.read()
.split("\n")
if r
]
)
w1, h1 = im_A.size
w2, h2 = im_B.size
K1 = K.copy()
K2 = K.copy()
dense_matches, dense_certainty = model.match(im_A_path, im_B_path)
sparse_matches, sparse_certainty = model.sample(
dense_matches, dense_certainty, 5000
)
scale1 = 480 / min(w1, h1)
scale2 = 480 / min(w2, h2)
w1, h1 = scale1 * w1, scale1 * h1
w2, h2 = scale2 * w2, scale2 * h2
K1 = K1 * scale1
K2 = K2 * scale2
offset = 0.5
kpts1 = sparse_matches[:, :2]
kpts1 = (
np.stack(
(
w1 * (kpts1[:, 0] + 1) / 2 - offset,
h1 * (kpts1[:, 1] + 1) / 2 - offset,
),
axis=-1,
)
)
kpts2 = sparse_matches[:, 2:]
kpts2 = (
np.stack(
(
w2 * (kpts2[:, 0] + 1) / 2 - offset,
h2 * (kpts2[:, 1] + 1) / 2 - offset,
),
axis=-1,
)
)
for _ in range(5):
shuffling = np.random.permutation(np.arange(len(kpts1)))
kpts1 = kpts1[shuffling]
kpts2 = kpts2[shuffling]
try:
norm_threshold = 0.5 / (
np.mean(np.abs(K1[:2, :2])) + np.mean(np.abs(K2[:2, :2])))
R_est, t_est, mask = estimate_pose(
kpts1,
kpts2,
K1,
K2,
norm_threshold,
conf=0.99999,
)
T1_to_2_est = np.concatenate((R_est, t_est), axis=-1) #
e_t, e_R = compute_pose_error(T1_to_2_est, R, t)
e_pose = max(e_t, e_R)
except Exception as e:
print(repr(e))
e_t, e_R = 90, 90
e_pose = max(e_t, e_R)
tot_e_t.append(e_t)
tot_e_R.append(e_R)
tot_e_pose.append(e_pose)
tot_e_t.append(e_t)
tot_e_R.append(e_R)
tot_e_pose.append(e_pose)
tot_e_pose = np.array(tot_e_pose)
thresholds = [5, 10, 20]
auc = pose_auc(tot_e_pose, thresholds)
acc_5 = (tot_e_pose < 5).mean()
acc_10 = (tot_e_pose < 10).mean()
acc_15 = (tot_e_pose < 15).mean()
acc_20 = (tot_e_pose < 20).mean()
map_5 = acc_5
map_10 = np.mean([acc_5, acc_10])
map_20 = np.mean([acc_5, acc_10, acc_15, acc_20])
return {
"auc_5": auc[0],
"auc_10": auc[1],
"auc_20": auc[2],
"map_5": map_5,
"map_10": map_10,
"map_20": map_20,
}