Realcat
fix: eloftr
63f3cf2
raw
history blame
4.99 kB
# -*- coding: UTF-8 -*-
'''=================================================
@Project -> File pram -> twelve_scenes
@IDE PyCharm
@Author fx221@cam.ac.uk
@Date 29/01/2024 14:37
=================================================='''
import os
import os.path as osp
import numpy as np
from colmap_utils.read_write_model import read_model
import torchvision.transforms as tvt
from dataset.basicdataset import BasicDataset
class TwelveScenes(BasicDataset):
def __init__(self, landmark_path, scene, dataset_path, n_class, seg_mode, seg_method, dataset='12Scenes',
nfeatures=1024,
query_p3d_fn=None,
train=True,
with_aug=False,
min_inliers=0,
max_inliers=4096,
random_inliers=False,
jitter_params=None,
scale_params=None,
image_dim=3,
query_info_path=None,
sample_ratio=1,
):
self.landmark_path = osp.join(landmark_path, scene)
self.dataset_path = osp.join(dataset_path, scene)
self.n_class = n_class
self.dataset = dataset + '/' + scene
self.nfeatures = nfeatures
self.with_aug = with_aug
self.jitter_params = jitter_params
self.scale_params = scale_params
self.image_dim = image_dim
self.train = train
self.min_inliers = min_inliers
self.max_inliers = max_inliers if max_inliers < nfeatures else nfeatures
self.random_inliers = random_inliers
self.image_prefix = ''
train_transforms = []
if self.with_aug:
train_transforms.append(tvt.ColorJitter(
brightness=jitter_params['brightness'],
contrast=jitter_params['contrast'],
saturation=jitter_params['saturation'],
hue=jitter_params['hue']))
if jitter_params['blur'] > 0:
train_transforms.append(tvt.GaussianBlur(kernel_size=int(jitter_params['blur'])))
self.train_transforms = tvt.Compose(train_transforms)
if train:
self.cameras, self.images, point3Ds = read_model(path=osp.join(self.landmark_path, '3D-models'), ext='.bin')
self.name_to_id = {image.name: i for i, image in self.images.items() if len(self.images[i].point3D_ids) > 0}
# only for testing of query images
if not self.train:
data = np.load(query_p3d_fn, allow_pickle=True)[()]
self.img_p3d = data
else:
self.img_p3d = {}
with open(osp.join(self.dataset_path, 'split.txt'), 'r') as f:
l = f.readline()
l = l.strip().split(' ') # sequence0 [frames=357] [start=0 ; end=356], first sequence for testing
start_img_id = l[-3].split('=')[-1]
end_img_id = l[-1].split('=')[-1][:-1]
test_start_img_id = int(start_img_id)
test_end_img_id = int(end_img_id)
self.img_fns = []
fns = os.listdir(osp.join(self.dataset_path, 'data'))
fns = sorted(fns)
nf = 0
for fn in fns:
if fn.find('jpg') >= 0: # frame-001098.color.jpg
frame_id = int(fn.split('.')[0].split('-')[-1])
if not train and frame_id > test_end_img_id:
continue
if train and frame_id <= test_end_img_id:
continue
if train and 'data' + '/' + fn not in self.name_to_id.keys():
continue
if not train and 'data' + '/' + fn not in self.img_p3d.keys():
continue
if nf % sample_ratio == 0:
self.img_fns.append('data' + '/' + fn)
nf += 1
print('Load {} images from {} for {}...'.format(len(self.img_fns),
self.dataset, 'training' if train else 'eval'))
data = np.load(osp.join(self.landmark_path,
'point3D_cluster_n{:d}_{:s}_{:s}.npy'.format(n_class - 1, seg_mode, seg_method)),
allow_pickle=True)[()]
p3d_id = data['id']
seg_id = data['label']
self.p3d_seg = {p3d_id[i]: seg_id[i] for i in range(p3d_id.shape[0])}
xyzs = data['xyz']
self.p3d_xyzs = {p3d_id[i]: xyzs[i] for i in range(p3d_id.shape[0])}
# with open(osp.join(self.landmark_path, 'sc_mean_scale.txt'), 'r') as f:
# lines = f.readlines()
# for l in lines:
# l = l.strip().split()
# self.mean_xyz = np.array([float(v) for v in l[:3]])
# self.scale_xyz = np.array([float(v) for v in l[3:]])
if not train:
self.query_info = self.read_query_info(path=query_info_path)
self.nfeatures = nfeatures
self.feature_dir = osp.join(self.landmark_path, 'feats')
self.feats = {}