# Copyright (C) 2024-present Naver Corporation. All rights reserved. # Licensed under CC BY-NC-SA 4.0 (non-commercial use only). # # -------------------------------------------------------- # Dataloader for preprocessed scannet++ # dataset at https://github.com/scannetpp/scannetpp - non-commercial research and educational purposes # https://kaldir.vc.in.tum.de/scannetpp/static/scannetpp-terms-of-use.pdf # See datasets_preprocess/preprocess_scannetpp.py # -------------------------------------------------------- import os.path as osp import cv2 import numpy as np from dust3r.datasets.base.base_stereo_view_dataset import BaseStereoViewDataset from dust3r.utils.image import imread_cv2 class ScanNetpp(BaseStereoViewDataset): def __init__(self, *args, ROOT, **kwargs): self.ROOT = ROOT super().__init__(*args, **kwargs) assert self.split == 'train' self.loaded_data = self._load_data() def _load_data(self): with np.load(osp.join(self.ROOT, 'all_metadata.npz')) as data: self.scenes = data['scenes'] self.sceneids = data['sceneids'] self.images = data['images'] self.intrinsics = data['intrinsics'].astype(np.float32) self.trajectories = data['trajectories'].astype(np.float32) self.pairs = data['pairs'][:, :2].astype(int) def __len__(self): return len(self.pairs) def _get_views(self, idx, resolution, rng): image_idx1, image_idx2 = self.pairs[idx] views = [] for view_idx in [image_idx1, image_idx2]: scene_id = self.sceneids[view_idx] scene_dir = osp.join(self.ROOT, self.scenes[scene_id]) intrinsics = self.intrinsics[view_idx] camera_pose = self.trajectories[view_idx] basename = self.images[view_idx] # Load RGB image rgb_image = imread_cv2(osp.join(scene_dir, 'images', basename + '.jpg')) # Load depthmap depthmap = imread_cv2(osp.join(scene_dir, 'depth', basename + '.png'), cv2.IMREAD_UNCHANGED) depthmap = depthmap.astype(np.float32) / 1000 depthmap[~np.isfinite(depthmap)] = 0 # invalid rgb_image, depthmap, intrinsics = self._crop_resize_if_necessary( rgb_image, depthmap, intrinsics, resolution, rng=rng, info=view_idx) views.append(dict( img=rgb_image, depthmap=depthmap.astype(np.float32), camera_pose=camera_pose.astype(np.float32), camera_intrinsics=intrinsics.astype(np.float32), dataset='ScanNet++', label=self.scenes[scene_id] + '_' + basename, instance=f'{str(idx)}_{str(view_idx)}', )) return views if __name__ == "__main__": from dust3r.datasets.base.base_stereo_view_dataset import view_name from dust3r.viz import SceneViz, auto_cam_size from dust3r.utils.image import rgb dataset = ScanNetpp(split='train', ROOT="data/scannetpp_processed", resolution=224, aug_crop=16) for idx in np.random.permutation(len(dataset)): views = dataset[idx] assert len(views) == 2 print(view_name(views[0]), view_name(views[1])) viz = SceneViz() poses = [views[view_idx]['camera_pose'] for view_idx in [0, 1]] cam_size = max(auto_cam_size(poses), 0.001) for view_idx in [0, 1]: pts3d = views[view_idx]['pts3d'] valid_mask = views[view_idx]['valid_mask'] colors = rgb(views[view_idx]['img']) viz.add_pointcloud(pts3d, colors, valid_mask) viz.add_camera(pose_c2w=views[view_idx]['camera_pose'], focal=views[view_idx]['camera_intrinsics'][0, 0], color=(idx*255, (1 - idx)*255, 0), image=colors, cam_size=cam_size) viz.show()