# Copyright (C) 2024-present Naver Corporation. All rights reserved. # Licensed under CC BY-NC-SA 4.0 (non-commercial use only). # # -------------------------------------------------------- # Dataloader for preprocessed arkitscenes # dataset at https://github.com/apple/ARKitScenes - Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International Public License https://github.com/apple/ARKitScenes/tree/main?tab=readme-ov-file#license # See datasets_preprocess/preprocess_arkitscenes.py # -------------------------------------------------------- import os.path as osp import cv2 import numpy as np from dust3r.datasets.base.base_stereo_view_dataset import BaseStereoViewDataset from dust3r.utils.image import imread_cv2 class ARKitScenes(BaseStereoViewDataset): def __init__(self, *args, split, ROOT, **kwargs): self.ROOT = ROOT super().__init__(*args, **kwargs) if split == "train": self.split = "Training" elif split == "test": self.split = "Test" else: raise ValueError("") self.loaded_data = self._load_data(self.split) def _load_data(self, split): with np.load(osp.join(self.ROOT, split, 'all_metadata.npz')) as data: self.scenes = data['scenes'] self.sceneids = data['sceneids'] self.images = data['images'] self.intrinsics = data['intrinsics'].astype(np.float32) self.trajectories = data['trajectories'].astype(np.float32) self.pairs = data['pairs'][:, :2].astype(int) def __len__(self): return len(self.pairs) def _get_views(self, idx, resolution, rng): image_idx1, image_idx2 = self.pairs[idx] views = [] for view_idx in [image_idx1, image_idx2]: scene_id = self.sceneids[view_idx] scene_dir = osp.join(self.ROOT, self.split, self.scenes[scene_id]) intrinsics = self.intrinsics[view_idx] camera_pose = self.trajectories[view_idx] basename = self.images[view_idx] # Load RGB image rgb_image = imread_cv2(osp.join(scene_dir, 'vga_wide', basename.replace('.png', '.jpg'))) # Load depthmap depthmap = imread_cv2(osp.join(scene_dir, 'lowres_depth', basename), cv2.IMREAD_UNCHANGED) depthmap = depthmap.astype(np.float32) / 1000 depthmap[~np.isfinite(depthmap)] = 0 # invalid rgb_image, depthmap, intrinsics = self._crop_resize_if_necessary( rgb_image, depthmap, intrinsics, resolution, rng=rng, info=view_idx) views.append(dict( img=rgb_image, depthmap=depthmap.astype(np.float32), camera_pose=camera_pose.astype(np.float32), camera_intrinsics=intrinsics.astype(np.float32), dataset='arkitscenes', label=self.scenes[scene_id] + '_' + basename, instance=f'{str(idx)}_{str(view_idx)}', )) return views if __name__ == "__main__": from dust3r.datasets.base.base_stereo_view_dataset import view_name from dust3r.viz import SceneViz, auto_cam_size from dust3r.utils.image import rgb dataset = ARKitScenes(split='train', ROOT="data/arkitscenes_processed", resolution=224, aug_crop=16) for idx in np.random.permutation(len(dataset)): views = dataset[idx] assert len(views) == 2 print(view_name(views[0]), view_name(views[1])) viz = SceneViz() poses = [views[view_idx]['camera_pose'] for view_idx in [0, 1]] cam_size = max(auto_cam_size(poses), 0.001) for view_idx in [0, 1]: pts3d = views[view_idx]['pts3d'] valid_mask = views[view_idx]['valid_mask'] colors = rgb(views[view_idx]['img']) viz.add_pointcloud(pts3d, colors, valid_mask) viz.add_camera(pose_c2w=views[view_idx]['camera_pose'], focal=views[view_idx]['camera_intrinsics'][0, 0], color=(idx * 255, (1 - idx) * 255, 0), image=colors, cam_size=cam_size) viz.show()