|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os.path as osp |
|
import json |
|
import itertools |
|
from collections import deque |
|
|
|
import cv2 |
|
import numpy as np |
|
|
|
from dust3r.datasets.base.base_stereo_view_dataset import BaseStereoViewDataset |
|
from dust3r.utils.image import imread_cv2 |
|
|
|
|
|
class Co3d(BaseStereoViewDataset): |
|
def __init__(self, mask_bg=True, *args, ROOT, **kwargs): |
|
self.ROOT = ROOT |
|
super().__init__(*args, **kwargs) |
|
assert mask_bg in (True, False, 'rand') |
|
self.mask_bg = mask_bg |
|
self.dataset_label = 'Co3d_v2' |
|
|
|
|
|
with open(osp.join(self.ROOT, f'selected_seqs_{self.split}.json'), 'r') as f: |
|
self.scenes = json.load(f) |
|
self.scenes = {k: v for k, v in self.scenes.items() if len(v) > 0} |
|
self.scenes = {(k, k2): v2 for k, v in self.scenes.items() |
|
for k2, v2 in v.items()} |
|
self.scene_list = list(self.scenes.keys()) |
|
|
|
|
|
|
|
self.combinations = [(i, j) |
|
for i, j in itertools.combinations(range(100), 2) |
|
if 0 < abs(i - j) <= 30 and abs(i - j) % 5 == 0] |
|
|
|
self.invalidate = {scene: {} for scene in self.scene_list} |
|
|
|
def __len__(self): |
|
return len(self.scene_list) * len(self.combinations) |
|
|
|
def _get_metadatapath(self, obj, instance, view_idx): |
|
return osp.join(self.ROOT, obj, instance, 'images', f'frame{view_idx:06n}.npz') |
|
|
|
def _get_impath(self, obj, instance, view_idx): |
|
return osp.join(self.ROOT, obj, instance, 'images', f'frame{view_idx:06n}.jpg') |
|
|
|
def _get_depthpath(self, obj, instance, view_idx): |
|
return osp.join(self.ROOT, obj, instance, 'depths', f'frame{view_idx:06n}.jpg.geometric.png') |
|
|
|
def _get_maskpath(self, obj, instance, view_idx): |
|
return osp.join(self.ROOT, obj, instance, 'masks', f'frame{view_idx:06n}.png') |
|
|
|
def _read_depthmap(self, depthpath, input_metadata): |
|
depthmap = imread_cv2(depthpath, cv2.IMREAD_UNCHANGED) |
|
depthmap = (depthmap.astype(np.float32) / 65535) * np.nan_to_num(input_metadata['maximum_depth']) |
|
return depthmap |
|
|
|
def _get_views(self, idx, resolution, rng): |
|
|
|
obj, instance = self.scene_list[idx // len(self.combinations)] |
|
image_pool = self.scenes[obj, instance] |
|
im1_idx, im2_idx = self.combinations[idx % len(self.combinations)] |
|
|
|
|
|
last = len(image_pool) - 1 |
|
|
|
if resolution not in self.invalidate[obj, instance]: |
|
self.invalidate[obj, instance][resolution] = [False for _ in range(len(image_pool))] |
|
|
|
|
|
mask_bg = (self.mask_bg == True) or (self.mask_bg == 'rand' and rng.choice(2)) |
|
|
|
views = [] |
|
imgs_idxs = [max(0, min(im_idx + rng.integers(-4, 5), last)) for im_idx in [im2_idx, im1_idx]] |
|
imgs_idxs = deque(imgs_idxs) |
|
while len(imgs_idxs) > 0: |
|
im_idx = imgs_idxs.pop() |
|
|
|
if self.invalidate[obj, instance][resolution][im_idx]: |
|
|
|
random_direction = 2 * rng.choice(2) - 1 |
|
for offset in range(1, len(image_pool)): |
|
tentative_im_idx = (im_idx + (random_direction * offset)) % len(image_pool) |
|
if not self.invalidate[obj, instance][resolution][tentative_im_idx]: |
|
im_idx = tentative_im_idx |
|
break |
|
|
|
view_idx = image_pool[im_idx] |
|
|
|
impath = self._get_impath(obj, instance, view_idx) |
|
depthpath = self._get_depthpath(obj, instance, view_idx) |
|
|
|
|
|
metadata_path = self._get_metadatapath(obj, instance, view_idx) |
|
input_metadata = np.load(metadata_path) |
|
camera_pose = input_metadata['camera_pose'].astype(np.float32) |
|
intrinsics = input_metadata['camera_intrinsics'].astype(np.float32) |
|
|
|
|
|
rgb_image = imread_cv2(impath) |
|
depthmap = self._read_depthmap(depthpath, input_metadata) |
|
|
|
if mask_bg: |
|
|
|
maskpath = self._get_maskpath(obj, instance, view_idx) |
|
maskmap = imread_cv2(maskpath, cv2.IMREAD_UNCHANGED).astype(np.float32) |
|
maskmap = (maskmap / 255.0) > 0.1 |
|
|
|
|
|
depthmap *= maskmap |
|
|
|
rgb_image, depthmap, intrinsics = self._crop_resize_if_necessary( |
|
rgb_image, depthmap, intrinsics, resolution, rng=rng, info=impath) |
|
|
|
num_valid = (depthmap > 0.0).sum() |
|
if num_valid == 0: |
|
|
|
self.invalidate[obj, instance][resolution][im_idx] = True |
|
imgs_idxs.append(im_idx) |
|
continue |
|
|
|
views.append(dict( |
|
img=rgb_image, |
|
depthmap=depthmap, |
|
camera_pose=camera_pose, |
|
camera_intrinsics=intrinsics, |
|
dataset=self.dataset_label, |
|
label=osp.join(obj, instance), |
|
instance=osp.split(impath)[1], |
|
)) |
|
return views |
|
|
|
|
|
if __name__ == "__main__": |
|
from dust3r.datasets.base.base_stereo_view_dataset import view_name |
|
from dust3r.viz import SceneViz, auto_cam_size |
|
from dust3r.utils.image import rgb |
|
|
|
dataset = Co3d(split='train', ROOT="data/co3d_subset_processed", resolution=224, aug_crop=16) |
|
|
|
for idx in np.random.permutation(len(dataset)): |
|
views = dataset[idx] |
|
assert len(views) == 2 |
|
print(view_name(views[0]), view_name(views[1])) |
|
viz = SceneViz() |
|
poses = [views[view_idx]['camera_pose'] for view_idx in [0, 1]] |
|
cam_size = max(auto_cam_size(poses), 0.001) |
|
for view_idx in [0, 1]: |
|
pts3d = views[view_idx]['pts3d'] |
|
valid_mask = views[view_idx]['valid_mask'] |
|
colors = rgb(views[view_idx]['img']) |
|
viz.add_pointcloud(pts3d, colors, valid_mask) |
|
viz.add_camera(pose_c2w=views[view_idx]['camera_pose'], |
|
focal=views[view_idx]['camera_intrinsics'][0, 0], |
|
color=(idx * 255, (1 - idx) * 255, 0), |
|
image=colors, |
|
cam_size=cam_size) |
|
viz.show() |
|
|