Spaces:
Running
Running
File size: 5,337 Bytes
e02ffe6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
from os import path as osp
from typing import Dict
from unicodedata import name
import numpy as np
import torch
import torch.utils as utils
from numpy.linalg import inv
from src.utils.dataset import (
read_scannet_gray,
read_scannet_depth,
read_scannet_pose,
read_scannet_intrinsic
)
class ScanNetDataset(utils.data.Dataset):
def __init__(self,
root_dir,
npz_path,
intrinsic_path,
mode='train',
min_overlap_score=0.4,
augment_fn=None,
pose_dir=None,
img_resize=None,
fp16=False,
**kwargs):
"""Manage one scene of ScanNet Dataset.
Args:
root_dir (str): ScanNet root directory that contains scene folders.
npz_path (str): {scene_id}.npz path. This contains image pair information of a scene.
intrinsic_path (str): path to depth-camera intrinsic file.
mode (str): options are ['train', 'val', 'test'].
augment_fn (callable, optional): augments images with pre-defined visual effects.
pose_dir (str): ScanNet root directory that contains all poses.
(we use a separate (optional) pose_dir since we store images and poses separately.)
"""
super().__init__()
self.root_dir = root_dir
self.pose_dir = pose_dir if pose_dir is not None else root_dir
self.mode = mode
# prepare data_names, intrinsics and extrinsics(T)
with np.load(npz_path) as data:
self.data_names = data['name']
if 'score' in data.keys() and mode not in ['val' or 'test']:
kept_mask = data['score'] > min_overlap_score
self.data_names = self.data_names[kept_mask]
self.intrinsics = dict(np.load(intrinsic_path))
# for training LoFTR
self.augment_fn = augment_fn if mode == 'train' else None
self.fp16 = fp16
self.img_resize = img_resize
def __len__(self):
return len(self.data_names)
def _read_abs_pose(self, scene_name, name):
pth = osp.join(self.pose_dir,
scene_name,
'pose', f'{name}.txt')
return read_scannet_pose(pth)
def _compute_rel_pose(self, scene_name, name0, name1):
pose0 = self._read_abs_pose(scene_name, name0)
pose1 = self._read_abs_pose(scene_name, name1)
return np.matmul(pose1, inv(pose0)) # (4, 4)
def __getitem__(self, idx):
data_name = self.data_names[idx]
scene_name, scene_sub_name, stem_name_0, stem_name_1 = data_name
scene_name = f'scene{scene_name:04d}_{scene_sub_name:02d}'
# read the grayscale image which will be resized to (1, 480, 640)
img_name0 = osp.join(self.root_dir, scene_name, 'color', f'{stem_name_0}.jpg')
img_name1 = osp.join(self.root_dir, scene_name, 'color', f'{stem_name_1}.jpg')
# TODO: Support augmentation & handle seeds for each worker correctly.
image0 = read_scannet_gray(img_name0, resize=self.img_resize, augment_fn=None)
# augment_fn=np.random.choice([self.augment_fn, None], p=[0.5, 0.5]))
image1 = read_scannet_gray(img_name1, resize=self.img_resize, augment_fn=None)
# augment_fn=np.random.choice([self.augment_fn, None], p=[0.5, 0.5]))
# read the depthmap which is stored as (480, 640)
if self.mode in ['train', 'val']:
depth0 = read_scannet_depth(osp.join(self.root_dir, scene_name, 'depth', f'{stem_name_0}.png'))
depth1 = read_scannet_depth(osp.join(self.root_dir, scene_name, 'depth', f'{stem_name_1}.png'))
else:
depth0 = depth1 = torch.tensor([])
# read the intrinsic of depthmap
K_0 = K_1 = torch.tensor(self.intrinsics[scene_name].copy(), dtype=torch.float).reshape(3, 3)
# read and compute relative poses
T_0to1 = torch.tensor(self._compute_rel_pose(scene_name, stem_name_0, stem_name_1),
dtype=torch.float32)
T_1to0 = T_0to1.inverse()
h_new, w_new = self.img_resize[1], self.img_resize[0]
scale0 = torch.tensor([640/w_new, 480/h_new], dtype=torch.float)
scale1 = torch.tensor([640/w_new, 480/h_new], dtype=torch.float)
if self.fp16:
image0, image1, depth0, depth1, scale0, scale1 = map(lambda x: x.half(),
[image0, image1, depth0, depth1, scale0, scale1])
data = {
'image0': image0, # (1, h, w)
'depth0': depth0, # (h, w)
'image1': image1,
'depth1': depth1,
'T_0to1': T_0to1, # (4, 4)
'T_1to0': T_1to0,
'K0': K_0, # (3, 3)
'K1': K_1,
'scale0': scale0, # [scale_w, scale_h]
'scale1': scale1,
'dataset_name': 'ScanNet',
'scene_id': scene_name,
'pair_id': idx,
'pair_names': (osp.join(scene_name, 'color', f'{stem_name_0}.jpg'),
osp.join(scene_name, 'color', f'{stem_name_1}.jpg'))
}
return data |