File size: 5,982 Bytes
404d2af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import os.path as osp
import numpy as np
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset
from loguru import logger
from src.utils.dataset import read_megadepth_gray, read_megadepth_depth
class MegaDepthDataset(Dataset):
def __init__(self,
root_dir,
npz_path,
mode='train',
min_overlap_score=0.4,
img_resize=None,
df=None,
img_padding=False,
depth_padding=False,
augment_fn=None,
**kwargs):
"""
Manage one scene(npz_path) of MegaDepth dataset.
Args:
root_dir (str): megadepth root directory that has `phoenix`.
npz_path (str): {scene_id}.npz path. This contains image pair information of a scene.
mode (str): options are ['train', 'val', 'test']
min_overlap_score (float): how much a pair should have in common. In range of [0, 1]. Set to 0 when testing.
img_resize (int, optional): the longer edge of resized images. None for no resize. 640 is recommended.
This is useful during training with batches and testing with memory intensive algorithms.
df (int, optional): image size division factor. NOTE: this will change the final image size after img_resize.
img_padding (bool): If set to 'True', zero-pad the image to squared size. This is useful during training.
depth_padding (bool): If set to 'True', zero-pad depthmap to (2000, 2000). This is useful during training.
augment_fn (callable, optional): augments images with pre-defined visual effects.
"""
super().__init__()
self.root_dir = root_dir
self.mode = mode
self.scene_id = npz_path.split('.')[0]
# prepare scene_info and pair_info
if mode == 'test' and min_overlap_score != 0:
logger.warning("You are using `min_overlap_score`!=0 in test mode. Set to 0.")
min_overlap_score = 0
self.scene_info = np.load(npz_path, allow_pickle=True)
self.pair_infos = self.scene_info['pair_infos'].copy()
del self.scene_info['pair_infos']
self.pair_infos = [pair_info for pair_info in self.pair_infos if pair_info[1] > min_overlap_score]
# parameters for image resizing, padding and depthmap padding
if mode == 'train':
assert img_resize is not None and img_padding and depth_padding
self.img_resize = img_resize
if mode == 'val':
self.img_resize = 864
self.df = df
self.img_padding = img_padding
self.depth_max_size = 2000 if depth_padding else None # the upperbound of depthmaps size in megadepth.
# for training LoFTR
self.augment_fn = augment_fn if mode == 'train' else None
self.coarse_scale = getattr(kwargs, 'coarse_scale', 0.125)
def __len__(self):
return len(self.pair_infos)
def __getitem__(self, idx):
(idx0, idx1), overlap_score, central_matches = self.pair_infos[idx]
# read grayscale image and mask. (1, h, w) and (h, w)
img_name0 = osp.join(self.root_dir, self.scene_info['image_paths'][idx0])
img_name1 = osp.join(self.root_dir, self.scene_info['image_paths'][idx1])
# TODO: Support augmentation & handle seeds for each worker correctly.
image0, mask0, scale0 = read_megadepth_gray(
img_name0, self.img_resize, self.df, self.img_padding, None)
# np.random.choice([self.augment_fn, None], p=[0.5, 0.5]))
image1, mask1, scale1 = read_megadepth_gray(
img_name1, self.img_resize, self.df, self.img_padding, None)
# np.random.choice([self.augment_fn, None], p=[0.5, 0.5]))
# read depth. shape: (h, w)
if self.mode in ['train', 'val']:
depth0 = read_megadepth_depth(
osp.join(self.root_dir, self.scene_info['depth_paths'][idx0]), pad_to=self.depth_max_size)
depth1 = read_megadepth_depth(
osp.join(self.root_dir, self.scene_info['depth_paths'][idx1]), pad_to=self.depth_max_size)
else:
depth0 = depth1 = torch.tensor([])
# read intrinsics of original size
K_0 = torch.tensor(self.scene_info['intrinsics'][idx0].copy(), dtype=torch.float).reshape(3, 3)
K_1 = torch.tensor(self.scene_info['intrinsics'][idx1].copy(), dtype=torch.float).reshape(3, 3)
# read and compute relative poses
T0 = self.scene_info['poses'][idx0]
T1 = self.scene_info['poses'][idx1]
T_0to1 = torch.tensor(np.matmul(T1, np.linalg.inv(T0)), dtype=torch.float)[:4, :4] # (4, 4)
T_1to0 = T_0to1.inverse()
data = {
'image0': image0, # (1, h, w)
'depth0': depth0, # (h, w)
'image1': image1,
'depth1': depth1,
'T_0to1': T_0to1, # (4, 4)
'T_1to0': T_1to0,
'K0': K_0, # (3, 3)
'K1': K_1,
'scale0': scale0, # [scale_w, scale_h]
'scale1': scale1,
'dataset_name': 'MegaDepth',
'scene_id': self.scene_id,
'pair_id': idx,
'pair_names': (self.scene_info['image_paths'][idx0], self.scene_info['image_paths'][idx1]),
}
# for LoFTR training
if mask0 is not None: # img_padding is True
if self.coarse_scale:
[ts_mask_0, ts_mask_1] = F.interpolate(torch.stack([mask0, mask1], dim=0)[None].float(),
scale_factor=self.coarse_scale,
mode='nearest',
recompute_scale_factor=False)[0].bool()
data.update({'mask0': ts_mask_0, 'mask1': ts_mask_1})
return data
|