|
import h5py |
|
|
|
import numpy as np |
|
|
|
from PIL import Image |
|
|
|
import os |
|
|
|
import torch |
|
from torch.utils.data import Dataset |
|
|
|
import time |
|
|
|
from tqdm import tqdm |
|
|
|
from lib.utils import preprocess_image |
|
|
|
|
|
class MegaDepthDataset(Dataset): |
|
def __init__( |
|
self, |
|
scene_list_path='megadepth_utils/train_scenes.txt', |
|
scene_info_path='/local/dataset/megadepth/scene_info', |
|
base_path='/local/dataset/megadepth', |
|
train=True, |
|
preprocessing=None, |
|
min_overlap_ratio=.5, |
|
max_overlap_ratio=1, |
|
max_scale_ratio=np.inf, |
|
pairs_per_scene=100, |
|
image_size=256 |
|
): |
|
self.scenes = [] |
|
with open(scene_list_path, 'r') as f: |
|
lines = f.readlines() |
|
for line in lines: |
|
self.scenes.append(line.strip('\n')) |
|
|
|
self.scene_info_path = scene_info_path |
|
self.base_path = base_path |
|
|
|
self.train = train |
|
|
|
self.preprocessing = preprocessing |
|
|
|
self.min_overlap_ratio = min_overlap_ratio |
|
self.max_overlap_ratio = max_overlap_ratio |
|
self.max_scale_ratio = max_scale_ratio |
|
|
|
self.pairs_per_scene = pairs_per_scene |
|
|
|
self.image_size = image_size |
|
|
|
self.dataset = [] |
|
|
|
def build_dataset(self): |
|
self.dataset = [] |
|
if not self.train: |
|
np_random_state = np.random.get_state() |
|
np.random.seed(42) |
|
print('Building the validation dataset...') |
|
else: |
|
print('Building a new training dataset...') |
|
for scene in tqdm(self.scenes, total=len(self.scenes)): |
|
scene_info_path = os.path.join( |
|
self.scene_info_path, '%s.npz' % scene |
|
) |
|
if not os.path.exists(scene_info_path): |
|
continue |
|
scene_info = np.load(scene_info_path, allow_pickle=True) |
|
overlap_matrix = scene_info['overlap_matrix'] |
|
scale_ratio_matrix = scene_info['scale_ratio_matrix'] |
|
|
|
valid = np.logical_and( |
|
np.logical_and( |
|
overlap_matrix >= self.min_overlap_ratio, |
|
overlap_matrix <= self.max_overlap_ratio |
|
), |
|
scale_ratio_matrix <= self.max_scale_ratio |
|
) |
|
|
|
pairs = np.vstack(np.where(valid)) |
|
try: |
|
selected_ids = np.random.choice( |
|
pairs.shape[1], self.pairs_per_scene |
|
) |
|
except: |
|
continue |
|
|
|
image_paths = scene_info['image_paths'] |
|
depth_paths = scene_info['depth_paths'] |
|
points3D_id_to_2D = scene_info['points3D_id_to_2D'] |
|
points3D_id_to_ndepth = scene_info['points3D_id_to_ndepth'] |
|
intrinsics = scene_info['intrinsics'] |
|
poses = scene_info['poses'] |
|
|
|
for pair_idx in selected_ids: |
|
idx1 = pairs[0, pair_idx] |
|
idx2 = pairs[1, pair_idx] |
|
matches = np.array(list( |
|
points3D_id_to_2D[idx1].keys() & |
|
points3D_id_to_2D[idx2].keys() |
|
)) |
|
|
|
|
|
matches_nd1 = np.array([points3D_id_to_ndepth[idx1][match] for match in matches]) |
|
matches_nd2 = np.array([points3D_id_to_ndepth[idx2][match] for match in matches]) |
|
scale_ratio = np.maximum(matches_nd1 / matches_nd2, matches_nd2 / matches_nd1) |
|
matches = matches[np.where(scale_ratio <= self.max_scale_ratio)[0]] |
|
|
|
point3D_id = np.random.choice(matches) |
|
point2D1 = points3D_id_to_2D[idx1][point3D_id] |
|
point2D2 = points3D_id_to_2D[idx2][point3D_id] |
|
nd1 = points3D_id_to_ndepth[idx1][point3D_id] |
|
nd2 = points3D_id_to_ndepth[idx2][point3D_id] |
|
central_match = np.array([ |
|
point2D1[1], point2D1[0], |
|
point2D2[1], point2D2[0] |
|
]) |
|
self.dataset.append({ |
|
'image_path1': image_paths[idx1], |
|
'depth_path1': depth_paths[idx1], |
|
'intrinsics1': intrinsics[idx1], |
|
'pose1': poses[idx1], |
|
'image_path2': image_paths[idx2], |
|
'depth_path2': depth_paths[idx2], |
|
'intrinsics2': intrinsics[idx2], |
|
'pose2': poses[idx2], |
|
'central_match': central_match, |
|
'scale_ratio': max(nd1 / nd2, nd2 / nd1) |
|
}) |
|
np.random.shuffle(self.dataset) |
|
if not self.train: |
|
np.random.set_state(np_random_state) |
|
|
|
def __len__(self): |
|
return len(self.dataset) |
|
|
|
def recover_pair(self, pair_metadata): |
|
depth_path1 = os.path.join( |
|
self.base_path, pair_metadata['depth_path1'] |
|
) |
|
with h5py.File(depth_path1, 'r') as hdf5_file: |
|
depth1 = np.array(hdf5_file['/depth']) |
|
assert(np.min(depth1) >= 0) |
|
image_path1 = os.path.join( |
|
self.base_path, pair_metadata['image_path1'] |
|
) |
|
image1 = Image.open(image_path1) |
|
if image1.mode != 'RGB': |
|
image1 = image1.convert('RGB') |
|
image1 = np.array(image1) |
|
assert(image1.shape[0] == depth1.shape[0] and image1.shape[1] == depth1.shape[1]) |
|
intrinsics1 = pair_metadata['intrinsics1'] |
|
pose1 = pair_metadata['pose1'] |
|
|
|
depth_path2 = os.path.join( |
|
self.base_path, pair_metadata['depth_path2'] |
|
) |
|
with h5py.File(depth_path2, 'r') as hdf5_file: |
|
depth2 = np.array(hdf5_file['/depth']) |
|
assert(np.min(depth2) >= 0) |
|
image_path2 = os.path.join( |
|
self.base_path, pair_metadata['image_path2'] |
|
) |
|
image2 = Image.open(image_path2) |
|
if image2.mode != 'RGB': |
|
image2 = image2.convert('RGB') |
|
image2 = np.array(image2) |
|
assert(image2.shape[0] == depth2.shape[0] and image2.shape[1] == depth2.shape[1]) |
|
intrinsics2 = pair_metadata['intrinsics2'] |
|
pose2 = pair_metadata['pose2'] |
|
|
|
central_match = pair_metadata['central_match'] |
|
image1, bbox1, image2, bbox2 = self.crop(image1, image2, central_match) |
|
|
|
depth1 = depth1[ |
|
bbox1[0] : bbox1[0] + self.image_size, |
|
bbox1[1] : bbox1[1] + self.image_size |
|
] |
|
depth2 = depth2[ |
|
bbox2[0] : bbox2[0] + self.image_size, |
|
bbox2[1] : bbox2[1] + self.image_size |
|
] |
|
|
|
return ( |
|
image1, depth1, intrinsics1, pose1, bbox1, |
|
image2, depth2, intrinsics2, pose2, bbox2 |
|
) |
|
|
|
def crop(self, image1, image2, central_match): |
|
bbox1_i = max(int(central_match[0]) - self.image_size // 2, 0) |
|
if bbox1_i + self.image_size >= image1.shape[0]: |
|
bbox1_i = image1.shape[0] - self.image_size |
|
bbox1_j = max(int(central_match[1]) - self.image_size // 2, 0) |
|
if bbox1_j + self.image_size >= image1.shape[1]: |
|
bbox1_j = image1.shape[1] - self.image_size |
|
|
|
bbox2_i = max(int(central_match[2]) - self.image_size // 2, 0) |
|
if bbox2_i + self.image_size >= image2.shape[0]: |
|
bbox2_i = image2.shape[0] - self.image_size |
|
bbox2_j = max(int(central_match[3]) - self.image_size // 2, 0) |
|
if bbox2_j + self.image_size >= image2.shape[1]: |
|
bbox2_j = image2.shape[1] - self.image_size |
|
|
|
return ( |
|
image1[ |
|
bbox1_i : bbox1_i + self.image_size, |
|
bbox1_j : bbox1_j + self.image_size |
|
], |
|
np.array([bbox1_i, bbox1_j]), |
|
image2[ |
|
bbox2_i : bbox2_i + self.image_size, |
|
bbox2_j : bbox2_j + self.image_size |
|
], |
|
np.array([bbox2_i, bbox2_j]) |
|
) |
|
|
|
def __getitem__(self, idx): |
|
( |
|
image1, depth1, intrinsics1, pose1, bbox1, |
|
image2, depth2, intrinsics2, pose2, bbox2 |
|
) = self.recover_pair(self.dataset[idx]) |
|
|
|
image1 = preprocess_image(image1, preprocessing=self.preprocessing) |
|
image2 = preprocess_image(image2, preprocessing=self.preprocessing) |
|
|
|
return { |
|
'image1': torch.from_numpy(image1.astype(np.float32)), |
|
'depth1': torch.from_numpy(depth1.astype(np.float32)), |
|
'intrinsics1': torch.from_numpy(intrinsics1.astype(np.float32)), |
|
'pose1': torch.from_numpy(pose1.astype(np.float32)), |
|
'bbox1': torch.from_numpy(bbox1.astype(np.float32)), |
|
'image2': torch.from_numpy(image2.astype(np.float32)), |
|
'depth2': torch.from_numpy(depth2.astype(np.float32)), |
|
'intrinsics2': torch.from_numpy(intrinsics2.astype(np.float32)), |
|
'pose2': torch.from_numpy(pose2.astype(np.float32)), |
|
'bbox2': torch.from_numpy(bbox2.astype(np.float32)) |
|
} |
|
|