Browse files- +17 -6
- dust3r/utils/ +5 -19
@@ -79,29 +79,40 @@ def get_3D_model_from_scene(outdir, silent, scene, min_conf_thr=3, as_pointcloud
79 |
80 |
81 |
def generate_monocular_depth_maps(img_list, depth_prior_name):
82 |
if depth_prior_name=='depthpro':
83 |
model, transform = depth_pro.create_model_and_transforms(device='cuda')
84 |
85 |
for image_path in tqdm(img_list):
86 |
path_depthpro = image_path.replace('.png','_pred_depth_depthpro.npz').replace('.jpg','_pred_depth_depthpro.npz')
87 |
image, _, f_px = depth_pro.load_rgb(image_path)
88 |
image = transform(image)
89 |
# Run inference.
90 |
prediction = model.infer(image, f_px=f_px)
91 |
depth = prediction["depth"].cpu() # Depth in [m].
92 |
93 |
elif depth_prior_name=='depthanything':
94 |
pipe = pipeline(task="depth-estimation", model="depth-anything/Depth-Anything-V2-Large-hf",device='cuda')
95 |
for image_path in tqdm(img_list):
96 |
path_depthanything = image_path.replace('.png','_pred_depth_depthanything.npz').replace('.jpg','_pred_depth_depthanything.npz')
97 |
image =
98 |
depth = pipe(image)["predicted_depth"].numpy()
99 |
100 |
101 |
102 |
def local_get_reconstructed_scene(filelist, min_conf_thr, as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size, depth_prior_name, **kw):
103 |
generate_monocular_depth_maps(filelist, depth_prior_name)
104 |
imgs = load_images(filelist, size=image_size, verbose=not silent,traj_format='custom', depth_prior_name=depth_prior_name)
105 |
pairs = []
106 |
pairs.append((imgs[0], imgs[1]))
107 |
output = inference(pairs, model, device, batch_size=batch_size, verbose=not silent)
79 |
80 |
81 |
def generate_monocular_depth_maps(img_list, depth_prior_name):
82 |
depth_list = []
83 |
focallength_px_list = []
84 |
85 |
if depth_prior_name=='depthpro':
86 |
model, transform = depth_pro.create_model_and_transforms(device='cuda')
87 |
88 |
89 |
for image_path in tqdm(img_list):
90 |
#path_depthpro = image_path.replace('.png','_pred_depth_depthpro.npz').replace('.jpg','_pred_depth_depthpro.npz')
91 |
image, _, f_px = depth_pro.load_rgb(image_path)
92 |
image = transform(image)
93 |
# Run inference.
94 |
prediction = model.infer(image, f_px=f_px)
95 |
depth = prediction["depth"].cpu() # Depth in [m].
96 |
97 |
98 |
99 |
#np.savez_compressed(path_depthpro, depth=depth, focallength_px=prediction["focallength_px"].cpu())
100 |
elif depth_prior_name=='depthanything':
101 |
pipe = pipeline(task="depth-estimation", model="depth-anything/Depth-Anything-V2-Large-hf",device='cuda')
102 |
for image_path in tqdm(img_list):
103 |
#path_depthanything = image_path.replace('.png','_pred_depth_depthanything.npz').replace('.jpg','_pred_depth_depthanything.npz')
104 |
image =
105 |
depth = pipe(image)["predicted_depth"].numpy()
106 |
focallength_px = 200
107 |
108 |
109 |
#np.savez_compressed(path_depthanything, depth=depth)
110 |
return depth_list, focallength_px_list
111 |
112 |
113 |
def local_get_reconstructed_scene(filelist, min_conf_thr, as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size, depth_prior_name, **kw):
114 |
depth_list, focallength_px_list = generate_monocular_depth_maps(filelist, depth_prior_name)
115 |
imgs = load_images(filelist, depth_list, focallength_px_list, size=image_size, verbose=not silent,traj_format='custom', depth_prior_name=depth_prior_name)
116 |
pairs = []
117 |
pairs.append((imgs[0], imgs[1]))
118 |
output = inference(pairs, model, device, batch_size=batch_size, verbose=not silent)
@@ -243,7 +243,7 @@ def normalize_pointcloud(point_cloud):
243 |
normalized_point_cloud = (point_cloud - min_vals) / (max_vals - min_vals)
244 |
return normalized_point_cloud
245 |
246 |
def load_images(folder_or_list, size, square_ok=False, verbose=True, dynamic_mask_root=None, crop=True, fps=0, traj_format="sintel", start=0, interval=30, depth_prior_name='depthpro'):
247 |
"""Open and convert all images or videos in a list or folder to proper input format for DUSt3R."""
248 |
if isinstance(folder_or_list, str):
249 |
if verbose:
@@ -272,30 +272,16 @@ def load_images(folder_or_list, size, square_ok=False, verbose=True, dynamic_mas
272 |
imgs = []
273 |
# Sort items by their names
274 |
#start = 0
275 |
folder_content = sorted(folder_content, key=lambda x: x.split('/')[-1])[start : start + interval]
276 |
# print(start,interval,len(folder_content))
277 |
for path in folder_content:
278 |
full_path = os.path.join(root, path)
279 |
if path.lower().endswith(supported_images_extensions):
280 |
# Process image files
281 |
img = exif_transpose('RGB')
282 |
283 |
284 |
285 |
elif traj_format in ["tum", "tartanair"]:
286 |
pred_depth = np.load(full_path.replace('rgb_50','rgb_50_depth_prediction_' + depth_prior_name).replace('.png', '.npz'))
287 |
elif traj_format in ["bonn"]:
288 |
pred_depth = np.load(full_path.replace('rgb_110','rgb_110_depth_prediction_' + depth_prior_name).replace('.png', '.npz'))
289 |
elif traj_format in ["davis"]:
290 |
pred_depth = np.load(full_path.replace('JPEGImages','depth_prediction_' + depth_prior_name).replace('.jpg', '.npz').replace('480p', '1080p'))
291 |
292 |
pred_depth = np.load(full_path.replace('.png','_pred_depth_' + depth_prior_name + '.npz').replace('.jpg','_pred_depth_' + depth_prior_name + '.npz'), allow_pickle=True)
293 |
294 |
if depth_prior_name == 'depthpro':
295 |
focal_length_px = pred_depth['focallength_px']
296 |
297 |
focal_length_px = 200
298 |
pred_depth1 = pred_depth['depth']
299 |
300 |
if len(pred_depth1.shape) == 3:
301 |
pred_depth1 = np.squeeze(pred_depth1)
243 |
normalized_point_cloud = (point_cloud - min_vals) / (max_vals - min_vals)
244 |
return normalized_point_cloud
245 |
246 |
def load_images(folder_or_list, depth_list, focallength_px_list, size, square_ok=False, verbose=True, dynamic_mask_root=None, crop=True, fps=0, traj_format="sintel", start=0, interval=30, depth_prior_name='depthpro'):
247 |
"""Open and convert all images or videos in a list or folder to proper input format for DUSt3R."""
248 |
if isinstance(folder_or_list, str):
249 |
if verbose:
272 |
imgs = []
273 |
# Sort items by their names
274 |
#start = 0
275 |
#folder_content = sorted(folder_content, key=lambda x: x.split('/')[-1])[start : start + interval]
276 |
# print(start,interval,len(folder_content))
277 |
for i, path in enumerate(folder_content):
278 |
full_path = os.path.join(root, path)
279 |
if path.lower().endswith(supported_images_extensions):
280 |
# Process image files
281 |
img = exif_transpose('RGB')
282 |
283 |
pred_depth = depth_list[i]
284 |
focal_length_px = focallength_px_list[i]
285 |
286 |
if len(pred_depth1.shape) == 3:
287 |
pred_depth1 = np.squeeze(pred_depth1)