cyun9286 commited on
Commit
04d1115
·
1 Parent(s): e23a503
Files changed (2) hide show
  1. app.py +17 -6
  2. dust3r/utils/image_pose.py +5 -19
app.py CHANGED
@@ -79,29 +79,40 @@ def get_3D_model_from_scene(outdir, silent, scene, min_conf_thr=3, as_pointcloud
79
  cam_color=cam_color)
80
 
81
  def generate_monocular_depth_maps(img_list, depth_prior_name):
 
 
 
82
  if depth_prior_name=='depthpro':
83
  model, transform = depth_pro.create_model_and_transforms(device='cuda')
84
  model.eval()
 
85
  for image_path in tqdm(img_list):
86
- path_depthpro = image_path.replace('.png','_pred_depth_depthpro.npz').replace('.jpg','_pred_depth_depthpro.npz')
87
  image, _, f_px = depth_pro.load_rgb(image_path)
88
  image = transform(image)
89
  # Run inference.
90
  prediction = model.infer(image, f_px=f_px)
91
  depth = prediction["depth"].cpu() # Depth in [m].
92
- np.savez_compressed(path_depthpro, depth=depth, focallength_px=prediction["focallength_px"].cpu())
 
 
 
93
  elif depth_prior_name=='depthanything':
94
  pipe = pipeline(task="depth-estimation", model="depth-anything/Depth-Anything-V2-Large-hf",device='cuda')
95
  for image_path in tqdm(img_list):
96
- path_depthanything = image_path.replace('.png','_pred_depth_depthanything.npz').replace('.jpg','_pred_depth_depthanything.npz')
97
  image = Image.open(image_path)
98
  depth = pipe(image)["predicted_depth"].numpy()
99
- np.savez_compressed(path_depthanything, depth=depth)
 
 
 
 
100
 
101
  @spaces.GPU(duration=180)
102
  def local_get_reconstructed_scene(filelist, min_conf_thr, as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size, depth_prior_name, **kw):
103
- generate_monocular_depth_maps(filelist, depth_prior_name)
104
- imgs = load_images(filelist, size=image_size, verbose=not silent,traj_format='custom', depth_prior_name=depth_prior_name)
105
  pairs = []
106
  pairs.append((imgs[0], imgs[1]))
107
  output = inference(pairs, model, device, batch_size=batch_size, verbose=not silent)
 
79
  cam_color=cam_color)
80
 
81
  def generate_monocular_depth_maps(img_list, depth_prior_name):
82
+ depth_list = []
83
+ focallength_px_list = []
84
+
85
  if depth_prior_name=='depthpro':
86
  model, transform = depth_pro.create_model_and_transforms(device='cuda')
87
  model.eval()
88
+
89
  for image_path in tqdm(img_list):
90
+ #path_depthpro = image_path.replace('.png','_pred_depth_depthpro.npz').replace('.jpg','_pred_depth_depthpro.npz')
91
  image, _, f_px = depth_pro.load_rgb(image_path)
92
  image = transform(image)
93
  # Run inference.
94
  prediction = model.infer(image, f_px=f_px)
95
  depth = prediction["depth"].cpu() # Depth in [m].
96
+ focallength_px=prediction["focallength_px"].cpu()
97
+ depth_list.append(depth)
98
+ focallength_px_list.append(focallength_px)
99
+ #np.savez_compressed(path_depthpro, depth=depth, focallength_px=prediction["focallength_px"].cpu())
100
  elif depth_prior_name=='depthanything':
101
  pipe = pipeline(task="depth-estimation", model="depth-anything/Depth-Anything-V2-Large-hf",device='cuda')
102
  for image_path in tqdm(img_list):
103
+ #path_depthanything = image_path.replace('.png','_pred_depth_depthanything.npz').replace('.jpg','_pred_depth_depthanything.npz')
104
  image = Image.open(image_path)
105
  depth = pipe(image)["predicted_depth"].numpy()
106
+ focallength_px = 200
107
+ depth_list.append(depth)
108
+ focallength_px_list.append(focallength_px)
109
+ #np.savez_compressed(path_depthanything, depth=depth)
110
+ return depth_list, focallength_px_list
111
 
112
  @spaces.GPU(duration=180)
113
  def local_get_reconstructed_scene(filelist, min_conf_thr, as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size, depth_prior_name, **kw):
114
+ depth_list, focallength_px_list = generate_monocular_depth_maps(filelist, depth_prior_name)
115
+ imgs = load_images(filelist, depth_list, focallength_px_list, size=image_size, verbose=not silent,traj_format='custom', depth_prior_name=depth_prior_name)
116
  pairs = []
117
  pairs.append((imgs[0], imgs[1]))
118
  output = inference(pairs, model, device, batch_size=batch_size, verbose=not silent)
dust3r/utils/image_pose.py CHANGED
@@ -243,7 +243,7 @@ def normalize_pointcloud(point_cloud):
243
  normalized_point_cloud = (point_cloud - min_vals) / (max_vals - min_vals)
244
  return normalized_point_cloud
245
 
246
- def load_images(folder_or_list, size, square_ok=False, verbose=True, dynamic_mask_root=None, crop=True, fps=0, traj_format="sintel", start=0, interval=30, depth_prior_name='depthpro'):
247
  """Open and convert all images or videos in a list or folder to proper input format for DUSt3R."""
248
  if isinstance(folder_or_list, str):
249
  if verbose:
@@ -272,30 +272,16 @@ def load_images(folder_or_list, size, square_ok=False, verbose=True, dynamic_mas
272
  imgs = []
273
  # Sort items by their names
274
  #start = 0
275
- folder_content = sorted(folder_content, key=lambda x: x.split('/')[-1])[start : start + interval]
276
  # print(start,interval,len(folder_content))
277
- for path in folder_content:
278
  full_path = os.path.join(root, path)
279
  if path.lower().endswith(supported_images_extensions):
280
  # Process image files
281
  img = exif_transpose(PIL.Image.open(full_path)).convert('RGB')
282
 
283
- if traj_format == 'sintel':
284
- pred_depth = np.load(full_path.replace('final','depth_prediction_' + depth_prior_name).replace('.png', '.npz'))
285
- elif traj_format in ["tum", "tartanair"]:
286
- pred_depth = np.load(full_path.replace('rgb_50','rgb_50_depth_prediction_' + depth_prior_name).replace('.png', '.npz'))
287
- elif traj_format in ["bonn"]:
288
- pred_depth = np.load(full_path.replace('rgb_110','rgb_110_depth_prediction_' + depth_prior_name).replace('.png', '.npz'))
289
- elif traj_format in ["davis"]:
290
- pred_depth = np.load(full_path.replace('JPEGImages','depth_prediction_' + depth_prior_name).replace('.jpg', '.npz').replace('480p', '1080p'))
291
- else:
292
- pred_depth = np.load(full_path.replace('.png','_pred_depth_' + depth_prior_name + '.npz').replace('.jpg','_pred_depth_' + depth_prior_name + '.npz'), allow_pickle=True)
293
- #print(pred_depth)
294
- if depth_prior_name == 'depthpro':
295
- focal_length_px = pred_depth['focallength_px']
296
- else:
297
- focal_length_px = 200
298
- pred_depth1 = pred_depth['depth']
299
 
300
  if len(pred_depth1.shape) == 3:
301
  pred_depth1 = np.squeeze(pred_depth1)
 
243
  normalized_point_cloud = (point_cloud - min_vals) / (max_vals - min_vals)
244
  return normalized_point_cloud
245
 
246
+ def load_images(folder_or_list, depth_list, focallength_px_list, size, square_ok=False, verbose=True, dynamic_mask_root=None, crop=True, fps=0, traj_format="sintel", start=0, interval=30, depth_prior_name='depthpro'):
247
  """Open and convert all images or videos in a list or folder to proper input format for DUSt3R."""
248
  if isinstance(folder_or_list, str):
249
  if verbose:
 
272
  imgs = []
273
  # Sort items by their names
274
  #start = 0
275
+ #folder_content = sorted(folder_content, key=lambda x: x.split('/')[-1])[start : start + interval]
276
  # print(start,interval,len(folder_content))
277
+ for i, path in enumerate(folder_content):
278
  full_path = os.path.join(root, path)
279
  if path.lower().endswith(supported_images_extensions):
280
  # Process image files
281
  img = exif_transpose(PIL.Image.open(full_path)).convert('RGB')
282
 
283
+ pred_depth = depth_list[i]
284
+ focal_length_px = focallength_px_list[i]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
 
286
  if len(pred_depth1.shape) == 3:
287
  pred_depth1 = np.squeeze(pred_depth1)