Spaces:

cyun9286
/

Align3R

Running on Zero

App Files Files Community

cyun9286 commited on Dec 13, 2024

Commit

04d1115

1 Parent(s): e23a503

commit

Browse files

Files changed (2) hide show

app.py +17 -6
dust3r/utils/image_pose.py +5 -19

app.py CHANGED Viewed

@@ -79,29 +79,40 @@ def get_3D_model_from_scene(outdir, silent, scene, min_conf_thr=3, as_pointcloud
                                         cam_color=cam_color)
 def generate_monocular_depth_maps(img_list, depth_prior_name):
     if depth_prior_name=='depthpro':
         model, transform = depth_pro.create_model_and_transforms(device='cuda')
         model.eval()
         for image_path in tqdm(img_list):
-          path_depthpro = image_path.replace('.png','_pred_depth_depthpro.npz').replace('.jpg','_pred_depth_depthpro.npz')
           image, _, f_px = depth_pro.load_rgb(image_path)
           image = transform(image)
           # Run inference.
           prediction = model.infer(image, f_px=f_px)
           depth = prediction["depth"].cpu()  # Depth in [m].
-          np.savez_compressed(path_depthpro, depth=depth, focallength_px=prediction["focallength_px"].cpu())
     elif depth_prior_name=='depthanything':
         pipe = pipeline(task="depth-estimation", model="depth-anything/Depth-Anything-V2-Large-hf",device='cuda')
         for image_path in tqdm(img_list):
-          path_depthanything = image_path.replace('.png','_pred_depth_depthanything.npz').replace('.jpg','_pred_depth_depthanything.npz')
           image = Image.open(image_path)
           depth = pipe(image)["predicted_depth"].numpy()
-          np.savez_compressed(path_depthanything, depth=depth)
 @spaces.GPU(duration=180)
 def local_get_reconstructed_scene(filelist, min_conf_thr, as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size, depth_prior_name, **kw):
-    generate_monocular_depth_maps(filelist, depth_prior_name)
-    imgs = load_images(filelist, size=image_size, verbose=not silent,traj_format='custom', depth_prior_name=depth_prior_name)
     pairs = []
     pairs.append((imgs[0], imgs[1]))
     output = inference(pairs, model, device, batch_size=batch_size, verbose=not silent)

                                         cam_color=cam_color)
 def generate_monocular_depth_maps(img_list, depth_prior_name):
+    depth_list = []
+    focallength_px_list = []
     if depth_prior_name=='depthpro':
         model, transform = depth_pro.create_model_and_transforms(device='cuda')
         model.eval()
         for image_path in tqdm(img_list):
+          #path_depthpro = image_path.replace('.png','_pred_depth_depthpro.npz').replace('.jpg','_pred_depth_depthpro.npz')
           image, _, f_px = depth_pro.load_rgb(image_path)
           image = transform(image)
           # Run inference.
           prediction = model.infer(image, f_px=f_px)
           depth = prediction["depth"].cpu()  # Depth in [m].
+          focallength_px=prediction["focallength_px"].cpu()
+          depth_list.append(depth)
+          focallength_px_list.append(focallength_px)
+          #np.savez_compressed(path_depthpro, depth=depth, focallength_px=prediction["focallength_px"].cpu())
     elif depth_prior_name=='depthanything':
         pipe = pipeline(task="depth-estimation", model="depth-anything/Depth-Anything-V2-Large-hf",device='cuda')
         for image_path in tqdm(img_list):
+          #path_depthanything = image_path.replace('.png','_pred_depth_depthanything.npz').replace('.jpg','_pred_depth_depthanything.npz')
           image = Image.open(image_path)
           depth = pipe(image)["predicted_depth"].numpy()
+          focallength_px = 200
+          depth_list.append(depth)
+          focallength_px_list.append(focallength_px)
+          #np.savez_compressed(path_depthanything, depth=depth)
+        return depth_list, focallength_px_list
 @spaces.GPU(duration=180)
 def local_get_reconstructed_scene(filelist, min_conf_thr, as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size, depth_prior_name, **kw):
+    depth_list, focallength_px_list = generate_monocular_depth_maps(filelist, depth_prior_name)
+    imgs = load_images(filelist, depth_list, focallength_px_list, size=image_size, verbose=not silent,traj_format='custom', depth_prior_name=depth_prior_name)
     pairs = []
     pairs.append((imgs[0], imgs[1]))
     output = inference(pairs, model, device, batch_size=batch_size, verbose=not silent)

dust3r/utils/image_pose.py CHANGED Viewed

@@ -243,7 +243,7 @@ def normalize_pointcloud(point_cloud):
     normalized_point_cloud = (point_cloud - min_vals) / (max_vals - min_vals)
     return normalized_point_cloud
-def load_images(folder_or_list, size, square_ok=False, verbose=True, dynamic_mask_root=None, crop=True, fps=0, traj_format="sintel", start=0, interval=30, depth_prior_name='depthpro'):
     """Open and convert all images or videos in a list or folder to proper input format for DUSt3R."""
     if isinstance(folder_or_list, str):
         if verbose:
@@ -272,30 +272,16 @@ def load_images(folder_or_list, size, square_ok=False, verbose=True, dynamic_mas
     imgs = []
     # Sort items by their names
     #start = 0
-    folder_content = sorted(folder_content, key=lambda x: x.split('/')[-1])[start : start + interval]
     # print(start,interval,len(folder_content))
-    for path in folder_content:
         full_path = os.path.join(root, path)
         if path.lower().endswith(supported_images_extensions):
             # Process image files
             img = exif_transpose(PIL.Image.open(full_path)).convert('RGB')
-            if traj_format == 'sintel':
-              pred_depth = np.load(full_path.replace('final','depth_prediction_' + depth_prior_name).replace('.png', '.npz'))
-            elif traj_format in ["tum", "tartanair"]:
-              pred_depth = np.load(full_path.replace('rgb_50','rgb_50_depth_prediction_' + depth_prior_name).replace('.png', '.npz'))
-            elif traj_format in ["bonn"]:
-                pred_depth = np.load(full_path.replace('rgb_110','rgb_110_depth_prediction_' + depth_prior_name).replace('.png', '.npz'))
-            elif traj_format in ["davis"]:
-                pred_depth = np.load(full_path.replace('JPEGImages','depth_prediction_' + depth_prior_name).replace('.jpg', '.npz').replace('480p', '1080p'))
-            else:
-                pred_depth = np.load(full_path.replace('.png','_pred_depth_' + depth_prior_name + '.npz').replace('.jpg','_pred_depth_' + depth_prior_name + '.npz'), allow_pickle=True)
-            #print(pred_depth)
-            if depth_prior_name == 'depthpro':
-              focal_length_px = pred_depth['focallength_px']
-            else:
-              focal_length_px = 200
-            pred_depth1 = pred_depth['depth']
             if len(pred_depth1.shape) == 3:
                 pred_depth1 = np.squeeze(pred_depth1)

     normalized_point_cloud = (point_cloud - min_vals) / (max_vals - min_vals)
     return normalized_point_cloud
+def load_images(folder_or_list, depth_list, focallength_px_list, size, square_ok=False, verbose=True, dynamic_mask_root=None, crop=True, fps=0, traj_format="sintel", start=0, interval=30, depth_prior_name='depthpro'):
     """Open and convert all images or videos in a list or folder to proper input format for DUSt3R."""
     if isinstance(folder_or_list, str):
         if verbose:
     imgs = []
     # Sort items by their names
     #start = 0
+    #folder_content = sorted(folder_content, key=lambda x: x.split('/')[-1])[start : start + interval]
     # print(start,interval,len(folder_content))
+    for i, path in enumerate(folder_content):
         full_path = os.path.join(root, path)
         if path.lower().endswith(supported_images_extensions):
             # Process image files
             img = exif_transpose(PIL.Image.open(full_path)).convert('RGB')
+            pred_depth = depth_list[i]
+            focal_length_px = focallength_px_list[i]
             if len(pred_depth1.shape) == 3:
                 pred_depth1 = np.squeeze(pred_depth1)