Spaces:

cyun9286
/

Align3R

Running on Zero

App Files Files Community

cyun9286 commited on Dec 13, 2024

Commit

b2eead7

1 Parent(s): 3046a80

1

Browse files

Files changed (2) hide show

.gitignore +4 -3
app.py +25 -12

.gitignore CHANGED Viewed

@@ -1,3 +1,4 @@
-# *.pth
-# *.pt
-.gitignore

+*.pth
+*.pt
+.gitignore
+*.glb

app.py CHANGED Viewed

@@ -21,14 +21,14 @@ from PIL import Image
 from dust3r.inference import inference
 from dust3r.model import AsymmetricCroCo3DStereo
 from dust3r.image_pairs import make_pairs
-from dust3r.utils.image_pose import load_images, rgb, enlarge_seg_masks
 from dust3r.utils.device import to_numpy
 from dust3r.cloud_opt_flow import global_aligner, GlobalAlignerMode
 import matplotlib.pyplot as pl
 from transformers import pipeline
 from dust3r.utils.viz_demo import convert_scene_output_to_glb
 import depth_pro
-import spaces
 from huggingface_hub import hf_hub_download
 pl.ion()
@@ -95,8 +95,9 @@ def generate_monocular_depth_maps(img_list, depth_prior_name):
           image = transform(image)
           # Run inference.
           prediction = model.infer(image, f_px=f_px)
-          depth = prediction["depth"].cpu()  # Depth in [m].
           focallength_px=prediction["focallength_px"].cpu()
           depth_list.append(depth)
           focallength_px_list.append(focallength_px)
           #np.savez_compressed(path_depthpro, depth=depth, focallength_px=prediction["focallength_px"].cpu())
@@ -105,30 +106,49 @@ def generate_monocular_depth_maps(img_list, depth_prior_name):
         for image_path in tqdm(img_list):
           #path_depthanything = image_path.replace('.png','_pred_depth_depthanything.npz').replace('.jpg','_pred_depth_depthanything.npz')
           image = Image.open(image_path)
           depth = pipe(image)["predicted_depth"].numpy()
           focallength_px = 200
           depth_list.append(depth)
           focallength_px_list.append(focallength_px)
           #np.savez_compressed(path_depthanything, depth=depth)
     return depth_list, focallength_px_list
-@spaces.GPU(duration=180)
 def local_get_reconstructed_scene(filelist, min_conf_thr, as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size, depth_prior_name, **kw):
     depth_list, focallength_px_list = generate_monocular_depth_maps(filelist, depth_prior_name)
     imgs = load_images(filelist, depth_list, focallength_px_list, size=image_size, verbose=not silent,traj_format='custom', depth_prior_name=depth_prior_name)
     pairs = []
     pairs.append((imgs[0], imgs[1]))
     output = inference(pairs, model, device, batch_size=batch_size, verbose=not silent)
     mode = GlobalAlignerMode.PairViewer
     scene = global_aligner(output, device=device, mode=mode, verbose=not silent)
     save_folder = './output'
-    outfile = get_3D_model_from_scene(save_folder, silent, scene, min_conf_thr, as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size, show_cam)
     return outfile
 def run_example(snapshot, matching_conf_thr, min_conf_thr, cam_size, as_pointcloud, shared_intrinsics, filelist, **kw):
     return local_get_reconstructed_scene(filelist, cam_size, **kw)
 css = """.gradio-container {margin: 0 !important; min-width: 100%};"""
 title = "Align3R Demo"
@@ -149,13 +169,6 @@ with gradio.Blocks(css=css, title=title, delete_cache=(gradio_delete_cache, grad
             depth_prior_name = gradio.Dropdown(
             ["Depth Pro", "Depth Anything V2"], label="monocular depth estimation model", info="Select the monocular depth estimation model.")
             min_conf_thr = gradio.Slider(label="min_conf_thr", value=1.1, minimum=0.0, maximum=20, step=0.01)
-            if depth_prior_name == "Depth Pro":
-              weights_path = "cyun9286/Align3R_DepthPro_ViTLarge_BaseDecoder_512_dpt"
-            else:
-              weights_path = "cyun9286/Align3R_DepthAnythingV2_ViTLarge_BaseDecoder_512_dpt"
-            device = 'cuda' if torch.cuda.is_available() else 'cpu'
-            model = AsymmetricCroCo3DStereo.from_pretrained(weights_path).to(device)
         with gradio.Row():
             as_pointcloud = gradio.Checkbox(value=True, label="As pointcloud")
             mask_sky = gradio.Checkbox(value=False, label="Mask sky")

 from dust3r.inference import inference
 from dust3r.model import AsymmetricCroCo3DStereo
 from dust3r.image_pairs import make_pairs
+from dust3r.utils.image_pose import load_images, rgb, enlarge_seg_masks, resize_numpy_image
 from dust3r.utils.device import to_numpy
 from dust3r.cloud_opt_flow import global_aligner, GlobalAlignerMode
 import matplotlib.pyplot as pl
 from transformers import pipeline
 from dust3r.utils.viz_demo import convert_scene_output_to_glb
 import depth_pro
+# import spaces
 from huggingface_hub import hf_hub_download
 pl.ion()
           image = transform(image)
           # Run inference.
           prediction = model.infer(image, f_px=f_px)
+          depth = prediction["depth"].cpu().numpy()  # Depth in [m].
           focallength_px=prediction["focallength_px"].cpu()
+          depth = resize_numpy_image(depth, image.size)
           depth_list.append(depth)
           focallength_px_list.append(focallength_px)
           #np.savez_compressed(path_depthpro, depth=depth, focallength_px=prediction["focallength_px"].cpu())
         for image_path in tqdm(img_list):
           #path_depthanything = image_path.replace('.png','_pred_depth_depthanything.npz').replace('.jpg','_pred_depth_depthanything.npz')
           image = Image.open(image_path)
+          #print(image.size)
           depth = pipe(image)["predicted_depth"].numpy()
+          depth = cv2.resize(depth[0], image.size, interpolation=cv2.INTER_LANCZOS4)
           focallength_px = 200
           depth_list.append(depth)
           focallength_px_list.append(focallength_px)
           #np.savez_compressed(path_depthanything, depth=depth)
     return depth_list, focallength_px_list
+# @spaces.GPU(duration=180)
 def local_get_reconstructed_scene(filelist, min_conf_thr, as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size, depth_prior_name, **kw):
     depth_list, focallength_px_list = generate_monocular_depth_maps(filelist, depth_prior_name)
     imgs = load_images(filelist, depth_list, focallength_px_list, size=image_size, verbose=not silent,traj_format='custom', depth_prior_name=depth_prior_name)
     pairs = []
     pairs.append((imgs[0], imgs[1]))
+    pairs.append((imgs[1], imgs[0]))
+    if depth_prior_name == "Depth Pro":
+      weights_path = "cyun9286/Align3R_DepthPro_ViTLarge_BaseDecoder_512_dpt"
+    else:
+      weights_path = "cyun9286/Align3R_DepthAnythingV2_ViTLarge_BaseDecoder_512_dpt"
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    model = AsymmetricCroCo3DStereo.from_pretrained(weights_path).to(device)
     output = inference(pairs, model, device, batch_size=batch_size, verbose=not silent)
     mode = GlobalAlignerMode.PairViewer
     scene = global_aligner(output, device=device, mode=mode, verbose=not silent)
     save_folder = './output'
+    os.makedirs(save_folder, exist_ok=True)
+    outfile = get_3D_model_from_scene(save_folder, silent, scene, min_conf_thr, as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size)
     return outfile
 def run_example(snapshot, matching_conf_thr, min_conf_thr, cam_size, as_pointcloud, shared_intrinsics, filelist, **kw):
     return local_get_reconstructed_scene(filelist, cam_size, **kw)
+# filelist = ['/home/lipeng/ljh_code/Video_Depth_CVPR2025-main/Align3R/data/davis/DAVIS/JPEGImages/480p/bear/00000.jpg', '/home/lipeng/ljh_code/Video_Depth_CVPR2025-main/Align3R/data/davis/DAVIS/JPEGImages/480p/bear/00010.jpg']
+# min_conf_thr = 1.1
+# as_pointcloud = True
+# mask_sky = False
+# clean_depth = True
+# transparent_cams = False
+# cam_size = 0.2
+# depth_prior_name = 'Depth Anything V2'
+# local_get_reconstructed_scene(filelist, min_conf_thr, as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size, depth_prior_name)
 css = """.gradio-container {margin: 0 !important; min-width: 100%};"""
 title = "Align3R Demo"
             depth_prior_name = gradio.Dropdown(
             ["Depth Pro", "Depth Anything V2"], label="monocular depth estimation model", info="Select the monocular depth estimation model.")
             min_conf_thr = gradio.Slider(label="min_conf_thr", value=1.1, minimum=0.0, maximum=20, step=0.01)
         with gradio.Row():
             as_pointcloud = gradio.Checkbox(value=True, label="As pointcloud")
             mask_sky = gradio.Checkbox(value=False, label="Mask sky")