cyun9286 commited on
Commit
b2eead7
1 Parent(s): 3046a80
Files changed (2) hide show
  1. .gitignore +4 -3
  2. app.py +25 -12
.gitignore CHANGED
@@ -1,3 +1,4 @@
1
- # *.pth
2
- # *.pt
3
- .gitignore
 
 
1
+ *.pth
2
+ *.pt
3
+ .gitignore
4
+ *.glb
app.py CHANGED
@@ -21,14 +21,14 @@ from PIL import Image
21
  from dust3r.inference import inference
22
  from dust3r.model import AsymmetricCroCo3DStereo
23
  from dust3r.image_pairs import make_pairs
24
- from dust3r.utils.image_pose import load_images, rgb, enlarge_seg_masks
25
  from dust3r.utils.device import to_numpy
26
  from dust3r.cloud_opt_flow import global_aligner, GlobalAlignerMode
27
  import matplotlib.pyplot as pl
28
  from transformers import pipeline
29
  from dust3r.utils.viz_demo import convert_scene_output_to_glb
30
  import depth_pro
31
- import spaces
32
  from huggingface_hub import hf_hub_download
33
  pl.ion()
34
 
@@ -95,8 +95,9 @@ def generate_monocular_depth_maps(img_list, depth_prior_name):
95
  image = transform(image)
96
  # Run inference.
97
  prediction = model.infer(image, f_px=f_px)
98
- depth = prediction["depth"].cpu() # Depth in [m].
99
  focallength_px=prediction["focallength_px"].cpu()
 
100
  depth_list.append(depth)
101
  focallength_px_list.append(focallength_px)
102
  #np.savez_compressed(path_depthpro, depth=depth, focallength_px=prediction["focallength_px"].cpu())
@@ -105,30 +106,49 @@ def generate_monocular_depth_maps(img_list, depth_prior_name):
105
  for image_path in tqdm(img_list):
106
  #path_depthanything = image_path.replace('.png','_pred_depth_depthanything.npz').replace('.jpg','_pred_depth_depthanything.npz')
107
  image = Image.open(image_path)
 
108
  depth = pipe(image)["predicted_depth"].numpy()
 
109
  focallength_px = 200
110
  depth_list.append(depth)
111
  focallength_px_list.append(focallength_px)
112
  #np.savez_compressed(path_depthanything, depth=depth)
113
  return depth_list, focallength_px_list
114
 
115
- @spaces.GPU(duration=180)
116
  def local_get_reconstructed_scene(filelist, min_conf_thr, as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size, depth_prior_name, **kw):
117
  depth_list, focallength_px_list = generate_monocular_depth_maps(filelist, depth_prior_name)
118
  imgs = load_images(filelist, depth_list, focallength_px_list, size=image_size, verbose=not silent,traj_format='custom', depth_prior_name=depth_prior_name)
119
  pairs = []
120
  pairs.append((imgs[0], imgs[1]))
 
 
 
 
 
 
 
121
  output = inference(pairs, model, device, batch_size=batch_size, verbose=not silent)
122
  mode = GlobalAlignerMode.PairViewer
123
  scene = global_aligner(output, device=device, mode=mode, verbose=not silent)
124
  save_folder = './output'
125
- outfile = get_3D_model_from_scene(save_folder, silent, scene, min_conf_thr, as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size, show_cam)
 
126
 
127
  return outfile
128
 
129
 
130
  def run_example(snapshot, matching_conf_thr, min_conf_thr, cam_size, as_pointcloud, shared_intrinsics, filelist, **kw):
131
  return local_get_reconstructed_scene(filelist, cam_size, **kw)
 
 
 
 
 
 
 
 
 
132
 
133
  css = """.gradio-container {margin: 0 !important; min-width: 100%};"""
134
  title = "Align3R Demo"
@@ -149,13 +169,6 @@ with gradio.Blocks(css=css, title=title, delete_cache=(gradio_delete_cache, grad
149
  depth_prior_name = gradio.Dropdown(
150
  ["Depth Pro", "Depth Anything V2"], label="monocular depth estimation model", info="Select the monocular depth estimation model.")
151
  min_conf_thr = gradio.Slider(label="min_conf_thr", value=1.1, minimum=0.0, maximum=20, step=0.01)
152
-
153
- if depth_prior_name == "Depth Pro":
154
- weights_path = "cyun9286/Align3R_DepthPro_ViTLarge_BaseDecoder_512_dpt"
155
- else:
156
- weights_path = "cyun9286/Align3R_DepthAnythingV2_ViTLarge_BaseDecoder_512_dpt"
157
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
158
- model = AsymmetricCroCo3DStereo.from_pretrained(weights_path).to(device)
159
  with gradio.Row():
160
  as_pointcloud = gradio.Checkbox(value=True, label="As pointcloud")
161
  mask_sky = gradio.Checkbox(value=False, label="Mask sky")
 
21
  from dust3r.inference import inference
22
  from dust3r.model import AsymmetricCroCo3DStereo
23
  from dust3r.image_pairs import make_pairs
24
+ from dust3r.utils.image_pose import load_images, rgb, enlarge_seg_masks, resize_numpy_image
25
  from dust3r.utils.device import to_numpy
26
  from dust3r.cloud_opt_flow import global_aligner, GlobalAlignerMode
27
  import matplotlib.pyplot as pl
28
  from transformers import pipeline
29
  from dust3r.utils.viz_demo import convert_scene_output_to_glb
30
  import depth_pro
31
+ # import spaces
32
  from huggingface_hub import hf_hub_download
33
  pl.ion()
34
 
 
95
  image = transform(image)
96
  # Run inference.
97
  prediction = model.infer(image, f_px=f_px)
98
+ depth = prediction["depth"].cpu().numpy() # Depth in [m].
99
  focallength_px=prediction["focallength_px"].cpu()
100
+ depth = resize_numpy_image(depth, image.size)
101
  depth_list.append(depth)
102
  focallength_px_list.append(focallength_px)
103
  #np.savez_compressed(path_depthpro, depth=depth, focallength_px=prediction["focallength_px"].cpu())
 
106
  for image_path in tqdm(img_list):
107
  #path_depthanything = image_path.replace('.png','_pred_depth_depthanything.npz').replace('.jpg','_pred_depth_depthanything.npz')
108
  image = Image.open(image_path)
109
+ #print(image.size)
110
  depth = pipe(image)["predicted_depth"].numpy()
111
+ depth = cv2.resize(depth[0], image.size, interpolation=cv2.INTER_LANCZOS4)
112
  focallength_px = 200
113
  depth_list.append(depth)
114
  focallength_px_list.append(focallength_px)
115
  #np.savez_compressed(path_depthanything, depth=depth)
116
  return depth_list, focallength_px_list
117
 
118
+ # @spaces.GPU(duration=180)
119
  def local_get_reconstructed_scene(filelist, min_conf_thr, as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size, depth_prior_name, **kw):
120
  depth_list, focallength_px_list = generate_monocular_depth_maps(filelist, depth_prior_name)
121
  imgs = load_images(filelist, depth_list, focallength_px_list, size=image_size, verbose=not silent,traj_format='custom', depth_prior_name=depth_prior_name)
122
  pairs = []
123
  pairs.append((imgs[0], imgs[1]))
124
+ pairs.append((imgs[1], imgs[0]))
125
+ if depth_prior_name == "Depth Pro":
126
+ weights_path = "cyun9286/Align3R_DepthPro_ViTLarge_BaseDecoder_512_dpt"
127
+ else:
128
+ weights_path = "cyun9286/Align3R_DepthAnythingV2_ViTLarge_BaseDecoder_512_dpt"
129
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
130
+ model = AsymmetricCroCo3DStereo.from_pretrained(weights_path).to(device)
131
  output = inference(pairs, model, device, batch_size=batch_size, verbose=not silent)
132
  mode = GlobalAlignerMode.PairViewer
133
  scene = global_aligner(output, device=device, mode=mode, verbose=not silent)
134
  save_folder = './output'
135
+ os.makedirs(save_folder, exist_ok=True)
136
+ outfile = get_3D_model_from_scene(save_folder, silent, scene, min_conf_thr, as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size)
137
 
138
  return outfile
139
 
140
 
141
  def run_example(snapshot, matching_conf_thr, min_conf_thr, cam_size, as_pointcloud, shared_intrinsics, filelist, **kw):
142
  return local_get_reconstructed_scene(filelist, cam_size, **kw)
143
+ # filelist = ['/home/lipeng/ljh_code/Video_Depth_CVPR2025-main/Align3R/data/davis/DAVIS/JPEGImages/480p/bear/00000.jpg', '/home/lipeng/ljh_code/Video_Depth_CVPR2025-main/Align3R/data/davis/DAVIS/JPEGImages/480p/bear/00010.jpg']
144
+ # min_conf_thr = 1.1
145
+ # as_pointcloud = True
146
+ # mask_sky = False
147
+ # clean_depth = True
148
+ # transparent_cams = False
149
+ # cam_size = 0.2
150
+ # depth_prior_name = 'Depth Anything V2'
151
+ # local_get_reconstructed_scene(filelist, min_conf_thr, as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size, depth_prior_name)
152
 
153
  css = """.gradio-container {margin: 0 !important; min-width: 100%};"""
154
  title = "Align3R Demo"
 
169
  depth_prior_name = gradio.Dropdown(
170
  ["Depth Pro", "Depth Anything V2"], label="monocular depth estimation model", info="Select the monocular depth estimation model.")
171
  min_conf_thr = gradio.Slider(label="min_conf_thr", value=1.1, minimum=0.0, maximum=20, step=0.01)
 
 
 
 
 
 
 
172
  with gradio.Row():
173
  as_pointcloud = gradio.Checkbox(value=True, label="As pointcloud")
174
  mask_sky = gradio.Checkbox(value=False, label="Mask sky")