MackinationsAi commited on
Commit
2a3f6b2
1 Parent(s): 6149be1

Upload 4 files

Browse files
README.md ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-nc-4.0
3
+
4
+ language:
5
+ - en
6
+ pipeline_tag: depth-estimation
7
+ tags:
8
+ - depth
9
+ - relative depth
10
+ ---
11
+
12
+ # Depth-Anything-V2-Large
13
+
14
+ ## Introduction
15
+ Depth Anything V2 is trained from 595K synthetic labeled images & 62M+ real unlabeled images, providing the most capable monocular depth estimation (MDE) model with the following features:
16
+ - more fine-grained details than Depth Anything V1
17
+ - more robust than Depth Anything V1 & SD-based models (e.g., Marigold, Geowizard)
18
+ - more efficient (10x faster) & more lightweight than SD-based models
19
+ - impressive fine-tuned performance with our pre-trained models
20
+
21
+ ## Installation
22
+
23
+ ```bash
24
+ git clone https://github.com/MackinationsAi/Upgraded-Depth-Anything-V2.git
25
+ cd Upgraded-Depth-Anything-V2
26
+ one_click_install.bat
27
+ ```
28
+
29
+ ## Usage
30
+
31
+ Please refer to the [README.md](https://github.com/MackinationsAi/Upgraded-Depth-Anything-V2/blob/main/README.md) for actual usage.
32
+
33
+ ## Test Code
34
+
35
+ ```bash
36
+ cd Upgraded-Depth-Anything-V2
37
+ venv\scripts\activate
38
+ python test.py /path/to/your/image.jpg (or .png)
39
+ ```
40
+ Create a test.py script using the code below:
41
+
42
+ ```python
43
+ import cv2
44
+ import torch
45
+ import numpy as np
46
+ import os
47
+ import argparse
48
+
49
+ from safetensors.torch import load_file
50
+ from depth_anything_v2.dpt import DepthAnythingV2
51
+
52
+ # Argument parser for input image path
53
+ parser = argparse.ArgumentParser(description="Depth map inference using DepthAnythingV2 model.")
54
+ parser.add_argument("input_image_path", type=str, help="Path to the input image")
55
+ args = parser.parse_args()
56
+
57
+ # Determine the directory of this script
58
+ script_dir = os.path.dirname(os.path.abspath(__file__))
59
+
60
+ # Set output path relative to the script directory
61
+ output_image_path = os.path.join(script_dir, "base_udav2_hf-code-test.png")
62
+ checkpoint_path = os.path.join(script_dir, "checkpoints", "depth_anything_v2_vitl.safetensors")
63
+
64
+ # Device selection: CUDA, MPS, or CPU
65
+ if torch.cuda.is_available():
66
+ device = torch.device('cuda')
67
+ elif torch.backends.mps.is_available():
68
+ device = torch.device('mps')
69
+ else:
70
+ device = torch.device('cpu')
71
+
72
+ model = DepthAnythingV2(encoder='vitl', features=256, out_channels=[256, 512, 1024, 1024])
73
+
74
+ state_dict = load_file(checkpoint_path, device='cpu')
75
+
76
+ model.load_state_dict(state_dict)
77
+ model.to(device)
78
+ model.eval()
79
+
80
+ # Load the input image
81
+ raw_img = cv2.imread(args.input_image_path)
82
+
83
+ # Infer the depth map
84
+ depth = model.infer_image(raw_img) # HxW raw depth map
85
+
86
+ # Normalize the depth map to 0-255 for saving as an image
87
+ depth_normalized = cv2.normalize(depth, None, 0, 255, cv2.NORM_MINMAX)
88
+ depth_normalized = depth_normalized.astype(np.uint8)
89
+
90
+ cv2.imwrite(output_image_path, depth_normalized)
91
+ print(f"Depth map saved at {output_image_path}")
92
+ ```
93
+
94
+ ## Citation
95
+
96
+ If you find this project useful, please consider citing [MackinationsAi](https://github.com/MackinationsAi/) & the following:
97
+
98
+ ```bibtex
99
+ @article{depth_anything_v2,
100
+ title={Depth Anything V2},
101
+ author={Yang, Lihe & Kang, Bingyi & Huang, Zilong & Zhao, Zhen & Xu, Xiaogang & Feng, Jiashi & Zhao, Hengshuang},
102
+ journal={arXiv:2406.09414},
103
+ year={2024}
104
+ }
105
+
106
+ @inproceedings{depth_anything_v1,
107
+ title={Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data},
108
+ author={Yang, Lihe & Kang, Bingyi & Huang, Zilong & Xu, Xiaogang & Feng, Jiashi & Zhao, Hengshuang},
109
+ booktitle={CVPR},
110
+ year={2024}
111
+ }
config.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": null,
3
+ "architectures": [
4
+ "DepthAnythingV2ForDepthEstimation"
5
+ ],
6
+ "backbone": null,
7
+ "backbone_config": {
8
+ "architectures": [
9
+ "Dinov2Model"
10
+ ],
11
+ "hidden_size": 1024,
12
+ "image_size": 518,
13
+ "model_type": "dinov2",
14
+ "num_attention_heads": 16,
15
+ "num_hidden_layers": 24,
16
+ "out_features": [
17
+ "stage21",
18
+ "stage22",
19
+ "stage23",
20
+ "stage24"
21
+ ],
22
+ "out_indices": [
23
+ 21,
24
+ 22,
25
+ 23,
26
+ 24
27
+ ],
28
+ "patch_size": 14,
29
+ "reshape_hidden_states": false,
30
+ "stage_names": [
31
+ "stem",
32
+ "stage1",
33
+ "stage2",
34
+ "stage3",
35
+ "stage4",
36
+ "stage5",
37
+ "stage6",
38
+ "stage7",
39
+ "stage8",
40
+ "stage9",
41
+ "stage10",
42
+ "stage11",
43
+ "stage12",
44
+ "stage13",
45
+ "stage14",
46
+ "stage15",
47
+ "stage16",
48
+ "stage17",
49
+ "stage18",
50
+ "stage19",
51
+ "stage20",
52
+ "stage21",
53
+ "stage22",
54
+ "stage23",
55
+ "stage24"
56
+ ],
57
+ "torch_dtype": "float32"
58
+ },
59
+ "fusion_hidden_size": 256,
60
+ "head_hidden_size": 32,
61
+ "head_in_index": -1,
62
+ "initializer_range": 0.02,
63
+ "model_type": "depth_anything_v2",
64
+ "neck_hidden_sizes": [
65
+ 256,
66
+ 512,
67
+ 1024,
68
+ 1024
69
+ ],
70
+ "patch_size": 14,
71
+ "reassemble_factors": [
72
+ 4,
73
+ 2,
74
+ 1,
75
+ 0.5
76
+ ],
77
+ "reassemble_hidden_size": 1024,
78
+ "torch_dtype": "float32",
79
+ "transformers_version": null,
80
+ "use_pretrained_backbone": false
81
+ }
depth_anything_v2_vitl.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f075a9099f94bae54a5bfe21a1423346429309bae40abb85b9935985b1f35a09
3
+ size 670674810
preprocessor_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_pad": false,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "ensure_multiple_of": 14,
7
+ "image_mean": [
8
+ 0.485,
9
+ 0.456,
10
+ 0.406
11
+ ],
12
+ "image_processor_type": "DPTImageProcessor",
13
+ "image_std": [
14
+ 0.229,
15
+ 0.224,
16
+ 0.225
17
+ ],
18
+ "keep_aspect_ratio": true,
19
+ "resample": 3,
20
+ "rescale_factor": 0.00392156862745098,
21
+ "size": {
22
+ "height": 518,
23
+ "width": 518
24
+ },
25
+ "size_divisor": null
26
+ }