Upload 4 files

Browse files

Files changed (4) hide show

README.md +111 -0
config.json +81 -0
depth_anything_v2_vitl.safetensors +3 -0
preprocessor_config.json +26 -0

README.md ADDED Viewed

	@@ -0,0 +1,111 @@

+---
+license: cc-by-nc-4.0
+language:
+- en
+pipeline_tag: depth-estimation
+tags:
+- depth
+- relative depth
+---
+# Depth-Anything-V2-Large
+## Introduction
+Depth Anything V2 is trained from 595K synthetic labeled images & 62M+ real unlabeled images, providing the most capable monocular depth estimation (MDE) model with the following features:
+- more fine-grained details than Depth Anything V1
+- more robust than Depth Anything V1 & SD-based models (e.g., Marigold, Geowizard)
+- more efficient (10x faster) & more lightweight than SD-based models
+- impressive fine-tuned performance with our pre-trained models
+## Installation
+```bash
+git clone https://github.com/MackinationsAi/Upgraded-Depth-Anything-V2.git
+cd Upgraded-Depth-Anything-V2
+one_click_install.bat
+```
+## Usage
+Please refer to the [README.md](https://github.com/MackinationsAi/Upgraded-Depth-Anything-V2/blob/main/README.md) for actual usage.
+## Test Code
+```bash
+cd Upgraded-Depth-Anything-V2
+venv\scripts\activate
+python test.py /path/to/your/image.jpg (or .png)
+```
+Create a test.py script using the code below:
+```python
+import cv2
+import torch
+import numpy as np
+import os
+import argparse
+from safetensors.torch import load_file
+from depth_anything_v2.dpt import DepthAnythingV2
+# Argument parser for input image path
+parser = argparse.ArgumentParser(description="Depth map inference using DepthAnythingV2 model.")
+parser.add_argument("input_image_path", type=str, help="Path to the input image")
+args = parser.parse_args()
+# Determine the directory of this script
+script_dir = os.path.dirname(os.path.abspath(__file__))
+# Set output path relative to the script directory
+output_image_path = os.path.join(script_dir, "base_udav2_hf-code-test.png")
+checkpoint_path = os.path.join(script_dir, "checkpoints", "depth_anything_v2_vitl.safetensors")
+# Device selection: CUDA, MPS, or CPU
+if torch.cuda.is_available():
+    device = torch.device('cuda')
+elif torch.backends.mps.is_available():
+    device = torch.device('mps')
+else:
+    device = torch.device('cpu')
+model = DepthAnythingV2(encoder='vitl', features=256, out_channels=[256, 512, 1024, 1024])
+state_dict = load_file(checkpoint_path, device='cpu')
+model.load_state_dict(state_dict)
+model.to(device)
+model.eval()
+# Load the input image
+raw_img = cv2.imread(args.input_image_path)
+# Infer the depth map
+depth = model.infer_image(raw_img)  # HxW raw depth map
+# Normalize the depth map to 0-255 for saving as an image
+depth_normalized = cv2.normalize(depth, None, 0, 255, cv2.NORM_MINMAX)
+depth_normalized = depth_normalized.astype(np.uint8)
+cv2.imwrite(output_image_path, depth_normalized)
+print(f"Depth map saved at {output_image_path}")
+```
+## Citation
+If you find this project useful, please consider citing [MackinationsAi](https://github.com/MackinationsAi/) & the following:
+```bibtex
+@article{depth_anything_v2,
+  title={Depth Anything V2},
+  author={Yang, Lihe & Kang, Bingyi & Huang, Zilong & Zhao, Zhen & Xu, Xiaogang & Feng, Jiashi & Zhao, Hengshuang},
+  journal={arXiv:2406.09414},
+  year={2024}
+}
+@inproceedings{depth_anything_v1,
+  title={Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data},
+  author={Yang, Lihe & Kang, Bingyi & Huang, Zilong & Xu, Xiaogang & Feng, Jiashi & Zhao, Hengshuang},
+  booktitle={CVPR},
+  year={2024}
+}

config.json ADDED Viewed

	@@ -0,0 +1,81 @@

+{
+  "_commit_hash": null,
+  "architectures": [
+    "DepthAnythingV2ForDepthEstimation"
+  ],
+  "backbone": null,
+  "backbone_config": {
+    "architectures": [
+      "Dinov2Model"
+    ],
+    "hidden_size": 1024,
+    "image_size": 518,
+    "model_type": "dinov2",
+    "num_attention_heads": 16,
+    "num_hidden_layers": 24,
+    "out_features": [
+      "stage21",
+      "stage22",
+      "stage23",
+      "stage24"
+    ],
+    "out_indices": [
+      21,
+      22,
+      23,
+      24
+    ],
+    "patch_size": 14,
+    "reshape_hidden_states": false,
+    "stage_names": [
+      "stem",
+      "stage1",
+      "stage2",
+      "stage3",
+      "stage4",
+      "stage5",
+      "stage6",
+      "stage7",
+      "stage8",
+      "stage9",
+      "stage10",
+      "stage11",
+      "stage12",
+      "stage13",
+      "stage14",
+      "stage15",
+      "stage16",
+      "stage17",
+      "stage18",
+      "stage19",
+      "stage20",
+      "stage21",
+      "stage22",
+      "stage23",
+      "stage24"
+    ],
+    "torch_dtype": "float32"
+  },
+  "fusion_hidden_size": 256,
+  "head_hidden_size": 32,
+  "head_in_index": -1,
+  "initializer_range": 0.02,
+  "model_type": "depth_anything_v2",
+  "neck_hidden_sizes": [
+    256,
+    512,
+    1024,
+    1024
+  ],
+  "patch_size": 14,
+  "reassemble_factors": [
+    4,
+    2,
+    1,
+    0.5
+  ],
+  "reassemble_hidden_size": 1024,
+  "torch_dtype": "float32",
+  "transformers_version": null,
+  "use_pretrained_backbone": false
+}

depth_anything_v2_vitl.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f075a9099f94bae54a5bfe21a1423346429309bae40abb85b9935985b1f35a09
+size 670674810

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "do_normalize": true,
+  "do_pad": false,
+  "do_rescale": true,
+  "do_resize": true,
+  "ensure_multiple_of": 14,
+  "image_mean": [
+    0.485,
+    0.456,
+    0.406
+  ],
+  "image_processor_type": "DPTImageProcessor",
+  "image_std": [
+    0.229,
+    0.224,
+    0.225
+  ],
+  "keep_aspect_ratio": true,
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 518,
+    "width": 518
+  },
+  "size_divisor": null
+}