Spaces:
Running
on
Zero
Running
on
Zero
wzhouxiff
commited on
Commit
•
38e3f9b
0
Parent(s):
init
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +4 -0
- ZoeDepth/.gitignore +148 -0
- ZoeDepth/LICENSE +21 -0
- ZoeDepth/README.md +248 -0
- ZoeDepth/environment.yml +26 -0
- ZoeDepth/evaluate.py +160 -0
- ZoeDepth/hubconf.py +154 -0
- ZoeDepth/sanity.py +98 -0
- ZoeDepth/sanity_hub.py +43 -0
- ZoeDepth/train_mix.py +179 -0
- ZoeDepth/train_mono.py +174 -0
- ZoeDepth/train_test_inputs/kitti_eigen_test_files_with_gt.txt +0 -0
- ZoeDepth/train_test_inputs/kitti_eigen_train_files_with_gt.txt +0 -0
- ZoeDepth/train_test_inputs/nyudepthv2_test_files_with_gt.txt +654 -0
- ZoeDepth/train_test_inputs/nyudepthv2_train_files_with_gt.txt +0 -0
- ZoeDepth/ui/app.py +66 -0
- ZoeDepth/ui/gradio_depth_pred.py +52 -0
- ZoeDepth/ui/gradio_im_to_3d.py +93 -0
- ZoeDepth/ui/gradio_pano_to_3d.py +120 -0
- ZoeDepth/ui/ui_requirements.txt +2 -0
- ZoeDepth/zoedepth/data/__init__.py +24 -0
- ZoeDepth/zoedepth/data/data_mono.py +573 -0
- ZoeDepth/zoedepth/data/ddad.py +117 -0
- ZoeDepth/zoedepth/data/diml_indoor_test.py +125 -0
- ZoeDepth/zoedepth/data/diml_outdoor_test.py +114 -0
- ZoeDepth/zoedepth/data/diode.py +125 -0
- ZoeDepth/zoedepth/data/hypersim.py +138 -0
- ZoeDepth/zoedepth/data/ibims.py +81 -0
- ZoeDepth/zoedepth/data/preprocess.py +154 -0
- ZoeDepth/zoedepth/data/sun_rgbd_loader.py +106 -0
- ZoeDepth/zoedepth/data/transforms.py +481 -0
- ZoeDepth/zoedepth/data/vkitti.py +151 -0
- ZoeDepth/zoedepth/data/vkitti2.py +187 -0
- ZoeDepth/zoedepth/models/__init__.py +24 -0
- ZoeDepth/zoedepth/models/base_models/__init__.py +24 -0
- ZoeDepth/zoedepth/models/base_models/midas.py +377 -0
- ZoeDepth/zoedepth/models/builder.py +51 -0
- ZoeDepth/zoedepth/models/depth_model.py +152 -0
- ZoeDepth/zoedepth/models/layers/attractor.py +208 -0
- ZoeDepth/zoedepth/models/layers/dist_layers.py +121 -0
- ZoeDepth/zoedepth/models/layers/localbins_layers.py +169 -0
- ZoeDepth/zoedepth/models/layers/patch_transformer.py +91 -0
- ZoeDepth/zoedepth/models/model_io.py +92 -0
- ZoeDepth/zoedepth/models/zoedepth/__init__.py +31 -0
- ZoeDepth/zoedepth/models/zoedepth/config_zoedepth.json +58 -0
- ZoeDepth/zoedepth/models/zoedepth/config_zoedepth_kitti.json +22 -0
- ZoeDepth/zoedepth/models/zoedepth/zoedepth_v1.py +250 -0
- ZoeDepth/zoedepth/models/zoedepth_nk/__init__.py +31 -0
- ZoeDepth/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json +67 -0
- ZoeDepth/zoedepth/models/zoedepth_nk/zoedepth_nk_v1.py +333 -0
.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
outputs/
|
2 |
+
ckpt/
|
3 |
+
checkpoints/*
|
4 |
+
__pycache__/
|
ZoeDepth/.gitignore
ADDED
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.png
|
2 |
+
**.gif
|
3 |
+
.vscode/
|
4 |
+
*.rdb
|
5 |
+
**.xml
|
6 |
+
wandb/
|
7 |
+
slurm/
|
8 |
+
tmp/
|
9 |
+
.logs/
|
10 |
+
checkpoints/
|
11 |
+
external_jobs/
|
12 |
+
# Byte-compiled / optimized / DLL files
|
13 |
+
__pycache__/
|
14 |
+
*.py[cod]
|
15 |
+
*$py.class
|
16 |
+
ptlflow_logs/
|
17 |
+
output/
|
18 |
+
log/
|
19 |
+
.idea/
|
20 |
+
# C extensions
|
21 |
+
*.so
|
22 |
+
results/
|
23 |
+
**.DS_Store
|
24 |
+
**.pt
|
25 |
+
demo/
|
26 |
+
# Distribution / packaging
|
27 |
+
.Python
|
28 |
+
build/
|
29 |
+
develop-eggs/
|
30 |
+
dist/
|
31 |
+
downloads/
|
32 |
+
eggs/
|
33 |
+
.eggs/
|
34 |
+
lib/
|
35 |
+
lib64/
|
36 |
+
parts/
|
37 |
+
sdist/
|
38 |
+
var/
|
39 |
+
wheels/
|
40 |
+
pip-wheel-metadata/
|
41 |
+
share/python-wheels/
|
42 |
+
*.egg-info/
|
43 |
+
.installed.cfg
|
44 |
+
*.egg
|
45 |
+
MANIFEST
|
46 |
+
~shortcuts/
|
47 |
+
**/wandb_logs/
|
48 |
+
**.db
|
49 |
+
# PyInstaller
|
50 |
+
# Usually these files are written by a python script from a template
|
51 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
52 |
+
*.manifest
|
53 |
+
*.spec
|
54 |
+
|
55 |
+
# Installer logs
|
56 |
+
pip-log.txt
|
57 |
+
pip-delete-this-directory.txt
|
58 |
+
|
59 |
+
# Unit test / coverage reports
|
60 |
+
htmlcov/
|
61 |
+
.tox/
|
62 |
+
.nox/
|
63 |
+
.coverage
|
64 |
+
.coverage.*
|
65 |
+
.cache
|
66 |
+
nosetests.xml
|
67 |
+
coverage.xml
|
68 |
+
*.cover
|
69 |
+
*.py,cover
|
70 |
+
.hypothesis/
|
71 |
+
.pytest_cache/
|
72 |
+
|
73 |
+
# Translations
|
74 |
+
*.mo
|
75 |
+
*.pot
|
76 |
+
|
77 |
+
# Django stuff:
|
78 |
+
*.log
|
79 |
+
local_settings.py
|
80 |
+
db.sqlite3
|
81 |
+
db.sqlite3-journal
|
82 |
+
|
83 |
+
# Flask stuff:
|
84 |
+
instance/
|
85 |
+
.webassets-cache
|
86 |
+
|
87 |
+
# Scrapy stuff:
|
88 |
+
.scrapy
|
89 |
+
|
90 |
+
# Sphinx documentation
|
91 |
+
docs/_build/
|
92 |
+
|
93 |
+
# PyBuilder
|
94 |
+
target/
|
95 |
+
|
96 |
+
# Jupyter Notebook
|
97 |
+
.ipynb_checkpoints
|
98 |
+
|
99 |
+
# IPython
|
100 |
+
profile_default/
|
101 |
+
ipython_config.py
|
102 |
+
|
103 |
+
# pyenv
|
104 |
+
.python-version
|
105 |
+
|
106 |
+
# pipenv
|
107 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
108 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
109 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
110 |
+
# install all needed dependencies.
|
111 |
+
#Pipfile.lock
|
112 |
+
|
113 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
114 |
+
__pypackages__/
|
115 |
+
|
116 |
+
# Celery stuff
|
117 |
+
celerybeat-schedule
|
118 |
+
celerybeat.pid
|
119 |
+
|
120 |
+
# SageMath parsed files
|
121 |
+
*.sage.py
|
122 |
+
|
123 |
+
# Environments
|
124 |
+
.env
|
125 |
+
.venv
|
126 |
+
env/
|
127 |
+
venv/
|
128 |
+
ENV/
|
129 |
+
env.bak/
|
130 |
+
venv.bak/
|
131 |
+
|
132 |
+
# Spyder project settings
|
133 |
+
.spyderproject
|
134 |
+
.spyproject
|
135 |
+
|
136 |
+
# Rope project settings
|
137 |
+
.ropeproject
|
138 |
+
|
139 |
+
# mkdocs documentation
|
140 |
+
/site
|
141 |
+
|
142 |
+
# mypy
|
143 |
+
.mypy_cache/
|
144 |
+
.dmypy.json
|
145 |
+
dmypy.json
|
146 |
+
|
147 |
+
# Pyre type checker
|
148 |
+
.pyre/
|
ZoeDepth/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
ZoeDepth/README.md
ADDED
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# **ZoeDepth: Combining relative and metric depth** (Official implementation) <!-- omit in toc -->
|
2 |
+
[![Open In Collab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/isl-org/ZoeDepth)
|
3 |
+
[![Open in Spaces](https://huggingface.co/datasets/huggingface/badges/raw/main/open-in-hf-spaces-sm.svg)](https://huggingface.co/spaces/shariqfarooq/ZoeDepth)
|
4 |
+
|
5 |
+
[![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT) ![PyTorch](https://img.shields.io/badge/PyTorch_v1.10.1-EE4C2C?&logo=pytorch&logoColor=white)
|
6 |
+
[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/zoedepth-zero-shot-transfer-by-combining/monocular-depth-estimation-on-nyu-depth-v2)](https://paperswithcode.com/sota/monocular-depth-estimation-on-nyu-depth-v2?p=zoedepth-zero-shot-transfer-by-combining)
|
7 |
+
|
8 |
+
>#### [ZoeDepth: Zero-shot Transfer by Combining Relative and Metric Depth](https://arxiv.org/abs/2302.12288)
|
9 |
+
> ##### [Shariq Farooq Bhat](https://shariqfarooq123.github.io), [Reiner Birkl](https://www.researchgate.net/profile/Reiner-Birkl), [Diana Wofk](https://dwofk.github.io/), [Peter Wonka](http://peterwonka.net/), [Matthias Müller](https://matthias.pw/)
|
10 |
+
|
11 |
+
[[Paper]](https://arxiv.org/abs/2302.12288)
|
12 |
+
|
13 |
+
![teaser](assets/zoedepth-teaser.png)
|
14 |
+
|
15 |
+
## **Table of Contents** <!-- omit in toc -->
|
16 |
+
- [**Usage**](#usage)
|
17 |
+
- [Using torch hub](#using-torch-hub)
|
18 |
+
- [Using local copy](#using-local-copy)
|
19 |
+
- [Using local torch hub](#using-local-torch-hub)
|
20 |
+
- [or load the models manually](#or-load-the-models-manually)
|
21 |
+
- [Using ZoeD models to predict depth](#using-zoed-models-to-predict-depth)
|
22 |
+
- [**Environment setup**](#environment-setup)
|
23 |
+
- [**Sanity checks** (Recommended)](#sanity-checks-recommended)
|
24 |
+
- [Model files](#model-files)
|
25 |
+
- [**Evaluation**](#evaluation)
|
26 |
+
- [Evaluating offical models](#evaluating-offical-models)
|
27 |
+
- [Evaluating local checkpoint](#evaluating-local-checkpoint)
|
28 |
+
- [**Training**](#training)
|
29 |
+
- [**Gradio demo**](#gradio-demo)
|
30 |
+
- [**Citation**](#citation)
|
31 |
+
|
32 |
+
|
33 |
+
## **Usage**
|
34 |
+
It is recommended to fetch the latest [MiDaS repo](https://github.com/isl-org/MiDaS) via torch hub before proceeding:
|
35 |
+
```python
|
36 |
+
import torch
|
37 |
+
|
38 |
+
torch.hub.help("intel-isl/MiDaS", "DPT_BEiT_L_384", force_reload=True) # Triggers fresh download of MiDaS repo
|
39 |
+
```
|
40 |
+
### **ZoeDepth models** <!-- omit in toc -->
|
41 |
+
### Using torch hub
|
42 |
+
```python
|
43 |
+
import torch
|
44 |
+
|
45 |
+
repo = "isl-org/ZoeDepth"
|
46 |
+
# Zoe_N
|
47 |
+
model_zoe_n = torch.hub.load(repo, "ZoeD_N", pretrained=True)
|
48 |
+
|
49 |
+
# Zoe_K
|
50 |
+
model_zoe_k = torch.hub.load(repo, "ZoeD_K", pretrained=True)
|
51 |
+
|
52 |
+
# Zoe_NK
|
53 |
+
model_zoe_nk = torch.hub.load(repo, "ZoeD_NK", pretrained=True)
|
54 |
+
```
|
55 |
+
### Using local copy
|
56 |
+
Clone this repo:
|
57 |
+
```bash
|
58 |
+
git clone https://github.com/isl-org/ZoeDepth.git && cd ZoeDepth
|
59 |
+
```
|
60 |
+
#### Using local torch hub
|
61 |
+
You can use local source for torch hub to load the ZoeDepth models, for example:
|
62 |
+
```python
|
63 |
+
import torch
|
64 |
+
|
65 |
+
# Zoe_N
|
66 |
+
model_zoe_n = torch.hub.load(".", "ZoeD_N", source="local", pretrained=True)
|
67 |
+
```
|
68 |
+
|
69 |
+
#### or load the models manually
|
70 |
+
```python
|
71 |
+
from zoedepth.models.builder import build_model
|
72 |
+
from zoedepth.utils.config import get_config
|
73 |
+
|
74 |
+
# ZoeD_N
|
75 |
+
conf = get_config("zoedepth", "infer")
|
76 |
+
model_zoe_n = build_model(conf)
|
77 |
+
|
78 |
+
# ZoeD_K
|
79 |
+
conf = get_config("zoedepth", "infer", config_version="kitti")
|
80 |
+
model_zoe_k = build_model(conf)
|
81 |
+
|
82 |
+
# ZoeD_NK
|
83 |
+
conf = get_config("zoedepth_nk", "infer")
|
84 |
+
model_zoe_nk = build_model(conf)
|
85 |
+
```
|
86 |
+
|
87 |
+
### Using ZoeD models to predict depth
|
88 |
+
```python
|
89 |
+
##### sample prediction
|
90 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
91 |
+
zoe = model_zoe_n.to(DEVICE)
|
92 |
+
|
93 |
+
|
94 |
+
# Local file
|
95 |
+
from PIL import Image
|
96 |
+
image = Image.open("/path/to/image.jpg").convert("RGB") # load
|
97 |
+
depth_numpy = zoe.infer_pil(image) # as numpy
|
98 |
+
|
99 |
+
depth_pil = zoe.infer_pil(image, output_type="pil") # as 16-bit PIL Image
|
100 |
+
|
101 |
+
depth_tensor = zoe.infer_pil(image, output_type="tensor") # as torch tensor
|
102 |
+
|
103 |
+
|
104 |
+
|
105 |
+
# Tensor
|
106 |
+
from zoedepth.utils.misc import pil_to_batched_tensor
|
107 |
+
X = pil_to_batched_tensor(image).to(DEVICE)
|
108 |
+
depth_tensor = zoe.infer(X)
|
109 |
+
|
110 |
+
|
111 |
+
|
112 |
+
# From URL
|
113 |
+
from zoedepth.utils.misc import get_image_from_url
|
114 |
+
|
115 |
+
# Example URL
|
116 |
+
URL = "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcS4W8H_Nxk_rs3Vje_zj6mglPOH7bnPhQitBH8WkqjlqQVotdtDEG37BsnGofME3_u6lDk&usqp=CAU"
|
117 |
+
|
118 |
+
|
119 |
+
image = get_image_from_url(URL) # fetch
|
120 |
+
depth = zoe.infer_pil(image)
|
121 |
+
|
122 |
+
# Save raw
|
123 |
+
from zoedepth.utils.misc import save_raw_16bit
|
124 |
+
fpath = "/path/to/output.png"
|
125 |
+
save_raw_16bit(depth, fpath)
|
126 |
+
|
127 |
+
# Colorize output
|
128 |
+
from zoedepth.utils.misc import colorize
|
129 |
+
|
130 |
+
colored = colorize(depth)
|
131 |
+
|
132 |
+
# save colored output
|
133 |
+
fpath_colored = "/path/to/output_colored.png"
|
134 |
+
Image.fromarray(colored).save(fpath_colored)
|
135 |
+
```
|
136 |
+
|
137 |
+
## **Environment setup**
|
138 |
+
The project depends on :
|
139 |
+
- [pytorch](https://pytorch.org/) (Main framework)
|
140 |
+
- [timm](https://timm.fast.ai/) (Backbone helper for MiDaS)
|
141 |
+
- pillow, matplotlib, scipy, h5py, opencv (utilities)
|
142 |
+
|
143 |
+
Install environment using `environment.yml` :
|
144 |
+
|
145 |
+
Using [mamba](https://github.com/mamba-org/mamba) (fastest):
|
146 |
+
```bash
|
147 |
+
mamba env create -n zoe --file environment.yml
|
148 |
+
mamba activate zoe
|
149 |
+
```
|
150 |
+
Using conda :
|
151 |
+
|
152 |
+
```bash
|
153 |
+
conda env create -n zoe --file environment.yml
|
154 |
+
conda activate zoe
|
155 |
+
```
|
156 |
+
|
157 |
+
## **Sanity checks** (Recommended)
|
158 |
+
Check if models can be loaded:
|
159 |
+
```bash
|
160 |
+
python sanity_hub.py
|
161 |
+
```
|
162 |
+
Try a demo prediction pipeline:
|
163 |
+
```bash
|
164 |
+
python sanity.py
|
165 |
+
```
|
166 |
+
This will save a file `pred.png` in the root folder, showing RGB and corresponding predicted depth side-by-side.
|
167 |
+
## Model files
|
168 |
+
Models are defined under `models/` folder, with `models/<model_name>_<version>.py` containing model definitions and `models/config_<model_name>.json` containing configuration.
|
169 |
+
|
170 |
+
Single metric head models (Zoe_N and Zoe_K from the paper) have the common definition and are defined under `models/zoedepth` while as the multi-headed model (Zoe_NK) is defined under `models/zoedepth_nk`.
|
171 |
+
## **Evaluation**
|
172 |
+
Download the required dataset and change the `DATASETS_CONFIG` dictionary in `utils/config.py` accordingly.
|
173 |
+
### Evaluating offical models
|
174 |
+
On NYU-Depth-v2 for example:
|
175 |
+
|
176 |
+
For ZoeD_N:
|
177 |
+
```bash
|
178 |
+
python evaluate.py -m zoedepth -d nyu
|
179 |
+
```
|
180 |
+
|
181 |
+
For ZoeD_NK:
|
182 |
+
```bash
|
183 |
+
python evaluate.py -m zoedepth_nk -d nyu
|
184 |
+
```
|
185 |
+
|
186 |
+
### Evaluating local checkpoint
|
187 |
+
```bash
|
188 |
+
python evaluate.py -m zoedepth --pretrained_resource="local::/path/to/local/ckpt.pt" -d nyu
|
189 |
+
```
|
190 |
+
Pretrained resources are prefixed with `url::` to indicate weights should be fetched from a url, or `local::` to indicate path is a local file. Refer to `models/model_io.py` for details.
|
191 |
+
|
192 |
+
The dataset name should match the corresponding key in `utils.config.DATASETS_CONFIG` .
|
193 |
+
|
194 |
+
## **Training**
|
195 |
+
Download training datasets as per instructions given [here](https://github.com/cleinc/bts/tree/master/pytorch#nyu-depvh-v2). Then for training a single head model on NYU-Depth-v2 :
|
196 |
+
```bash
|
197 |
+
python train_mono.py -m zoedepth --pretrained_resource=""
|
198 |
+
```
|
199 |
+
|
200 |
+
For training the Zoe-NK model:
|
201 |
+
```bash
|
202 |
+
python train_mix.py -m zoedepth_nk --pretrained_resource=""
|
203 |
+
```
|
204 |
+
## **Gradio demo**
|
205 |
+
We provide a UI demo built using [gradio](https://gradio.app/). To get started, install UI requirements:
|
206 |
+
```bash
|
207 |
+
pip install -r ui/ui_requirements.txt
|
208 |
+
```
|
209 |
+
Then launch the gradio UI:
|
210 |
+
```bash
|
211 |
+
python -m ui.app
|
212 |
+
```
|
213 |
+
|
214 |
+
The UI is also hosted on HuggingFace🤗 [here](https://huggingface.co/spaces/shariqfarooq/ZoeDepth)
|
215 |
+
## **Citation**
|
216 |
+
```
|
217 |
+
@misc{https://doi.org/10.48550/arxiv.2302.12288,
|
218 |
+
doi = {10.48550/ARXIV.2302.12288},
|
219 |
+
|
220 |
+
url = {https://arxiv.org/abs/2302.12288},
|
221 |
+
|
222 |
+
author = {Bhat, Shariq Farooq and Birkl, Reiner and Wofk, Diana and Wonka, Peter and Müller, Matthias},
|
223 |
+
|
224 |
+
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
|
225 |
+
|
226 |
+
title = {ZoeDepth: Zero-shot Transfer by Combining Relative and Metric Depth},
|
227 |
+
|
228 |
+
publisher = {arXiv},
|
229 |
+
|
230 |
+
year = {2023},
|
231 |
+
|
232 |
+
copyright = {arXiv.org perpetual, non-exclusive license}
|
233 |
+
}
|
234 |
+
|
235 |
+
```
|
236 |
+
|
237 |
+
|
238 |
+
|
239 |
+
|
240 |
+
|
241 |
+
|
242 |
+
|
243 |
+
|
244 |
+
|
245 |
+
|
246 |
+
|
247 |
+
|
248 |
+
|
ZoeDepth/environment.yml
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: zoe
|
2 |
+
channels:
|
3 |
+
- pytorch
|
4 |
+
- nvidia
|
5 |
+
- conda-forge
|
6 |
+
dependencies:
|
7 |
+
- cuda=11.7.1
|
8 |
+
- h5py=3.7.0
|
9 |
+
- hdf5=1.12.2
|
10 |
+
- matplotlib=3.6.2
|
11 |
+
- matplotlib-base=3.6.2
|
12 |
+
- numpy=1.24.1
|
13 |
+
- opencv=4.6.0
|
14 |
+
- pip=22.3.1
|
15 |
+
- python=3.9.7
|
16 |
+
- pytorch=1.13.1
|
17 |
+
- pytorch-cuda=11.7
|
18 |
+
- pytorch-mutex=1.0
|
19 |
+
- scipy=1.10.0
|
20 |
+
- torchaudio=0.13.1
|
21 |
+
- torchvision=0.14.1
|
22 |
+
- pip:
|
23 |
+
- huggingface-hub==0.11.1
|
24 |
+
- timm==0.6.12
|
25 |
+
- tqdm==4.64.1
|
26 |
+
- wandb==0.13.9
|
ZoeDepth/evaluate.py
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import argparse
|
26 |
+
from pprint import pprint
|
27 |
+
|
28 |
+
import torch
|
29 |
+
from zoedepth.utils.easydict import EasyDict as edict
|
30 |
+
from tqdm import tqdm
|
31 |
+
|
32 |
+
from zoedepth.data.data_mono import DepthDataLoader
|
33 |
+
from zoedepth.models.builder import build_model
|
34 |
+
from zoedepth.utils.arg_utils import parse_unknown
|
35 |
+
from zoedepth.utils.config import change_dataset, get_config, ALL_EVAL_DATASETS, ALL_INDOOR, ALL_OUTDOOR
|
36 |
+
from zoedepth.utils.misc import (RunningAverageDict, colors, compute_metrics,
|
37 |
+
count_parameters)
|
38 |
+
|
39 |
+
|
40 |
+
@torch.no_grad()
|
41 |
+
def infer(model, images, **kwargs):
|
42 |
+
"""Inference with flip augmentation"""
|
43 |
+
# images.shape = N, C, H, W
|
44 |
+
def get_depth_from_prediction(pred):
|
45 |
+
if isinstance(pred, torch.Tensor):
|
46 |
+
pred = pred # pass
|
47 |
+
elif isinstance(pred, (list, tuple)):
|
48 |
+
pred = pred[-1]
|
49 |
+
elif isinstance(pred, dict):
|
50 |
+
pred = pred['metric_depth'] if 'metric_depth' in pred else pred['out']
|
51 |
+
else:
|
52 |
+
raise NotImplementedError(f"Unknown output type {type(pred)}")
|
53 |
+
return pred
|
54 |
+
|
55 |
+
pred1 = model(images, **kwargs)
|
56 |
+
pred1 = get_depth_from_prediction(pred1)
|
57 |
+
|
58 |
+
pred2 = model(torch.flip(images, [3]), **kwargs)
|
59 |
+
pred2 = get_depth_from_prediction(pred2)
|
60 |
+
pred2 = torch.flip(pred2, [3])
|
61 |
+
|
62 |
+
mean_pred = 0.5 * (pred1 + pred2)
|
63 |
+
|
64 |
+
return mean_pred
|
65 |
+
|
66 |
+
|
67 |
+
@torch.no_grad()
|
68 |
+
def evaluate(model, test_loader, config, round_vals=True, round_precision=3):
|
69 |
+
model.eval()
|
70 |
+
metrics = RunningAverageDict()
|
71 |
+
for i, sample in tqdm(enumerate(test_loader), total=len(test_loader)):
|
72 |
+
if 'has_valid_depth' in sample:
|
73 |
+
if not sample['has_valid_depth']:
|
74 |
+
continue
|
75 |
+
image, depth = sample['image'], sample['depth']
|
76 |
+
image, depth = image.cuda(), depth.cuda()
|
77 |
+
depth = depth.squeeze().unsqueeze(0).unsqueeze(0)
|
78 |
+
focal = sample.get('focal', torch.Tensor(
|
79 |
+
[715.0873]).cuda()) # This magic number (focal) is only used for evaluating BTS model
|
80 |
+
pred = infer(model, image, dataset=sample['dataset'][0], focal=focal)
|
81 |
+
|
82 |
+
# Save image, depth, pred for visualization
|
83 |
+
if "save_images" in config and config.save_images:
|
84 |
+
import os
|
85 |
+
# print("Saving images ...")
|
86 |
+
from PIL import Image
|
87 |
+
import torchvision.transforms as transforms
|
88 |
+
from zoedepth.utils.misc import colorize
|
89 |
+
|
90 |
+
os.makedirs(config.save_images, exist_ok=True)
|
91 |
+
# def save_image(img, path):
|
92 |
+
d = colorize(depth.squeeze().cpu().numpy(), 0, 10)
|
93 |
+
p = colorize(pred.squeeze().cpu().numpy(), 0, 10)
|
94 |
+
im = transforms.ToPILImage()(image.squeeze().cpu())
|
95 |
+
im.save(os.path.join(config.save_images, f"{i}_img.png"))
|
96 |
+
Image.fromarray(d).save(os.path.join(config.save_images, f"{i}_depth.png"))
|
97 |
+
Image.fromarray(p).save(os.path.join(config.save_images, f"{i}_pred.png"))
|
98 |
+
|
99 |
+
|
100 |
+
|
101 |
+
# print(depth.shape, pred.shape)
|
102 |
+
metrics.update(compute_metrics(depth, pred, config=config))
|
103 |
+
|
104 |
+
if round_vals:
|
105 |
+
def r(m): return round(m, round_precision)
|
106 |
+
else:
|
107 |
+
def r(m): return m
|
108 |
+
metrics = {k: r(v) for k, v in metrics.get_value().items()}
|
109 |
+
return metrics
|
110 |
+
|
111 |
+
def main(config):
|
112 |
+
model = build_model(config)
|
113 |
+
test_loader = DepthDataLoader(config, 'online_eval').data
|
114 |
+
model = model.cuda()
|
115 |
+
metrics = evaluate(model, test_loader, config)
|
116 |
+
print(f"{colors.fg.green}")
|
117 |
+
print(metrics)
|
118 |
+
print(f"{colors.reset}")
|
119 |
+
metrics['#params'] = f"{round(count_parameters(model, include_all=True)/1e6, 2)}M"
|
120 |
+
return metrics
|
121 |
+
|
122 |
+
|
123 |
+
def eval_model(model_name, pretrained_resource, dataset='nyu', **kwargs):
|
124 |
+
|
125 |
+
# Load default pretrained resource defined in config if not set
|
126 |
+
overwrite = {**kwargs, "pretrained_resource": pretrained_resource} if pretrained_resource else kwargs
|
127 |
+
config = get_config(model_name, "eval", dataset, **overwrite)
|
128 |
+
# config = change_dataset(config, dataset) # change the dataset
|
129 |
+
pprint(config)
|
130 |
+
print(f"Evaluating {model_name} on {dataset}...")
|
131 |
+
metrics = main(config)
|
132 |
+
return metrics
|
133 |
+
|
134 |
+
|
135 |
+
if __name__ == '__main__':
|
136 |
+
parser = argparse.ArgumentParser()
|
137 |
+
parser.add_argument("-m", "--model", type=str,
|
138 |
+
required=True, help="Name of the model to evaluate")
|
139 |
+
parser.add_argument("-p", "--pretrained_resource", type=str,
|
140 |
+
required=False, default=None, help="Pretrained resource to use for fetching weights. If not set, default resource from model config is used, Refer models.model_io.load_state_from_resource for more details.")
|
141 |
+
parser.add_argument("-d", "--dataset", type=str, required=False,
|
142 |
+
default='nyu', help="Dataset to evaluate on")
|
143 |
+
|
144 |
+
args, unknown_args = parser.parse_known_args()
|
145 |
+
overwrite_kwargs = parse_unknown(unknown_args)
|
146 |
+
|
147 |
+
if "ALL_INDOOR" in args.dataset:
|
148 |
+
datasets = ALL_INDOOR
|
149 |
+
elif "ALL_OUTDOOR" in args.dataset:
|
150 |
+
datasets = ALL_OUTDOOR
|
151 |
+
elif "ALL" in args.dataset:
|
152 |
+
datasets = ALL_EVAL_DATASETS
|
153 |
+
elif "," in args.dataset:
|
154 |
+
datasets = args.dataset.split(",")
|
155 |
+
else:
|
156 |
+
datasets = [args.dataset]
|
157 |
+
|
158 |
+
for dataset in datasets:
|
159 |
+
eval_model(args.model, pretrained_resource=args.pretrained_resource,
|
160 |
+
dataset=dataset, **overwrite_kwargs)
|
ZoeDepth/hubconf.py
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
dependencies=['torch']
|
26 |
+
from zoedepth.utils.config import get_config
|
27 |
+
from zoedepth.models.builder import build_model
|
28 |
+
import numpy as np
|
29 |
+
import torch
|
30 |
+
|
31 |
+
|
32 |
+
# ZoeD_N
|
33 |
+
def ZoeD_N(pretrained=False, midas_model_type="DPT_BEiT_L_384", config_mode="infer", **kwargs):
|
34 |
+
"""Zoe_M12_N model. This is the version of ZoeDepth that has a single metric head
|
35 |
+
Args:
|
36 |
+
pretrained (bool): If True, returns a model pre-trained on NYU-Depth-V2
|
37 |
+
midas_model_type (str): Midas model type. Should be one of the models as listed in torch.hub.list("intel-isl/MiDaS"). Default: DPT_BEiT_L_384
|
38 |
+
config_mode (str): Config mode. Should be one of "infer", "train" or "eval". Default: "infer"
|
39 |
+
|
40 |
+
Keyword Args:
|
41 |
+
**kwargs: Additional arguments to pass to the model
|
42 |
+
The following arguments are supported:
|
43 |
+
train_midas (bool): If True, returns a model that with trainable midas base. Default: False
|
44 |
+
use_pretrained_midas (bool): If True, returns a model that uses pretrained midas base. Default: False
|
45 |
+
n_bins (int): Number of bin centers. Defaults to 64.
|
46 |
+
bin_centers_type (str): "normed" or "softplus". Activation type used for bin centers. For "normed" bin centers, linear normalization trick is applied. This results in bounded bin centers.
|
47 |
+
For "softplus", softplus activation is used and thus are unbounded. Defaults to "softplus".
|
48 |
+
bin_embedding_dim (int): bin embedding dimension. Defaults to 128.
|
49 |
+
min_depth (float): Lower bound for normed bin centers. Defaults to 1e-3.
|
50 |
+
max_depth (float): Upper bound for normed bin centers. Defaults to 10.
|
51 |
+
n_attractors (List[int]): Number of bin attractors at decoder layers. Defaults to [16, 8, 4, 1].
|
52 |
+
attractor_alpha (int): Proportional attractor strength. Refer to models.layers.attractor for more details. Defaults to 1000.
|
53 |
+
attractor_gamma (int): Exponential attractor strength. Refer to models.layers.attractor for more details. Defaults to 2.
|
54 |
+
attractor_kind (str): Attraction aggregation "sum" or "mean". Defaults to 'mean'.
|
55 |
+
attractor_type (str): Type of attractor to use; "inv" (Inverse attractor) or "exp" (Exponential attractor). Defaults to 'inv'.
|
56 |
+
min_temp (int): Lower bound for temperature of output probability distribution. Defaults to 0.0212.
|
57 |
+
max_temp (int): Upper bound for temperature of output probability distribution. Defaults to 50.
|
58 |
+
force_keep_ar (bool): If True, the model will keep the aspect ratio of the input image. Defaults to True.
|
59 |
+
"""
|
60 |
+
if pretrained and midas_model_type != "DPT_BEiT_L_384":
|
61 |
+
raise ValueError(f"Only DPT_BEiT_L_384 MiDaS model is supported for pretrained Zoe_N model, got: {midas_model_type}")
|
62 |
+
|
63 |
+
if not pretrained:
|
64 |
+
pretrained_resource = None
|
65 |
+
else:
|
66 |
+
pretrained_resource = "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt"
|
67 |
+
|
68 |
+
config = get_config("zoedepth", config_mode, pretrained_resource=pretrained_resource, **kwargs)
|
69 |
+
model = build_model(config)
|
70 |
+
return model
|
71 |
+
|
72 |
+
# ZoeD_K
|
73 |
+
def ZoeD_K(pretrained=False, midas_model_type="DPT_BEiT_L_384", config_mode="infer", **kwargs):
|
74 |
+
"""Zoe_M12_K model. This is the version of ZoeDepth that has a single metric head
|
75 |
+
Args:
|
76 |
+
pretrained (bool): If True, returns a model pre-trained on NYU-Depth-V2
|
77 |
+
midas_model_type (str): Midas model type. Should be one of the models as listed in torch.hub.list("intel-isl/MiDaS"). Default: DPT_BEiT_L_384
|
78 |
+
config_mode (str): Config mode. Should be one of "infer", "train" or "eval". Default: "infer"
|
79 |
+
|
80 |
+
Keyword Args:
|
81 |
+
**kwargs: Additional arguments to pass to the model
|
82 |
+
The following arguments are supported:
|
83 |
+
train_midas (bool): If True, returns a model that with trainable midas base. Default: False
|
84 |
+
use_pretrained_midas (bool): If True, returns a model that uses pretrained midas base. Default: False
|
85 |
+
n_bins (int): Number of bin centers. Defaults to 64.
|
86 |
+
bin_centers_type (str): "normed" or "softplus". Activation type used for bin centers. For "normed" bin centers, linear normalization trick is applied. This results in bounded bin centers.
|
87 |
+
For "softplus", softplus activation is used and thus are unbounded. Defaults to "softplus".
|
88 |
+
bin_embedding_dim (int): bin embedding dimension. Defaults to 128.
|
89 |
+
min_depth (float): Lower bound for normed bin centers. Defaults to 1e-3.
|
90 |
+
max_depth (float): Upper bound for normed bin centers. Defaults to 10.
|
91 |
+
n_attractors (List[int]): Number of bin attractors at decoder layers. Defaults to [16, 8, 4, 1].
|
92 |
+
attractor_alpha (int): Proportional attractor strength. Refer to models.layers.attractor for more details. Defaults to 1000.
|
93 |
+
attractor_gamma (int): Exponential attractor strength. Refer to models.layers.attractor for more details. Defaults to 2.
|
94 |
+
attractor_kind (str): Attraction aggregation "sum" or "mean". Defaults to 'mean'.
|
95 |
+
attractor_type (str): Type of attractor to use; "inv" (Inverse attractor) or "exp" (Exponential attractor). Defaults to 'inv'.
|
96 |
+
min_temp (int): Lower bound for temperature of output probability distribution. Defaults to 0.0212.
|
97 |
+
max_temp (int): Upper bound for temperature of output probability distribution. Defaults to 50.
|
98 |
+
force_keep_ar (bool): If True, the model will keep the aspect ratio of the input image. Defaults to True.
|
99 |
+
|
100 |
+
"""
|
101 |
+
if pretrained and midas_model_type != "DPT_BEiT_L_384":
|
102 |
+
raise ValueError(f"Only DPT_BEiT_L_384 MiDaS model is supported for pretrained Zoe_K model, got: {midas_model_type}")
|
103 |
+
|
104 |
+
if not pretrained:
|
105 |
+
pretrained_resource = None
|
106 |
+
else:
|
107 |
+
pretrained_resource = "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt"
|
108 |
+
|
109 |
+
config = get_config("zoedepth", config_mode, pretrained_resource=pretrained_resource, config_version="kitti", **kwargs)
|
110 |
+
model = build_model(config)
|
111 |
+
return model
|
112 |
+
|
113 |
+
# Zoe_NK
|
114 |
+
def ZoeD_NK(pretrained=False, midas_model_type="DPT_BEiT_L_384", config_mode="infer", **kwargs):
|
115 |
+
"""ZoeDepthNK model. This is the version of ZoeDepth that has two metric heads and uses a learned router to route to experts.
|
116 |
+
Args:
|
117 |
+
pretrained (bool): If True, returns a model pre-trained on NYU-Depth-V2
|
118 |
+
midas_model_type (str): Midas model type. Should be one of the models as listed in torch.hub.list("intel-isl/MiDaS"). Default: DPT_BEiT_L_384
|
119 |
+
|
120 |
+
Keyword Args:
|
121 |
+
**kwargs: Additional arguments to pass to the model
|
122 |
+
The following arguments are supported:
|
123 |
+
train_midas (bool): If True, returns a model that with trainable midas base. Defaults to True
|
124 |
+
use_pretrained_midas (bool): If True, returns a model that uses pretrained midas base. Defaults to True
|
125 |
+
bin_conf (List[dict]): A list of dictionaries that contain the bin configuration for each metric head. Each dictionary should contain the following keys:
|
126 |
+
"name" (str, typically same as the dataset name), "n_bins" (int), "min_depth" (float), "max_depth" (float)
|
127 |
+
The length of this list determines the number of metric heads.
|
128 |
+
bin_centers_type (str): "normed" or "softplus". Activation type used for bin centers. For "normed" bin centers, linear normalization trick is applied. This results in bounded bin centers.
|
129 |
+
For "softplus", softplus activation is used and thus are unbounded. Defaults to "softplus".
|
130 |
+
bin_embedding_dim (int): bin embedding dimension. Defaults to 128.
|
131 |
+
|
132 |
+
n_attractors (List[int]): Number of bin attractors at decoder layers. Defaults to [16, 8, 4, 1].
|
133 |
+
attractor_alpha (int): Proportional attractor strength. Refer to models.layers.attractor for more details. Defaults to 1000.
|
134 |
+
attractor_gamma (int): Exponential attractor strength. Refer to models.layers.attractor for more details. Defaults to 2.
|
135 |
+
attractor_kind (str): Attraction aggregation "sum" or "mean". Defaults to 'mean'.
|
136 |
+
attractor_type (str): Type of attractor to use; "inv" (Inverse attractor) or "exp" (Exponential attractor). Defaults to 'inv'.
|
137 |
+
|
138 |
+
min_temp (int): Lower bound for temperature of output probability distribution. Defaults to 0.0212.
|
139 |
+
max_temp (int): Upper bound for temperature of output probability distribution. Defaults to 50.
|
140 |
+
|
141 |
+
memory_efficient (bool): Whether to use memory efficient version of attractor layers. Memory efficient version is slower but is recommended incase of multiple metric heads in order save GPU memory. Defaults to True.
|
142 |
+
|
143 |
+
"""
|
144 |
+
if pretrained and midas_model_type != "DPT_BEiT_L_384":
|
145 |
+
raise ValueError(f"Only DPT_BEiT_L_384 MiDaS model is supported for pretrained Zoe_NK model, got: {midas_model_type}")
|
146 |
+
|
147 |
+
if not pretrained:
|
148 |
+
pretrained_resource = None
|
149 |
+
else:
|
150 |
+
pretrained_resource = "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt"
|
151 |
+
|
152 |
+
config = get_config("zoedepth_nk", config_mode, pretrained_resource=pretrained_resource, **kwargs)
|
153 |
+
model = build_model(config)
|
154 |
+
return model
|
ZoeDepth/sanity.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import numpy as np
|
26 |
+
from torchvision.transforms import ToTensor
|
27 |
+
from PIL import Image
|
28 |
+
from zoedepth.utils.misc import get_image_from_url, colorize
|
29 |
+
import torch
|
30 |
+
|
31 |
+
from zoedepth.models.builder import build_model
|
32 |
+
from zoedepth.utils.config import get_config
|
33 |
+
from pprint import pprint
|
34 |
+
|
35 |
+
|
36 |
+
torch.hub.help("intel-isl/MiDaS", "DPT_BEiT_L_384", force_reload=True)
|
37 |
+
|
38 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
39 |
+
if DEVICE == "cpu":
|
40 |
+
print("WARNING: Running on CPU. This will be slow. Check your CUDA installation.")
|
41 |
+
|
42 |
+
print("*" * 20 + " Testing zoedepth " + "*" * 20)
|
43 |
+
conf = get_config("zoedepth", "infer")
|
44 |
+
|
45 |
+
|
46 |
+
print("Config:")
|
47 |
+
pprint(conf)
|
48 |
+
|
49 |
+
model = build_model(conf).to(DEVICE)
|
50 |
+
model.eval()
|
51 |
+
x = torch.rand(1, 3, 384, 512).to(DEVICE)
|
52 |
+
|
53 |
+
print("-"*20 + "Testing on a random input" + "-"*20)
|
54 |
+
|
55 |
+
with torch.no_grad():
|
56 |
+
out = model(x)
|
57 |
+
|
58 |
+
if isinstance(out, dict):
|
59 |
+
# print shapes of all outputs
|
60 |
+
for k, v in out.items():
|
61 |
+
if v is not None:
|
62 |
+
print(k, v.shape)
|
63 |
+
else:
|
64 |
+
print([o.shape for o in out if o is not None])
|
65 |
+
|
66 |
+
print("\n\n")
|
67 |
+
print("-"*20 + " Testing on an indoor scene from url " + "-"*20)
|
68 |
+
|
69 |
+
# Test img
|
70 |
+
url = "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcS4W8H_Nxk_rs3Vje_zj6mglPOH7bnPhQitBH8WkqjlqQVotdtDEG37BsnGofME3_u6lDk&usqp=CAU"
|
71 |
+
img = get_image_from_url(url)
|
72 |
+
orig_size = img.size
|
73 |
+
X = ToTensor()(img)
|
74 |
+
X = X.unsqueeze(0).to(DEVICE)
|
75 |
+
|
76 |
+
print("X.shape", X.shape)
|
77 |
+
print("predicting")
|
78 |
+
|
79 |
+
with torch.no_grad():
|
80 |
+
out = model.infer(X).cpu()
|
81 |
+
|
82 |
+
# or just,
|
83 |
+
# out = model.infer_pil(img)
|
84 |
+
|
85 |
+
|
86 |
+
print("output.shape", out.shape)
|
87 |
+
pred = Image.fromarray(colorize(out))
|
88 |
+
# Stack img and pred side by side for comparison and save
|
89 |
+
pred = pred.resize(orig_size, Image.ANTIALIAS)
|
90 |
+
stacked = Image.new("RGB", (orig_size[0]*2, orig_size[1]))
|
91 |
+
stacked.paste(img, (0, 0))
|
92 |
+
stacked.paste(pred, (orig_size[0], 0))
|
93 |
+
|
94 |
+
stacked.save("pred.png")
|
95 |
+
print("saved pred.png")
|
96 |
+
|
97 |
+
|
98 |
+
model.infer_pil(img, output_type="pil").save("pred_raw.png")
|
ZoeDepth/sanity_hub.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import torch
|
26 |
+
import numpy as np
|
27 |
+
from torchvision.transforms import ToTensor
|
28 |
+
from PIL import Image
|
29 |
+
from zoedepth.utils.misc import get_image_from_url, colorize
|
30 |
+
|
31 |
+
from zoedepth.models.builder import build_model
|
32 |
+
from zoedepth.utils.config import get_config
|
33 |
+
from pprint import pprint
|
34 |
+
|
35 |
+
|
36 |
+
|
37 |
+
# Trigger reload of MiDaS
|
38 |
+
torch.hub.help("intel-isl/MiDaS", "DPT_BEiT_L_384", force_reload=True)
|
39 |
+
|
40 |
+
|
41 |
+
model = torch.hub.load(".", "ZoeD_K", source="local", pretrained=True)
|
42 |
+
model = torch.hub.load(".", "ZoeD_NK", source="local", pretrained=True)
|
43 |
+
model = torch.hub.load(".", "ZoeD_N", source="local", pretrained=True)
|
ZoeDepth/train_mix.py
ADDED
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
from zoedepth.utils.misc import count_parameters, parallelize
|
26 |
+
from zoedepth.utils.config import get_config
|
27 |
+
from zoedepth.utils.arg_utils import parse_unknown
|
28 |
+
from zoedepth.trainers.builder import get_trainer
|
29 |
+
from zoedepth.models.builder import build_model
|
30 |
+
from zoedepth.data.data_mono import MixedNYUKITTI
|
31 |
+
import torch.utils.data.distributed
|
32 |
+
import torch.multiprocessing as mp
|
33 |
+
import torch
|
34 |
+
import numpy as np
|
35 |
+
from pprint import pprint
|
36 |
+
import argparse
|
37 |
+
import os
|
38 |
+
|
39 |
+
os.environ["PYOPENGL_PLATFORM"] = "egl"
|
40 |
+
os.environ["WANDB_START_METHOD"] = "thread"
|
41 |
+
|
42 |
+
|
43 |
+
def fix_random_seed(seed: int):
|
44 |
+
"""
|
45 |
+
Fix random seed for reproducibility
|
46 |
+
|
47 |
+
Args:
|
48 |
+
seed (int): random seed
|
49 |
+
"""
|
50 |
+
import random
|
51 |
+
|
52 |
+
import numpy
|
53 |
+
import torch
|
54 |
+
|
55 |
+
random.seed(seed)
|
56 |
+
numpy.random.seed(seed)
|
57 |
+
torch.manual_seed(seed)
|
58 |
+
torch.cuda.manual_seed(seed)
|
59 |
+
torch.cuda.manual_seed_all(seed)
|
60 |
+
|
61 |
+
torch.backends.cudnn.deterministic = True
|
62 |
+
torch.backends.cudnn.benchmark = False
|
63 |
+
|
64 |
+
|
65 |
+
def load_ckpt(config, model, checkpoint_dir="./checkpoints", ckpt_type="best"):
|
66 |
+
import glob
|
67 |
+
import os
|
68 |
+
|
69 |
+
from zoedepth.models.model_io import load_wts
|
70 |
+
|
71 |
+
if hasattr(config, "checkpoint"):
|
72 |
+
checkpoint = config.checkpoint
|
73 |
+
elif hasattr(config, "ckpt_pattern"):
|
74 |
+
pattern = config.ckpt_pattern
|
75 |
+
matches = glob.glob(os.path.join(
|
76 |
+
checkpoint_dir, f"*{pattern}*{ckpt_type}*"))
|
77 |
+
if not (len(matches) > 0):
|
78 |
+
raise ValueError(f"No matches found for the pattern {pattern}")
|
79 |
+
|
80 |
+
checkpoint = matches[0]
|
81 |
+
|
82 |
+
else:
|
83 |
+
return model
|
84 |
+
model = load_wts(model, checkpoint)
|
85 |
+
print("Loaded weights from {0}".format(checkpoint))
|
86 |
+
return model
|
87 |
+
|
88 |
+
|
89 |
+
def main_worker(gpu, ngpus_per_node, config):
|
90 |
+
try:
|
91 |
+
fix_random_seed(43)
|
92 |
+
|
93 |
+
config.gpu = gpu
|
94 |
+
|
95 |
+
model = build_model(config)
|
96 |
+
model = load_ckpt(config, model)
|
97 |
+
model = parallelize(config, model)
|
98 |
+
|
99 |
+
total_params = f"{round(count_parameters(model)/1e6,2)}M"
|
100 |
+
config.total_params = total_params
|
101 |
+
print(f"Total parameters : {total_params}")
|
102 |
+
|
103 |
+
train_loader = MixedNYUKITTI(config, "train").data
|
104 |
+
test_loader = MixedNYUKITTI(config, "online_eval").data
|
105 |
+
|
106 |
+
trainer = get_trainer(config)(
|
107 |
+
config, model, train_loader, test_loader, device=config.gpu)
|
108 |
+
|
109 |
+
trainer.train()
|
110 |
+
finally:
|
111 |
+
import wandb
|
112 |
+
wandb.finish()
|
113 |
+
|
114 |
+
|
115 |
+
if __name__ == '__main__':
|
116 |
+
mp.set_start_method('forkserver')
|
117 |
+
|
118 |
+
parser = argparse.ArgumentParser()
|
119 |
+
parser.add_argument("-m", "--model", type=str, default="synunet")
|
120 |
+
parser.add_argument("-d", "--dataset", type=str, default='mix')
|
121 |
+
parser.add_argument("--trainer", type=str, default=None)
|
122 |
+
|
123 |
+
args, unknown_args = parser.parse_known_args()
|
124 |
+
overwrite_kwargs = parse_unknown(unknown_args)
|
125 |
+
|
126 |
+
overwrite_kwargs["model"] = args.model
|
127 |
+
if args.trainer is not None:
|
128 |
+
overwrite_kwargs["trainer"] = args.trainer
|
129 |
+
|
130 |
+
config = get_config(args.model, "train", args.dataset, **overwrite_kwargs)
|
131 |
+
# git_commit()
|
132 |
+
if config.use_shared_dict:
|
133 |
+
shared_dict = mp.Manager().dict()
|
134 |
+
else:
|
135 |
+
shared_dict = None
|
136 |
+
config.shared_dict = shared_dict
|
137 |
+
|
138 |
+
config.batch_size = config.bs
|
139 |
+
config.mode = 'train'
|
140 |
+
if config.root != "." and not os.path.isdir(config.root):
|
141 |
+
os.makedirs(config.root)
|
142 |
+
|
143 |
+
try:
|
144 |
+
node_str = os.environ['SLURM_JOB_NODELIST'].replace(
|
145 |
+
'[', '').replace(']', '')
|
146 |
+
nodes = node_str.split(',')
|
147 |
+
|
148 |
+
config.world_size = len(nodes)
|
149 |
+
config.rank = int(os.environ['SLURM_PROCID'])
|
150 |
+
# config.save_dir = "/ibex/scratch/bhatsf/videodepth/checkpoints"
|
151 |
+
|
152 |
+
except KeyError as e:
|
153 |
+
# We are NOT using SLURM
|
154 |
+
config.world_size = 1
|
155 |
+
config.rank = 0
|
156 |
+
nodes = ["127.0.0.1"]
|
157 |
+
|
158 |
+
if config.distributed:
|
159 |
+
|
160 |
+
print(config.rank)
|
161 |
+
port = np.random.randint(15000, 15025)
|
162 |
+
config.dist_url = 'tcp://{}:{}'.format(nodes[0], port)
|
163 |
+
print(config.dist_url)
|
164 |
+
config.dist_backend = 'nccl'
|
165 |
+
config.gpu = None
|
166 |
+
|
167 |
+
ngpus_per_node = torch.cuda.device_count()
|
168 |
+
config.num_workers = config.workers
|
169 |
+
config.ngpus_per_node = ngpus_per_node
|
170 |
+
print("Config:")
|
171 |
+
pprint(config)
|
172 |
+
if config.distributed:
|
173 |
+
config.world_size = ngpus_per_node * config.world_size
|
174 |
+
mp.spawn(main_worker, nprocs=ngpus_per_node,
|
175 |
+
args=(ngpus_per_node, config))
|
176 |
+
else:
|
177 |
+
if ngpus_per_node == 1:
|
178 |
+
config.gpu = 0
|
179 |
+
main_worker(config.gpu, ngpus_per_node, config)
|
ZoeDepth/train_mono.py
ADDED
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
from zoedepth.utils.misc import count_parameters, parallelize
|
26 |
+
from zoedepth.utils.config import get_config
|
27 |
+
from zoedepth.utils.arg_utils import parse_unknown
|
28 |
+
from zoedepth.trainers.builder import get_trainer
|
29 |
+
from zoedepth.models.builder import build_model
|
30 |
+
from zoedepth.data.data_mono import DepthDataLoader
|
31 |
+
import torch.utils.data.distributed
|
32 |
+
import torch.multiprocessing as mp
|
33 |
+
import torch
|
34 |
+
import numpy as np
|
35 |
+
from pprint import pprint
|
36 |
+
import argparse
|
37 |
+
import os
|
38 |
+
|
39 |
+
os.environ["PYOPENGL_PLATFORM"] = "egl"
|
40 |
+
os.environ["WANDB_START_METHOD"] = "thread"
|
41 |
+
|
42 |
+
|
43 |
+
def fix_random_seed(seed: int):
|
44 |
+
import random
|
45 |
+
|
46 |
+
import numpy
|
47 |
+
import torch
|
48 |
+
|
49 |
+
random.seed(seed)
|
50 |
+
numpy.random.seed(seed)
|
51 |
+
torch.manual_seed(seed)
|
52 |
+
torch.cuda.manual_seed(seed)
|
53 |
+
torch.cuda.manual_seed_all(seed)
|
54 |
+
|
55 |
+
torch.backends.cudnn.deterministic = True
|
56 |
+
torch.backends.cudnn.benchmark = True
|
57 |
+
|
58 |
+
|
59 |
+
def load_ckpt(config, model, checkpoint_dir="./checkpoints", ckpt_type="best"):
|
60 |
+
import glob
|
61 |
+
import os
|
62 |
+
|
63 |
+
from zoedepth.models.model_io import load_wts
|
64 |
+
|
65 |
+
if hasattr(config, "checkpoint"):
|
66 |
+
checkpoint = config.checkpoint
|
67 |
+
elif hasattr(config, "ckpt_pattern"):
|
68 |
+
pattern = config.ckpt_pattern
|
69 |
+
matches = glob.glob(os.path.join(
|
70 |
+
checkpoint_dir, f"*{pattern}*{ckpt_type}*"))
|
71 |
+
if not (len(matches) > 0):
|
72 |
+
raise ValueError(f"No matches found for the pattern {pattern}")
|
73 |
+
|
74 |
+
checkpoint = matches[0]
|
75 |
+
|
76 |
+
else:
|
77 |
+
return model
|
78 |
+
model = load_wts(model, checkpoint)
|
79 |
+
print("Loaded weights from {0}".format(checkpoint))
|
80 |
+
return model
|
81 |
+
|
82 |
+
|
83 |
+
def main_worker(gpu, ngpus_per_node, config):
|
84 |
+
try:
|
85 |
+
seed = config.seed if 'seed' in config and config.seed else 43
|
86 |
+
fix_random_seed(seed)
|
87 |
+
|
88 |
+
config.gpu = gpu
|
89 |
+
|
90 |
+
model = build_model(config)
|
91 |
+
model = load_ckpt(config, model)
|
92 |
+
model = parallelize(config, model)
|
93 |
+
|
94 |
+
total_params = f"{round(count_parameters(model)/1e6,2)}M"
|
95 |
+
config.total_params = total_params
|
96 |
+
print(f"Total parameters : {total_params}")
|
97 |
+
|
98 |
+
train_loader = DepthDataLoader(config, "train").data
|
99 |
+
test_loader = DepthDataLoader(config, "online_eval").data
|
100 |
+
|
101 |
+
trainer = get_trainer(config)(
|
102 |
+
config, model, train_loader, test_loader, device=config.gpu)
|
103 |
+
|
104 |
+
trainer.train()
|
105 |
+
finally:
|
106 |
+
import wandb
|
107 |
+
wandb.finish()
|
108 |
+
|
109 |
+
|
110 |
+
if __name__ == '__main__':
|
111 |
+
mp.set_start_method('forkserver')
|
112 |
+
|
113 |
+
parser = argparse.ArgumentParser()
|
114 |
+
parser.add_argument("-m", "--model", type=str, default="synunet")
|
115 |
+
parser.add_argument("-d", "--dataset", type=str, default='nyu')
|
116 |
+
parser.add_argument("--trainer", type=str, default=None)
|
117 |
+
|
118 |
+
args, unknown_args = parser.parse_known_args()
|
119 |
+
overwrite_kwargs = parse_unknown(unknown_args)
|
120 |
+
|
121 |
+
overwrite_kwargs["model"] = args.model
|
122 |
+
if args.trainer is not None:
|
123 |
+
overwrite_kwargs["trainer"] = args.trainer
|
124 |
+
|
125 |
+
config = get_config(args.model, "train", args.dataset, **overwrite_kwargs)
|
126 |
+
# git_commit()
|
127 |
+
if config.use_shared_dict:
|
128 |
+
shared_dict = mp.Manager().dict()
|
129 |
+
else:
|
130 |
+
shared_dict = None
|
131 |
+
config.shared_dict = shared_dict
|
132 |
+
|
133 |
+
config.batch_size = config.bs
|
134 |
+
config.mode = 'train'
|
135 |
+
if config.root != "." and not os.path.isdir(config.root):
|
136 |
+
os.makedirs(config.root)
|
137 |
+
|
138 |
+
try:
|
139 |
+
node_str = os.environ['SLURM_JOB_NODELIST'].replace(
|
140 |
+
'[', '').replace(']', '')
|
141 |
+
nodes = node_str.split(',')
|
142 |
+
|
143 |
+
config.world_size = len(nodes)
|
144 |
+
config.rank = int(os.environ['SLURM_PROCID'])
|
145 |
+
# config.save_dir = "/ibex/scratch/bhatsf/videodepth/checkpoints"
|
146 |
+
|
147 |
+
except KeyError as e:
|
148 |
+
# We are NOT using SLURM
|
149 |
+
config.world_size = 1
|
150 |
+
config.rank = 0
|
151 |
+
nodes = ["127.0.0.1"]
|
152 |
+
|
153 |
+
if config.distributed:
|
154 |
+
|
155 |
+
print(config.rank)
|
156 |
+
port = np.random.randint(15000, 15025)
|
157 |
+
config.dist_url = 'tcp://{}:{}'.format(nodes[0], port)
|
158 |
+
print(config.dist_url)
|
159 |
+
config.dist_backend = 'nccl'
|
160 |
+
config.gpu = None
|
161 |
+
|
162 |
+
ngpus_per_node = torch.cuda.device_count()
|
163 |
+
config.num_workers = config.workers
|
164 |
+
config.ngpus_per_node = ngpus_per_node
|
165 |
+
print("Config:")
|
166 |
+
pprint(config)
|
167 |
+
if config.distributed:
|
168 |
+
config.world_size = ngpus_per_node * config.world_size
|
169 |
+
mp.spawn(main_worker, nprocs=ngpus_per_node,
|
170 |
+
args=(ngpus_per_node, config))
|
171 |
+
else:
|
172 |
+
if ngpus_per_node == 1:
|
173 |
+
config.gpu = 0
|
174 |
+
main_worker(config.gpu, ngpus_per_node, config)
|
ZoeDepth/train_test_inputs/kitti_eigen_test_files_with_gt.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
ZoeDepth/train_test_inputs/kitti_eigen_train_files_with_gt.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
ZoeDepth/train_test_inputs/nyudepthv2_test_files_with_gt.txt
ADDED
@@ -0,0 +1,654 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
bathroom/rgb_00045.jpg bathroom/sync_depth_00045.png 518.8579
|
2 |
+
bathroom/rgb_00046.jpg bathroom/sync_depth_00046.png 518.8579
|
3 |
+
bathroom/rgb_00507.jpg bathroom/sync_depth_00507.png 518.8579
|
4 |
+
bathroom/rgb_00508.jpg bathroom/sync_depth_00508.png 518.8579
|
5 |
+
bathroom/rgb_00509.jpg bathroom/sync_depth_00509.png 518.8579
|
6 |
+
bathroom/rgb_00510.jpg bathroom/sync_depth_00510.png 518.8579
|
7 |
+
bathroom/rgb_00511.jpg bathroom/sync_depth_00511.png 518.8579
|
8 |
+
bathroom/rgb_00512.jpg bathroom/sync_depth_00512.png 518.8579
|
9 |
+
bathroom/rgb_00649.jpg bathroom/sync_depth_00649.png 518.8579
|
10 |
+
bathroom/rgb_00650.jpg bathroom/sync_depth_00650.png 518.8579
|
11 |
+
bathroom/rgb_00655.jpg bathroom/sync_depth_00655.png 518.8579
|
12 |
+
bathroom/rgb_00656.jpg bathroom/sync_depth_00656.png 518.8579
|
13 |
+
bathroom/rgb_00657.jpg bathroom/sync_depth_00657.png 518.8579
|
14 |
+
bathroom/rgb_00662.jpg bathroom/sync_depth_00662.png 518.8579
|
15 |
+
bathroom/rgb_00663.jpg bathroom/sync_depth_00663.png 518.8579
|
16 |
+
bathroom/rgb_00667.jpg bathroom/sync_depth_00667.png 518.8579
|
17 |
+
bathroom/rgb_00668.jpg bathroom/sync_depth_00668.png 518.8579
|
18 |
+
bathroom/rgb_00670.jpg bathroom/sync_depth_00670.png 518.8579
|
19 |
+
bathroom/rgb_00671.jpg bathroom/sync_depth_00671.png 518.8579
|
20 |
+
bathroom/rgb_00672.jpg bathroom/sync_depth_00672.png 518.8579
|
21 |
+
bathroom/rgb_00675.jpg bathroom/sync_depth_00675.png 518.8579
|
22 |
+
bathroom/rgb_00676.jpg bathroom/sync_depth_00676.png 518.8579
|
23 |
+
bathroom/rgb_00677.jpg bathroom/sync_depth_00677.png 518.8579
|
24 |
+
bathroom/rgb_00678.jpg bathroom/sync_depth_00678.png 518.8579
|
25 |
+
bathroom/rgb_00679.jpg bathroom/sync_depth_00679.png 518.8579
|
26 |
+
bathroom/rgb_00680.jpg bathroom/sync_depth_00680.png 518.8579
|
27 |
+
bathroom/rgb_00685.jpg bathroom/sync_depth_00685.png 518.8579
|
28 |
+
bathroom/rgb_00686.jpg bathroom/sync_depth_00686.png 518.8579
|
29 |
+
bathroom/rgb_00687.jpg bathroom/sync_depth_00687.png 518.8579
|
30 |
+
bathroom/rgb_00688.jpg bathroom/sync_depth_00688.png 518.8579
|
31 |
+
bathroom/rgb_00689.jpg bathroom/sync_depth_00689.png 518.8579
|
32 |
+
bathroom/rgb_00692.jpg bathroom/sync_depth_00692.png 518.8579
|
33 |
+
bathroom/rgb_00693.jpg bathroom/sync_depth_00693.png 518.8579
|
34 |
+
bathroom/rgb_00696.jpg bathroom/sync_depth_00696.png 518.8579
|
35 |
+
bathroom/rgb_00669.jpg bathroom/sync_depth_00669.png 518.8579
|
36 |
+
bathroom/rgb_00697.jpg bathroom/sync_depth_00697.png 518.8579
|
37 |
+
bathroom/rgb_00698.jpg bathroom/sync_depth_00698.png 518.8579
|
38 |
+
bathroom/rgb_00705.jpg bathroom/sync_depth_00705.png 518.8579
|
39 |
+
bathroom/rgb_00706.jpg bathroom/sync_depth_00706.png 518.8579
|
40 |
+
bathroom/rgb_00707.jpg bathroom/sync_depth_00707.png 518.8579
|
41 |
+
bathroom/rgb_00708.jpg bathroom/sync_depth_00708.png 518.8579
|
42 |
+
bathroom/rgb_00709.jpg bathroom/sync_depth_00709.png 518.8579
|
43 |
+
bathroom/rgb_00710.jpg bathroom/sync_depth_00710.png 518.8579
|
44 |
+
bathroom/rgb_00711.jpg bathroom/sync_depth_00711.png 518.8579
|
45 |
+
bathroom/rgb_00712.jpg bathroom/sync_depth_00712.png 518.8579
|
46 |
+
bathroom/rgb_00716.jpg bathroom/sync_depth_00716.png 518.8579
|
47 |
+
bathroom/rgb_00717.jpg bathroom/sync_depth_00717.png 518.8579
|
48 |
+
bathroom/rgb_00723.jpg bathroom/sync_depth_00723.png 518.8579
|
49 |
+
bathroom/rgb_00724.jpg bathroom/sync_depth_00724.png 518.8579
|
50 |
+
bathroom/rgb_00725.jpg bathroom/sync_depth_00725.png 518.8579
|
51 |
+
bathroom/rgb_00726.jpg bathroom/sync_depth_00726.png 518.8579
|
52 |
+
bathroom/rgb_00727.jpg bathroom/sync_depth_00727.png 518.8579
|
53 |
+
bathroom/rgb_00730.jpg bathroom/sync_depth_00730.png 518.8579
|
54 |
+
bathroom/rgb_00731.jpg bathroom/sync_depth_00731.png 518.8579
|
55 |
+
bathroom/rgb_00732.jpg bathroom/sync_depth_00732.png 518.8579
|
56 |
+
bathroom/rgb_00733.jpg bathroom/sync_depth_00733.png 518.8579
|
57 |
+
bathroom/rgb_00742.jpg bathroom/sync_depth_00742.png 518.8579
|
58 |
+
bathroom/rgb_00743.jpg bathroom/sync_depth_00743.png 518.8579
|
59 |
+
bedroom/rgb_00055.jpg bedroom/sync_depth_00055.png 518.8579
|
60 |
+
bedroom/rgb_00056.jpg bedroom/sync_depth_00056.png 518.8579
|
61 |
+
bedroom/rgb_00058.jpg bedroom/sync_depth_00058.png 518.8579
|
62 |
+
bedroom/rgb_00059.jpg bedroom/sync_depth_00059.png 518.8579
|
63 |
+
bedroom/rgb_00060.jpg bedroom/sync_depth_00060.png 518.8579
|
64 |
+
bedroom/rgb_00061.jpg bedroom/sync_depth_00061.png 518.8579
|
65 |
+
bedroom/rgb_00062.jpg bedroom/sync_depth_00062.png 518.8579
|
66 |
+
bedroom/rgb_00075.jpg bedroom/sync_depth_00075.png 518.8579
|
67 |
+
bedroom/rgb_00076.jpg bedroom/sync_depth_00076.png 518.8579
|
68 |
+
bedroom/rgb_00077.jpg bedroom/sync_depth_00077.png 518.8579
|
69 |
+
bedroom/rgb_00078.jpg bedroom/sync_depth_00078.png 518.8579
|
70 |
+
bedroom/rgb_00170.jpg bedroom/sync_depth_00170.png 518.8579
|
71 |
+
bedroom/rgb_00171.jpg bedroom/sync_depth_00171.png 518.8579
|
72 |
+
bedroom/rgb_00172.jpg bedroom/sync_depth_00172.png 518.8579
|
73 |
+
bedroom/rgb_00173.jpg bedroom/sync_depth_00173.png 518.8579
|
74 |
+
bedroom/rgb_00174.jpg bedroom/sync_depth_00174.png 518.8579
|
75 |
+
bedroom/rgb_00175.jpg bedroom/sync_depth_00175.png 518.8579
|
76 |
+
bedroom/rgb_00180.jpg bedroom/sync_depth_00180.png 518.8579
|
77 |
+
bedroom/rgb_00181.jpg bedroom/sync_depth_00181.png 518.8579
|
78 |
+
bedroom/rgb_00182.jpg bedroom/sync_depth_00182.png 518.8579
|
79 |
+
bedroom/rgb_00183.jpg bedroom/sync_depth_00183.png 518.8579
|
80 |
+
bedroom/rgb_00184.jpg bedroom/sync_depth_00184.png 518.8579
|
81 |
+
bedroom/rgb_00185.jpg bedroom/sync_depth_00185.png 518.8579
|
82 |
+
bedroom/rgb_00186.jpg bedroom/sync_depth_00186.png 518.8579
|
83 |
+
bedroom/rgb_00187.jpg bedroom/sync_depth_00187.png 518.8579
|
84 |
+
bedroom/rgb_00188.jpg bedroom/sync_depth_00188.png 518.8579
|
85 |
+
bedroom/rgb_00189.jpg bedroom/sync_depth_00189.png 518.8579
|
86 |
+
bedroom/rgb_00190.jpg bedroom/sync_depth_00190.png 518.8579
|
87 |
+
bedroom/rgb_00191.jpg bedroom/sync_depth_00191.png 518.8579
|
88 |
+
bedroom/rgb_00192.jpg bedroom/sync_depth_00192.png 518.8579
|
89 |
+
bedroom/rgb_00219.jpg bedroom/sync_depth_00219.png 518.8579
|
90 |
+
bedroom/rgb_00220.jpg bedroom/sync_depth_00220.png 518.8579
|
91 |
+
bedroom/rgb_00221.jpg bedroom/sync_depth_00221.png 518.8579
|
92 |
+
bedroom/rgb_00279.jpg bedroom/sync_depth_00279.png 518.8579
|
93 |
+
bedroom/rgb_00179.jpg bedroom/sync_depth_00179.png 518.8579
|
94 |
+
bedroom/rgb_00280.jpg bedroom/sync_depth_00280.png 518.8579
|
95 |
+
bedroom/rgb_00536.jpg bedroom/sync_depth_00536.png 518.8579
|
96 |
+
bedroom/rgb_00960.jpg bedroom/sync_depth_00960.png 518.8579
|
97 |
+
bedroom/rgb_01000.jpg bedroom/sync_depth_01000.png 518.8579
|
98 |
+
bedroom/rgb_01052.jpg bedroom/sync_depth_01052.png 518.8579
|
99 |
+
bedroom/rgb_01092.jpg bedroom/sync_depth_01092.png 518.8579
|
100 |
+
bedroom/rgb_01122.jpg bedroom/sync_depth_01122.png 518.8579
|
101 |
+
bedroom/rgb_01150.jpg bedroom/sync_depth_01150.png 518.8579
|
102 |
+
bedroom/rgb_00281.jpg bedroom/sync_depth_00281.png 518.8579
|
103 |
+
bedroom/rgb_00282.jpg bedroom/sync_depth_00282.png 518.8579
|
104 |
+
bedroom/rgb_00514.jpg bedroom/sync_depth_00514.png 518.8579
|
105 |
+
bedroom/rgb_00515.jpg bedroom/sync_depth_00515.png 518.8579
|
106 |
+
bedroom/rgb_00516.jpg bedroom/sync_depth_00516.png 518.8579
|
107 |
+
bedroom/rgb_00517.jpg bedroom/sync_depth_00517.png 518.8579
|
108 |
+
bedroom/rgb_00518.jpg bedroom/sync_depth_00518.png 518.8579
|
109 |
+
bedroom/rgb_00519.jpg bedroom/sync_depth_00519.png 518.8579
|
110 |
+
bedroom/rgb_00520.jpg bedroom/sync_depth_00520.png 518.8579
|
111 |
+
bedroom/rgb_00521.jpg bedroom/sync_depth_00521.png 518.8579
|
112 |
+
bedroom/rgb_00522.jpg bedroom/sync_depth_00522.png 518.8579
|
113 |
+
bedroom/rgb_00523.jpg bedroom/sync_depth_00523.png 518.8579
|
114 |
+
bedroom/rgb_00524.jpg bedroom/sync_depth_00524.png 518.8579
|
115 |
+
bedroom/rgb_00525.jpg bedroom/sync_depth_00525.png 518.8579
|
116 |
+
bedroom/rgb_00530.jpg bedroom/sync_depth_00530.png 518.8579
|
117 |
+
bedroom/rgb_00531.jpg bedroom/sync_depth_00531.png 518.8579
|
118 |
+
bedroom/rgb_00532.jpg bedroom/sync_depth_00532.png 518.8579
|
119 |
+
bedroom/rgb_00537.jpg bedroom/sync_depth_00537.png 518.8579
|
120 |
+
bedroom/rgb_00538.jpg bedroom/sync_depth_00538.png 518.8579
|
121 |
+
bedroom/rgb_00916.jpg bedroom/sync_depth_00916.png 518.8579
|
122 |
+
bedroom/rgb_00917.jpg bedroom/sync_depth_00917.png 518.8579
|
123 |
+
bedroom/rgb_00918.jpg bedroom/sync_depth_00918.png 518.8579
|
124 |
+
bedroom/rgb_00925.jpg bedroom/sync_depth_00925.png 518.8579
|
125 |
+
bedroom/rgb_00926.jpg bedroom/sync_depth_00926.png 518.8579
|
126 |
+
bedroom/rgb_00927.jpg bedroom/sync_depth_00927.png 518.8579
|
127 |
+
bedroom/rgb_00931.jpg bedroom/sync_depth_00931.png 518.8579
|
128 |
+
bedroom/rgb_00932.jpg bedroom/sync_depth_00932.png 518.8579
|
129 |
+
bedroom/rgb_00933.jpg bedroom/sync_depth_00933.png 518.8579
|
130 |
+
bedroom/rgb_00934.jpg bedroom/sync_depth_00934.png 518.8579
|
131 |
+
bedroom/rgb_00944.jpg bedroom/sync_depth_00944.png 518.8579
|
132 |
+
bedroom/rgb_00945.jpg bedroom/sync_depth_00945.png 518.8579
|
133 |
+
bedroom/rgb_00946.jpg bedroom/sync_depth_00946.png 518.8579
|
134 |
+
bedroom/rgb_00958.jpg bedroom/sync_depth_00958.png 518.8579
|
135 |
+
bedroom/rgb_00959.jpg bedroom/sync_depth_00959.png 518.8579
|
136 |
+
bedroom/rgb_00961.jpg bedroom/sync_depth_00961.png 518.8579
|
137 |
+
bedroom/rgb_00964.jpg bedroom/sync_depth_00964.png 518.8579
|
138 |
+
bedroom/rgb_00965.jpg bedroom/sync_depth_00965.png 518.8579
|
139 |
+
bedroom/rgb_00966.jpg bedroom/sync_depth_00966.png 518.8579
|
140 |
+
bedroom/rgb_00969.jpg bedroom/sync_depth_00969.png 518.8579
|
141 |
+
bedroom/rgb_00970.jpg bedroom/sync_depth_00970.png 518.8579
|
142 |
+
bedroom/rgb_00971.jpg bedroom/sync_depth_00971.png 518.8579
|
143 |
+
bedroom/rgb_00972.jpg bedroom/sync_depth_00972.png 518.8579
|
144 |
+
bedroom/rgb_00973.jpg bedroom/sync_depth_00973.png 518.8579
|
145 |
+
bedroom/rgb_00974.jpg bedroom/sync_depth_00974.png 518.8579
|
146 |
+
bedroom/rgb_00975.jpg bedroom/sync_depth_00975.png 518.8579
|
147 |
+
bedroom/rgb_00976.jpg bedroom/sync_depth_00976.png 518.8579
|
148 |
+
bedroom/rgb_00990.jpg bedroom/sync_depth_00990.png 518.8579
|
149 |
+
bedroom/rgb_00991.jpg bedroom/sync_depth_00991.png 518.8579
|
150 |
+
bedroom/rgb_00992.jpg bedroom/sync_depth_00992.png 518.8579
|
151 |
+
bedroom/rgb_00993.jpg bedroom/sync_depth_00993.png 518.8579
|
152 |
+
bedroom/rgb_00994.jpg bedroom/sync_depth_00994.png 518.8579
|
153 |
+
bedroom/rgb_01001.jpg bedroom/sync_depth_01001.png 518.8579
|
154 |
+
bedroom/rgb_01002.jpg bedroom/sync_depth_01002.png 518.8579
|
155 |
+
bedroom/rgb_01003.jpg bedroom/sync_depth_01003.png 518.8579
|
156 |
+
bedroom/rgb_01009.jpg bedroom/sync_depth_01009.png 518.8579
|
157 |
+
bedroom/rgb_01010.jpg bedroom/sync_depth_01010.png 518.8579
|
158 |
+
bedroom/rgb_01011.jpg bedroom/sync_depth_01011.png 518.8579
|
159 |
+
bedroom/rgb_01020.jpg bedroom/sync_depth_01020.png 518.8579
|
160 |
+
bedroom/rgb_01021.jpg bedroom/sync_depth_01021.png 518.8579
|
161 |
+
bedroom/rgb_01022.jpg bedroom/sync_depth_01022.png 518.8579
|
162 |
+
bedroom/rgb_01031.jpg bedroom/sync_depth_01031.png 518.8579
|
163 |
+
bedroom/rgb_01032.jpg bedroom/sync_depth_01032.png 518.8579
|
164 |
+
bedroom/rgb_01033.jpg bedroom/sync_depth_01033.png 518.8579
|
165 |
+
bedroom/rgb_01037.jpg bedroom/sync_depth_01037.png 518.8579
|
166 |
+
bedroom/rgb_01038.jpg bedroom/sync_depth_01038.png 518.8579
|
167 |
+
bedroom/rgb_01047.jpg bedroom/sync_depth_01047.png 518.8579
|
168 |
+
bedroom/rgb_01048.jpg bedroom/sync_depth_01048.png 518.8579
|
169 |
+
bedroom/rgb_01051.jpg bedroom/sync_depth_01051.png 518.8579
|
170 |
+
bedroom/rgb_01056.jpg bedroom/sync_depth_01056.png 518.8579
|
171 |
+
bedroom/rgb_01057.jpg bedroom/sync_depth_01057.png 518.8579
|
172 |
+
bedroom/rgb_01074.jpg bedroom/sync_depth_01074.png 518.8579
|
173 |
+
bedroom/rgb_01075.jpg bedroom/sync_depth_01075.png 518.8579
|
174 |
+
bedroom/rgb_01076.jpg bedroom/sync_depth_01076.png 518.8579
|
175 |
+
bedroom/rgb_01077.jpg bedroom/sync_depth_01077.png 518.8579
|
176 |
+
bedroom/rgb_01078.jpg bedroom/sync_depth_01078.png 518.8579
|
177 |
+
bedroom/rgb_01079.jpg bedroom/sync_depth_01079.png 518.8579
|
178 |
+
bedroom/rgb_01080.jpg bedroom/sync_depth_01080.png 518.8579
|
179 |
+
bedroom/rgb_01081.jpg bedroom/sync_depth_01081.png 518.8579
|
180 |
+
bedroom/rgb_01082.jpg bedroom/sync_depth_01082.png 518.8579
|
181 |
+
bedroom/rgb_01083.jpg bedroom/sync_depth_01083.png 518.8579
|
182 |
+
bedroom/rgb_01087.jpg bedroom/sync_depth_01087.png 518.8579
|
183 |
+
bedroom/rgb_01088.jpg bedroom/sync_depth_01088.png 518.8579
|
184 |
+
bedroom/rgb_01089.jpg bedroom/sync_depth_01089.png 518.8579
|
185 |
+
bedroom/rgb_01090.jpg bedroom/sync_depth_01090.png 518.8579
|
186 |
+
bedroom/rgb_01091.jpg bedroom/sync_depth_01091.png 518.8579
|
187 |
+
bedroom/rgb_01093.jpg bedroom/sync_depth_01093.png 518.8579
|
188 |
+
bedroom/rgb_01094.jpg bedroom/sync_depth_01094.png 518.8579
|
189 |
+
bedroom/rgb_01095.jpg bedroom/sync_depth_01095.png 518.8579
|
190 |
+
bedroom/rgb_01097.jpg bedroom/sync_depth_01097.png 518.8579
|
191 |
+
bedroom/rgb_01098.jpg bedroom/sync_depth_01098.png 518.8579
|
192 |
+
bedroom/rgb_01099.jpg bedroom/sync_depth_01099.png 518.8579
|
193 |
+
bedroom/rgb_01100.jpg bedroom/sync_depth_01100.png 518.8579
|
194 |
+
bedroom/rgb_01101.jpg bedroom/sync_depth_01101.png 518.8579
|
195 |
+
bedroom/rgb_01102.jpg bedroom/sync_depth_01102.png 518.8579
|
196 |
+
bedroom/rgb_01103.jpg bedroom/sync_depth_01103.png 518.8579
|
197 |
+
bedroom/rgb_01105.jpg bedroom/sync_depth_01105.png 518.8579
|
198 |
+
bedroom/rgb_01106.jpg bedroom/sync_depth_01106.png 518.8579
|
199 |
+
bedroom/rgb_01107.jpg bedroom/sync_depth_01107.png 518.8579
|
200 |
+
bedroom/rgb_01108.jpg bedroom/sync_depth_01108.png 518.8579
|
201 |
+
bedroom/rgb_01116.jpg bedroom/sync_depth_01116.png 518.8579
|
202 |
+
bedroom/rgb_01117.jpg bedroom/sync_depth_01117.png 518.8579
|
203 |
+
bedroom/rgb_01118.jpg bedroom/sync_depth_01118.png 518.8579
|
204 |
+
bedroom/rgb_01123.jpg bedroom/sync_depth_01123.png 518.8579
|
205 |
+
bedroom/rgb_01124.jpg bedroom/sync_depth_01124.png 518.8579
|
206 |
+
bedroom/rgb_01125.jpg bedroom/sync_depth_01125.png 518.8579
|
207 |
+
bedroom/rgb_01126.jpg bedroom/sync_depth_01126.png 518.8579
|
208 |
+
bedroom/rgb_01127.jpg bedroom/sync_depth_01127.png 518.8579
|
209 |
+
bedroom/rgb_01128.jpg bedroom/sync_depth_01128.png 518.8579
|
210 |
+
bedroom/rgb_01129.jpg bedroom/sync_depth_01129.png 518.8579
|
211 |
+
bedroom/rgb_01130.jpg bedroom/sync_depth_01130.png 518.8579
|
212 |
+
bedroom/rgb_01134.jpg bedroom/sync_depth_01134.png 518.8579
|
213 |
+
bedroom/rgb_01135.jpg bedroom/sync_depth_01135.png 518.8579
|
214 |
+
bedroom/rgb_01143.jpg bedroom/sync_depth_01143.png 518.8579
|
215 |
+
bedroom/rgb_01144.jpg bedroom/sync_depth_01144.png 518.8579
|
216 |
+
bedroom/rgb_01145.jpg bedroom/sync_depth_01145.png 518.8579
|
217 |
+
bedroom/rgb_01146.jpg bedroom/sync_depth_01146.png 518.8579
|
218 |
+
bedroom/rgb_01147.jpg bedroom/sync_depth_01147.png 518.8579
|
219 |
+
bedroom/rgb_01148.jpg bedroom/sync_depth_01148.png 518.8579
|
220 |
+
bedroom/rgb_01149.jpg bedroom/sync_depth_01149.png 518.8579
|
221 |
+
bedroom/rgb_01151.jpg bedroom/sync_depth_01151.png 518.8579
|
222 |
+
bedroom/rgb_01152.jpg bedroom/sync_depth_01152.png 518.8579
|
223 |
+
bedroom/rgb_01153.jpg bedroom/sync_depth_01153.png 518.8579
|
224 |
+
bedroom/rgb_01154.jpg bedroom/sync_depth_01154.png 518.8579
|
225 |
+
bedroom/rgb_01155.jpg bedroom/sync_depth_01155.png 518.8579
|
226 |
+
bedroom/rgb_01156.jpg bedroom/sync_depth_01156.png 518.8579
|
227 |
+
bedroom/rgb_01157.jpg bedroom/sync_depth_01157.png 518.8579
|
228 |
+
bedroom/rgb_01161.jpg bedroom/sync_depth_01161.png 518.8579
|
229 |
+
bedroom/rgb_01162.jpg bedroom/sync_depth_01162.png 518.8579
|
230 |
+
bedroom/rgb_01163.jpg bedroom/sync_depth_01163.png 518.8579
|
231 |
+
bedroom/rgb_01164.jpg bedroom/sync_depth_01164.png 518.8579
|
232 |
+
bedroom/rgb_01165.jpg bedroom/sync_depth_01165.png 518.8579
|
233 |
+
bedroom/rgb_01166.jpg bedroom/sync_depth_01166.png 518.8579
|
234 |
+
bedroom/rgb_01169.jpg bedroom/sync_depth_01169.png 518.8579
|
235 |
+
bedroom/rgb_01170.jpg bedroom/sync_depth_01170.png 518.8579
|
236 |
+
bedroom/rgb_01173.jpg bedroom/sync_depth_01173.png 518.8579
|
237 |
+
bedroom/rgb_01174.jpg bedroom/sync_depth_01174.png 518.8579
|
238 |
+
bedroom/rgb_01175.jpg bedroom/sync_depth_01175.png 518.8579
|
239 |
+
bedroom/rgb_01178.jpg bedroom/sync_depth_01178.png 518.8579
|
240 |
+
bedroom/rgb_01179.jpg bedroom/sync_depth_01179.png 518.8579
|
241 |
+
bedroom/rgb_01180.jpg bedroom/sync_depth_01180.png 518.8579
|
242 |
+
bedroom/rgb_01181.jpg bedroom/sync_depth_01181.png 518.8579
|
243 |
+
bedroom/rgb_01182.jpg bedroom/sync_depth_01182.png 518.8579
|
244 |
+
bedroom/rgb_01183.jpg bedroom/sync_depth_01183.png 518.8579
|
245 |
+
bedroom/rgb_01191.jpg bedroom/sync_depth_01191.png 518.8579
|
246 |
+
bedroom/rgb_01192.jpg bedroom/sync_depth_01192.png 518.8579
|
247 |
+
bedroom/rgb_01193.jpg bedroom/sync_depth_01193.png 518.8579
|
248 |
+
bedroom/rgb_01194.jpg bedroom/sync_depth_01194.png 518.8579
|
249 |
+
bedroom/rgb_01195.jpg bedroom/sync_depth_01195.png 518.8579
|
250 |
+
bookstore/rgb_00083.jpg bookstore/sync_depth_00083.png 518.8579
|
251 |
+
bookstore/rgb_00084.jpg bookstore/sync_depth_00084.png 518.8579
|
252 |
+
bookstore/rgb_00085.jpg bookstore/sync_depth_00085.png 518.8579
|
253 |
+
bookstore/rgb_00086.jpg bookstore/sync_depth_00086.png 518.8579
|
254 |
+
bookstore/rgb_00087.jpg bookstore/sync_depth_00087.png 518.8579
|
255 |
+
bookstore/rgb_00088.jpg bookstore/sync_depth_00088.png 518.8579
|
256 |
+
bookstore/rgb_00089.jpg bookstore/sync_depth_00089.png 518.8579
|
257 |
+
bookstore/rgb_00090.jpg bookstore/sync_depth_00090.png 518.8579
|
258 |
+
bookstore/rgb_00116.jpg bookstore/sync_depth_00116.png 518.8579
|
259 |
+
bookstore/rgb_00117.jpg bookstore/sync_depth_00117.png 518.8579
|
260 |
+
bookstore/rgb_00118.jpg bookstore/sync_depth_00118.png 518.8579
|
261 |
+
classroom/rgb_00283.jpg classroom/sync_depth_00283.png 518.8579
|
262 |
+
classroom/rgb_00284.jpg classroom/sync_depth_00284.png 518.8579
|
263 |
+
classroom/rgb_00295.jpg classroom/sync_depth_00295.png 518.8579
|
264 |
+
classroom/rgb_00296.jpg classroom/sync_depth_00296.png 518.8579
|
265 |
+
classroom/rgb_00297.jpg classroom/sync_depth_00297.png 518.8579
|
266 |
+
classroom/rgb_00298.jpg classroom/sync_depth_00298.png 518.8579
|
267 |
+
classroom/rgb_00299.jpg classroom/sync_depth_00299.png 518.8579
|
268 |
+
classroom/rgb_00300.jpg classroom/sync_depth_00300.png 518.8579
|
269 |
+
classroom/rgb_00301.jpg classroom/sync_depth_00301.png 518.8579
|
270 |
+
classroom/rgb_00309.jpg classroom/sync_depth_00309.png 518.8579
|
271 |
+
classroom/rgb_00310.jpg classroom/sync_depth_00310.png 518.8579
|
272 |
+
classroom/rgb_00311.jpg classroom/sync_depth_00311.png 518.8579
|
273 |
+
classroom/rgb_00314.jpg classroom/sync_depth_00314.png 518.8579
|
274 |
+
classroom/rgb_00315.jpg classroom/sync_depth_00315.png 518.8579
|
275 |
+
classroom/rgb_00316.jpg classroom/sync_depth_00316.png 518.8579
|
276 |
+
classroom/rgb_00324.jpg classroom/sync_depth_00324.png 518.8579
|
277 |
+
classroom/rgb_00325.jpg classroom/sync_depth_00325.png 518.8579
|
278 |
+
classroom/rgb_00326.jpg classroom/sync_depth_00326.png 518.8579
|
279 |
+
classroom/rgb_00327.jpg classroom/sync_depth_00327.png 518.8579
|
280 |
+
classroom/rgb_00328.jpg classroom/sync_depth_00328.png 518.8579
|
281 |
+
classroom/rgb_00329.jpg classroom/sync_depth_00329.png 518.8579
|
282 |
+
classroom/rgb_00330.jpg classroom/sync_depth_00330.png 518.8579
|
283 |
+
classroom/rgb_00331.jpg classroom/sync_depth_00331.png 518.8579
|
284 |
+
computer_lab/rgb_00332.jpg computer_lab/sync_depth_00332.png 518.8579
|
285 |
+
computer_lab/rgb_00333.jpg computer_lab/sync_depth_00333.png 518.8579
|
286 |
+
computer_lab/rgb_00334.jpg computer_lab/sync_depth_00334.png 518.8579
|
287 |
+
dining_room/rgb_00548.jpg dining_room/sync_depth_00548.png 518.8579
|
288 |
+
dining_room/rgb_00549.jpg dining_room/sync_depth_00549.png 518.8579
|
289 |
+
dining_room/rgb_00550.jpg dining_room/sync_depth_00550.png 518.8579
|
290 |
+
dining_room/rgb_01346.jpg dining_room/sync_depth_01346.png 518.8579
|
291 |
+
dining_room/rgb_01347.jpg dining_room/sync_depth_01347.png 518.8579
|
292 |
+
dining_room/rgb_01348.jpg dining_room/sync_depth_01348.png 518.8579
|
293 |
+
dining_room/rgb_01352.jpg dining_room/sync_depth_01352.png 518.8579
|
294 |
+
dining_room/rgb_01353.jpg dining_room/sync_depth_01353.png 518.8579
|
295 |
+
dining_room/rgb_01354.jpg dining_room/sync_depth_01354.png 518.8579
|
296 |
+
dining_room/rgb_01355.jpg dining_room/sync_depth_01355.png 518.8579
|
297 |
+
dining_room/rgb_01363.jpg dining_room/sync_depth_01363.png 518.8579
|
298 |
+
dining_room/rgb_01364.jpg dining_room/sync_depth_01364.png 518.8579
|
299 |
+
dining_room/rgb_01367.jpg dining_room/sync_depth_01367.png 518.8579
|
300 |
+
dining_room/rgb_01368.jpg dining_room/sync_depth_01368.png 518.8579
|
301 |
+
dining_room/rgb_01383.jpg dining_room/sync_depth_01383.png 518.8579
|
302 |
+
dining_room/rgb_01384.jpg dining_room/sync_depth_01384.png 518.8579
|
303 |
+
dining_room/rgb_01385.jpg dining_room/sync_depth_01385.png 518.8579
|
304 |
+
dining_room/rgb_01387.jpg dining_room/sync_depth_01387.png 518.8579
|
305 |
+
dining_room/rgb_01388.jpg dining_room/sync_depth_01388.png 518.8579
|
306 |
+
dining_room/rgb_01389.jpg dining_room/sync_depth_01389.png 518.8579
|
307 |
+
dining_room/rgb_01390.jpg dining_room/sync_depth_01390.png 518.8579
|
308 |
+
dining_room/rgb_01393.jpg dining_room/sync_depth_01393.png 518.8579
|
309 |
+
dining_room/rgb_01394.jpg dining_room/sync_depth_01394.png 518.8579
|
310 |
+
dining_room/rgb_01395.jpg dining_room/sync_depth_01395.png 518.8579
|
311 |
+
dining_room/rgb_01396.jpg dining_room/sync_depth_01396.png 518.8579
|
312 |
+
dining_room/rgb_01397.jpg dining_room/sync_depth_01397.png 518.8579
|
313 |
+
dining_room/rgb_01398.jpg dining_room/sync_depth_01398.png 518.8579
|
314 |
+
dining_room/rgb_01399.jpg dining_room/sync_depth_01399.png 518.8579
|
315 |
+
dining_room/rgb_01400.jpg dining_room/sync_depth_01400.png 518.8579
|
316 |
+
dining_room/rgb_01406.jpg dining_room/sync_depth_01406.png 518.8579
|
317 |
+
dining_room/rgb_01407.jpg dining_room/sync_depth_01407.png 518.8579
|
318 |
+
dining_room/rgb_01408.jpg dining_room/sync_depth_01408.png 518.8579
|
319 |
+
dining_room/rgb_01409.jpg dining_room/sync_depth_01409.png 518.8579
|
320 |
+
dining_room/rgb_01410.jpg dining_room/sync_depth_01410.png 518.8579
|
321 |
+
dining_room/rgb_01386.jpg dining_room/sync_depth_01386.png 518.8579
|
322 |
+
dining_room/rgb_01411.jpg dining_room/sync_depth_01411.png 518.8579
|
323 |
+
dining_room/rgb_01412.jpg dining_room/sync_depth_01412.png 518.8579
|
324 |
+
dining_room/rgb_01413.jpg dining_room/sync_depth_01413.png 518.8579
|
325 |
+
dining_room/rgb_01420.jpg dining_room/sync_depth_01420.png 518.8579
|
326 |
+
dining_room/rgb_01421.jpg dining_room/sync_depth_01421.png 518.8579
|
327 |
+
dining_room/rgb_01422.jpg dining_room/sync_depth_01422.png 518.8579
|
328 |
+
dining_room/rgb_01423.jpg dining_room/sync_depth_01423.png 518.8579
|
329 |
+
dining_room/rgb_01429.jpg dining_room/sync_depth_01429.png 518.8579
|
330 |
+
dining_room/rgb_01430.jpg dining_room/sync_depth_01430.png 518.8579
|
331 |
+
dining_room/rgb_01431.jpg dining_room/sync_depth_01431.png 518.8579
|
332 |
+
dining_room/rgb_01432.jpg dining_room/sync_depth_01432.png 518.8579
|
333 |
+
dining_room/rgb_01440.jpg dining_room/sync_depth_01440.png 518.8579
|
334 |
+
dining_room/rgb_01441.jpg dining_room/sync_depth_01441.png 518.8579
|
335 |
+
dining_room/rgb_01442.jpg dining_room/sync_depth_01442.png 518.8579
|
336 |
+
dining_room/rgb_01443.jpg dining_room/sync_depth_01443.png 518.8579
|
337 |
+
dining_room/rgb_01444.jpg dining_room/sync_depth_01444.png 518.8579
|
338 |
+
dining_room/rgb_01445.jpg dining_room/sync_depth_01445.png 518.8579
|
339 |
+
dining_room/rgb_01446.jpg dining_room/sync_depth_01446.png 518.8579
|
340 |
+
dining_room/rgb_01447.jpg dining_room/sync_depth_01447.png 518.8579
|
341 |
+
dining_room/rgb_01448.jpg dining_room/sync_depth_01448.png 518.8579
|
342 |
+
foyer/rgb_00350.jpg foyer/sync_depth_00350.png 518.8579
|
343 |
+
foyer/rgb_00351.jpg foyer/sync_depth_00351.png 518.8579
|
344 |
+
home_office/rgb_00354.jpg home_office/sync_depth_00354.png 518.8579
|
345 |
+
home_office/rgb_00355.jpg home_office/sync_depth_00355.png 518.8579
|
346 |
+
home_office/rgb_00356.jpg home_office/sync_depth_00356.png 518.8579
|
347 |
+
home_office/rgb_00357.jpg home_office/sync_depth_00357.png 518.8579
|
348 |
+
home_office/rgb_00358.jpg home_office/sync_depth_00358.png 518.8579
|
349 |
+
home_office/rgb_00359.jpg home_office/sync_depth_00359.png 518.8579
|
350 |
+
home_office/rgb_00360.jpg home_office/sync_depth_00360.png 518.8579
|
351 |
+
home_office/rgb_00361.jpg home_office/sync_depth_00361.png 518.8579
|
352 |
+
home_office/rgb_00362.jpg home_office/sync_depth_00362.png 518.8579
|
353 |
+
home_office/rgb_00363.jpg home_office/sync_depth_00363.png 518.8579
|
354 |
+
home_office/rgb_00383.jpg home_office/sync_depth_00383.png 518.8579
|
355 |
+
home_office/rgb_00384.jpg home_office/sync_depth_00384.png 518.8579
|
356 |
+
home_office/rgb_00385.jpg home_office/sync_depth_00385.png 518.8579
|
357 |
+
home_office/rgb_00386.jpg home_office/sync_depth_00386.png 518.8579
|
358 |
+
home_office/rgb_00387.jpg home_office/sync_depth_00387.png 518.8579
|
359 |
+
home_office/rgb_00388.jpg home_office/sync_depth_00388.png 518.8579
|
360 |
+
home_office/rgb_00389.jpg home_office/sync_depth_00389.png 518.8579
|
361 |
+
home_office/rgb_00394.jpg home_office/sync_depth_00394.png 518.8579
|
362 |
+
home_office/rgb_00395.jpg home_office/sync_depth_00395.png 518.8579
|
363 |
+
home_office/rgb_00396.jpg home_office/sync_depth_00396.png 518.8579
|
364 |
+
home_office/rgb_00554.jpg home_office/sync_depth_00554.png 518.8579
|
365 |
+
home_office/rgb_00555.jpg home_office/sync_depth_00555.png 518.8579
|
366 |
+
home_office/rgb_00556.jpg home_office/sync_depth_00556.png 518.8579
|
367 |
+
home_office/rgb_00557.jpg home_office/sync_depth_00557.png 518.8579
|
368 |
+
kitchen/rgb_00000.jpg kitchen/sync_depth_00000.png 518.8579
|
369 |
+
kitchen/rgb_00001.jpg kitchen/sync_depth_00001.png 518.8579
|
370 |
+
kitchen/rgb_00124.jpg kitchen/sync_depth_00124.png 518.8579
|
371 |
+
kitchen/rgb_00125.jpg kitchen/sync_depth_00125.png 518.8579
|
372 |
+
kitchen/rgb_00126.jpg kitchen/sync_depth_00126.png 518.8579
|
373 |
+
kitchen/rgb_00127.jpg kitchen/sync_depth_00127.png 518.8579
|
374 |
+
kitchen/rgb_00128.jpg kitchen/sync_depth_00128.png 518.8579
|
375 |
+
kitchen/rgb_00130.jpg kitchen/sync_depth_00130.png 518.8579
|
376 |
+
kitchen/rgb_00131.jpg kitchen/sync_depth_00131.png 518.8579
|
377 |
+
kitchen/rgb_00132.jpg kitchen/sync_depth_00132.png 518.8579
|
378 |
+
kitchen/rgb_00133.jpg kitchen/sync_depth_00133.png 518.8579
|
379 |
+
kitchen/rgb_00136.jpg kitchen/sync_depth_00136.png 518.8579
|
380 |
+
kitchen/rgb_00193.jpg kitchen/sync_depth_00193.png 518.8579
|
381 |
+
kitchen/rgb_00194.jpg kitchen/sync_depth_00194.png 518.8579
|
382 |
+
kitchen/rgb_00195.jpg kitchen/sync_depth_00195.png 518.8579
|
383 |
+
kitchen/rgb_00196.jpg kitchen/sync_depth_00196.png 518.8579
|
384 |
+
kitchen/rgb_00197.jpg kitchen/sync_depth_00197.png 518.8579
|
385 |
+
kitchen/rgb_00199.jpg kitchen/sync_depth_00199.png 518.8579
|
386 |
+
kitchen/rgb_00200.jpg kitchen/sync_depth_00200.png 518.8579
|
387 |
+
kitchen/rgb_00201.jpg kitchen/sync_depth_00201.png 518.8579
|
388 |
+
kitchen/rgb_00249.jpg kitchen/sync_depth_00249.png 518.8579
|
389 |
+
kitchen/rgb_00558.jpg kitchen/sync_depth_00558.png 518.8579
|
390 |
+
kitchen/rgb_00559.jpg kitchen/sync_depth_00559.png 518.8579
|
391 |
+
kitchen/rgb_00560.jpg kitchen/sync_depth_00560.png 518.8579
|
392 |
+
kitchen/rgb_00561.jpg kitchen/sync_depth_00561.png 518.8579
|
393 |
+
kitchen/rgb_00562.jpg kitchen/sync_depth_00562.png 518.8579
|
394 |
+
kitchen/rgb_00563.jpg kitchen/sync_depth_00563.png 518.8579
|
395 |
+
kitchen/rgb_00564.jpg kitchen/sync_depth_00564.png 518.8579
|
396 |
+
kitchen/rgb_00565.jpg kitchen/sync_depth_00565.png 518.8579
|
397 |
+
kitchen/rgb_00566.jpg kitchen/sync_depth_00566.png 518.8579
|
398 |
+
kitchen/rgb_00567.jpg kitchen/sync_depth_00567.png 518.8579
|
399 |
+
kitchen/rgb_00568.jpg kitchen/sync_depth_00568.png 518.8579
|
400 |
+
kitchen/rgb_00569.jpg kitchen/sync_depth_00569.png 518.8579
|
401 |
+
kitchen/rgb_00570.jpg kitchen/sync_depth_00570.png 518.8579
|
402 |
+
kitchen/rgb_00198.jpg kitchen/sync_depth_00198.png 518.8579
|
403 |
+
kitchen/rgb_00758.jpg kitchen/sync_depth_00758.png 518.8579
|
404 |
+
kitchen/rgb_00776.jpg kitchen/sync_depth_00776.png 518.8579
|
405 |
+
kitchen/rgb_00811.jpg kitchen/sync_depth_00811.png 518.8579
|
406 |
+
kitchen/rgb_00844.jpg kitchen/sync_depth_00844.png 518.8579
|
407 |
+
kitchen/rgb_00759.jpg kitchen/sync_depth_00759.png 518.8579
|
408 |
+
kitchen/rgb_00760.jpg kitchen/sync_depth_00760.png 518.8579
|
409 |
+
kitchen/rgb_00761.jpg kitchen/sync_depth_00761.png 518.8579
|
410 |
+
kitchen/rgb_00762.jpg kitchen/sync_depth_00762.png 518.8579
|
411 |
+
kitchen/rgb_00763.jpg kitchen/sync_depth_00763.png 518.8579
|
412 |
+
kitchen/rgb_00764.jpg kitchen/sync_depth_00764.png 518.8579
|
413 |
+
kitchen/rgb_00765.jpg kitchen/sync_depth_00765.png 518.8579
|
414 |
+
kitchen/rgb_00766.jpg kitchen/sync_depth_00766.png 518.8579
|
415 |
+
kitchen/rgb_00767.jpg kitchen/sync_depth_00767.png 518.8579
|
416 |
+
kitchen/rgb_00768.jpg kitchen/sync_depth_00768.png 518.8579
|
417 |
+
kitchen/rgb_00769.jpg kitchen/sync_depth_00769.png 518.8579
|
418 |
+
kitchen/rgb_00770.jpg kitchen/sync_depth_00770.png 518.8579
|
419 |
+
kitchen/rgb_00771.jpg kitchen/sync_depth_00771.png 518.8579
|
420 |
+
kitchen/rgb_00772.jpg kitchen/sync_depth_00772.png 518.8579
|
421 |
+
kitchen/rgb_00773.jpg kitchen/sync_depth_00773.png 518.8579
|
422 |
+
kitchen/rgb_00774.jpg kitchen/sync_depth_00774.png 518.8579
|
423 |
+
kitchen/rgb_00775.jpg kitchen/sync_depth_00775.png 518.8579
|
424 |
+
kitchen/rgb_00777.jpg kitchen/sync_depth_00777.png 518.8579
|
425 |
+
kitchen/rgb_00778.jpg kitchen/sync_depth_00778.png 518.8579
|
426 |
+
kitchen/rgb_00779.jpg kitchen/sync_depth_00779.png 518.8579
|
427 |
+
kitchen/rgb_00780.jpg kitchen/sync_depth_00780.png 518.8579
|
428 |
+
kitchen/rgb_00781.jpg kitchen/sync_depth_00781.png 518.8579
|
429 |
+
kitchen/rgb_00782.jpg kitchen/sync_depth_00782.png 518.8579
|
430 |
+
kitchen/rgb_00783.jpg kitchen/sync_depth_00783.png 518.8579
|
431 |
+
kitchen/rgb_00784.jpg kitchen/sync_depth_00784.png 518.8579
|
432 |
+
kitchen/rgb_00785.jpg kitchen/sync_depth_00785.png 518.8579
|
433 |
+
kitchen/rgb_00786.jpg kitchen/sync_depth_00786.png 518.8579
|
434 |
+
kitchen/rgb_00799.jpg kitchen/sync_depth_00799.png 518.8579
|
435 |
+
kitchen/rgb_00800.jpg kitchen/sync_depth_00800.png 518.8579
|
436 |
+
kitchen/rgb_00801.jpg kitchen/sync_depth_00801.png 518.8579
|
437 |
+
kitchen/rgb_00802.jpg kitchen/sync_depth_00802.png 518.8579
|
438 |
+
kitchen/rgb_00803.jpg kitchen/sync_depth_00803.png 518.8579
|
439 |
+
kitchen/rgb_00809.jpg kitchen/sync_depth_00809.png 518.8579
|
440 |
+
kitchen/rgb_00810.jpg kitchen/sync_depth_00810.png 518.8579
|
441 |
+
kitchen/rgb_00812.jpg kitchen/sync_depth_00812.png 518.8579
|
442 |
+
kitchen/rgb_00813.jpg kitchen/sync_depth_00813.png 518.8579
|
443 |
+
kitchen/rgb_00820.jpg kitchen/sync_depth_00820.png 518.8579
|
444 |
+
kitchen/rgb_00821.jpg kitchen/sync_depth_00821.png 518.8579
|
445 |
+
kitchen/rgb_00822.jpg kitchen/sync_depth_00822.png 518.8579
|
446 |
+
kitchen/rgb_00832.jpg kitchen/sync_depth_00832.png 518.8579
|
447 |
+
kitchen/rgb_00833.jpg kitchen/sync_depth_00833.png 518.8579
|
448 |
+
kitchen/rgb_00834.jpg kitchen/sync_depth_00834.png 518.8579
|
449 |
+
kitchen/rgb_00835.jpg kitchen/sync_depth_00835.png 518.8579
|
450 |
+
kitchen/rgb_00836.jpg kitchen/sync_depth_00836.png 518.8579
|
451 |
+
kitchen/rgb_00837.jpg kitchen/sync_depth_00837.png 518.8579
|
452 |
+
kitchen/rgb_00838.jpg kitchen/sync_depth_00838.png 518.8579
|
453 |
+
kitchen/rgb_00839.jpg kitchen/sync_depth_00839.png 518.8579
|
454 |
+
kitchen/rgb_00840.jpg kitchen/sync_depth_00840.png 518.8579
|
455 |
+
kitchen/rgb_00841.jpg kitchen/sync_depth_00841.png 518.8579
|
456 |
+
kitchen/rgb_00842.jpg kitchen/sync_depth_00842.png 518.8579
|
457 |
+
kitchen/rgb_00843.jpg kitchen/sync_depth_00843.png 518.8579
|
458 |
+
kitchen/rgb_00845.jpg kitchen/sync_depth_00845.png 518.8579
|
459 |
+
kitchen/rgb_00849.jpg kitchen/sync_depth_00849.png 518.8579
|
460 |
+
kitchen/rgb_00850.jpg kitchen/sync_depth_00850.png 518.8579
|
461 |
+
kitchen/rgb_00851.jpg kitchen/sync_depth_00851.png 518.8579
|
462 |
+
kitchen/rgb_00856.jpg kitchen/sync_depth_00856.png 518.8579
|
463 |
+
kitchen/rgb_00857.jpg kitchen/sync_depth_00857.png 518.8579
|
464 |
+
kitchen/rgb_00858.jpg kitchen/sync_depth_00858.png 518.8579
|
465 |
+
kitchen/rgb_00859.jpg kitchen/sync_depth_00859.png 518.8579
|
466 |
+
kitchen/rgb_00860.jpg kitchen/sync_depth_00860.png 518.8579
|
467 |
+
kitchen/rgb_00861.jpg kitchen/sync_depth_00861.png 518.8579
|
468 |
+
kitchen/rgb_00868.jpg kitchen/sync_depth_00868.png 518.8579
|
469 |
+
kitchen/rgb_00869.jpg kitchen/sync_depth_00869.png 518.8579
|
470 |
+
kitchen/rgb_00870.jpg kitchen/sync_depth_00870.png 518.8579
|
471 |
+
kitchen/rgb_00905.jpg kitchen/sync_depth_00905.png 518.8579
|
472 |
+
kitchen/rgb_00906.jpg kitchen/sync_depth_00906.png 518.8579
|
473 |
+
kitchen/rgb_00907.jpg kitchen/sync_depth_00907.png 518.8579
|
474 |
+
living_room/rgb_00152.jpg living_room/sync_depth_00152.png 518.8579
|
475 |
+
living_room/rgb_00153.jpg living_room/sync_depth_00153.png 518.8579
|
476 |
+
living_room/rgb_00154.jpg living_room/sync_depth_00154.png 518.8579
|
477 |
+
living_room/rgb_00166.jpg living_room/sync_depth_00166.png 518.8579
|
478 |
+
living_room/rgb_00167.jpg living_room/sync_depth_00167.png 518.8579
|
479 |
+
living_room/rgb_00168.jpg living_room/sync_depth_00168.png 518.8579
|
480 |
+
living_room/rgb_00206.jpg living_room/sync_depth_00206.png 518.8579
|
481 |
+
living_room/rgb_00207.jpg living_room/sync_depth_00207.png 518.8579
|
482 |
+
living_room/rgb_00208.jpg living_room/sync_depth_00208.png 518.8579
|
483 |
+
living_room/rgb_00209.jpg living_room/sync_depth_00209.png 518.8579
|
484 |
+
living_room/rgb_00210.jpg living_room/sync_depth_00210.png 518.8579
|
485 |
+
living_room/rgb_00211.jpg living_room/sync_depth_00211.png 518.8579
|
486 |
+
living_room/rgb_00263.jpg living_room/sync_depth_00263.png 518.8579
|
487 |
+
living_room/rgb_00578.jpg living_room/sync_depth_00578.png 518.8579
|
488 |
+
living_room/rgb_00579.jpg living_room/sync_depth_00579.png 518.8579
|
489 |
+
living_room/rgb_00580.jpg living_room/sync_depth_00580.png 518.8579
|
490 |
+
living_room/rgb_00581.jpg living_room/sync_depth_00581.png 518.8579
|
491 |
+
living_room/rgb_00590.jpg living_room/sync_depth_00590.png 518.8579
|
492 |
+
living_room/rgb_00591.jpg living_room/sync_depth_00591.png 518.8579
|
493 |
+
living_room/rgb_00592.jpg living_room/sync_depth_00592.png 518.8579
|
494 |
+
living_room/rgb_00593.jpg living_room/sync_depth_00593.png 518.8579
|
495 |
+
living_room/rgb_00602.jpg living_room/sync_depth_00602.png 518.8579
|
496 |
+
living_room/rgb_00603.jpg living_room/sync_depth_00603.png 518.8579
|
497 |
+
living_room/rgb_00604.jpg living_room/sync_depth_00604.png 518.8579
|
498 |
+
living_room/rgb_00605.jpg living_room/sync_depth_00605.png 518.8579
|
499 |
+
living_room/rgb_00606.jpg living_room/sync_depth_00606.png 518.8579
|
500 |
+
living_room/rgb_01200.jpg living_room/sync_depth_01200.png 518.8579
|
501 |
+
living_room/rgb_01201.jpg living_room/sync_depth_01201.png 518.8579
|
502 |
+
living_room/rgb_01202.jpg living_room/sync_depth_01202.png 518.8579
|
503 |
+
living_room/rgb_01203.jpg living_room/sync_depth_01203.png 518.8579
|
504 |
+
living_room/rgb_01204.jpg living_room/sync_depth_01204.png 518.8579
|
505 |
+
living_room/rgb_01205.jpg living_room/sync_depth_01205.png 518.8579
|
506 |
+
living_room/rgb_01206.jpg living_room/sync_depth_01206.png 518.8579
|
507 |
+
living_room/rgb_01207.jpg living_room/sync_depth_01207.png 518.8579
|
508 |
+
living_room/rgb_00582.jpg living_room/sync_depth_00582.png 518.8579
|
509 |
+
living_room/rgb_01208.jpg living_room/sync_depth_01208.png 518.8579
|
510 |
+
living_room/rgb_01247.jpg living_room/sync_depth_01247.png 518.8579
|
511 |
+
living_room/rgb_01277.jpg living_room/sync_depth_01277.png 518.8579
|
512 |
+
living_room/rgb_01302.jpg living_room/sync_depth_01302.png 518.8579
|
513 |
+
living_room/rgb_01209.jpg living_room/sync_depth_01209.png 518.8579
|
514 |
+
living_room/rgb_01210.jpg living_room/sync_depth_01210.png 518.8579
|
515 |
+
living_room/rgb_01211.jpg living_room/sync_depth_01211.png 518.8579
|
516 |
+
living_room/rgb_01215.jpg living_room/sync_depth_01215.png 518.8579
|
517 |
+
living_room/rgb_01216.jpg living_room/sync_depth_01216.png 518.8579
|
518 |
+
living_room/rgb_01217.jpg living_room/sync_depth_01217.png 518.8579
|
519 |
+
living_room/rgb_01218.jpg living_room/sync_depth_01218.png 518.8579
|
520 |
+
living_room/rgb_01219.jpg living_room/sync_depth_01219.png 518.8579
|
521 |
+
living_room/rgb_01225.jpg living_room/sync_depth_01225.png 518.8579
|
522 |
+
living_room/rgb_01226.jpg living_room/sync_depth_01226.png 518.8579
|
523 |
+
living_room/rgb_01227.jpg living_room/sync_depth_01227.png 518.8579
|
524 |
+
living_room/rgb_01228.jpg living_room/sync_depth_01228.png 518.8579
|
525 |
+
living_room/rgb_01229.jpg living_room/sync_depth_01229.png 518.8579
|
526 |
+
living_room/rgb_01232.jpg living_room/sync_depth_01232.png 518.8579
|
527 |
+
living_room/rgb_01233.jpg living_room/sync_depth_01233.png 518.8579
|
528 |
+
living_room/rgb_01234.jpg living_room/sync_depth_01234.png 518.8579
|
529 |
+
living_room/rgb_01246.jpg living_room/sync_depth_01246.png 518.8579
|
530 |
+
living_room/rgb_01248.jpg living_room/sync_depth_01248.png 518.8579
|
531 |
+
living_room/rgb_01249.jpg living_room/sync_depth_01249.png 518.8579
|
532 |
+
living_room/rgb_01253.jpg living_room/sync_depth_01253.png 518.8579
|
533 |
+
living_room/rgb_01254.jpg living_room/sync_depth_01254.png 518.8579
|
534 |
+
living_room/rgb_01255.jpg living_room/sync_depth_01255.png 518.8579
|
535 |
+
living_room/rgb_01256.jpg living_room/sync_depth_01256.png 518.8579
|
536 |
+
living_room/rgb_01257.jpg living_room/sync_depth_01257.png 518.8579
|
537 |
+
living_room/rgb_01258.jpg living_room/sync_depth_01258.png 518.8579
|
538 |
+
living_room/rgb_01259.jpg living_room/sync_depth_01259.png 518.8579
|
539 |
+
living_room/rgb_01260.jpg living_room/sync_depth_01260.png 518.8579
|
540 |
+
living_room/rgb_01261.jpg living_room/sync_depth_01261.png 518.8579
|
541 |
+
living_room/rgb_01262.jpg living_room/sync_depth_01262.png 518.8579
|
542 |
+
living_room/rgb_01263.jpg living_room/sync_depth_01263.png 518.8579
|
543 |
+
living_room/rgb_01264.jpg living_room/sync_depth_01264.png 518.8579
|
544 |
+
living_room/rgb_01274.jpg living_room/sync_depth_01274.png 518.8579
|
545 |
+
living_room/rgb_01275.jpg living_room/sync_depth_01275.png 518.8579
|
546 |
+
living_room/rgb_01276.jpg living_room/sync_depth_01276.png 518.8579
|
547 |
+
living_room/rgb_01278.jpg living_room/sync_depth_01278.png 518.8579
|
548 |
+
living_room/rgb_01279.jpg living_room/sync_depth_01279.png 518.8579
|
549 |
+
living_room/rgb_01284.jpg living_room/sync_depth_01284.png 518.8579
|
550 |
+
living_room/rgb_01285.jpg living_room/sync_depth_01285.png 518.8579
|
551 |
+
living_room/rgb_01286.jpg living_room/sync_depth_01286.png 518.8579
|
552 |
+
living_room/rgb_01287.jpg living_room/sync_depth_01287.png 518.8579
|
553 |
+
living_room/rgb_01288.jpg living_room/sync_depth_01288.png 518.8579
|
554 |
+
living_room/rgb_01289.jpg living_room/sync_depth_01289.png 518.8579
|
555 |
+
living_room/rgb_01290.jpg living_room/sync_depth_01290.png 518.8579
|
556 |
+
living_room/rgb_01291.jpg living_room/sync_depth_01291.png 518.8579
|
557 |
+
living_room/rgb_01292.jpg living_room/sync_depth_01292.png 518.8579
|
558 |
+
living_room/rgb_01293.jpg living_room/sync_depth_01293.png 518.8579
|
559 |
+
living_room/rgb_01294.jpg living_room/sync_depth_01294.png 518.8579
|
560 |
+
living_room/rgb_01296.jpg living_room/sync_depth_01296.png 518.8579
|
561 |
+
living_room/rgb_01297.jpg living_room/sync_depth_01297.png 518.8579
|
562 |
+
living_room/rgb_01298.jpg living_room/sync_depth_01298.png 518.8579
|
563 |
+
living_room/rgb_01301.jpg living_room/sync_depth_01301.png 518.8579
|
564 |
+
living_room/rgb_01303.jpg living_room/sync_depth_01303.png 518.8579
|
565 |
+
living_room/rgb_01304.jpg living_room/sync_depth_01304.png 518.8579
|
566 |
+
living_room/rgb_01305.jpg living_room/sync_depth_01305.png 518.8579
|
567 |
+
living_room/rgb_01306.jpg living_room/sync_depth_01306.png 518.8579
|
568 |
+
living_room/rgb_01307.jpg living_room/sync_depth_01307.png 518.8579
|
569 |
+
living_room/rgb_01313.jpg living_room/sync_depth_01313.png 518.8579
|
570 |
+
living_room/rgb_01314.jpg living_room/sync_depth_01314.png 518.8579
|
571 |
+
living_room/rgb_01328.jpg living_room/sync_depth_01328.png 518.8579
|
572 |
+
living_room/rgb_01329.jpg living_room/sync_depth_01329.png 518.8579
|
573 |
+
living_room/rgb_01330.jpg living_room/sync_depth_01330.png 518.8579
|
574 |
+
living_room/rgb_01331.jpg living_room/sync_depth_01331.png 518.8579
|
575 |
+
living_room/rgb_01334.jpg living_room/sync_depth_01334.png 518.8579
|
576 |
+
living_room/rgb_01335.jpg living_room/sync_depth_01335.png 518.8579
|
577 |
+
living_room/rgb_01336.jpg living_room/sync_depth_01336.png 518.8579
|
578 |
+
living_room/rgb_01337.jpg living_room/sync_depth_01337.png 518.8579
|
579 |
+
living_room/rgb_01338.jpg living_room/sync_depth_01338.png 518.8579
|
580 |
+
living_room/rgb_01339.jpg living_room/sync_depth_01339.png 518.8579
|
581 |
+
office/rgb_00008.jpg office/sync_depth_00008.png 518.8579
|
582 |
+
office/rgb_00013.jpg office/sync_depth_00013.png 518.8579
|
583 |
+
office/rgb_00014.jpg office/sync_depth_00014.png 518.8579
|
584 |
+
office/rgb_00015.jpg office/sync_depth_00015.png 518.8579
|
585 |
+
office/rgb_00016.jpg office/sync_depth_00016.png 518.8579
|
586 |
+
office/rgb_00017.jpg office/sync_depth_00017.png 518.8579
|
587 |
+
office/rgb_00020.jpg office/sync_depth_00020.png 518.8579
|
588 |
+
office/rgb_00027.jpg office/sync_depth_00027.png 518.8579
|
589 |
+
office/rgb_00028.jpg office/sync_depth_00028.png 518.8579
|
590 |
+
office/rgb_00029.jpg office/sync_depth_00029.png 518.8579
|
591 |
+
office/rgb_00030.jpg office/sync_depth_00030.png 518.8579
|
592 |
+
office/rgb_00031.jpg office/sync_depth_00031.png 518.8579
|
593 |
+
office/rgb_00032.jpg office/sync_depth_00032.png 518.8579
|
594 |
+
office/rgb_00033.jpg office/sync_depth_00033.png 518.8579
|
595 |
+
office/rgb_00034.jpg office/sync_depth_00034.png 518.8579
|
596 |
+
office/rgb_00035.jpg office/sync_depth_00035.png 518.8579
|
597 |
+
office/rgb_00036.jpg office/sync_depth_00036.png 518.8579
|
598 |
+
office/rgb_00038.jpg office/sync_depth_00038.png 518.8579
|
599 |
+
office/rgb_00039.jpg office/sync_depth_00039.png 518.8579
|
600 |
+
office/rgb_00040.jpg office/sync_depth_00040.png 518.8579
|
601 |
+
office/rgb_00041.jpg office/sync_depth_00041.png 518.8579
|
602 |
+
office/rgb_00042.jpg office/sync_depth_00042.png 518.8579
|
603 |
+
office/rgb_00270.jpg office/sync_depth_00270.png 518.8579
|
604 |
+
office/rgb_00271.jpg office/sync_depth_00271.png 518.8579
|
605 |
+
office/rgb_00611.jpg office/sync_depth_00611.png 518.8579
|
606 |
+
office/rgb_00612.jpg office/sync_depth_00612.png 518.8579
|
607 |
+
office/rgb_00616.jpg office/sync_depth_00616.png 518.8579
|
608 |
+
office/rgb_00617.jpg office/sync_depth_00617.png 518.8579
|
609 |
+
office/rgb_00618.jpg office/sync_depth_00618.png 518.8579
|
610 |
+
office/rgb_00619.jpg office/sync_depth_00619.png 518.8579
|
611 |
+
office/rgb_00620.jpg office/sync_depth_00620.png 518.8579
|
612 |
+
office/rgb_00632.jpg office/sync_depth_00632.png 518.8579
|
613 |
+
office/rgb_00633.jpg office/sync_depth_00633.png 518.8579
|
614 |
+
office/rgb_00634.jpg office/sync_depth_00634.png 518.8579
|
615 |
+
office/rgb_00635.jpg office/sync_depth_00635.png 518.8579
|
616 |
+
office/rgb_00636.jpg office/sync_depth_00636.png 518.8579
|
617 |
+
office/rgb_00637.jpg office/sync_depth_00637.png 518.8579
|
618 |
+
office/rgb_00037.jpg office/sync_depth_00037.png 518.8579
|
619 |
+
office_kitchen/rgb_00410.jpg office_kitchen/sync_depth_00410.png 518.8579
|
620 |
+
office_kitchen/rgb_00411.jpg office_kitchen/sync_depth_00411.png 518.8579
|
621 |
+
office_kitchen/rgb_00412.jpg office_kitchen/sync_depth_00412.png 518.8579
|
622 |
+
office_kitchen/rgb_00413.jpg office_kitchen/sync_depth_00413.png 518.8579
|
623 |
+
playroom/rgb_00429.jpg playroom/sync_depth_00429.png 518.8579
|
624 |
+
playroom/rgb_00430.jpg playroom/sync_depth_00430.png 518.8579
|
625 |
+
playroom/rgb_00431.jpg playroom/sync_depth_00431.png 518.8579
|
626 |
+
playroom/rgb_00432.jpg playroom/sync_depth_00432.png 518.8579
|
627 |
+
playroom/rgb_00433.jpg playroom/sync_depth_00433.png 518.8579
|
628 |
+
playroom/rgb_00434.jpg playroom/sync_depth_00434.png 518.8579
|
629 |
+
playroom/rgb_00440.jpg playroom/sync_depth_00440.png 518.8579
|
630 |
+
playroom/rgb_00441.jpg playroom/sync_depth_00441.png 518.8579
|
631 |
+
playroom/rgb_00442.jpg playroom/sync_depth_00442.png 518.8579
|
632 |
+
playroom/rgb_00443.jpg playroom/sync_depth_00443.png 518.8579
|
633 |
+
playroom/rgb_00444.jpg playroom/sync_depth_00444.png 518.8579
|
634 |
+
playroom/rgb_00445.jpg playroom/sync_depth_00445.png 518.8579
|
635 |
+
playroom/rgb_00446.jpg playroom/sync_depth_00446.png 518.8579
|
636 |
+
playroom/rgb_00447.jpg playroom/sync_depth_00447.png 518.8579
|
637 |
+
reception_room/rgb_00461.jpg reception_room/sync_depth_00461.png 518.8579
|
638 |
+
reception_room/rgb_00462.jpg reception_room/sync_depth_00462.png 518.8579
|
639 |
+
reception_room/rgb_00463.jpg reception_room/sync_depth_00463.png 518.8579
|
640 |
+
reception_room/rgb_00464.jpg reception_room/sync_depth_00464.png 518.8579
|
641 |
+
reception_room/rgb_00465.jpg reception_room/sync_depth_00465.png 518.8579
|
642 |
+
study/rgb_00468.jpg study/sync_depth_00468.png 518.8579
|
643 |
+
study/rgb_00469.jpg study/sync_depth_00469.png 518.8579
|
644 |
+
study/rgb_00470.jpg study/sync_depth_00470.png 518.8579
|
645 |
+
study/rgb_00471.jpg study/sync_depth_00471.png 518.8579
|
646 |
+
study/rgb_00472.jpg study/sync_depth_00472.png 518.8579
|
647 |
+
study/rgb_00473.jpg study/sync_depth_00473.png 518.8579
|
648 |
+
study/rgb_00474.jpg study/sync_depth_00474.png 518.8579
|
649 |
+
study/rgb_00475.jpg study/sync_depth_00475.png 518.8579
|
650 |
+
study/rgb_00476.jpg study/sync_depth_00476.png 518.8579
|
651 |
+
study/rgb_00643.jpg study/sync_depth_00643.png 518.8579
|
652 |
+
study/rgb_00644.jpg study/sync_depth_00644.png 518.8579
|
653 |
+
study_room/rgb_00272.jpg study_room/sync_depth_00272.png 518.8579
|
654 |
+
study_room/rgb_00278.jpg study_room/sync_depth_00278.png 518.8579
|
ZoeDepth/train_test_inputs/nyudepthv2_train_files_with_gt.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
ZoeDepth/ui/app.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import gradio as gr
|
26 |
+
import torch
|
27 |
+
|
28 |
+
from .gradio_depth_pred import create_demo as create_depth_pred_demo
|
29 |
+
from .gradio_im_to_3d import create_demo as create_im_to_3d_demo
|
30 |
+
from .gradio_pano_to_3d import create_demo as create_pano_to_3d_demo
|
31 |
+
|
32 |
+
|
33 |
+
css = """
|
34 |
+
#img-display-container {
|
35 |
+
max-height: 50vh;
|
36 |
+
}
|
37 |
+
#img-display-input {
|
38 |
+
max-height: 40vh;
|
39 |
+
}
|
40 |
+
#img-display-output {
|
41 |
+
max-height: 40vh;
|
42 |
+
}
|
43 |
+
|
44 |
+
"""
|
45 |
+
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
46 |
+
model = torch.hub.load('isl-org/ZoeDepth', "ZoeD_N", pretrained=True).to(DEVICE).eval()
|
47 |
+
|
48 |
+
title = "# ZoeDepth"
|
49 |
+
description = """Official demo for **ZoeDepth: Zero-shot Transfer by Combining Relative and Metric Depth**.
|
50 |
+
|
51 |
+
ZoeDepth is a deep learning model for metric depth estimation from a single image.
|
52 |
+
|
53 |
+
Please refer to our [paper](https://arxiv.org/abs/2302.12288) or [github](https://github.com/isl-org/ZoeDepth) for more details."""
|
54 |
+
|
55 |
+
with gr.Blocks(css=css) as demo:
|
56 |
+
gr.Markdown(title)
|
57 |
+
gr.Markdown(description)
|
58 |
+
with gr.Tab("Depth Prediction"):
|
59 |
+
create_depth_pred_demo(model)
|
60 |
+
with gr.Tab("Image to 3D"):
|
61 |
+
create_im_to_3d_demo(model)
|
62 |
+
with gr.Tab("360 Panorama to 3D"):
|
63 |
+
create_pano_to_3d_demo(model)
|
64 |
+
|
65 |
+
if __name__ == '__main__':
|
66 |
+
demo.queue().launch()
|
ZoeDepth/ui/gradio_depth_pred.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import gradio as gr
|
26 |
+
from zoedepth.utils.misc import colorize
|
27 |
+
from PIL import Image
|
28 |
+
import tempfile
|
29 |
+
|
30 |
+
def predict_depth(model, image):
|
31 |
+
depth = model.infer_pil(image)
|
32 |
+
return depth
|
33 |
+
|
34 |
+
def create_demo(model):
|
35 |
+
gr.Markdown("### Depth Prediction demo")
|
36 |
+
with gr.Row():
|
37 |
+
input_image = gr.Image(label="Input Image", type='pil', elem_id='img-display-input').style(height="auto")
|
38 |
+
depth_image = gr.Image(label="Depth Map", elem_id='img-display-output')
|
39 |
+
raw_file = gr.File(label="16-bit raw depth, multiplier:256")
|
40 |
+
submit = gr.Button("Submit")
|
41 |
+
|
42 |
+
def on_submit(image):
|
43 |
+
depth = predict_depth(model, image)
|
44 |
+
colored_depth = colorize(depth, cmap='gray_r')
|
45 |
+
tmp = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
|
46 |
+
raw_depth = Image.fromarray((depth*256).astype('uint16'))
|
47 |
+
raw_depth.save(tmp.name)
|
48 |
+
return [colored_depth, tmp.name]
|
49 |
+
|
50 |
+
submit.click(on_submit, inputs=[input_image], outputs=[depth_image, raw_file])
|
51 |
+
# examples = gr.Examples(examples=["examples/person_1.jpeg", "examples/person_2.jpeg", "examples/person-leaves.png", "examples/living-room.jpeg"],
|
52 |
+
# inputs=[input_image])
|
ZoeDepth/ui/gradio_im_to_3d.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import gradio as gr
|
26 |
+
import numpy as np
|
27 |
+
import trimesh
|
28 |
+
from zoedepth.utils.geometry import depth_to_points, create_triangles
|
29 |
+
from functools import partial
|
30 |
+
import tempfile
|
31 |
+
|
32 |
+
|
33 |
+
def depth_edges_mask(depth):
|
34 |
+
"""Returns a mask of edges in the depth map.
|
35 |
+
Args:
|
36 |
+
depth: 2D numpy array of shape (H, W) with dtype float32.
|
37 |
+
Returns:
|
38 |
+
mask: 2D numpy array of shape (H, W) with dtype bool.
|
39 |
+
"""
|
40 |
+
# Compute the x and y gradients of the depth map.
|
41 |
+
depth_dx, depth_dy = np.gradient(depth)
|
42 |
+
# Compute the gradient magnitude.
|
43 |
+
depth_grad = np.sqrt(depth_dx ** 2 + depth_dy ** 2)
|
44 |
+
# Compute the edge mask.
|
45 |
+
mask = depth_grad > 0.05
|
46 |
+
return mask
|
47 |
+
|
48 |
+
|
49 |
+
def predict_depth(model, image):
|
50 |
+
depth = model.infer_pil(image)
|
51 |
+
return depth
|
52 |
+
|
53 |
+
def get_mesh(model, image, keep_edges=False):
|
54 |
+
image.thumbnail((1024,1024)) # limit the size of the input image
|
55 |
+
depth = predict_depth(model, image)
|
56 |
+
pts3d = depth_to_points(depth[None])
|
57 |
+
pts3d = pts3d.reshape(-1, 3)
|
58 |
+
|
59 |
+
# Create a trimesh mesh from the points
|
60 |
+
# Each pixel is connected to its 4 neighbors
|
61 |
+
# colors are the RGB values of the image
|
62 |
+
|
63 |
+
verts = pts3d.reshape(-1, 3)
|
64 |
+
image = np.array(image)
|
65 |
+
if keep_edges:
|
66 |
+
triangles = create_triangles(image.shape[0], image.shape[1])
|
67 |
+
else:
|
68 |
+
triangles = create_triangles(image.shape[0], image.shape[1], mask=~depth_edges_mask(depth))
|
69 |
+
colors = image.reshape(-1, 3)
|
70 |
+
mesh = trimesh.Trimesh(vertices=verts, faces=triangles, vertex_colors=colors)
|
71 |
+
|
72 |
+
# Save as glb
|
73 |
+
glb_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
|
74 |
+
glb_path = glb_file.name
|
75 |
+
mesh.export(glb_path)
|
76 |
+
return glb_path
|
77 |
+
|
78 |
+
def create_demo(model):
|
79 |
+
|
80 |
+
gr.Markdown("### Image to 3D mesh")
|
81 |
+
gr.Markdown("Convert a single 2D image to a 3D mesh")
|
82 |
+
|
83 |
+
with gr.Row():
|
84 |
+
image = gr.Image(label="Input Image", type='pil')
|
85 |
+
result = gr.Model3D(label="3d mesh reconstruction", clear_color=[
|
86 |
+
1.0, 1.0, 1.0, 1.0])
|
87 |
+
|
88 |
+
checkbox = gr.Checkbox(label="Keep occlusion edges", value=False)
|
89 |
+
submit = gr.Button("Submit")
|
90 |
+
submit.click(partial(get_mesh, model), inputs=[image, checkbox], outputs=[result])
|
91 |
+
# examples = gr.Examples(examples=["examples/aerial_beach.jpeg", "examples/mountains.jpeg", "examples/person_1.jpeg", "examples/ancient-carved.jpeg"],
|
92 |
+
# inputs=[image])
|
93 |
+
|
ZoeDepth/ui/gradio_pano_to_3d.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import gradio as gr
|
26 |
+
import numpy as np
|
27 |
+
import trimesh
|
28 |
+
from zoedepth.utils.geometry import create_triangles
|
29 |
+
from functools import partial
|
30 |
+
import tempfile
|
31 |
+
|
32 |
+
def depth_edges_mask(depth):
|
33 |
+
"""Returns a mask of edges in the depth map.
|
34 |
+
Args:
|
35 |
+
depth: 2D numpy array of shape (H, W) with dtype float32.
|
36 |
+
Returns:
|
37 |
+
mask: 2D numpy array of shape (H, W) with dtype bool.
|
38 |
+
"""
|
39 |
+
# Compute the x and y gradients of the depth map.
|
40 |
+
depth_dx, depth_dy = np.gradient(depth)
|
41 |
+
# Compute the gradient magnitude.
|
42 |
+
depth_grad = np.sqrt(depth_dx ** 2 + depth_dy ** 2)
|
43 |
+
# Compute the edge mask.
|
44 |
+
mask = depth_grad > 0.05
|
45 |
+
return mask
|
46 |
+
|
47 |
+
|
48 |
+
def pano_depth_to_world_points(depth):
|
49 |
+
"""
|
50 |
+
360 depth to world points
|
51 |
+
given 2D depth is an equirectangular projection of a spherical image
|
52 |
+
Treat depth as radius
|
53 |
+
|
54 |
+
longitude : -pi to pi
|
55 |
+
latitude : -pi/2 to pi/2
|
56 |
+
"""
|
57 |
+
|
58 |
+
# Convert depth to radius
|
59 |
+
radius = depth.flatten()
|
60 |
+
|
61 |
+
lon = np.linspace(-np.pi, np.pi, depth.shape[1])
|
62 |
+
lat = np.linspace(-np.pi/2, np.pi/2, depth.shape[0])
|
63 |
+
|
64 |
+
lon, lat = np.meshgrid(lon, lat)
|
65 |
+
lon = lon.flatten()
|
66 |
+
lat = lat.flatten()
|
67 |
+
|
68 |
+
# Convert to cartesian coordinates
|
69 |
+
x = radius * np.cos(lat) * np.cos(lon)
|
70 |
+
y = radius * np.cos(lat) * np.sin(lon)
|
71 |
+
z = radius * np.sin(lat)
|
72 |
+
|
73 |
+
pts3d = np.stack([x, y, z], axis=1)
|
74 |
+
|
75 |
+
return pts3d
|
76 |
+
|
77 |
+
|
78 |
+
def predict_depth(model, image):
|
79 |
+
depth = model.infer_pil(image)
|
80 |
+
return depth
|
81 |
+
|
82 |
+
def get_mesh(model, image, keep_edges=False):
|
83 |
+
image.thumbnail((1024,1024)) # limit the size of the image
|
84 |
+
depth = predict_depth(model, image)
|
85 |
+
pts3d = pano_depth_to_world_points(depth)
|
86 |
+
|
87 |
+
# Create a trimesh mesh from the points
|
88 |
+
# Each pixel is connected to its 4 neighbors
|
89 |
+
# colors are the RGB values of the image
|
90 |
+
|
91 |
+
verts = pts3d.reshape(-1, 3)
|
92 |
+
image = np.array(image)
|
93 |
+
if keep_edges:
|
94 |
+
triangles = create_triangles(image.shape[0], image.shape[1])
|
95 |
+
else:
|
96 |
+
triangles = create_triangles(image.shape[0], image.shape[1], mask=~depth_edges_mask(depth))
|
97 |
+
colors = image.reshape(-1, 3)
|
98 |
+
mesh = trimesh.Trimesh(vertices=verts, faces=triangles, vertex_colors=colors)
|
99 |
+
|
100 |
+
# Save as glb
|
101 |
+
glb_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
|
102 |
+
glb_path = glb_file.name
|
103 |
+
mesh.export(glb_path)
|
104 |
+
return glb_path
|
105 |
+
|
106 |
+
def create_demo(model):
|
107 |
+
gr.Markdown("### Panorama to 3D mesh")
|
108 |
+
gr.Markdown("Convert a 360 spherical panorama to a 3D mesh")
|
109 |
+
gr.Markdown("ZoeDepth was not trained on panoramic images. It doesn't know anything about panoramas or spherical projection. Here, we just treat the estimated depth as radius and some projection errors are expected. Nonetheless, ZoeDepth still works surprisingly well on 360 reconstruction.")
|
110 |
+
|
111 |
+
with gr.Row():
|
112 |
+
input_image = gr.Image(label="Input Image", type='pil')
|
113 |
+
result = gr.Model3D(label="3d mesh reconstruction", clear_color=[
|
114 |
+
1.0, 1.0, 1.0, 1.0])
|
115 |
+
|
116 |
+
checkbox = gr.Checkbox(label="Keep occlusion edges", value=True)
|
117 |
+
submit = gr.Button("Submit")
|
118 |
+
submit.click(partial(get_mesh, model), inputs=[input_image, checkbox], outputs=[result])
|
119 |
+
# examples = gr.Examples(examples=["examples/pano_1.jpeg", "examples/pano_2.jpeg", "examples/pano_3.jpeg"],
|
120 |
+
# inputs=[input_image])
|
ZoeDepth/ui/ui_requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
trimesh==3.9.42
|
ZoeDepth/zoedepth/data/__init__.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
ZoeDepth/zoedepth/data/data_mono.py
ADDED
@@ -0,0 +1,573 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
# This file is partly inspired from BTS (https://github.com/cleinc/bts/blob/master/pytorch/bts_dataloader.py); author: Jin Han Lee
|
26 |
+
|
27 |
+
import itertools
|
28 |
+
import os
|
29 |
+
import random
|
30 |
+
|
31 |
+
import numpy as np
|
32 |
+
import cv2
|
33 |
+
import torch
|
34 |
+
import torch.nn as nn
|
35 |
+
import torch.utils.data.distributed
|
36 |
+
from zoedepth.utils.easydict import EasyDict as edict
|
37 |
+
from PIL import Image, ImageOps
|
38 |
+
from torch.utils.data import DataLoader, Dataset
|
39 |
+
from torchvision import transforms
|
40 |
+
|
41 |
+
from zoedepth.utils.config import change_dataset
|
42 |
+
|
43 |
+
from .ddad import get_ddad_loader
|
44 |
+
from .diml_indoor_test import get_diml_indoor_loader
|
45 |
+
from .diml_outdoor_test import get_diml_outdoor_loader
|
46 |
+
from .diode import get_diode_loader
|
47 |
+
from .hypersim import get_hypersim_loader
|
48 |
+
from .ibims import get_ibims_loader
|
49 |
+
from .sun_rgbd_loader import get_sunrgbd_loader
|
50 |
+
from .vkitti import get_vkitti_loader
|
51 |
+
from .vkitti2 import get_vkitti2_loader
|
52 |
+
|
53 |
+
from .preprocess import CropParams, get_white_border, get_black_border
|
54 |
+
|
55 |
+
|
56 |
+
def _is_pil_image(img):
|
57 |
+
return isinstance(img, Image.Image)
|
58 |
+
|
59 |
+
|
60 |
+
def _is_numpy_image(img):
|
61 |
+
return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
|
62 |
+
|
63 |
+
|
64 |
+
def preprocessing_transforms(mode, **kwargs):
|
65 |
+
return transforms.Compose([
|
66 |
+
ToTensor(mode=mode, **kwargs)
|
67 |
+
])
|
68 |
+
|
69 |
+
|
70 |
+
class DepthDataLoader(object):
|
71 |
+
def __init__(self, config, mode, device='cpu', transform=None, **kwargs):
|
72 |
+
"""
|
73 |
+
Data loader for depth datasets
|
74 |
+
|
75 |
+
Args:
|
76 |
+
config (dict): Config dictionary. Refer to utils/config.py
|
77 |
+
mode (str): "train" or "online_eval"
|
78 |
+
device (str, optional): Device to load the data on. Defaults to 'cpu'.
|
79 |
+
transform (torchvision.transforms, optional): Transform to apply to the data. Defaults to None.
|
80 |
+
"""
|
81 |
+
|
82 |
+
self.config = config
|
83 |
+
|
84 |
+
if config.dataset == 'ibims':
|
85 |
+
self.data = get_ibims_loader(config, batch_size=1, num_workers=1)
|
86 |
+
return
|
87 |
+
|
88 |
+
if config.dataset == 'sunrgbd':
|
89 |
+
self.data = get_sunrgbd_loader(
|
90 |
+
data_dir_root=config.sunrgbd_root, batch_size=1, num_workers=1)
|
91 |
+
return
|
92 |
+
|
93 |
+
if config.dataset == 'diml_indoor':
|
94 |
+
self.data = get_diml_indoor_loader(
|
95 |
+
data_dir_root=config.diml_indoor_root, batch_size=1, num_workers=1)
|
96 |
+
return
|
97 |
+
|
98 |
+
if config.dataset == 'diml_outdoor':
|
99 |
+
self.data = get_diml_outdoor_loader(
|
100 |
+
data_dir_root=config.diml_outdoor_root, batch_size=1, num_workers=1)
|
101 |
+
return
|
102 |
+
|
103 |
+
if "diode" in config.dataset:
|
104 |
+
self.data = get_diode_loader(
|
105 |
+
config[config.dataset+"_root"], batch_size=1, num_workers=1)
|
106 |
+
return
|
107 |
+
|
108 |
+
if config.dataset == 'hypersim_test':
|
109 |
+
self.data = get_hypersim_loader(
|
110 |
+
config.hypersim_test_root, batch_size=1, num_workers=1)
|
111 |
+
return
|
112 |
+
|
113 |
+
if config.dataset == 'vkitti':
|
114 |
+
self.data = get_vkitti_loader(
|
115 |
+
config.vkitti_root, batch_size=1, num_workers=1)
|
116 |
+
return
|
117 |
+
|
118 |
+
if config.dataset == 'vkitti2':
|
119 |
+
self.data = get_vkitti2_loader(
|
120 |
+
config.vkitti2_root, batch_size=1, num_workers=1)
|
121 |
+
return
|
122 |
+
|
123 |
+
if config.dataset == 'ddad':
|
124 |
+
self.data = get_ddad_loader(config.ddad_root, resize_shape=(
|
125 |
+
352, 1216), batch_size=1, num_workers=1)
|
126 |
+
return
|
127 |
+
|
128 |
+
img_size = self.config.get("img_size", None)
|
129 |
+
img_size = img_size if self.config.get(
|
130 |
+
"do_input_resize", False) else None
|
131 |
+
|
132 |
+
if transform is None:
|
133 |
+
transform = preprocessing_transforms(mode, size=img_size)
|
134 |
+
|
135 |
+
if mode == 'train':
|
136 |
+
|
137 |
+
Dataset = DataLoadPreprocess
|
138 |
+
self.training_samples = Dataset(
|
139 |
+
config, mode, transform=transform, device=device)
|
140 |
+
|
141 |
+
if config.distributed:
|
142 |
+
self.train_sampler = torch.utils.data.distributed.DistributedSampler(
|
143 |
+
self.training_samples)
|
144 |
+
else:
|
145 |
+
self.train_sampler = None
|
146 |
+
|
147 |
+
self.data = DataLoader(self.training_samples,
|
148 |
+
batch_size=config.batch_size,
|
149 |
+
shuffle=(self.train_sampler is None),
|
150 |
+
num_workers=config.workers,
|
151 |
+
pin_memory=True,
|
152 |
+
persistent_workers=True,
|
153 |
+
# prefetch_factor=2,
|
154 |
+
sampler=self.train_sampler)
|
155 |
+
|
156 |
+
elif mode == 'online_eval':
|
157 |
+
self.testing_samples = DataLoadPreprocess(
|
158 |
+
config, mode, transform=transform)
|
159 |
+
if config.distributed: # redundant. here only for readability and to be more explicit
|
160 |
+
# Give whole test set to all processes (and report evaluation only on one) regardless
|
161 |
+
self.eval_sampler = None
|
162 |
+
else:
|
163 |
+
self.eval_sampler = None
|
164 |
+
self.data = DataLoader(self.testing_samples, 1,
|
165 |
+
shuffle=kwargs.get("shuffle_test", False),
|
166 |
+
num_workers=1,
|
167 |
+
pin_memory=False,
|
168 |
+
sampler=self.eval_sampler)
|
169 |
+
|
170 |
+
elif mode == 'test':
|
171 |
+
self.testing_samples = DataLoadPreprocess(
|
172 |
+
config, mode, transform=transform)
|
173 |
+
self.data = DataLoader(self.testing_samples,
|
174 |
+
1, shuffle=False, num_workers=1)
|
175 |
+
|
176 |
+
else:
|
177 |
+
print(
|
178 |
+
'mode should be one of \'train, test, online_eval\'. Got {}'.format(mode))
|
179 |
+
|
180 |
+
|
181 |
+
def repetitive_roundrobin(*iterables):
|
182 |
+
"""
|
183 |
+
cycles through iterables but sample wise
|
184 |
+
first yield first sample from first iterable then first sample from second iterable and so on
|
185 |
+
then second sample from first iterable then second sample from second iterable and so on
|
186 |
+
|
187 |
+
If one iterable is shorter than the others, it is repeated until all iterables are exhausted
|
188 |
+
repetitive_roundrobin('ABC', 'D', 'EF') --> A D E B D F C D E
|
189 |
+
"""
|
190 |
+
# Repetitive roundrobin
|
191 |
+
iterables_ = [iter(it) for it in iterables]
|
192 |
+
exhausted = [False] * len(iterables)
|
193 |
+
while not all(exhausted):
|
194 |
+
for i, it in enumerate(iterables_):
|
195 |
+
try:
|
196 |
+
yield next(it)
|
197 |
+
except StopIteration:
|
198 |
+
exhausted[i] = True
|
199 |
+
iterables_[i] = itertools.cycle(iterables[i])
|
200 |
+
# First elements may get repeated if one iterable is shorter than the others
|
201 |
+
yield next(iterables_[i])
|
202 |
+
|
203 |
+
|
204 |
+
class RepetitiveRoundRobinDataLoader(object):
|
205 |
+
def __init__(self, *dataloaders):
|
206 |
+
self.dataloaders = dataloaders
|
207 |
+
|
208 |
+
def __iter__(self):
|
209 |
+
return repetitive_roundrobin(*self.dataloaders)
|
210 |
+
|
211 |
+
def __len__(self):
|
212 |
+
# First samples get repeated, thats why the plus one
|
213 |
+
return len(self.dataloaders) * (max(len(dl) for dl in self.dataloaders) + 1)
|
214 |
+
|
215 |
+
|
216 |
+
class MixedNYUKITTI(object):
|
217 |
+
def __init__(self, config, mode, device='cpu', **kwargs):
|
218 |
+
config = edict(config)
|
219 |
+
config.workers = config.workers // 2
|
220 |
+
self.config = config
|
221 |
+
nyu_conf = change_dataset(edict(config), 'nyu')
|
222 |
+
kitti_conf = change_dataset(edict(config), 'kitti')
|
223 |
+
|
224 |
+
# make nyu default for testing
|
225 |
+
self.config = config = nyu_conf
|
226 |
+
img_size = self.config.get("img_size", None)
|
227 |
+
img_size = img_size if self.config.get(
|
228 |
+
"do_input_resize", False) else None
|
229 |
+
if mode == 'train':
|
230 |
+
nyu_loader = DepthDataLoader(
|
231 |
+
nyu_conf, mode, device=device, transform=preprocessing_transforms(mode, size=img_size)).data
|
232 |
+
kitti_loader = DepthDataLoader(
|
233 |
+
kitti_conf, mode, device=device, transform=preprocessing_transforms(mode, size=img_size)).data
|
234 |
+
# It has been changed to repetitive roundrobin
|
235 |
+
self.data = RepetitiveRoundRobinDataLoader(
|
236 |
+
nyu_loader, kitti_loader)
|
237 |
+
else:
|
238 |
+
self.data = DepthDataLoader(nyu_conf, mode, device=device).data
|
239 |
+
|
240 |
+
|
241 |
+
def remove_leading_slash(s):
|
242 |
+
if s[0] == '/' or s[0] == '\\':
|
243 |
+
return s[1:]
|
244 |
+
return s
|
245 |
+
|
246 |
+
|
247 |
+
class CachedReader:
|
248 |
+
def __init__(self, shared_dict=None):
|
249 |
+
if shared_dict:
|
250 |
+
self._cache = shared_dict
|
251 |
+
else:
|
252 |
+
self._cache = {}
|
253 |
+
|
254 |
+
def open(self, fpath):
|
255 |
+
im = self._cache.get(fpath, None)
|
256 |
+
if im is None:
|
257 |
+
im = self._cache[fpath] = Image.open(fpath)
|
258 |
+
return im
|
259 |
+
|
260 |
+
|
261 |
+
class ImReader:
|
262 |
+
def __init__(self):
|
263 |
+
pass
|
264 |
+
|
265 |
+
# @cache
|
266 |
+
def open(self, fpath):
|
267 |
+
return Image.open(fpath)
|
268 |
+
|
269 |
+
|
270 |
+
class DataLoadPreprocess(Dataset):
|
271 |
+
def __init__(self, config, mode, transform=None, is_for_online_eval=False, **kwargs):
|
272 |
+
self.config = config
|
273 |
+
if mode == 'online_eval':
|
274 |
+
with open(config.filenames_file_eval, 'r') as f:
|
275 |
+
self.filenames = f.readlines()
|
276 |
+
else:
|
277 |
+
with open(config.filenames_file, 'r') as f:
|
278 |
+
self.filenames = f.readlines()
|
279 |
+
|
280 |
+
self.mode = mode
|
281 |
+
self.transform = transform
|
282 |
+
self.to_tensor = ToTensor(mode)
|
283 |
+
self.is_for_online_eval = is_for_online_eval
|
284 |
+
if config.use_shared_dict:
|
285 |
+
self.reader = CachedReader(config.shared_dict)
|
286 |
+
else:
|
287 |
+
self.reader = ImReader()
|
288 |
+
|
289 |
+
def postprocess(self, sample):
|
290 |
+
return sample
|
291 |
+
|
292 |
+
def __getitem__(self, idx):
|
293 |
+
sample_path = self.filenames[idx]
|
294 |
+
focal = float(sample_path.split()[2])
|
295 |
+
sample = {}
|
296 |
+
|
297 |
+
if self.mode == 'train':
|
298 |
+
if self.config.dataset == 'kitti' and self.config.use_right and random.random() > 0.5:
|
299 |
+
image_path = os.path.join(
|
300 |
+
self.config.data_path, remove_leading_slash(sample_path.split()[3]))
|
301 |
+
depth_path = os.path.join(
|
302 |
+
self.config.gt_path, remove_leading_slash(sample_path.split()[4]))
|
303 |
+
else:
|
304 |
+
image_path = os.path.join(
|
305 |
+
self.config.data_path, remove_leading_slash(sample_path.split()[0]))
|
306 |
+
depth_path = os.path.join(
|
307 |
+
self.config.gt_path, remove_leading_slash(sample_path.split()[1]))
|
308 |
+
|
309 |
+
image = self.reader.open(image_path)
|
310 |
+
depth_gt = self.reader.open(depth_path)
|
311 |
+
w, h = image.size
|
312 |
+
|
313 |
+
if self.config.do_kb_crop:
|
314 |
+
height = image.height
|
315 |
+
width = image.width
|
316 |
+
top_margin = int(height - 352)
|
317 |
+
left_margin = int((width - 1216) / 2)
|
318 |
+
depth_gt = depth_gt.crop(
|
319 |
+
(left_margin, top_margin, left_margin + 1216, top_margin + 352))
|
320 |
+
image = image.crop(
|
321 |
+
(left_margin, top_margin, left_margin + 1216, top_margin + 352))
|
322 |
+
|
323 |
+
# Avoid blank boundaries due to pixel registration?
|
324 |
+
# Train images have white border. Test images have black border.
|
325 |
+
if self.config.dataset == 'nyu' and self.config.avoid_boundary:
|
326 |
+
# print("Avoiding Blank Boundaries!")
|
327 |
+
# We just crop and pad again with reflect padding to original size
|
328 |
+
# original_size = image.size
|
329 |
+
crop_params = get_white_border(np.array(image, dtype=np.uint8))
|
330 |
+
image = image.crop((crop_params.left, crop_params.top, crop_params.right, crop_params.bottom))
|
331 |
+
depth_gt = depth_gt.crop((crop_params.left, crop_params.top, crop_params.right, crop_params.bottom))
|
332 |
+
|
333 |
+
# Use reflect padding to fill the blank
|
334 |
+
image = np.array(image)
|
335 |
+
image = np.pad(image, ((crop_params.top, h - crop_params.bottom), (crop_params.left, w - crop_params.right), (0, 0)), mode='reflect')
|
336 |
+
image = Image.fromarray(image)
|
337 |
+
|
338 |
+
depth_gt = np.array(depth_gt)
|
339 |
+
depth_gt = np.pad(depth_gt, ((crop_params.top, h - crop_params.bottom), (crop_params.left, w - crop_params.right)), 'constant', constant_values=0)
|
340 |
+
depth_gt = Image.fromarray(depth_gt)
|
341 |
+
|
342 |
+
|
343 |
+
if self.config.do_random_rotate and (self.config.aug):
|
344 |
+
random_angle = (random.random() - 0.5) * 2 * self.config.degree
|
345 |
+
image = self.rotate_image(image, random_angle)
|
346 |
+
depth_gt = self.rotate_image(
|
347 |
+
depth_gt, random_angle, flag=Image.NEAREST)
|
348 |
+
|
349 |
+
image = np.asarray(image, dtype=np.float32) / 255.0
|
350 |
+
depth_gt = np.asarray(depth_gt, dtype=np.float32)
|
351 |
+
depth_gt = np.expand_dims(depth_gt, axis=2)
|
352 |
+
|
353 |
+
if self.config.dataset == 'nyu':
|
354 |
+
depth_gt = depth_gt / 1000.0
|
355 |
+
else:
|
356 |
+
depth_gt = depth_gt / 256.0
|
357 |
+
|
358 |
+
if self.config.aug and (self.config.random_crop):
|
359 |
+
image, depth_gt = self.random_crop(
|
360 |
+
image, depth_gt, self.config.input_height, self.config.input_width)
|
361 |
+
|
362 |
+
if self.config.aug and self.config.random_translate:
|
363 |
+
# print("Random Translation!")
|
364 |
+
image, depth_gt = self.random_translate(image, depth_gt, self.config.max_translation)
|
365 |
+
|
366 |
+
image, depth_gt = self.train_preprocess(image, depth_gt)
|
367 |
+
mask = np.logical_and(depth_gt > self.config.min_depth,
|
368 |
+
depth_gt < self.config.max_depth).squeeze()[None, ...]
|
369 |
+
sample = {'image': image, 'depth': depth_gt, 'focal': focal,
|
370 |
+
'mask': mask, **sample}
|
371 |
+
|
372 |
+
else:
|
373 |
+
if self.mode == 'online_eval':
|
374 |
+
data_path = self.config.data_path_eval
|
375 |
+
else:
|
376 |
+
data_path = self.config.data_path
|
377 |
+
|
378 |
+
image_path = os.path.join(
|
379 |
+
data_path, remove_leading_slash(sample_path.split()[0]))
|
380 |
+
image = np.asarray(self.reader.open(image_path),
|
381 |
+
dtype=np.float32) / 255.0
|
382 |
+
|
383 |
+
if self.mode == 'online_eval':
|
384 |
+
gt_path = self.config.gt_path_eval
|
385 |
+
depth_path = os.path.join(
|
386 |
+
gt_path, remove_leading_slash(sample_path.split()[1]))
|
387 |
+
has_valid_depth = False
|
388 |
+
try:
|
389 |
+
depth_gt = self.reader.open(depth_path)
|
390 |
+
has_valid_depth = True
|
391 |
+
except IOError:
|
392 |
+
depth_gt = False
|
393 |
+
# print('Missing gt for {}'.format(image_path))
|
394 |
+
|
395 |
+
if has_valid_depth:
|
396 |
+
depth_gt = np.asarray(depth_gt, dtype=np.float32)
|
397 |
+
depth_gt = np.expand_dims(depth_gt, axis=2)
|
398 |
+
if self.config.dataset == 'nyu':
|
399 |
+
depth_gt = depth_gt / 1000.0
|
400 |
+
else:
|
401 |
+
depth_gt = depth_gt / 256.0
|
402 |
+
|
403 |
+
mask = np.logical_and(
|
404 |
+
depth_gt >= self.config.min_depth, depth_gt <= self.config.max_depth).squeeze()[None, ...]
|
405 |
+
else:
|
406 |
+
mask = False
|
407 |
+
|
408 |
+
if self.config.do_kb_crop:
|
409 |
+
height = image.shape[0]
|
410 |
+
width = image.shape[1]
|
411 |
+
top_margin = int(height - 352)
|
412 |
+
left_margin = int((width - 1216) / 2)
|
413 |
+
image = image[top_margin:top_margin + 352,
|
414 |
+
left_margin:left_margin + 1216, :]
|
415 |
+
if self.mode == 'online_eval' and has_valid_depth:
|
416 |
+
depth_gt = depth_gt[top_margin:top_margin +
|
417 |
+
352, left_margin:left_margin + 1216, :]
|
418 |
+
|
419 |
+
if self.mode == 'online_eval':
|
420 |
+
sample = {'image': image, 'depth': depth_gt, 'focal': focal, 'has_valid_depth': has_valid_depth,
|
421 |
+
'image_path': sample_path.split()[0], 'depth_path': sample_path.split()[1],
|
422 |
+
'mask': mask}
|
423 |
+
else:
|
424 |
+
sample = {'image': image, 'focal': focal}
|
425 |
+
|
426 |
+
if (self.mode == 'train') or ('has_valid_depth' in sample and sample['has_valid_depth']):
|
427 |
+
mask = np.logical_and(depth_gt > self.config.min_depth,
|
428 |
+
depth_gt < self.config.max_depth).squeeze()[None, ...]
|
429 |
+
sample['mask'] = mask
|
430 |
+
|
431 |
+
if self.transform:
|
432 |
+
sample = self.transform(sample)
|
433 |
+
|
434 |
+
sample = self.postprocess(sample)
|
435 |
+
sample['dataset'] = self.config.dataset
|
436 |
+
sample = {**sample, 'image_path': sample_path.split()[0], 'depth_path': sample_path.split()[1]}
|
437 |
+
|
438 |
+
return sample
|
439 |
+
|
440 |
+
def rotate_image(self, image, angle, flag=Image.BILINEAR):
|
441 |
+
result = image.rotate(angle, resample=flag)
|
442 |
+
return result
|
443 |
+
|
444 |
+
def random_crop(self, img, depth, height, width):
|
445 |
+
assert img.shape[0] >= height
|
446 |
+
assert img.shape[1] >= width
|
447 |
+
assert img.shape[0] == depth.shape[0]
|
448 |
+
assert img.shape[1] == depth.shape[1]
|
449 |
+
x = random.randint(0, img.shape[1] - width)
|
450 |
+
y = random.randint(0, img.shape[0] - height)
|
451 |
+
img = img[y:y + height, x:x + width, :]
|
452 |
+
depth = depth[y:y + height, x:x + width, :]
|
453 |
+
|
454 |
+
return img, depth
|
455 |
+
|
456 |
+
def random_translate(self, img, depth, max_t=20):
|
457 |
+
assert img.shape[0] == depth.shape[0]
|
458 |
+
assert img.shape[1] == depth.shape[1]
|
459 |
+
p = self.config.translate_prob
|
460 |
+
do_translate = random.random()
|
461 |
+
if do_translate > p:
|
462 |
+
return img, depth
|
463 |
+
x = random.randint(-max_t, max_t)
|
464 |
+
y = random.randint(-max_t, max_t)
|
465 |
+
M = np.float32([[1, 0, x], [0, 1, y]])
|
466 |
+
# print(img.shape, depth.shape)
|
467 |
+
img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))
|
468 |
+
depth = cv2.warpAffine(depth, M, (depth.shape[1], depth.shape[0]))
|
469 |
+
depth = depth.squeeze()[..., None] # add channel dim back. Affine warp removes it
|
470 |
+
# print("after", img.shape, depth.shape)
|
471 |
+
return img, depth
|
472 |
+
|
473 |
+
def train_preprocess(self, image, depth_gt):
|
474 |
+
if self.config.aug:
|
475 |
+
# Random flipping
|
476 |
+
do_flip = random.random()
|
477 |
+
if do_flip > 0.5:
|
478 |
+
image = (image[:, ::-1, :]).copy()
|
479 |
+
depth_gt = (depth_gt[:, ::-1, :]).copy()
|
480 |
+
|
481 |
+
# Random gamma, brightness, color augmentation
|
482 |
+
do_augment = random.random()
|
483 |
+
if do_augment > 0.5:
|
484 |
+
image = self.augment_image(image)
|
485 |
+
|
486 |
+
return image, depth_gt
|
487 |
+
|
488 |
+
def augment_image(self, image):
|
489 |
+
# gamma augmentation
|
490 |
+
gamma = random.uniform(0.9, 1.1)
|
491 |
+
image_aug = image ** gamma
|
492 |
+
|
493 |
+
# brightness augmentation
|
494 |
+
if self.config.dataset == 'nyu':
|
495 |
+
brightness = random.uniform(0.75, 1.25)
|
496 |
+
else:
|
497 |
+
brightness = random.uniform(0.9, 1.1)
|
498 |
+
image_aug = image_aug * brightness
|
499 |
+
|
500 |
+
# color augmentation
|
501 |
+
colors = np.random.uniform(0.9, 1.1, size=3)
|
502 |
+
white = np.ones((image.shape[0], image.shape[1]))
|
503 |
+
color_image = np.stack([white * colors[i] for i in range(3)], axis=2)
|
504 |
+
image_aug *= color_image
|
505 |
+
image_aug = np.clip(image_aug, 0, 1)
|
506 |
+
|
507 |
+
return image_aug
|
508 |
+
|
509 |
+
def __len__(self):
|
510 |
+
return len(self.filenames)
|
511 |
+
|
512 |
+
|
513 |
+
class ToTensor(object):
|
514 |
+
def __init__(self, mode, do_normalize=False, size=None):
|
515 |
+
self.mode = mode
|
516 |
+
self.normalize = transforms.Normalize(
|
517 |
+
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if do_normalize else nn.Identity()
|
518 |
+
self.size = size
|
519 |
+
if size is not None:
|
520 |
+
self.resize = transforms.Resize(size=size)
|
521 |
+
else:
|
522 |
+
self.resize = nn.Identity()
|
523 |
+
|
524 |
+
def __call__(self, sample):
|
525 |
+
image, focal = sample['image'], sample['focal']
|
526 |
+
image = self.to_tensor(image)
|
527 |
+
image = self.normalize(image)
|
528 |
+
image = self.resize(image)
|
529 |
+
|
530 |
+
if self.mode == 'test':
|
531 |
+
return {'image': image, 'focal': focal}
|
532 |
+
|
533 |
+
depth = sample['depth']
|
534 |
+
if self.mode == 'train':
|
535 |
+
depth = self.to_tensor(depth)
|
536 |
+
return {**sample, 'image': image, 'depth': depth, 'focal': focal}
|
537 |
+
else:
|
538 |
+
has_valid_depth = sample['has_valid_depth']
|
539 |
+
image = self.resize(image)
|
540 |
+
return {**sample, 'image': image, 'depth': depth, 'focal': focal, 'has_valid_depth': has_valid_depth,
|
541 |
+
'image_path': sample['image_path'], 'depth_path': sample['depth_path']}
|
542 |
+
|
543 |
+
def to_tensor(self, pic):
|
544 |
+
if not (_is_pil_image(pic) or _is_numpy_image(pic)):
|
545 |
+
raise TypeError(
|
546 |
+
'pic should be PIL Image or ndarray. Got {}'.format(type(pic)))
|
547 |
+
|
548 |
+
if isinstance(pic, np.ndarray):
|
549 |
+
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
550 |
+
return img
|
551 |
+
|
552 |
+
# handle PIL Image
|
553 |
+
if pic.mode == 'I':
|
554 |
+
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
|
555 |
+
elif pic.mode == 'I;16':
|
556 |
+
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
|
557 |
+
else:
|
558 |
+
img = torch.ByteTensor(
|
559 |
+
torch.ByteStorage.from_buffer(pic.tobytes()))
|
560 |
+
# PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
|
561 |
+
if pic.mode == 'YCbCr':
|
562 |
+
nchannel = 3
|
563 |
+
elif pic.mode == 'I;16':
|
564 |
+
nchannel = 1
|
565 |
+
else:
|
566 |
+
nchannel = len(pic.mode)
|
567 |
+
img = img.view(pic.size[1], pic.size[0], nchannel)
|
568 |
+
|
569 |
+
img = img.transpose(0, 1).transpose(0, 2).contiguous()
|
570 |
+
if isinstance(img, torch.ByteTensor):
|
571 |
+
return img.float()
|
572 |
+
else:
|
573 |
+
return img
|
ZoeDepth/zoedepth/data/ddad.py
ADDED
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import os
|
26 |
+
|
27 |
+
import numpy as np
|
28 |
+
import torch
|
29 |
+
from PIL import Image
|
30 |
+
from torch.utils.data import DataLoader, Dataset
|
31 |
+
from torchvision import transforms
|
32 |
+
|
33 |
+
|
34 |
+
class ToTensor(object):
|
35 |
+
def __init__(self, resize_shape):
|
36 |
+
# self.normalize = transforms.Normalize(
|
37 |
+
# mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
38 |
+
self.normalize = lambda x : x
|
39 |
+
self.resize = transforms.Resize(resize_shape)
|
40 |
+
|
41 |
+
def __call__(self, sample):
|
42 |
+
image, depth = sample['image'], sample['depth']
|
43 |
+
image = self.to_tensor(image)
|
44 |
+
image = self.normalize(image)
|
45 |
+
depth = self.to_tensor(depth)
|
46 |
+
|
47 |
+
image = self.resize(image)
|
48 |
+
|
49 |
+
return {'image': image, 'depth': depth, 'dataset': "ddad"}
|
50 |
+
|
51 |
+
def to_tensor(self, pic):
|
52 |
+
|
53 |
+
if isinstance(pic, np.ndarray):
|
54 |
+
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
55 |
+
return img
|
56 |
+
|
57 |
+
# # handle PIL Image
|
58 |
+
if pic.mode == 'I':
|
59 |
+
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
|
60 |
+
elif pic.mode == 'I;16':
|
61 |
+
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
|
62 |
+
else:
|
63 |
+
img = torch.ByteTensor(
|
64 |
+
torch.ByteStorage.from_buffer(pic.tobytes()))
|
65 |
+
# PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
|
66 |
+
if pic.mode == 'YCbCr':
|
67 |
+
nchannel = 3
|
68 |
+
elif pic.mode == 'I;16':
|
69 |
+
nchannel = 1
|
70 |
+
else:
|
71 |
+
nchannel = len(pic.mode)
|
72 |
+
img = img.view(pic.size[1], pic.size[0], nchannel)
|
73 |
+
|
74 |
+
img = img.transpose(0, 1).transpose(0, 2).contiguous()
|
75 |
+
|
76 |
+
if isinstance(img, torch.ByteTensor):
|
77 |
+
return img.float()
|
78 |
+
else:
|
79 |
+
return img
|
80 |
+
|
81 |
+
|
82 |
+
class DDAD(Dataset):
|
83 |
+
def __init__(self, data_dir_root, resize_shape):
|
84 |
+
import glob
|
85 |
+
|
86 |
+
# image paths are of the form <data_dir_root>/{outleft, depthmap}/*.png
|
87 |
+
self.image_files = glob.glob(os.path.join(data_dir_root, '*.png'))
|
88 |
+
self.depth_files = [r.replace("_rgb.png", "_depth.npy")
|
89 |
+
for r in self.image_files]
|
90 |
+
self.transform = ToTensor(resize_shape)
|
91 |
+
|
92 |
+
def __getitem__(self, idx):
|
93 |
+
|
94 |
+
image_path = self.image_files[idx]
|
95 |
+
depth_path = self.depth_files[idx]
|
96 |
+
|
97 |
+
image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
|
98 |
+
depth = np.load(depth_path) # meters
|
99 |
+
|
100 |
+
# depth[depth > 8] = -1
|
101 |
+
depth = depth[..., None]
|
102 |
+
|
103 |
+
sample = dict(image=image, depth=depth)
|
104 |
+
sample = self.transform(sample)
|
105 |
+
|
106 |
+
if idx == 0:
|
107 |
+
print(sample["image"].shape)
|
108 |
+
|
109 |
+
return sample
|
110 |
+
|
111 |
+
def __len__(self):
|
112 |
+
return len(self.image_files)
|
113 |
+
|
114 |
+
|
115 |
+
def get_ddad_loader(data_dir_root, resize_shape, batch_size=1, **kwargs):
|
116 |
+
dataset = DDAD(data_dir_root, resize_shape)
|
117 |
+
return DataLoader(dataset, batch_size, **kwargs)
|
ZoeDepth/zoedepth/data/diml_indoor_test.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import os
|
26 |
+
|
27 |
+
import numpy as np
|
28 |
+
import torch
|
29 |
+
from PIL import Image
|
30 |
+
from torch.utils.data import DataLoader, Dataset
|
31 |
+
from torchvision import transforms
|
32 |
+
|
33 |
+
|
34 |
+
class ToTensor(object):
|
35 |
+
def __init__(self):
|
36 |
+
# self.normalize = transforms.Normalize(
|
37 |
+
# mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
38 |
+
self.normalize = lambda x : x
|
39 |
+
self.resize = transforms.Resize((480, 640))
|
40 |
+
|
41 |
+
def __call__(self, sample):
|
42 |
+
image, depth = sample['image'], sample['depth']
|
43 |
+
image = self.to_tensor(image)
|
44 |
+
image = self.normalize(image)
|
45 |
+
depth = self.to_tensor(depth)
|
46 |
+
|
47 |
+
image = self.resize(image)
|
48 |
+
|
49 |
+
return {'image': image, 'depth': depth, 'dataset': "diml_indoor"}
|
50 |
+
|
51 |
+
def to_tensor(self, pic):
|
52 |
+
|
53 |
+
if isinstance(pic, np.ndarray):
|
54 |
+
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
55 |
+
return img
|
56 |
+
|
57 |
+
# # handle PIL Image
|
58 |
+
if pic.mode == 'I':
|
59 |
+
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
|
60 |
+
elif pic.mode == 'I;16':
|
61 |
+
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
|
62 |
+
else:
|
63 |
+
img = torch.ByteTensor(
|
64 |
+
torch.ByteStorage.from_buffer(pic.tobytes()))
|
65 |
+
# PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
|
66 |
+
if pic.mode == 'YCbCr':
|
67 |
+
nchannel = 3
|
68 |
+
elif pic.mode == 'I;16':
|
69 |
+
nchannel = 1
|
70 |
+
else:
|
71 |
+
nchannel = len(pic.mode)
|
72 |
+
img = img.view(pic.size[1], pic.size[0], nchannel)
|
73 |
+
|
74 |
+
img = img.transpose(0, 1).transpose(0, 2).contiguous()
|
75 |
+
if isinstance(img, torch.ByteTensor):
|
76 |
+
return img.float()
|
77 |
+
else:
|
78 |
+
return img
|
79 |
+
|
80 |
+
|
81 |
+
class DIML_Indoor(Dataset):
|
82 |
+
def __init__(self, data_dir_root):
|
83 |
+
import glob
|
84 |
+
|
85 |
+
# image paths are of the form <data_dir_root>/{HR, LR}/<scene>/{color, depth_filled}/*.png
|
86 |
+
self.image_files = glob.glob(os.path.join(
|
87 |
+
data_dir_root, "LR", '*', 'color', '*.png'))
|
88 |
+
self.depth_files = [r.replace("color", "depth_filled").replace(
|
89 |
+
"_c.png", "_depth_filled.png") for r in self.image_files]
|
90 |
+
self.transform = ToTensor()
|
91 |
+
|
92 |
+
def __getitem__(self, idx):
|
93 |
+
image_path = self.image_files[idx]
|
94 |
+
depth_path = self.depth_files[idx]
|
95 |
+
|
96 |
+
image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
|
97 |
+
depth = np.asarray(Image.open(depth_path),
|
98 |
+
dtype='uint16') / 1000.0 # mm to meters
|
99 |
+
|
100 |
+
# print(np.shape(image))
|
101 |
+
# print(np.shape(depth))
|
102 |
+
|
103 |
+
# depth[depth > 8] = -1
|
104 |
+
depth = depth[..., None]
|
105 |
+
|
106 |
+
sample = dict(image=image, depth=depth)
|
107 |
+
|
108 |
+
# return sample
|
109 |
+
sample = self.transform(sample)
|
110 |
+
|
111 |
+
if idx == 0:
|
112 |
+
print(sample["image"].shape)
|
113 |
+
|
114 |
+
return sample
|
115 |
+
|
116 |
+
def __len__(self):
|
117 |
+
return len(self.image_files)
|
118 |
+
|
119 |
+
|
120 |
+
def get_diml_indoor_loader(data_dir_root, batch_size=1, **kwargs):
|
121 |
+
dataset = DIML_Indoor(data_dir_root)
|
122 |
+
return DataLoader(dataset, batch_size, **kwargs)
|
123 |
+
|
124 |
+
# get_diml_indoor_loader(data_dir_root="datasets/diml/indoor/test/HR")
|
125 |
+
# get_diml_indoor_loader(data_dir_root="datasets/diml/indoor/test/LR")
|
ZoeDepth/zoedepth/data/diml_outdoor_test.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import os
|
26 |
+
|
27 |
+
import numpy as np
|
28 |
+
import torch
|
29 |
+
from PIL import Image
|
30 |
+
from torch.utils.data import DataLoader, Dataset
|
31 |
+
from torchvision import transforms
|
32 |
+
|
33 |
+
|
34 |
+
class ToTensor(object):
|
35 |
+
def __init__(self):
|
36 |
+
# self.normalize = transforms.Normalize(
|
37 |
+
# mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
38 |
+
self.normalize = lambda x : x
|
39 |
+
|
40 |
+
def __call__(self, sample):
|
41 |
+
image, depth = sample['image'], sample['depth']
|
42 |
+
image = self.to_tensor(image)
|
43 |
+
image = self.normalize(image)
|
44 |
+
depth = self.to_tensor(depth)
|
45 |
+
|
46 |
+
return {'image': image, 'depth': depth, 'dataset': "diml_outdoor"}
|
47 |
+
|
48 |
+
def to_tensor(self, pic):
|
49 |
+
|
50 |
+
if isinstance(pic, np.ndarray):
|
51 |
+
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
52 |
+
return img
|
53 |
+
|
54 |
+
# # handle PIL Image
|
55 |
+
if pic.mode == 'I':
|
56 |
+
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
|
57 |
+
elif pic.mode == 'I;16':
|
58 |
+
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
|
59 |
+
else:
|
60 |
+
img = torch.ByteTensor(
|
61 |
+
torch.ByteStorage.from_buffer(pic.tobytes()))
|
62 |
+
# PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
|
63 |
+
if pic.mode == 'YCbCr':
|
64 |
+
nchannel = 3
|
65 |
+
elif pic.mode == 'I;16':
|
66 |
+
nchannel = 1
|
67 |
+
else:
|
68 |
+
nchannel = len(pic.mode)
|
69 |
+
img = img.view(pic.size[1], pic.size[0], nchannel)
|
70 |
+
|
71 |
+
img = img.transpose(0, 1).transpose(0, 2).contiguous()
|
72 |
+
if isinstance(img, torch.ByteTensor):
|
73 |
+
return img.float()
|
74 |
+
else:
|
75 |
+
return img
|
76 |
+
|
77 |
+
|
78 |
+
class DIML_Outdoor(Dataset):
|
79 |
+
def __init__(self, data_dir_root):
|
80 |
+
import glob
|
81 |
+
|
82 |
+
# image paths are of the form <data_dir_root>/{outleft, depthmap}/*.png
|
83 |
+
self.image_files = glob.glob(os.path.join(
|
84 |
+
data_dir_root, "*", 'outleft', '*.png'))
|
85 |
+
self.depth_files = [r.replace("outleft", "depthmap")
|
86 |
+
for r in self.image_files]
|
87 |
+
self.transform = ToTensor()
|
88 |
+
|
89 |
+
def __getitem__(self, idx):
|
90 |
+
image_path = self.image_files[idx]
|
91 |
+
depth_path = self.depth_files[idx]
|
92 |
+
|
93 |
+
image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
|
94 |
+
depth = np.asarray(Image.open(depth_path),
|
95 |
+
dtype='uint16') / 1000.0 # mm to meters
|
96 |
+
|
97 |
+
# depth[depth > 8] = -1
|
98 |
+
depth = depth[..., None]
|
99 |
+
|
100 |
+
sample = dict(image=image, depth=depth, dataset="diml_outdoor")
|
101 |
+
|
102 |
+
# return sample
|
103 |
+
return self.transform(sample)
|
104 |
+
|
105 |
+
def __len__(self):
|
106 |
+
return len(self.image_files)
|
107 |
+
|
108 |
+
|
109 |
+
def get_diml_outdoor_loader(data_dir_root, batch_size=1, **kwargs):
|
110 |
+
dataset = DIML_Outdoor(data_dir_root)
|
111 |
+
return DataLoader(dataset, batch_size, **kwargs)
|
112 |
+
|
113 |
+
# get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/HR")
|
114 |
+
# get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/LR")
|
ZoeDepth/zoedepth/data/diode.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import os
|
26 |
+
|
27 |
+
import numpy as np
|
28 |
+
import torch
|
29 |
+
from PIL import Image
|
30 |
+
from torch.utils.data import DataLoader, Dataset
|
31 |
+
from torchvision import transforms
|
32 |
+
|
33 |
+
|
34 |
+
class ToTensor(object):
|
35 |
+
def __init__(self):
|
36 |
+
# self.normalize = transforms.Normalize(
|
37 |
+
# mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
38 |
+
self.normalize = lambda x : x
|
39 |
+
self.resize = transforms.Resize(480)
|
40 |
+
|
41 |
+
def __call__(self, sample):
|
42 |
+
image, depth = sample['image'], sample['depth']
|
43 |
+
image = self.to_tensor(image)
|
44 |
+
image = self.normalize(image)
|
45 |
+
depth = self.to_tensor(depth)
|
46 |
+
|
47 |
+
image = self.resize(image)
|
48 |
+
|
49 |
+
return {'image': image, 'depth': depth, 'dataset': "diode"}
|
50 |
+
|
51 |
+
def to_tensor(self, pic):
|
52 |
+
|
53 |
+
if isinstance(pic, np.ndarray):
|
54 |
+
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
55 |
+
return img
|
56 |
+
|
57 |
+
# # handle PIL Image
|
58 |
+
if pic.mode == 'I':
|
59 |
+
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
|
60 |
+
elif pic.mode == 'I;16':
|
61 |
+
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
|
62 |
+
else:
|
63 |
+
img = torch.ByteTensor(
|
64 |
+
torch.ByteStorage.from_buffer(pic.tobytes()))
|
65 |
+
# PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
|
66 |
+
if pic.mode == 'YCbCr':
|
67 |
+
nchannel = 3
|
68 |
+
elif pic.mode == 'I;16':
|
69 |
+
nchannel = 1
|
70 |
+
else:
|
71 |
+
nchannel = len(pic.mode)
|
72 |
+
img = img.view(pic.size[1], pic.size[0], nchannel)
|
73 |
+
|
74 |
+
img = img.transpose(0, 1).transpose(0, 2).contiguous()
|
75 |
+
|
76 |
+
if isinstance(img, torch.ByteTensor):
|
77 |
+
return img.float()
|
78 |
+
else:
|
79 |
+
return img
|
80 |
+
|
81 |
+
|
82 |
+
class DIODE(Dataset):
|
83 |
+
def __init__(self, data_dir_root):
|
84 |
+
import glob
|
85 |
+
|
86 |
+
# image paths are of the form <data_dir_root>/scene_#/scan_#/*.png
|
87 |
+
self.image_files = glob.glob(
|
88 |
+
os.path.join(data_dir_root, '*', '*', '*.png'))
|
89 |
+
self.depth_files = [r.replace(".png", "_depth.npy")
|
90 |
+
for r in self.image_files]
|
91 |
+
self.depth_mask_files = [
|
92 |
+
r.replace(".png", "_depth_mask.npy") for r in self.image_files]
|
93 |
+
self.transform = ToTensor()
|
94 |
+
|
95 |
+
def __getitem__(self, idx):
|
96 |
+
image_path = self.image_files[idx]
|
97 |
+
depth_path = self.depth_files[idx]
|
98 |
+
depth_mask_path = self.depth_mask_files[idx]
|
99 |
+
|
100 |
+
image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
|
101 |
+
depth = np.load(depth_path) # in meters
|
102 |
+
valid = np.load(depth_mask_path) # binary
|
103 |
+
|
104 |
+
# depth[depth > 8] = -1
|
105 |
+
# depth = depth[..., None]
|
106 |
+
|
107 |
+
sample = dict(image=image, depth=depth, valid=valid)
|
108 |
+
|
109 |
+
# return sample
|
110 |
+
sample = self.transform(sample)
|
111 |
+
|
112 |
+
if idx == 0:
|
113 |
+
print(sample["image"].shape)
|
114 |
+
|
115 |
+
return sample
|
116 |
+
|
117 |
+
def __len__(self):
|
118 |
+
return len(self.image_files)
|
119 |
+
|
120 |
+
|
121 |
+
def get_diode_loader(data_dir_root, batch_size=1, **kwargs):
|
122 |
+
dataset = DIODE(data_dir_root)
|
123 |
+
return DataLoader(dataset, batch_size, **kwargs)
|
124 |
+
|
125 |
+
# get_diode_loader(data_dir_root="datasets/diode/val/outdoor")
|
ZoeDepth/zoedepth/data/hypersim.py
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import glob
|
26 |
+
import os
|
27 |
+
|
28 |
+
import h5py
|
29 |
+
import numpy as np
|
30 |
+
import torch
|
31 |
+
from PIL import Image
|
32 |
+
from torch.utils.data import DataLoader, Dataset
|
33 |
+
from torchvision import transforms
|
34 |
+
|
35 |
+
|
36 |
+
def hypersim_distance_to_depth(npyDistance):
|
37 |
+
intWidth, intHeight, fltFocal = 1024, 768, 886.81
|
38 |
+
|
39 |
+
npyImageplaneX = np.linspace((-0.5 * intWidth) + 0.5, (0.5 * intWidth) - 0.5, intWidth).reshape(
|
40 |
+
1, intWidth).repeat(intHeight, 0).astype(np.float32)[:, :, None]
|
41 |
+
npyImageplaneY = np.linspace((-0.5 * intHeight) + 0.5, (0.5 * intHeight) - 0.5,
|
42 |
+
intHeight).reshape(intHeight, 1).repeat(intWidth, 1).astype(np.float32)[:, :, None]
|
43 |
+
npyImageplaneZ = np.full([intHeight, intWidth, 1], fltFocal, np.float32)
|
44 |
+
npyImageplane = np.concatenate(
|
45 |
+
[npyImageplaneX, npyImageplaneY, npyImageplaneZ], 2)
|
46 |
+
|
47 |
+
npyDepth = npyDistance / np.linalg.norm(npyImageplane, 2, 2) * fltFocal
|
48 |
+
return npyDepth
|
49 |
+
|
50 |
+
|
51 |
+
class ToTensor(object):
|
52 |
+
def __init__(self):
|
53 |
+
# self.normalize = transforms.Normalize(
|
54 |
+
# mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
55 |
+
self.normalize = lambda x: x
|
56 |
+
self.resize = transforms.Resize((480, 640))
|
57 |
+
|
58 |
+
def __call__(self, sample):
|
59 |
+
image, depth = sample['image'], sample['depth']
|
60 |
+
image = self.to_tensor(image)
|
61 |
+
image = self.normalize(image)
|
62 |
+
depth = self.to_tensor(depth)
|
63 |
+
|
64 |
+
image = self.resize(image)
|
65 |
+
|
66 |
+
return {'image': image, 'depth': depth, 'dataset': "hypersim"}
|
67 |
+
|
68 |
+
def to_tensor(self, pic):
|
69 |
+
|
70 |
+
if isinstance(pic, np.ndarray):
|
71 |
+
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
72 |
+
return img
|
73 |
+
|
74 |
+
# # handle PIL Image
|
75 |
+
if pic.mode == 'I':
|
76 |
+
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
|
77 |
+
elif pic.mode == 'I;16':
|
78 |
+
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
|
79 |
+
else:
|
80 |
+
img = torch.ByteTensor(
|
81 |
+
torch.ByteStorage.from_buffer(pic.tobytes()))
|
82 |
+
# PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
|
83 |
+
if pic.mode == 'YCbCr':
|
84 |
+
nchannel = 3
|
85 |
+
elif pic.mode == 'I;16':
|
86 |
+
nchannel = 1
|
87 |
+
else:
|
88 |
+
nchannel = len(pic.mode)
|
89 |
+
img = img.view(pic.size[1], pic.size[0], nchannel)
|
90 |
+
|
91 |
+
img = img.transpose(0, 1).transpose(0, 2).contiguous()
|
92 |
+
if isinstance(img, torch.ByteTensor):
|
93 |
+
return img.float()
|
94 |
+
else:
|
95 |
+
return img
|
96 |
+
|
97 |
+
|
98 |
+
class HyperSim(Dataset):
|
99 |
+
def __init__(self, data_dir_root):
|
100 |
+
# image paths are of the form <data_dir_root>/<scene>/images/scene_cam_#_final_preview/*.tonemap.jpg
|
101 |
+
# depth paths are of the form <data_dir_root>/<scene>/images/scene_cam_#_final_preview/*.depth_meters.hdf5
|
102 |
+
self.image_files = glob.glob(os.path.join(
|
103 |
+
data_dir_root, '*', 'images', 'scene_cam_*_final_preview', '*.tonemap.jpg'))
|
104 |
+
self.depth_files = [r.replace("_final_preview", "_geometry_hdf5").replace(
|
105 |
+
".tonemap.jpg", ".depth_meters.hdf5") for r in self.image_files]
|
106 |
+
self.transform = ToTensor()
|
107 |
+
|
108 |
+
def __getitem__(self, idx):
|
109 |
+
image_path = self.image_files[idx]
|
110 |
+
depth_path = self.depth_files[idx]
|
111 |
+
|
112 |
+
image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
|
113 |
+
|
114 |
+
# depth from hdf5
|
115 |
+
depth_fd = h5py.File(depth_path, "r")
|
116 |
+
# in meters (Euclidean distance)
|
117 |
+
distance_meters = np.array(depth_fd['dataset'])
|
118 |
+
depth = hypersim_distance_to_depth(
|
119 |
+
distance_meters) # in meters (planar depth)
|
120 |
+
|
121 |
+
# depth[depth > 8] = -1
|
122 |
+
depth = depth[..., None]
|
123 |
+
|
124 |
+
sample = dict(image=image, depth=depth)
|
125 |
+
sample = self.transform(sample)
|
126 |
+
|
127 |
+
if idx == 0:
|
128 |
+
print(sample["image"].shape)
|
129 |
+
|
130 |
+
return sample
|
131 |
+
|
132 |
+
def __len__(self):
|
133 |
+
return len(self.image_files)
|
134 |
+
|
135 |
+
|
136 |
+
def get_hypersim_loader(data_dir_root, batch_size=1, **kwargs):
|
137 |
+
dataset = HyperSim(data_dir_root)
|
138 |
+
return DataLoader(dataset, batch_size, **kwargs)
|
ZoeDepth/zoedepth/data/ibims.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import os
|
26 |
+
|
27 |
+
import numpy as np
|
28 |
+
import torch
|
29 |
+
from PIL import Image
|
30 |
+
from torch.utils.data import DataLoader, Dataset
|
31 |
+
from torchvision import transforms as T
|
32 |
+
|
33 |
+
|
34 |
+
class iBims(Dataset):
|
35 |
+
def __init__(self, config):
|
36 |
+
root_folder = config.ibims_root
|
37 |
+
with open(os.path.join(root_folder, "imagelist.txt"), 'r') as f:
|
38 |
+
imglist = f.read().split()
|
39 |
+
|
40 |
+
samples = []
|
41 |
+
for basename in imglist:
|
42 |
+
img_path = os.path.join(root_folder, 'rgb', basename + ".png")
|
43 |
+
depth_path = os.path.join(root_folder, 'depth', basename + ".png")
|
44 |
+
valid_mask_path = os.path.join(
|
45 |
+
root_folder, 'mask_invalid', basename+".png")
|
46 |
+
transp_mask_path = os.path.join(
|
47 |
+
root_folder, 'mask_transp', basename+".png")
|
48 |
+
|
49 |
+
samples.append(
|
50 |
+
(img_path, depth_path, valid_mask_path, transp_mask_path))
|
51 |
+
|
52 |
+
self.samples = samples
|
53 |
+
# self.normalize = T.Normalize(
|
54 |
+
# mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
55 |
+
self.normalize = lambda x : x
|
56 |
+
|
57 |
+
def __getitem__(self, idx):
|
58 |
+
img_path, depth_path, valid_mask_path, transp_mask_path = self.samples[idx]
|
59 |
+
|
60 |
+
img = np.asarray(Image.open(img_path), dtype=np.float32) / 255.0
|
61 |
+
depth = np.asarray(Image.open(depth_path),
|
62 |
+
dtype=np.uint16).astype('float')*50.0/65535
|
63 |
+
|
64 |
+
mask_valid = np.asarray(Image.open(valid_mask_path))
|
65 |
+
mask_transp = np.asarray(Image.open(transp_mask_path))
|
66 |
+
|
67 |
+
# depth = depth * mask_valid * mask_transp
|
68 |
+
depth = np.where(mask_valid * mask_transp, depth, -1)
|
69 |
+
|
70 |
+
img = torch.from_numpy(img).permute(2, 0, 1)
|
71 |
+
img = self.normalize(img)
|
72 |
+
depth = torch.from_numpy(depth).unsqueeze(0)
|
73 |
+
return dict(image=img, depth=depth, image_path=img_path, depth_path=depth_path, dataset='ibims')
|
74 |
+
|
75 |
+
def __len__(self):
|
76 |
+
return len(self.samples)
|
77 |
+
|
78 |
+
|
79 |
+
def get_ibims_loader(config, batch_size=1, **kwargs):
|
80 |
+
dataloader = DataLoader(iBims(config), batch_size=batch_size, **kwargs)
|
81 |
+
return dataloader
|
ZoeDepth/zoedepth/data/preprocess.py
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import numpy as np
|
26 |
+
from dataclasses import dataclass
|
27 |
+
from typing import Tuple, List
|
28 |
+
|
29 |
+
# dataclass to store the crop parameters
|
30 |
+
@dataclass
|
31 |
+
class CropParams:
|
32 |
+
top: int
|
33 |
+
bottom: int
|
34 |
+
left: int
|
35 |
+
right: int
|
36 |
+
|
37 |
+
|
38 |
+
|
39 |
+
def get_border_params(rgb_image, tolerance=0.1, cut_off=20, value=0, level_diff_threshold=5, channel_axis=-1, min_border=5) -> CropParams:
|
40 |
+
gray_image = np.mean(rgb_image, axis=channel_axis)
|
41 |
+
h, w = gray_image.shape
|
42 |
+
|
43 |
+
|
44 |
+
def num_value_pixels(arr):
|
45 |
+
return np.sum(np.abs(arr - value) < level_diff_threshold)
|
46 |
+
|
47 |
+
def is_above_tolerance(arr, total_pixels):
|
48 |
+
return (num_value_pixels(arr) / total_pixels) > tolerance
|
49 |
+
|
50 |
+
# Crop top border until number of value pixels become below tolerance
|
51 |
+
top = min_border
|
52 |
+
while is_above_tolerance(gray_image[top, :], w) and top < h-1:
|
53 |
+
top += 1
|
54 |
+
if top > cut_off:
|
55 |
+
break
|
56 |
+
|
57 |
+
# Crop bottom border until number of value pixels become below tolerance
|
58 |
+
bottom = h - min_border
|
59 |
+
while is_above_tolerance(gray_image[bottom, :], w) and bottom > 0:
|
60 |
+
bottom -= 1
|
61 |
+
if h - bottom > cut_off:
|
62 |
+
break
|
63 |
+
|
64 |
+
# Crop left border until number of value pixels become below tolerance
|
65 |
+
left = min_border
|
66 |
+
while is_above_tolerance(gray_image[:, left], h) and left < w-1:
|
67 |
+
left += 1
|
68 |
+
if left > cut_off:
|
69 |
+
break
|
70 |
+
|
71 |
+
# Crop right border until number of value pixels become below tolerance
|
72 |
+
right = w - min_border
|
73 |
+
while is_above_tolerance(gray_image[:, right], h) and right > 0:
|
74 |
+
right -= 1
|
75 |
+
if w - right > cut_off:
|
76 |
+
break
|
77 |
+
|
78 |
+
|
79 |
+
return CropParams(top, bottom, left, right)
|
80 |
+
|
81 |
+
|
82 |
+
def get_white_border(rgb_image, value=255, **kwargs) -> CropParams:
|
83 |
+
"""Crops the white border of the RGB.
|
84 |
+
|
85 |
+
Args:
|
86 |
+
rgb: RGB image, shape (H, W, 3).
|
87 |
+
Returns:
|
88 |
+
Crop parameters.
|
89 |
+
"""
|
90 |
+
if value == 255:
|
91 |
+
# assert range of values in rgb image is [0, 255]
|
92 |
+
assert np.max(rgb_image) <= 255 and np.min(rgb_image) >= 0, "RGB image values are not in range [0, 255]."
|
93 |
+
assert rgb_image.max() > 1, "RGB image values are not in range [0, 255]."
|
94 |
+
elif value == 1:
|
95 |
+
# assert range of values in rgb image is [0, 1]
|
96 |
+
assert np.max(rgb_image) <= 1 and np.min(rgb_image) >= 0, "RGB image values are not in range [0, 1]."
|
97 |
+
|
98 |
+
return get_border_params(rgb_image, value=value, **kwargs)
|
99 |
+
|
100 |
+
def get_black_border(rgb_image, **kwargs) -> CropParams:
|
101 |
+
"""Crops the black border of the RGB.
|
102 |
+
|
103 |
+
Args:
|
104 |
+
rgb: RGB image, shape (H, W, 3).
|
105 |
+
|
106 |
+
Returns:
|
107 |
+
Crop parameters.
|
108 |
+
"""
|
109 |
+
|
110 |
+
return get_border_params(rgb_image, value=0, **kwargs)
|
111 |
+
|
112 |
+
def crop_image(image: np.ndarray, crop_params: CropParams) -> np.ndarray:
|
113 |
+
"""Crops the image according to the crop parameters.
|
114 |
+
|
115 |
+
Args:
|
116 |
+
image: RGB or depth image, shape (H, W, 3) or (H, W).
|
117 |
+
crop_params: Crop parameters.
|
118 |
+
|
119 |
+
Returns:
|
120 |
+
Cropped image.
|
121 |
+
"""
|
122 |
+
return image[crop_params.top:crop_params.bottom, crop_params.left:crop_params.right]
|
123 |
+
|
124 |
+
def crop_images(*images: np.ndarray, crop_params: CropParams) -> Tuple[np.ndarray]:
|
125 |
+
"""Crops the images according to the crop parameters.
|
126 |
+
|
127 |
+
Args:
|
128 |
+
images: RGB or depth images, shape (H, W, 3) or (H, W).
|
129 |
+
crop_params: Crop parameters.
|
130 |
+
|
131 |
+
Returns:
|
132 |
+
Cropped images.
|
133 |
+
"""
|
134 |
+
return tuple(crop_image(image, crop_params) for image in images)
|
135 |
+
|
136 |
+
def crop_black_or_white_border(rgb_image, *other_images: np.ndarray, tolerance=0.1, cut_off=20, level_diff_threshold=5) -> Tuple[np.ndarray]:
|
137 |
+
"""Crops the white and black border of the RGB and depth images.
|
138 |
+
|
139 |
+
Args:
|
140 |
+
rgb: RGB image, shape (H, W, 3). This image is used to determine the border.
|
141 |
+
other_images: The other images to crop according to the border of the RGB image.
|
142 |
+
Returns:
|
143 |
+
Cropped RGB and other images.
|
144 |
+
"""
|
145 |
+
# crop black border
|
146 |
+
crop_params = get_black_border(rgb_image, tolerance=tolerance, cut_off=cut_off, level_diff_threshold=level_diff_threshold)
|
147 |
+
cropped_images = crop_images(rgb_image, *other_images, crop_params=crop_params)
|
148 |
+
|
149 |
+
# crop white border
|
150 |
+
crop_params = get_white_border(cropped_images[0], tolerance=tolerance, cut_off=cut_off, level_diff_threshold=level_diff_threshold)
|
151 |
+
cropped_images = crop_images(*cropped_images, crop_params=crop_params)
|
152 |
+
|
153 |
+
return cropped_images
|
154 |
+
|
ZoeDepth/zoedepth/data/sun_rgbd_loader.py
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import os
|
26 |
+
|
27 |
+
import numpy as np
|
28 |
+
import torch
|
29 |
+
from PIL import Image
|
30 |
+
from torch.utils.data import DataLoader, Dataset
|
31 |
+
from torchvision import transforms
|
32 |
+
|
33 |
+
|
34 |
+
class ToTensor(object):
|
35 |
+
def __init__(self):
|
36 |
+
# self.normalize = transforms.Normalize(
|
37 |
+
# mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
38 |
+
self.normalize = lambda x : x
|
39 |
+
|
40 |
+
def __call__(self, sample):
|
41 |
+
image, depth = sample['image'], sample['depth']
|
42 |
+
image = self.to_tensor(image)
|
43 |
+
image = self.normalize(image)
|
44 |
+
depth = self.to_tensor(depth)
|
45 |
+
|
46 |
+
return {'image': image, 'depth': depth, 'dataset': "sunrgbd"}
|
47 |
+
|
48 |
+
def to_tensor(self, pic):
|
49 |
+
|
50 |
+
if isinstance(pic, np.ndarray):
|
51 |
+
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
52 |
+
return img
|
53 |
+
|
54 |
+
# # handle PIL Image
|
55 |
+
if pic.mode == 'I':
|
56 |
+
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
|
57 |
+
elif pic.mode == 'I;16':
|
58 |
+
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
|
59 |
+
else:
|
60 |
+
img = torch.ByteTensor(
|
61 |
+
torch.ByteStorage.from_buffer(pic.tobytes()))
|
62 |
+
# PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
|
63 |
+
if pic.mode == 'YCbCr':
|
64 |
+
nchannel = 3
|
65 |
+
elif pic.mode == 'I;16':
|
66 |
+
nchannel = 1
|
67 |
+
else:
|
68 |
+
nchannel = len(pic.mode)
|
69 |
+
img = img.view(pic.size[1], pic.size[0], nchannel)
|
70 |
+
|
71 |
+
img = img.transpose(0, 1).transpose(0, 2).contiguous()
|
72 |
+
if isinstance(img, torch.ByteTensor):
|
73 |
+
return img.float()
|
74 |
+
else:
|
75 |
+
return img
|
76 |
+
|
77 |
+
|
78 |
+
class SunRGBD(Dataset):
|
79 |
+
def __init__(self, data_dir_root):
|
80 |
+
# test_file_dirs = loadmat(train_test_file)['alltest'].squeeze()
|
81 |
+
# all_test = [t[0].replace("/n/fs/sun3d/data/", "") for t in test_file_dirs]
|
82 |
+
# self.all_test = [os.path.join(data_dir_root, t) for t in all_test]
|
83 |
+
import glob
|
84 |
+
self.image_files = glob.glob(
|
85 |
+
os.path.join(data_dir_root, 'rgb', 'rgb', '*'))
|
86 |
+
self.depth_files = [
|
87 |
+
r.replace("rgb/rgb", "gt/gt").replace("jpg", "png") for r in self.image_files]
|
88 |
+
self.transform = ToTensor()
|
89 |
+
|
90 |
+
def __getitem__(self, idx):
|
91 |
+
image_path = self.image_files[idx]
|
92 |
+
depth_path = self.depth_files[idx]
|
93 |
+
|
94 |
+
image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
|
95 |
+
depth = np.asarray(Image.open(depth_path), dtype='uint16') / 1000.0
|
96 |
+
depth[depth > 8] = -1
|
97 |
+
depth = depth[..., None]
|
98 |
+
return self.transform(dict(image=image, depth=depth))
|
99 |
+
|
100 |
+
def __len__(self):
|
101 |
+
return len(self.image_files)
|
102 |
+
|
103 |
+
|
104 |
+
def get_sunrgbd_loader(data_dir_root, batch_size=1, **kwargs):
|
105 |
+
dataset = SunRGBD(data_dir_root)
|
106 |
+
return DataLoader(dataset, batch_size, **kwargs)
|
ZoeDepth/zoedepth/data/transforms.py
ADDED
@@ -0,0 +1,481 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import math
|
26 |
+
import random
|
27 |
+
|
28 |
+
import cv2
|
29 |
+
import numpy as np
|
30 |
+
|
31 |
+
|
32 |
+
class RandomFliplr(object):
|
33 |
+
"""Horizontal flip of the sample with given probability.
|
34 |
+
"""
|
35 |
+
|
36 |
+
def __init__(self, probability=0.5):
|
37 |
+
"""Init.
|
38 |
+
|
39 |
+
Args:
|
40 |
+
probability (float, optional): Flip probability. Defaults to 0.5.
|
41 |
+
"""
|
42 |
+
self.__probability = probability
|
43 |
+
|
44 |
+
def __call__(self, sample):
|
45 |
+
prob = random.random()
|
46 |
+
|
47 |
+
if prob < self.__probability:
|
48 |
+
for k, v in sample.items():
|
49 |
+
if len(v.shape) >= 2:
|
50 |
+
sample[k] = np.fliplr(v).copy()
|
51 |
+
|
52 |
+
return sample
|
53 |
+
|
54 |
+
|
55 |
+
def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA):
|
56 |
+
"""Rezise the sample to ensure the given size. Keeps aspect ratio.
|
57 |
+
|
58 |
+
Args:
|
59 |
+
sample (dict): sample
|
60 |
+
size (tuple): image size
|
61 |
+
|
62 |
+
Returns:
|
63 |
+
tuple: new size
|
64 |
+
"""
|
65 |
+
shape = list(sample["disparity"].shape)
|
66 |
+
|
67 |
+
if shape[0] >= size[0] and shape[1] >= size[1]:
|
68 |
+
return sample
|
69 |
+
|
70 |
+
scale = [0, 0]
|
71 |
+
scale[0] = size[0] / shape[0]
|
72 |
+
scale[1] = size[1] / shape[1]
|
73 |
+
|
74 |
+
scale = max(scale)
|
75 |
+
|
76 |
+
shape[0] = math.ceil(scale * shape[0])
|
77 |
+
shape[1] = math.ceil(scale * shape[1])
|
78 |
+
|
79 |
+
# resize
|
80 |
+
sample["image"] = cv2.resize(
|
81 |
+
sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method
|
82 |
+
)
|
83 |
+
|
84 |
+
sample["disparity"] = cv2.resize(
|
85 |
+
sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST
|
86 |
+
)
|
87 |
+
sample["mask"] = cv2.resize(
|
88 |
+
sample["mask"].astype(np.float32),
|
89 |
+
tuple(shape[::-1]),
|
90 |
+
interpolation=cv2.INTER_NEAREST,
|
91 |
+
)
|
92 |
+
sample["mask"] = sample["mask"].astype(bool)
|
93 |
+
|
94 |
+
return tuple(shape)
|
95 |
+
|
96 |
+
|
97 |
+
class RandomCrop(object):
|
98 |
+
"""Get a random crop of the sample with the given size (width, height).
|
99 |
+
"""
|
100 |
+
|
101 |
+
def __init__(
|
102 |
+
self,
|
103 |
+
width,
|
104 |
+
height,
|
105 |
+
resize_if_needed=False,
|
106 |
+
image_interpolation_method=cv2.INTER_AREA,
|
107 |
+
):
|
108 |
+
"""Init.
|
109 |
+
|
110 |
+
Args:
|
111 |
+
width (int): output width
|
112 |
+
height (int): output height
|
113 |
+
resize_if_needed (bool, optional): If True, sample might be upsampled to ensure
|
114 |
+
that a crop of size (width, height) is possbile. Defaults to False.
|
115 |
+
"""
|
116 |
+
self.__size = (height, width)
|
117 |
+
self.__resize_if_needed = resize_if_needed
|
118 |
+
self.__image_interpolation_method = image_interpolation_method
|
119 |
+
|
120 |
+
def __call__(self, sample):
|
121 |
+
|
122 |
+
shape = sample["disparity"].shape
|
123 |
+
|
124 |
+
if self.__size[0] > shape[0] or self.__size[1] > shape[1]:
|
125 |
+
if self.__resize_if_needed:
|
126 |
+
shape = apply_min_size(
|
127 |
+
sample, self.__size, self.__image_interpolation_method
|
128 |
+
)
|
129 |
+
else:
|
130 |
+
raise Exception(
|
131 |
+
"Output size {} bigger than input size {}.".format(
|
132 |
+
self.__size, shape
|
133 |
+
)
|
134 |
+
)
|
135 |
+
|
136 |
+
offset = (
|
137 |
+
np.random.randint(shape[0] - self.__size[0] + 1),
|
138 |
+
np.random.randint(shape[1] - self.__size[1] + 1),
|
139 |
+
)
|
140 |
+
|
141 |
+
for k, v in sample.items():
|
142 |
+
if k == "code" or k == "basis":
|
143 |
+
continue
|
144 |
+
|
145 |
+
if len(sample[k].shape) >= 2:
|
146 |
+
sample[k] = v[
|
147 |
+
offset[0]: offset[0] + self.__size[0],
|
148 |
+
offset[1]: offset[1] + self.__size[1],
|
149 |
+
]
|
150 |
+
|
151 |
+
return sample
|
152 |
+
|
153 |
+
|
154 |
+
class Resize(object):
|
155 |
+
"""Resize sample to given size (width, height).
|
156 |
+
"""
|
157 |
+
|
158 |
+
def __init__(
|
159 |
+
self,
|
160 |
+
width,
|
161 |
+
height,
|
162 |
+
resize_target=True,
|
163 |
+
keep_aspect_ratio=False,
|
164 |
+
ensure_multiple_of=1,
|
165 |
+
resize_method="lower_bound",
|
166 |
+
image_interpolation_method=cv2.INTER_AREA,
|
167 |
+
letter_box=False,
|
168 |
+
):
|
169 |
+
"""Init.
|
170 |
+
|
171 |
+
Args:
|
172 |
+
width (int): desired output width
|
173 |
+
height (int): desired output height
|
174 |
+
resize_target (bool, optional):
|
175 |
+
True: Resize the full sample (image, mask, target).
|
176 |
+
False: Resize image only.
|
177 |
+
Defaults to True.
|
178 |
+
keep_aspect_ratio (bool, optional):
|
179 |
+
True: Keep the aspect ratio of the input sample.
|
180 |
+
Output sample might not have the given width and height, and
|
181 |
+
resize behaviour depends on the parameter 'resize_method'.
|
182 |
+
Defaults to False.
|
183 |
+
ensure_multiple_of (int, optional):
|
184 |
+
Output width and height is constrained to be multiple of this parameter.
|
185 |
+
Defaults to 1.
|
186 |
+
resize_method (str, optional):
|
187 |
+
"lower_bound": Output will be at least as large as the given size.
|
188 |
+
"upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
|
189 |
+
"minimal": Scale as least as possible. (Output size might be smaller than given size.)
|
190 |
+
Defaults to "lower_bound".
|
191 |
+
"""
|
192 |
+
self.__width = width
|
193 |
+
self.__height = height
|
194 |
+
|
195 |
+
self.__resize_target = resize_target
|
196 |
+
self.__keep_aspect_ratio = keep_aspect_ratio
|
197 |
+
self.__multiple_of = ensure_multiple_of
|
198 |
+
self.__resize_method = resize_method
|
199 |
+
self.__image_interpolation_method = image_interpolation_method
|
200 |
+
self.__letter_box = letter_box
|
201 |
+
|
202 |
+
def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
|
203 |
+
y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
|
204 |
+
|
205 |
+
if max_val is not None and y > max_val:
|
206 |
+
y = (np.floor(x / self.__multiple_of)
|
207 |
+
* self.__multiple_of).astype(int)
|
208 |
+
|
209 |
+
if y < min_val:
|
210 |
+
y = (np.ceil(x / self.__multiple_of)
|
211 |
+
* self.__multiple_of).astype(int)
|
212 |
+
|
213 |
+
return y
|
214 |
+
|
215 |
+
def get_size(self, width, height):
|
216 |
+
# determine new height and width
|
217 |
+
scale_height = self.__height / height
|
218 |
+
scale_width = self.__width / width
|
219 |
+
|
220 |
+
if self.__keep_aspect_ratio:
|
221 |
+
if self.__resize_method == "lower_bound":
|
222 |
+
# scale such that output size is lower bound
|
223 |
+
if scale_width > scale_height:
|
224 |
+
# fit width
|
225 |
+
scale_height = scale_width
|
226 |
+
else:
|
227 |
+
# fit height
|
228 |
+
scale_width = scale_height
|
229 |
+
elif self.__resize_method == "upper_bound":
|
230 |
+
# scale such that output size is upper bound
|
231 |
+
if scale_width < scale_height:
|
232 |
+
# fit width
|
233 |
+
scale_height = scale_width
|
234 |
+
else:
|
235 |
+
# fit height
|
236 |
+
scale_width = scale_height
|
237 |
+
elif self.__resize_method == "minimal":
|
238 |
+
# scale as least as possbile
|
239 |
+
if abs(1 - scale_width) < abs(1 - scale_height):
|
240 |
+
# fit width
|
241 |
+
scale_height = scale_width
|
242 |
+
else:
|
243 |
+
# fit height
|
244 |
+
scale_width = scale_height
|
245 |
+
else:
|
246 |
+
raise ValueError(
|
247 |
+
f"resize_method {self.__resize_method} not implemented"
|
248 |
+
)
|
249 |
+
|
250 |
+
if self.__resize_method == "lower_bound":
|
251 |
+
new_height = self.constrain_to_multiple_of(
|
252 |
+
scale_height * height, min_val=self.__height
|
253 |
+
)
|
254 |
+
new_width = self.constrain_to_multiple_of(
|
255 |
+
scale_width * width, min_val=self.__width
|
256 |
+
)
|
257 |
+
elif self.__resize_method == "upper_bound":
|
258 |
+
new_height = self.constrain_to_multiple_of(
|
259 |
+
scale_height * height, max_val=self.__height
|
260 |
+
)
|
261 |
+
new_width = self.constrain_to_multiple_of(
|
262 |
+
scale_width * width, max_val=self.__width
|
263 |
+
)
|
264 |
+
elif self.__resize_method == "minimal":
|
265 |
+
new_height = self.constrain_to_multiple_of(scale_height * height)
|
266 |
+
new_width = self.constrain_to_multiple_of(scale_width * width)
|
267 |
+
else:
|
268 |
+
raise ValueError(
|
269 |
+
f"resize_method {self.__resize_method} not implemented")
|
270 |
+
|
271 |
+
return (new_width, new_height)
|
272 |
+
|
273 |
+
def make_letter_box(self, sample):
|
274 |
+
top = bottom = (self.__height - sample.shape[0]) // 2
|
275 |
+
left = right = (self.__width - sample.shape[1]) // 2
|
276 |
+
sample = cv2.copyMakeBorder(
|
277 |
+
sample, top, bottom, left, right, cv2.BORDER_CONSTANT, None, 0)
|
278 |
+
return sample
|
279 |
+
|
280 |
+
def __call__(self, sample):
|
281 |
+
width, height = self.get_size(
|
282 |
+
sample["image"].shape[1], sample["image"].shape[0]
|
283 |
+
)
|
284 |
+
|
285 |
+
# resize sample
|
286 |
+
sample["image"] = cv2.resize(
|
287 |
+
sample["image"],
|
288 |
+
(width, height),
|
289 |
+
interpolation=self.__image_interpolation_method,
|
290 |
+
)
|
291 |
+
|
292 |
+
if self.__letter_box:
|
293 |
+
sample["image"] = self.make_letter_box(sample["image"])
|
294 |
+
|
295 |
+
if self.__resize_target:
|
296 |
+
if "disparity" in sample:
|
297 |
+
sample["disparity"] = cv2.resize(
|
298 |
+
sample["disparity"],
|
299 |
+
(width, height),
|
300 |
+
interpolation=cv2.INTER_NEAREST,
|
301 |
+
)
|
302 |
+
|
303 |
+
if self.__letter_box:
|
304 |
+
sample["disparity"] = self.make_letter_box(
|
305 |
+
sample["disparity"])
|
306 |
+
|
307 |
+
if "depth" in sample:
|
308 |
+
sample["depth"] = cv2.resize(
|
309 |
+
sample["depth"], (width,
|
310 |
+
height), interpolation=cv2.INTER_NEAREST
|
311 |
+
)
|
312 |
+
|
313 |
+
if self.__letter_box:
|
314 |
+
sample["depth"] = self.make_letter_box(sample["depth"])
|
315 |
+
|
316 |
+
sample["mask"] = cv2.resize(
|
317 |
+
sample["mask"].astype(np.float32),
|
318 |
+
(width, height),
|
319 |
+
interpolation=cv2.INTER_NEAREST,
|
320 |
+
)
|
321 |
+
|
322 |
+
if self.__letter_box:
|
323 |
+
sample["mask"] = self.make_letter_box(sample["mask"])
|
324 |
+
|
325 |
+
sample["mask"] = sample["mask"].astype(bool)
|
326 |
+
|
327 |
+
return sample
|
328 |
+
|
329 |
+
|
330 |
+
class ResizeFixed(object):
|
331 |
+
def __init__(self, size):
|
332 |
+
self.__size = size
|
333 |
+
|
334 |
+
def __call__(self, sample):
|
335 |
+
sample["image"] = cv2.resize(
|
336 |
+
sample["image"], self.__size[::-1], interpolation=cv2.INTER_LINEAR
|
337 |
+
)
|
338 |
+
|
339 |
+
sample["disparity"] = cv2.resize(
|
340 |
+
sample["disparity"], self.__size[::-
|
341 |
+
1], interpolation=cv2.INTER_NEAREST
|
342 |
+
)
|
343 |
+
|
344 |
+
sample["mask"] = cv2.resize(
|
345 |
+
sample["mask"].astype(np.float32),
|
346 |
+
self.__size[::-1],
|
347 |
+
interpolation=cv2.INTER_NEAREST,
|
348 |
+
)
|
349 |
+
sample["mask"] = sample["mask"].astype(bool)
|
350 |
+
|
351 |
+
return sample
|
352 |
+
|
353 |
+
|
354 |
+
class Rescale(object):
|
355 |
+
"""Rescale target values to the interval [0, max_val].
|
356 |
+
If input is constant, values are set to max_val / 2.
|
357 |
+
"""
|
358 |
+
|
359 |
+
def __init__(self, max_val=1.0, use_mask=True):
|
360 |
+
"""Init.
|
361 |
+
|
362 |
+
Args:
|
363 |
+
max_val (float, optional): Max output value. Defaults to 1.0.
|
364 |
+
use_mask (bool, optional): Only operate on valid pixels (mask == True). Defaults to True.
|
365 |
+
"""
|
366 |
+
self.__max_val = max_val
|
367 |
+
self.__use_mask = use_mask
|
368 |
+
|
369 |
+
def __call__(self, sample):
|
370 |
+
disp = sample["disparity"]
|
371 |
+
|
372 |
+
if self.__use_mask:
|
373 |
+
mask = sample["mask"]
|
374 |
+
else:
|
375 |
+
mask = np.ones_like(disp, dtype=np.bool)
|
376 |
+
|
377 |
+
if np.sum(mask) == 0:
|
378 |
+
return sample
|
379 |
+
|
380 |
+
min_val = np.min(disp[mask])
|
381 |
+
max_val = np.max(disp[mask])
|
382 |
+
|
383 |
+
if max_val > min_val:
|
384 |
+
sample["disparity"][mask] = (
|
385 |
+
(disp[mask] - min_val) / (max_val - min_val) * self.__max_val
|
386 |
+
)
|
387 |
+
else:
|
388 |
+
sample["disparity"][mask] = np.ones_like(
|
389 |
+
disp[mask]) * self.__max_val / 2.0
|
390 |
+
|
391 |
+
return sample
|
392 |
+
|
393 |
+
|
394 |
+
# mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
|
395 |
+
class NormalizeImage(object):
|
396 |
+
"""Normlize image by given mean and std.
|
397 |
+
"""
|
398 |
+
|
399 |
+
def __init__(self, mean, std):
|
400 |
+
self.__mean = mean
|
401 |
+
self.__std = std
|
402 |
+
|
403 |
+
def __call__(self, sample):
|
404 |
+
sample["image"] = (sample["image"] - self.__mean) / self.__std
|
405 |
+
|
406 |
+
return sample
|
407 |
+
|
408 |
+
|
409 |
+
class DepthToDisparity(object):
|
410 |
+
"""Convert depth to disparity. Removes depth from sample.
|
411 |
+
"""
|
412 |
+
|
413 |
+
def __init__(self, eps=1e-4):
|
414 |
+
self.__eps = eps
|
415 |
+
|
416 |
+
def __call__(self, sample):
|
417 |
+
assert "depth" in sample
|
418 |
+
|
419 |
+
sample["mask"][sample["depth"] < self.__eps] = False
|
420 |
+
|
421 |
+
sample["disparity"] = np.zeros_like(sample["depth"])
|
422 |
+
sample["disparity"][sample["depth"] >= self.__eps] = (
|
423 |
+
1.0 / sample["depth"][sample["depth"] >= self.__eps]
|
424 |
+
)
|
425 |
+
|
426 |
+
del sample["depth"]
|
427 |
+
|
428 |
+
return sample
|
429 |
+
|
430 |
+
|
431 |
+
class DisparityToDepth(object):
|
432 |
+
"""Convert disparity to depth. Removes disparity from sample.
|
433 |
+
"""
|
434 |
+
|
435 |
+
def __init__(self, eps=1e-4):
|
436 |
+
self.__eps = eps
|
437 |
+
|
438 |
+
def __call__(self, sample):
|
439 |
+
assert "disparity" in sample
|
440 |
+
|
441 |
+
disp = np.abs(sample["disparity"])
|
442 |
+
sample["mask"][disp < self.__eps] = False
|
443 |
+
|
444 |
+
# print(sample["disparity"])
|
445 |
+
# print(sample["mask"].sum())
|
446 |
+
# exit()
|
447 |
+
|
448 |
+
sample["depth"] = np.zeros_like(disp)
|
449 |
+
sample["depth"][disp >= self.__eps] = (
|
450 |
+
1.0 / disp[disp >= self.__eps]
|
451 |
+
)
|
452 |
+
|
453 |
+
del sample["disparity"]
|
454 |
+
|
455 |
+
return sample
|
456 |
+
|
457 |
+
|
458 |
+
class PrepareForNet(object):
|
459 |
+
"""Prepare sample for usage as network input.
|
460 |
+
"""
|
461 |
+
|
462 |
+
def __init__(self):
|
463 |
+
pass
|
464 |
+
|
465 |
+
def __call__(self, sample):
|
466 |
+
image = np.transpose(sample["image"], (2, 0, 1))
|
467 |
+
sample["image"] = np.ascontiguousarray(image).astype(np.float32)
|
468 |
+
|
469 |
+
if "mask" in sample:
|
470 |
+
sample["mask"] = sample["mask"].astype(np.float32)
|
471 |
+
sample["mask"] = np.ascontiguousarray(sample["mask"])
|
472 |
+
|
473 |
+
if "disparity" in sample:
|
474 |
+
disparity = sample["disparity"].astype(np.float32)
|
475 |
+
sample["disparity"] = np.ascontiguousarray(disparity)
|
476 |
+
|
477 |
+
if "depth" in sample:
|
478 |
+
depth = sample["depth"].astype(np.float32)
|
479 |
+
sample["depth"] = np.ascontiguousarray(depth)
|
480 |
+
|
481 |
+
return sample
|
ZoeDepth/zoedepth/data/vkitti.py
ADDED
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import torch
|
26 |
+
from torch.utils.data import Dataset, DataLoader
|
27 |
+
from torchvision import transforms
|
28 |
+
import os
|
29 |
+
|
30 |
+
from PIL import Image
|
31 |
+
import numpy as np
|
32 |
+
import cv2
|
33 |
+
|
34 |
+
|
35 |
+
class ToTensor(object):
|
36 |
+
def __init__(self):
|
37 |
+
self.normalize = transforms.Normalize(
|
38 |
+
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
39 |
+
# self.resize = transforms.Resize((375, 1242))
|
40 |
+
|
41 |
+
def __call__(self, sample):
|
42 |
+
image, depth = sample['image'], sample['depth']
|
43 |
+
|
44 |
+
image = self.to_tensor(image)
|
45 |
+
image = self.normalize(image)
|
46 |
+
depth = self.to_tensor(depth)
|
47 |
+
|
48 |
+
# image = self.resize(image)
|
49 |
+
|
50 |
+
return {'image': image, 'depth': depth, 'dataset': "vkitti"}
|
51 |
+
|
52 |
+
def to_tensor(self, pic):
|
53 |
+
|
54 |
+
if isinstance(pic, np.ndarray):
|
55 |
+
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
56 |
+
return img
|
57 |
+
|
58 |
+
# # handle PIL Image
|
59 |
+
if pic.mode == 'I':
|
60 |
+
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
|
61 |
+
elif pic.mode == 'I;16':
|
62 |
+
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
|
63 |
+
else:
|
64 |
+
img = torch.ByteTensor(
|
65 |
+
torch.ByteStorage.from_buffer(pic.tobytes()))
|
66 |
+
# PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
|
67 |
+
if pic.mode == 'YCbCr':
|
68 |
+
nchannel = 3
|
69 |
+
elif pic.mode == 'I;16':
|
70 |
+
nchannel = 1
|
71 |
+
else:
|
72 |
+
nchannel = len(pic.mode)
|
73 |
+
img = img.view(pic.size[1], pic.size[0], nchannel)
|
74 |
+
|
75 |
+
img = img.transpose(0, 1).transpose(0, 2).contiguous()
|
76 |
+
if isinstance(img, torch.ByteTensor):
|
77 |
+
return img.float()
|
78 |
+
else:
|
79 |
+
return img
|
80 |
+
|
81 |
+
|
82 |
+
class VKITTI(Dataset):
|
83 |
+
def __init__(self, data_dir_root, do_kb_crop=True):
|
84 |
+
import glob
|
85 |
+
# image paths are of the form <data_dir_root>/{HR, LR}/<scene>/{color, depth_filled}/*.png
|
86 |
+
self.image_files = glob.glob(os.path.join(
|
87 |
+
data_dir_root, "test_color", '*.png'))
|
88 |
+
self.depth_files = [r.replace("test_color", "test_depth")
|
89 |
+
for r in self.image_files]
|
90 |
+
self.do_kb_crop = True
|
91 |
+
self.transform = ToTensor()
|
92 |
+
|
93 |
+
def __getitem__(self, idx):
|
94 |
+
image_path = self.image_files[idx]
|
95 |
+
depth_path = self.depth_files[idx]
|
96 |
+
|
97 |
+
image = Image.open(image_path)
|
98 |
+
depth = Image.open(depth_path)
|
99 |
+
depth = cv2.imread(depth_path, cv2.IMREAD_ANYCOLOR |
|
100 |
+
cv2.IMREAD_ANYDEPTH)
|
101 |
+
print("dpeth min max", depth.min(), depth.max())
|
102 |
+
|
103 |
+
# print(np.shape(image))
|
104 |
+
# print(np.shape(depth))
|
105 |
+
|
106 |
+
# depth[depth > 8] = -1
|
107 |
+
|
108 |
+
if self.do_kb_crop and False:
|
109 |
+
height = image.height
|
110 |
+
width = image.width
|
111 |
+
top_margin = int(height - 352)
|
112 |
+
left_margin = int((width - 1216) / 2)
|
113 |
+
depth = depth.crop(
|
114 |
+
(left_margin, top_margin, left_margin + 1216, top_margin + 352))
|
115 |
+
image = image.crop(
|
116 |
+
(left_margin, top_margin, left_margin + 1216, top_margin + 352))
|
117 |
+
# uv = uv[:, top_margin:top_margin + 352, left_margin:left_margin + 1216]
|
118 |
+
|
119 |
+
image = np.asarray(image, dtype=np.float32) / 255.0
|
120 |
+
# depth = np.asarray(depth, dtype=np.uint16) /1.
|
121 |
+
depth = depth[..., None]
|
122 |
+
sample = dict(image=image, depth=depth)
|
123 |
+
|
124 |
+
# return sample
|
125 |
+
sample = self.transform(sample)
|
126 |
+
|
127 |
+
if idx == 0:
|
128 |
+
print(sample["image"].shape)
|
129 |
+
|
130 |
+
return sample
|
131 |
+
|
132 |
+
def __len__(self):
|
133 |
+
return len(self.image_files)
|
134 |
+
|
135 |
+
|
136 |
+
def get_vkitti_loader(data_dir_root, batch_size=1, **kwargs):
|
137 |
+
dataset = VKITTI(data_dir_root)
|
138 |
+
return DataLoader(dataset, batch_size, **kwargs)
|
139 |
+
|
140 |
+
|
141 |
+
if __name__ == "__main__":
|
142 |
+
loader = get_vkitti_loader(
|
143 |
+
data_dir_root="/home/bhatsf/shortcuts/datasets/vkitti_test")
|
144 |
+
print("Total files", len(loader.dataset))
|
145 |
+
for i, sample in enumerate(loader):
|
146 |
+
print(sample["image"].shape)
|
147 |
+
print(sample["depth"].shape)
|
148 |
+
print(sample["dataset"])
|
149 |
+
print(sample['depth'].min(), sample['depth'].max())
|
150 |
+
if i > 5:
|
151 |
+
break
|
ZoeDepth/zoedepth/data/vkitti2.py
ADDED
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import os
|
26 |
+
|
27 |
+
import cv2
|
28 |
+
import numpy as np
|
29 |
+
import torch
|
30 |
+
from PIL import Image
|
31 |
+
from torch.utils.data import DataLoader, Dataset
|
32 |
+
from torchvision import transforms
|
33 |
+
|
34 |
+
|
35 |
+
class ToTensor(object):
|
36 |
+
def __init__(self):
|
37 |
+
# self.normalize = transforms.Normalize(
|
38 |
+
# mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
39 |
+
self.normalize = lambda x: x
|
40 |
+
# self.resize = transforms.Resize((375, 1242))
|
41 |
+
|
42 |
+
def __call__(self, sample):
|
43 |
+
image, depth = sample['image'], sample['depth']
|
44 |
+
|
45 |
+
image = self.to_tensor(image)
|
46 |
+
image = self.normalize(image)
|
47 |
+
depth = self.to_tensor(depth)
|
48 |
+
|
49 |
+
# image = self.resize(image)
|
50 |
+
|
51 |
+
return {'image': image, 'depth': depth, 'dataset': "vkitti"}
|
52 |
+
|
53 |
+
def to_tensor(self, pic):
|
54 |
+
|
55 |
+
if isinstance(pic, np.ndarray):
|
56 |
+
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
57 |
+
return img
|
58 |
+
|
59 |
+
# # handle PIL Image
|
60 |
+
if pic.mode == 'I':
|
61 |
+
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
|
62 |
+
elif pic.mode == 'I;16':
|
63 |
+
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
|
64 |
+
else:
|
65 |
+
img = torch.ByteTensor(
|
66 |
+
torch.ByteStorage.from_buffer(pic.tobytes()))
|
67 |
+
# PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
|
68 |
+
if pic.mode == 'YCbCr':
|
69 |
+
nchannel = 3
|
70 |
+
elif pic.mode == 'I;16':
|
71 |
+
nchannel = 1
|
72 |
+
else:
|
73 |
+
nchannel = len(pic.mode)
|
74 |
+
img = img.view(pic.size[1], pic.size[0], nchannel)
|
75 |
+
|
76 |
+
img = img.transpose(0, 1).transpose(0, 2).contiguous()
|
77 |
+
if isinstance(img, torch.ByteTensor):
|
78 |
+
return img.float()
|
79 |
+
else:
|
80 |
+
return img
|
81 |
+
|
82 |
+
|
83 |
+
class VKITTI2(Dataset):
|
84 |
+
def __init__(self, data_dir_root, do_kb_crop=True, split="test"):
|
85 |
+
import glob
|
86 |
+
|
87 |
+
# image paths are of the form <data_dir_root>/rgb/<scene>/<variant>/frames/<rgb,depth>/Camera<0,1>/rgb_{}.jpg
|
88 |
+
self.image_files = glob.glob(os.path.join(
|
89 |
+
data_dir_root, "rgb", "**", "frames", "rgb", "Camera_0", '*.jpg'), recursive=True)
|
90 |
+
self.depth_files = [r.replace("/rgb/", "/depth/").replace(
|
91 |
+
"rgb_", "depth_").replace(".jpg", ".png") for r in self.image_files]
|
92 |
+
self.do_kb_crop = True
|
93 |
+
self.transform = ToTensor()
|
94 |
+
|
95 |
+
# If train test split is not created, then create one.
|
96 |
+
# Split is such that 8% of the frames from each scene are used for testing.
|
97 |
+
if not os.path.exists(os.path.join(data_dir_root, "train.txt")):
|
98 |
+
import random
|
99 |
+
scenes = set([os.path.basename(os.path.dirname(
|
100 |
+
os.path.dirname(os.path.dirname(f)))) for f in self.image_files])
|
101 |
+
train_files = []
|
102 |
+
test_files = []
|
103 |
+
for scene in scenes:
|
104 |
+
scene_files = [f for f in self.image_files if os.path.basename(
|
105 |
+
os.path.dirname(os.path.dirname(os.path.dirname(f)))) == scene]
|
106 |
+
random.shuffle(scene_files)
|
107 |
+
train_files.extend(scene_files[:int(len(scene_files) * 0.92)])
|
108 |
+
test_files.extend(scene_files[int(len(scene_files) * 0.92):])
|
109 |
+
with open(os.path.join(data_dir_root, "train.txt"), "w") as f:
|
110 |
+
f.write("\n".join(train_files))
|
111 |
+
with open(os.path.join(data_dir_root, "test.txt"), "w") as f:
|
112 |
+
f.write("\n".join(test_files))
|
113 |
+
|
114 |
+
if split == "train":
|
115 |
+
with open(os.path.join(data_dir_root, "train.txt"), "r") as f:
|
116 |
+
self.image_files = f.read().splitlines()
|
117 |
+
self.depth_files = [r.replace("/rgb/", "/depth/").replace(
|
118 |
+
"rgb_", "depth_").replace(".jpg", ".png") for r in self.image_files]
|
119 |
+
elif split == "test":
|
120 |
+
with open(os.path.join(data_dir_root, "test.txt"), "r") as f:
|
121 |
+
self.image_files = f.read().splitlines()
|
122 |
+
self.depth_files = [r.replace("/rgb/", "/depth/").replace(
|
123 |
+
"rgb_", "depth_").replace(".jpg", ".png") for r in self.image_files]
|
124 |
+
|
125 |
+
def __getitem__(self, idx):
|
126 |
+
image_path = self.image_files[idx]
|
127 |
+
depth_path = self.depth_files[idx]
|
128 |
+
|
129 |
+
image = Image.open(image_path)
|
130 |
+
# depth = Image.open(depth_path)
|
131 |
+
depth = cv2.imread(depth_path, cv2.IMREAD_ANYCOLOR |
|
132 |
+
cv2.IMREAD_ANYDEPTH) / 100.0 # cm to m
|
133 |
+
depth = Image.fromarray(depth)
|
134 |
+
# print("dpeth min max", depth.min(), depth.max())
|
135 |
+
|
136 |
+
# print(np.shape(image))
|
137 |
+
# print(np.shape(depth))
|
138 |
+
|
139 |
+
if self.do_kb_crop:
|
140 |
+
if idx == 0:
|
141 |
+
print("Using KB input crop")
|
142 |
+
height = image.height
|
143 |
+
width = image.width
|
144 |
+
top_margin = int(height - 352)
|
145 |
+
left_margin = int((width - 1216) / 2)
|
146 |
+
depth = depth.crop(
|
147 |
+
(left_margin, top_margin, left_margin + 1216, top_margin + 352))
|
148 |
+
image = image.crop(
|
149 |
+
(left_margin, top_margin, left_margin + 1216, top_margin + 352))
|
150 |
+
# uv = uv[:, top_margin:top_margin + 352, left_margin:left_margin + 1216]
|
151 |
+
|
152 |
+
image = np.asarray(image, dtype=np.float32) / 255.0
|
153 |
+
# depth = np.asarray(depth, dtype=np.uint16) /1.
|
154 |
+
depth = np.asarray(depth, dtype=np.float32) / 1.
|
155 |
+
depth[depth > 80] = -1
|
156 |
+
|
157 |
+
depth = depth[..., None]
|
158 |
+
sample = dict(image=image, depth=depth)
|
159 |
+
|
160 |
+
# return sample
|
161 |
+
sample = self.transform(sample)
|
162 |
+
|
163 |
+
if idx == 0:
|
164 |
+
print(sample["image"].shape)
|
165 |
+
|
166 |
+
return sample
|
167 |
+
|
168 |
+
def __len__(self):
|
169 |
+
return len(self.image_files)
|
170 |
+
|
171 |
+
|
172 |
+
def get_vkitti2_loader(data_dir_root, batch_size=1, **kwargs):
|
173 |
+
dataset = VKITTI2(data_dir_root)
|
174 |
+
return DataLoader(dataset, batch_size, **kwargs)
|
175 |
+
|
176 |
+
|
177 |
+
if __name__ == "__main__":
|
178 |
+
loader = get_vkitti2_loader(
|
179 |
+
data_dir_root="/home/bhatsf/shortcuts/datasets/vkitti2")
|
180 |
+
print("Total files", len(loader.dataset))
|
181 |
+
for i, sample in enumerate(loader):
|
182 |
+
print(sample["image"].shape)
|
183 |
+
print(sample["depth"].shape)
|
184 |
+
print(sample["dataset"])
|
185 |
+
print(sample['depth'].min(), sample['depth'].max())
|
186 |
+
if i > 5:
|
187 |
+
break
|
ZoeDepth/zoedepth/models/__init__.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
ZoeDepth/zoedepth/models/base_models/__init__.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
ZoeDepth/zoedepth/models/base_models/midas.py
ADDED
@@ -0,0 +1,377 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import torch
|
26 |
+
import torch.nn as nn
|
27 |
+
import numpy as np
|
28 |
+
from torchvision.transforms import Normalize
|
29 |
+
|
30 |
+
|
31 |
+
def denormalize(x):
|
32 |
+
"""Reverses the imagenet normalization applied to the input.
|
33 |
+
|
34 |
+
Args:
|
35 |
+
x (torch.Tensor - shape(N,3,H,W)): input tensor
|
36 |
+
|
37 |
+
Returns:
|
38 |
+
torch.Tensor - shape(N,3,H,W): Denormalized input
|
39 |
+
"""
|
40 |
+
mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(x.device)
|
41 |
+
std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(x.device)
|
42 |
+
return x * std + mean
|
43 |
+
|
44 |
+
def get_activation(name, bank):
|
45 |
+
def hook(model, input, output):
|
46 |
+
bank[name] = output
|
47 |
+
return hook
|
48 |
+
|
49 |
+
|
50 |
+
class Resize(object):
|
51 |
+
"""Resize sample to given size (width, height).
|
52 |
+
"""
|
53 |
+
|
54 |
+
def __init__(
|
55 |
+
self,
|
56 |
+
width,
|
57 |
+
height,
|
58 |
+
resize_target=True,
|
59 |
+
keep_aspect_ratio=False,
|
60 |
+
ensure_multiple_of=1,
|
61 |
+
resize_method="lower_bound",
|
62 |
+
):
|
63 |
+
"""Init.
|
64 |
+
Args:
|
65 |
+
width (int): desired output width
|
66 |
+
height (int): desired output height
|
67 |
+
resize_target (bool, optional):
|
68 |
+
True: Resize the full sample (image, mask, target).
|
69 |
+
False: Resize image only.
|
70 |
+
Defaults to True.
|
71 |
+
keep_aspect_ratio (bool, optional):
|
72 |
+
True: Keep the aspect ratio of the input sample.
|
73 |
+
Output sample might not have the given width and height, and
|
74 |
+
resize behaviour depends on the parameter 'resize_method'.
|
75 |
+
Defaults to False.
|
76 |
+
ensure_multiple_of (int, optional):
|
77 |
+
Output width and height is constrained to be multiple of this parameter.
|
78 |
+
Defaults to 1.
|
79 |
+
resize_method (str, optional):
|
80 |
+
"lower_bound": Output will be at least as large as the given size.
|
81 |
+
"upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
|
82 |
+
"minimal": Scale as least as possible. (Output size might be smaller than given size.)
|
83 |
+
Defaults to "lower_bound".
|
84 |
+
"""
|
85 |
+
print("Params passed to Resize transform:")
|
86 |
+
print("\twidth: ", width)
|
87 |
+
print("\theight: ", height)
|
88 |
+
print("\tresize_target: ", resize_target)
|
89 |
+
print("\tkeep_aspect_ratio: ", keep_aspect_ratio)
|
90 |
+
print("\tensure_multiple_of: ", ensure_multiple_of)
|
91 |
+
print("\tresize_method: ", resize_method)
|
92 |
+
|
93 |
+
self.__width = width
|
94 |
+
self.__height = height
|
95 |
+
|
96 |
+
self.__keep_aspect_ratio = keep_aspect_ratio
|
97 |
+
self.__multiple_of = ensure_multiple_of
|
98 |
+
self.__resize_method = resize_method
|
99 |
+
|
100 |
+
def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
|
101 |
+
y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
|
102 |
+
|
103 |
+
if max_val is not None and y > max_val:
|
104 |
+
y = (np.floor(x / self.__multiple_of)
|
105 |
+
* self.__multiple_of).astype(int)
|
106 |
+
|
107 |
+
if y < min_val:
|
108 |
+
y = (np.ceil(x / self.__multiple_of)
|
109 |
+
* self.__multiple_of).astype(int)
|
110 |
+
|
111 |
+
return y
|
112 |
+
|
113 |
+
def get_size(self, width, height):
|
114 |
+
# determine new height and width
|
115 |
+
scale_height = self.__height / height
|
116 |
+
scale_width = self.__width / width
|
117 |
+
|
118 |
+
if self.__keep_aspect_ratio:
|
119 |
+
if self.__resize_method == "lower_bound":
|
120 |
+
# scale such that output size is lower bound
|
121 |
+
if scale_width > scale_height:
|
122 |
+
# fit width
|
123 |
+
scale_height = scale_width
|
124 |
+
else:
|
125 |
+
# fit height
|
126 |
+
scale_width = scale_height
|
127 |
+
elif self.__resize_method == "upper_bound":
|
128 |
+
# scale such that output size is upper bound
|
129 |
+
if scale_width < scale_height:
|
130 |
+
# fit width
|
131 |
+
scale_height = scale_width
|
132 |
+
else:
|
133 |
+
# fit height
|
134 |
+
scale_width = scale_height
|
135 |
+
elif self.__resize_method == "minimal":
|
136 |
+
# scale as least as possbile
|
137 |
+
if abs(1 - scale_width) < abs(1 - scale_height):
|
138 |
+
# fit width
|
139 |
+
scale_height = scale_width
|
140 |
+
else:
|
141 |
+
# fit height
|
142 |
+
scale_width = scale_height
|
143 |
+
else:
|
144 |
+
raise ValueError(
|
145 |
+
f"resize_method {self.__resize_method} not implemented"
|
146 |
+
)
|
147 |
+
|
148 |
+
if self.__resize_method == "lower_bound":
|
149 |
+
new_height = self.constrain_to_multiple_of(
|
150 |
+
scale_height * height, min_val=self.__height
|
151 |
+
)
|
152 |
+
new_width = self.constrain_to_multiple_of(
|
153 |
+
scale_width * width, min_val=self.__width
|
154 |
+
)
|
155 |
+
elif self.__resize_method == "upper_bound":
|
156 |
+
new_height = self.constrain_to_multiple_of(
|
157 |
+
scale_height * height, max_val=self.__height
|
158 |
+
)
|
159 |
+
new_width = self.constrain_to_multiple_of(
|
160 |
+
scale_width * width, max_val=self.__width
|
161 |
+
)
|
162 |
+
elif self.__resize_method == "minimal":
|
163 |
+
new_height = self.constrain_to_multiple_of(scale_height * height)
|
164 |
+
new_width = self.constrain_to_multiple_of(scale_width * width)
|
165 |
+
else:
|
166 |
+
raise ValueError(
|
167 |
+
f"resize_method {self.__resize_method} not implemented")
|
168 |
+
|
169 |
+
return (new_width, new_height)
|
170 |
+
|
171 |
+
def __call__(self, x):
|
172 |
+
width, height = self.get_size(*x.shape[-2:][::-1])
|
173 |
+
return nn.functional.interpolate(x, (height, width), mode='bilinear', align_corners=True)
|
174 |
+
|
175 |
+
class PrepForMidas(object):
|
176 |
+
def __init__(self, resize_mode="minimal", keep_aspect_ratio=True, img_size=384, do_resize=True):
|
177 |
+
if isinstance(img_size, int):
|
178 |
+
img_size = (img_size, img_size)
|
179 |
+
net_h, net_w = img_size
|
180 |
+
self.normalization = Normalize(
|
181 |
+
mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
|
182 |
+
self.resizer = Resize(net_w, net_h, keep_aspect_ratio=keep_aspect_ratio, ensure_multiple_of=32, resize_method=resize_mode) \
|
183 |
+
if do_resize else nn.Identity()
|
184 |
+
|
185 |
+
def __call__(self, x):
|
186 |
+
return self.normalization(self.resizer(x))
|
187 |
+
|
188 |
+
|
189 |
+
class MidasCore(nn.Module):
|
190 |
+
def __init__(self, midas, trainable=False, fetch_features=True, layer_names=('out_conv', 'l4_rn', 'r4', 'r3', 'r2', 'r1'), freeze_bn=False, keep_aspect_ratio=True,
|
191 |
+
img_size=384, **kwargs):
|
192 |
+
"""Midas Base model used for multi-scale feature extraction.
|
193 |
+
|
194 |
+
Args:
|
195 |
+
midas (torch.nn.Module): Midas model.
|
196 |
+
trainable (bool, optional): Train midas model. Defaults to False.
|
197 |
+
fetch_features (bool, optional): Extract multi-scale features. Defaults to True.
|
198 |
+
layer_names (tuple, optional): Layers used for feature extraction. Order = (head output features, last layer features, ...decoder features). Defaults to ('out_conv', 'l4_rn', 'r4', 'r3', 'r2', 'r1').
|
199 |
+
freeze_bn (bool, optional): Freeze BatchNorm. Generally results in better finetuning performance. Defaults to False.
|
200 |
+
keep_aspect_ratio (bool, optional): Keep the aspect ratio of input images while resizing. Defaults to True.
|
201 |
+
img_size (int, tuple, optional): Input resolution. Defaults to 384.
|
202 |
+
"""
|
203 |
+
super().__init__()
|
204 |
+
self.core = midas
|
205 |
+
self.output_channels = None
|
206 |
+
self.core_out = {}
|
207 |
+
self.trainable = trainable
|
208 |
+
self.fetch_features = fetch_features
|
209 |
+
# midas.scratch.output_conv = nn.Identity()
|
210 |
+
self.handles = []
|
211 |
+
# self.layer_names = ['out_conv','l4_rn', 'r4', 'r3', 'r2', 'r1']
|
212 |
+
self.layer_names = layer_names
|
213 |
+
|
214 |
+
self.set_trainable(trainable)
|
215 |
+
self.set_fetch_features(fetch_features)
|
216 |
+
|
217 |
+
self.prep = PrepForMidas(keep_aspect_ratio=keep_aspect_ratio,
|
218 |
+
img_size=img_size, do_resize=kwargs.get('do_resize', True))
|
219 |
+
|
220 |
+
if freeze_bn:
|
221 |
+
self.freeze_bn()
|
222 |
+
|
223 |
+
def set_trainable(self, trainable):
|
224 |
+
self.trainable = trainable
|
225 |
+
if trainable:
|
226 |
+
self.unfreeze()
|
227 |
+
else:
|
228 |
+
self.freeze()
|
229 |
+
return self
|
230 |
+
|
231 |
+
def set_fetch_features(self, fetch_features):
|
232 |
+
self.fetch_features = fetch_features
|
233 |
+
if fetch_features:
|
234 |
+
if len(self.handles) == 0:
|
235 |
+
self.attach_hooks(self.core)
|
236 |
+
else:
|
237 |
+
self.remove_hooks()
|
238 |
+
return self
|
239 |
+
|
240 |
+
def freeze(self):
|
241 |
+
for p in self.parameters():
|
242 |
+
p.requires_grad = False
|
243 |
+
self.trainable = False
|
244 |
+
return self
|
245 |
+
|
246 |
+
def unfreeze(self):
|
247 |
+
for p in self.parameters():
|
248 |
+
p.requires_grad = True
|
249 |
+
self.trainable = True
|
250 |
+
return self
|
251 |
+
|
252 |
+
def freeze_bn(self):
|
253 |
+
for m in self.modules():
|
254 |
+
if isinstance(m, nn.BatchNorm2d):
|
255 |
+
m.eval()
|
256 |
+
return self
|
257 |
+
|
258 |
+
def forward(self, x, denorm=False, return_rel_depth=False):
|
259 |
+
with torch.no_grad():
|
260 |
+
if denorm:
|
261 |
+
x = denormalize(x)
|
262 |
+
x = self.prep(x)
|
263 |
+
# print("Shape after prep: ", x.shape)
|
264 |
+
|
265 |
+
with torch.set_grad_enabled(self.trainable):
|
266 |
+
|
267 |
+
# print("Input size to Midascore", x.shape)
|
268 |
+
rel_depth = self.core(x)
|
269 |
+
# print("Output from midas shape", rel_depth.shape)
|
270 |
+
if not self.fetch_features:
|
271 |
+
return rel_depth
|
272 |
+
out = [self.core_out[k] for k in self.layer_names]
|
273 |
+
|
274 |
+
if return_rel_depth:
|
275 |
+
return rel_depth, out
|
276 |
+
return out
|
277 |
+
|
278 |
+
def get_rel_pos_params(self):
|
279 |
+
for name, p in self.core.pretrained.named_parameters():
|
280 |
+
if "relative_position" in name:
|
281 |
+
yield p
|
282 |
+
|
283 |
+
def get_enc_params_except_rel_pos(self):
|
284 |
+
for name, p in self.core.pretrained.named_parameters():
|
285 |
+
if "relative_position" not in name:
|
286 |
+
yield p
|
287 |
+
|
288 |
+
def freeze_encoder(self, freeze_rel_pos=False):
|
289 |
+
if freeze_rel_pos:
|
290 |
+
for p in self.core.pretrained.parameters():
|
291 |
+
p.requires_grad = False
|
292 |
+
else:
|
293 |
+
for p in self.get_enc_params_except_rel_pos():
|
294 |
+
p.requires_grad = False
|
295 |
+
return self
|
296 |
+
|
297 |
+
def attach_hooks(self, midas):
|
298 |
+
if len(self.handles) > 0:
|
299 |
+
self.remove_hooks()
|
300 |
+
if "out_conv" in self.layer_names:
|
301 |
+
self.handles.append(list(midas.scratch.output_conv.children())[
|
302 |
+
3].register_forward_hook(get_activation("out_conv", self.core_out)))
|
303 |
+
if "r4" in self.layer_names:
|
304 |
+
self.handles.append(midas.scratch.refinenet4.register_forward_hook(
|
305 |
+
get_activation("r4", self.core_out)))
|
306 |
+
if "r3" in self.layer_names:
|
307 |
+
self.handles.append(midas.scratch.refinenet3.register_forward_hook(
|
308 |
+
get_activation("r3", self.core_out)))
|
309 |
+
if "r2" in self.layer_names:
|
310 |
+
self.handles.append(midas.scratch.refinenet2.register_forward_hook(
|
311 |
+
get_activation("r2", self.core_out)))
|
312 |
+
if "r1" in self.layer_names:
|
313 |
+
self.handles.append(midas.scratch.refinenet1.register_forward_hook(
|
314 |
+
get_activation("r1", self.core_out)))
|
315 |
+
if "l4_rn" in self.layer_names:
|
316 |
+
self.handles.append(midas.scratch.layer4_rn.register_forward_hook(
|
317 |
+
get_activation("l4_rn", self.core_out)))
|
318 |
+
|
319 |
+
return self
|
320 |
+
|
321 |
+
def remove_hooks(self):
|
322 |
+
for h in self.handles:
|
323 |
+
h.remove()
|
324 |
+
return self
|
325 |
+
|
326 |
+
def __del__(self):
|
327 |
+
self.remove_hooks()
|
328 |
+
|
329 |
+
def set_output_channels(self, model_type):
|
330 |
+
self.output_channels = MIDAS_SETTINGS[model_type]
|
331 |
+
|
332 |
+
@staticmethod
|
333 |
+
def build(midas_model_type="DPT_BEiT_L_384", train_midas=False, use_pretrained_midas=True, fetch_features=False, freeze_bn=True, force_keep_ar=False, force_reload=False, **kwargs):
|
334 |
+
if midas_model_type not in MIDAS_SETTINGS:
|
335 |
+
raise ValueError(
|
336 |
+
f"Invalid model type: {midas_model_type}. Must be one of {list(MIDAS_SETTINGS.keys())}")
|
337 |
+
if "img_size" in kwargs:
|
338 |
+
kwargs = MidasCore.parse_img_size(kwargs)
|
339 |
+
img_size = kwargs.pop("img_size", [384, 384])
|
340 |
+
print("img_size", img_size)
|
341 |
+
midas = torch.hub.load("intel-isl/MiDaS", midas_model_type,
|
342 |
+
pretrained=use_pretrained_midas, force_reload=force_reload)
|
343 |
+
kwargs.update({'keep_aspect_ratio': force_keep_ar})
|
344 |
+
midas_core = MidasCore(midas, trainable=train_midas, fetch_features=fetch_features,
|
345 |
+
freeze_bn=freeze_bn, img_size=img_size, **kwargs)
|
346 |
+
midas_core.set_output_channels(midas_model_type)
|
347 |
+
return midas_core
|
348 |
+
|
349 |
+
@staticmethod
|
350 |
+
def build_from_config(config):
|
351 |
+
return MidasCore.build(**config)
|
352 |
+
|
353 |
+
@staticmethod
|
354 |
+
def parse_img_size(config):
|
355 |
+
assert 'img_size' in config
|
356 |
+
if isinstance(config['img_size'], str):
|
357 |
+
assert "," in config['img_size'], "img_size should be a string with comma separated img_size=H,W"
|
358 |
+
config['img_size'] = list(map(int, config['img_size'].split(",")))
|
359 |
+
assert len(
|
360 |
+
config['img_size']) == 2, "img_size should be a string with comma separated img_size=H,W"
|
361 |
+
elif isinstance(config['img_size'], int):
|
362 |
+
config['img_size'] = [config['img_size'], config['img_size']]
|
363 |
+
else:
|
364 |
+
assert isinstance(config['img_size'], list) and len(
|
365 |
+
config['img_size']) == 2, "img_size should be a list of H,W"
|
366 |
+
return config
|
367 |
+
|
368 |
+
|
369 |
+
nchannels2models = {
|
370 |
+
tuple([256]*5): ["DPT_BEiT_L_384", "DPT_BEiT_L_512", "DPT_BEiT_B_384", "DPT_SwinV2_L_384", "DPT_SwinV2_B_384", "DPT_SwinV2_T_256", "DPT_Large", "DPT_Hybrid"],
|
371 |
+
(512, 256, 128, 64, 64): ["MiDaS_small"]
|
372 |
+
}
|
373 |
+
|
374 |
+
# Model name to number of output channels
|
375 |
+
MIDAS_SETTINGS = {m: k for k, v in nchannels2models.items()
|
376 |
+
for m in v
|
377 |
+
}
|
ZoeDepth/zoedepth/models/builder.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
from importlib import import_module
|
26 |
+
from zoedepth.models.depth_model import DepthModel
|
27 |
+
|
28 |
+
def build_model(config) -> DepthModel:
|
29 |
+
"""Builds a model from a config. The model is specified by the model name and version in the config. The model is then constructed using the build_from_config function of the model interface.
|
30 |
+
This function should be used to construct models for training and evaluation.
|
31 |
+
|
32 |
+
Args:
|
33 |
+
config (dict): Config dict. Config is constructed in utils/config.py. Each model has its own config file(s) saved in its root model folder.
|
34 |
+
|
35 |
+
Returns:
|
36 |
+
torch.nn.Module: Model corresponding to name and version as specified in config
|
37 |
+
"""
|
38 |
+
module_name = f"zoedepth.models.{config.model}"
|
39 |
+
try:
|
40 |
+
module = import_module(module_name)
|
41 |
+
except ModuleNotFoundError as e:
|
42 |
+
# print the original error message
|
43 |
+
print(e)
|
44 |
+
raise ValueError(
|
45 |
+
f"Model {config.model} not found. Refer above error for details.") from e
|
46 |
+
try:
|
47 |
+
get_version = getattr(module, "get_version")
|
48 |
+
except AttributeError as e:
|
49 |
+
raise ValueError(
|
50 |
+
f"Model {config.model} has no get_version function.") from e
|
51 |
+
return get_version(config.version_name).build_from_config(config)
|
ZoeDepth/zoedepth/models/depth_model.py
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import numpy as np
|
26 |
+
import torch
|
27 |
+
import torch.nn as nn
|
28 |
+
import torch.nn.functional as F
|
29 |
+
from torchvision import transforms
|
30 |
+
import PIL.Image
|
31 |
+
from PIL import Image
|
32 |
+
from typing import Union
|
33 |
+
|
34 |
+
|
35 |
+
class DepthModel(nn.Module):
|
36 |
+
def __init__(self):
|
37 |
+
super().__init__()
|
38 |
+
self.device = 'cpu'
|
39 |
+
|
40 |
+
def to(self, device) -> nn.Module:
|
41 |
+
self.device = device
|
42 |
+
return super().to(device)
|
43 |
+
|
44 |
+
def forward(self, x, *args, **kwargs):
|
45 |
+
raise NotImplementedError
|
46 |
+
|
47 |
+
def _infer(self, x: torch.Tensor):
|
48 |
+
"""
|
49 |
+
Inference interface for the model
|
50 |
+
Args:
|
51 |
+
x (torch.Tensor): input tensor of shape (b, c, h, w)
|
52 |
+
Returns:
|
53 |
+
torch.Tensor: output tensor of shape (b, 1, h, w)
|
54 |
+
"""
|
55 |
+
return self(x)['metric_depth']
|
56 |
+
|
57 |
+
def _infer_with_pad_aug(self, x: torch.Tensor, pad_input: bool=True, fh: float=3, fw: float=3, upsampling_mode: str='bicubic', padding_mode="reflect", **kwargs) -> torch.Tensor:
|
58 |
+
"""
|
59 |
+
Inference interface for the model with padding augmentation
|
60 |
+
Padding augmentation fixes the boundary artifacts in the output depth map.
|
61 |
+
Boundary artifacts are sometimes caused by the fact that the model is trained on NYU raw dataset which has a black or white border around the image.
|
62 |
+
This augmentation pads the input image and crops the prediction back to the original size / view.
|
63 |
+
|
64 |
+
Note: This augmentation is not required for the models trained with 'avoid_boundary'=True.
|
65 |
+
Args:
|
66 |
+
x (torch.Tensor): input tensor of shape (b, c, h, w)
|
67 |
+
pad_input (bool, optional): whether to pad the input or not. Defaults to True.
|
68 |
+
fh (float, optional): height padding factor. The padding is calculated as sqrt(h/2) * fh. Defaults to 3.
|
69 |
+
fw (float, optional): width padding factor. The padding is calculated as sqrt(w/2) * fw. Defaults to 3.
|
70 |
+
upsampling_mode (str, optional): upsampling mode. Defaults to 'bicubic'.
|
71 |
+
padding_mode (str, optional): padding mode. Defaults to "reflect".
|
72 |
+
Returns:
|
73 |
+
torch.Tensor: output tensor of shape (b, 1, h, w)
|
74 |
+
"""
|
75 |
+
# assert x is nchw and c = 3
|
76 |
+
assert x.dim() == 4, "x must be 4 dimensional, got {}".format(x.dim())
|
77 |
+
assert x.shape[1] == 3, "x must have 3 channels, got {}".format(x.shape[1])
|
78 |
+
|
79 |
+
if pad_input:
|
80 |
+
assert fh > 0 or fw > 0, "atlease one of fh and fw must be greater than 0"
|
81 |
+
pad_h = int(np.sqrt(x.shape[2]/2) * fh)
|
82 |
+
pad_w = int(np.sqrt(x.shape[3]/2) * fw)
|
83 |
+
padding = [pad_w, pad_w]
|
84 |
+
if pad_h > 0:
|
85 |
+
padding += [pad_h, pad_h]
|
86 |
+
|
87 |
+
x = F.pad(x, padding, mode=padding_mode, **kwargs)
|
88 |
+
out = self._infer(x)
|
89 |
+
if out.shape[-2:] != x.shape[-2:]:
|
90 |
+
out = F.interpolate(out, size=(x.shape[2], x.shape[3]), mode=upsampling_mode, align_corners=False)
|
91 |
+
if pad_input:
|
92 |
+
# crop to the original size, handling the case where pad_h and pad_w is 0
|
93 |
+
if pad_h > 0:
|
94 |
+
out = out[:, :, pad_h:-pad_h,:]
|
95 |
+
if pad_w > 0:
|
96 |
+
out = out[:, :, :, pad_w:-pad_w]
|
97 |
+
return out
|
98 |
+
|
99 |
+
def infer_with_flip_aug(self, x, pad_input: bool=True, **kwargs) -> torch.Tensor:
|
100 |
+
"""
|
101 |
+
Inference interface for the model with horizontal flip augmentation
|
102 |
+
Horizontal flip augmentation improves the accuracy of the model by averaging the output of the model with and without horizontal flip.
|
103 |
+
Args:
|
104 |
+
x (torch.Tensor): input tensor of shape (b, c, h, w)
|
105 |
+
pad_input (bool, optional): whether to use padding augmentation. Defaults to True.
|
106 |
+
Returns:
|
107 |
+
torch.Tensor: output tensor of shape (b, 1, h, w)
|
108 |
+
"""
|
109 |
+
# infer with horizontal flip and average
|
110 |
+
out = self._infer_with_pad_aug(x, pad_input=pad_input, **kwargs)
|
111 |
+
out_flip = self._infer_with_pad_aug(torch.flip(x, dims=[3]), pad_input=pad_input, **kwargs)
|
112 |
+
out = (out + torch.flip(out_flip, dims=[3])) / 2
|
113 |
+
return out
|
114 |
+
|
115 |
+
def infer(self, x, pad_input: bool=True, with_flip_aug: bool=True, **kwargs) -> torch.Tensor:
|
116 |
+
"""
|
117 |
+
Inference interface for the model
|
118 |
+
Args:
|
119 |
+
x (torch.Tensor): input tensor of shape (b, c, h, w)
|
120 |
+
pad_input (bool, optional): whether to use padding augmentation. Defaults to True.
|
121 |
+
with_flip_aug (bool, optional): whether to use horizontal flip augmentation. Defaults to True.
|
122 |
+
Returns:
|
123 |
+
torch.Tensor: output tensor of shape (b, 1, h, w)
|
124 |
+
"""
|
125 |
+
if with_flip_aug:
|
126 |
+
return self.infer_with_flip_aug(x, pad_input=pad_input, **kwargs)
|
127 |
+
else:
|
128 |
+
return self._infer_with_pad_aug(x, pad_input=pad_input, **kwargs)
|
129 |
+
|
130 |
+
@torch.no_grad()
|
131 |
+
def infer_pil(self, pil_img, pad_input: bool=True, with_flip_aug: bool=True, output_type: str="numpy", **kwargs) -> Union[np.ndarray, PIL.Image.Image, torch.Tensor]:
|
132 |
+
"""
|
133 |
+
Inference interface for the model for PIL image
|
134 |
+
Args:
|
135 |
+
pil_img (PIL.Image.Image): input PIL image
|
136 |
+
pad_input (bool, optional): whether to use padding augmentation. Defaults to True.
|
137 |
+
with_flip_aug (bool, optional): whether to use horizontal flip augmentation. Defaults to True.
|
138 |
+
output_type (str, optional): output type. Supported values are 'numpy', 'pil' and 'tensor'. Defaults to "numpy".
|
139 |
+
"""
|
140 |
+
x = transforms.ToTensor()(pil_img).unsqueeze(0).to(self.device)
|
141 |
+
out_tensor = self.infer(x, pad_input=pad_input, with_flip_aug=with_flip_aug, **kwargs)
|
142 |
+
if output_type == "numpy":
|
143 |
+
return out_tensor.squeeze().cpu().numpy()
|
144 |
+
elif output_type == "pil":
|
145 |
+
# uint16 is required for depth pil image
|
146 |
+
out_16bit_numpy = (out_tensor.squeeze().cpu().numpy()*256).astype(np.uint16)
|
147 |
+
return Image.fromarray(out_16bit_numpy)
|
148 |
+
elif output_type == "tensor":
|
149 |
+
return out_tensor.squeeze().cpu()
|
150 |
+
else:
|
151 |
+
raise ValueError(f"output_type {output_type} not supported. Supported values are 'numpy', 'pil' and 'tensor'")
|
152 |
+
|
ZoeDepth/zoedepth/models/layers/attractor.py
ADDED
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import torch
|
26 |
+
import torch.nn as nn
|
27 |
+
|
28 |
+
|
29 |
+
@torch.jit.script
|
30 |
+
def exp_attractor(dx, alpha: float = 300, gamma: int = 2):
|
31 |
+
"""Exponential attractor: dc = exp(-alpha*|dx|^gamma) * dx , where dx = a - c, a = attractor point, c = bin center, dc = shift in bin centermmary for exp_attractor
|
32 |
+
|
33 |
+
Args:
|
34 |
+
dx (torch.Tensor): The difference tensor dx = Ai - Cj, where Ai is the attractor point and Cj is the bin center.
|
35 |
+
alpha (float, optional): Proportional Attractor strength. Determines the absolute strength. Lower alpha = greater attraction. Defaults to 300.
|
36 |
+
gamma (int, optional): Exponential Attractor strength. Determines the "region of influence" and indirectly number of bin centers affected. Lower gamma = farther reach. Defaults to 2.
|
37 |
+
|
38 |
+
Returns:
|
39 |
+
torch.Tensor : Delta shifts - dc; New bin centers = Old bin centers + dc
|
40 |
+
"""
|
41 |
+
return torch.exp(-alpha*(torch.abs(dx)**gamma)) * (dx)
|
42 |
+
|
43 |
+
|
44 |
+
@torch.jit.script
|
45 |
+
def inv_attractor(dx, alpha: float = 300, gamma: int = 2):
|
46 |
+
"""Inverse attractor: dc = dx / (1 + alpha*dx^gamma), where dx = a - c, a = attractor point, c = bin center, dc = shift in bin center
|
47 |
+
This is the default one according to the accompanying paper.
|
48 |
+
|
49 |
+
Args:
|
50 |
+
dx (torch.Tensor): The difference tensor dx = Ai - Cj, where Ai is the attractor point and Cj is the bin center.
|
51 |
+
alpha (float, optional): Proportional Attractor strength. Determines the absolute strength. Lower alpha = greater attraction. Defaults to 300.
|
52 |
+
gamma (int, optional): Exponential Attractor strength. Determines the "region of influence" and indirectly number of bin centers affected. Lower gamma = farther reach. Defaults to 2.
|
53 |
+
|
54 |
+
Returns:
|
55 |
+
torch.Tensor: Delta shifts - dc; New bin centers = Old bin centers + dc
|
56 |
+
"""
|
57 |
+
return dx.div(1+alpha*dx.pow(gamma))
|
58 |
+
|
59 |
+
|
60 |
+
class AttractorLayer(nn.Module):
|
61 |
+
def __init__(self, in_features, n_bins, n_attractors=16, mlp_dim=128, min_depth=1e-3, max_depth=10,
|
62 |
+
alpha=300, gamma=2, kind='sum', attractor_type='exp', memory_efficient=False):
|
63 |
+
"""
|
64 |
+
Attractor layer for bin centers. Bin centers are bounded on the interval (min_depth, max_depth)
|
65 |
+
"""
|
66 |
+
super().__init__()
|
67 |
+
|
68 |
+
self.n_attractors = n_attractors
|
69 |
+
self.n_bins = n_bins
|
70 |
+
self.min_depth = min_depth
|
71 |
+
self.max_depth = max_depth
|
72 |
+
self.alpha = alpha
|
73 |
+
self.gamma = gamma
|
74 |
+
self.kind = kind
|
75 |
+
self.attractor_type = attractor_type
|
76 |
+
self.memory_efficient = memory_efficient
|
77 |
+
|
78 |
+
self._net = nn.Sequential(
|
79 |
+
nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
|
80 |
+
nn.ReLU(inplace=True),
|
81 |
+
nn.Conv2d(mlp_dim, n_attractors*2, 1, 1, 0), # x2 for linear norm
|
82 |
+
nn.ReLU(inplace=True)
|
83 |
+
)
|
84 |
+
|
85 |
+
def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False):
|
86 |
+
"""
|
87 |
+
Args:
|
88 |
+
x (torch.Tensor) : feature block; shape - n, c, h, w
|
89 |
+
b_prev (torch.Tensor) : previous bin centers normed; shape - n, prev_nbins, h, w
|
90 |
+
|
91 |
+
Returns:
|
92 |
+
tuple(torch.Tensor,torch.Tensor) : new bin centers normed and scaled; shape - n, nbins, h, w
|
93 |
+
"""
|
94 |
+
if prev_b_embedding is not None:
|
95 |
+
if interpolate:
|
96 |
+
prev_b_embedding = nn.functional.interpolate(
|
97 |
+
prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True)
|
98 |
+
x = x + prev_b_embedding
|
99 |
+
|
100 |
+
A = self._net(x)
|
101 |
+
eps = 1e-3
|
102 |
+
A = A + eps
|
103 |
+
n, c, h, w = A.shape
|
104 |
+
A = A.view(n, self.n_attractors, 2, h, w)
|
105 |
+
A_normed = A / A.sum(dim=2, keepdim=True) # n, a, 2, h, w
|
106 |
+
A_normed = A[:, :, 0, ...] # n, na, h, w
|
107 |
+
|
108 |
+
b_prev = nn.functional.interpolate(
|
109 |
+
b_prev, (h, w), mode='bilinear', align_corners=True)
|
110 |
+
b_centers = b_prev
|
111 |
+
|
112 |
+
if self.attractor_type == 'exp':
|
113 |
+
dist = exp_attractor
|
114 |
+
else:
|
115 |
+
dist = inv_attractor
|
116 |
+
|
117 |
+
if not self.memory_efficient:
|
118 |
+
func = {'mean': torch.mean, 'sum': torch.sum}[self.kind]
|
119 |
+
# .shape N, nbins, h, w
|
120 |
+
delta_c = func(dist(A_normed.unsqueeze(
|
121 |
+
2) - b_centers.unsqueeze(1)), dim=1)
|
122 |
+
else:
|
123 |
+
delta_c = torch.zeros_like(b_centers, device=b_centers.device)
|
124 |
+
for i in range(self.n_attractors):
|
125 |
+
# .shape N, nbins, h, w
|
126 |
+
delta_c += dist(A_normed[:, i, ...].unsqueeze(1) - b_centers)
|
127 |
+
|
128 |
+
if self.kind == 'mean':
|
129 |
+
delta_c = delta_c / self.n_attractors
|
130 |
+
|
131 |
+
b_new_centers = b_centers + delta_c
|
132 |
+
B_centers = (self.max_depth - self.min_depth) * \
|
133 |
+
b_new_centers + self.min_depth
|
134 |
+
B_centers, _ = torch.sort(B_centers, dim=1)
|
135 |
+
B_centers = torch.clip(B_centers, self.min_depth, self.max_depth)
|
136 |
+
return b_new_centers, B_centers
|
137 |
+
|
138 |
+
|
139 |
+
class AttractorLayerUnnormed(nn.Module):
|
140 |
+
def __init__(self, in_features, n_bins, n_attractors=16, mlp_dim=128, min_depth=1e-3, max_depth=10,
|
141 |
+
alpha=300, gamma=2, kind='sum', attractor_type='exp', memory_efficient=False):
|
142 |
+
"""
|
143 |
+
Attractor layer for bin centers. Bin centers are unbounded
|
144 |
+
"""
|
145 |
+
super().__init__()
|
146 |
+
|
147 |
+
self.n_attractors = n_attractors
|
148 |
+
self.n_bins = n_bins
|
149 |
+
self.min_depth = min_depth
|
150 |
+
self.max_depth = max_depth
|
151 |
+
self.alpha = alpha
|
152 |
+
self.gamma = gamma
|
153 |
+
self.kind = kind
|
154 |
+
self.attractor_type = attractor_type
|
155 |
+
self.memory_efficient = memory_efficient
|
156 |
+
|
157 |
+
self._net = nn.Sequential(
|
158 |
+
nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
|
159 |
+
nn.ReLU(inplace=True),
|
160 |
+
nn.Conv2d(mlp_dim, n_attractors, 1, 1, 0),
|
161 |
+
nn.Softplus()
|
162 |
+
)
|
163 |
+
|
164 |
+
def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False):
|
165 |
+
"""
|
166 |
+
Args:
|
167 |
+
x (torch.Tensor) : feature block; shape - n, c, h, w
|
168 |
+
b_prev (torch.Tensor) : previous bin centers normed; shape - n, prev_nbins, h, w
|
169 |
+
|
170 |
+
Returns:
|
171 |
+
tuple(torch.Tensor,torch.Tensor) : new bin centers unbounded; shape - n, nbins, h, w. Two outputs just to keep the API consistent with the normed version
|
172 |
+
"""
|
173 |
+
if prev_b_embedding is not None:
|
174 |
+
if interpolate:
|
175 |
+
prev_b_embedding = nn.functional.interpolate(
|
176 |
+
prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True)
|
177 |
+
x = x + prev_b_embedding
|
178 |
+
|
179 |
+
A = self._net(x)
|
180 |
+
n, c, h, w = A.shape
|
181 |
+
|
182 |
+
b_prev = nn.functional.interpolate(
|
183 |
+
b_prev, (h, w), mode='bilinear', align_corners=True)
|
184 |
+
b_centers = b_prev
|
185 |
+
|
186 |
+
if self.attractor_type == 'exp':
|
187 |
+
dist = exp_attractor
|
188 |
+
else:
|
189 |
+
dist = inv_attractor
|
190 |
+
|
191 |
+
if not self.memory_efficient:
|
192 |
+
func = {'mean': torch.mean, 'sum': torch.sum}[self.kind]
|
193 |
+
# .shape N, nbins, h, w
|
194 |
+
delta_c = func(
|
195 |
+
dist(A.unsqueeze(2) - b_centers.unsqueeze(1)), dim=1)
|
196 |
+
else:
|
197 |
+
delta_c = torch.zeros_like(b_centers, device=b_centers.device)
|
198 |
+
for i in range(self.n_attractors):
|
199 |
+
delta_c += dist(A[:, i, ...].unsqueeze(1) -
|
200 |
+
b_centers) # .shape N, nbins, h, w
|
201 |
+
|
202 |
+
if self.kind == 'mean':
|
203 |
+
delta_c = delta_c / self.n_attractors
|
204 |
+
|
205 |
+
b_new_centers = b_centers + delta_c
|
206 |
+
B_centers = b_new_centers
|
207 |
+
|
208 |
+
return b_new_centers, B_centers
|
ZoeDepth/zoedepth/models/layers/dist_layers.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import torch
|
26 |
+
import torch.nn as nn
|
27 |
+
|
28 |
+
|
29 |
+
def log_binom(n, k, eps=1e-7):
|
30 |
+
""" log(nCk) using stirling approximation """
|
31 |
+
n = n + eps
|
32 |
+
k = k + eps
|
33 |
+
return n * torch.log(n) - k * torch.log(k) - (n-k) * torch.log(n-k+eps)
|
34 |
+
|
35 |
+
|
36 |
+
class LogBinomial(nn.Module):
|
37 |
+
def __init__(self, n_classes=256, act=torch.softmax):
|
38 |
+
"""Compute log binomial distribution for n_classes
|
39 |
+
|
40 |
+
Args:
|
41 |
+
n_classes (int, optional): number of output classes. Defaults to 256.
|
42 |
+
"""
|
43 |
+
super().__init__()
|
44 |
+
self.K = n_classes
|
45 |
+
self.act = act
|
46 |
+
self.register_buffer('k_idx', torch.arange(
|
47 |
+
0, n_classes).view(1, -1, 1, 1))
|
48 |
+
self.register_buffer('K_minus_1', torch.Tensor(
|
49 |
+
[self.K-1]).view(1, -1, 1, 1))
|
50 |
+
|
51 |
+
def forward(self, x, t=1., eps=1e-4):
|
52 |
+
"""Compute log binomial distribution for x
|
53 |
+
|
54 |
+
Args:
|
55 |
+
x (torch.Tensor - NCHW): probabilities
|
56 |
+
t (float, torch.Tensor - NCHW, optional): Temperature of distribution. Defaults to 1..
|
57 |
+
eps (float, optional): Small number for numerical stability. Defaults to 1e-4.
|
58 |
+
|
59 |
+
Returns:
|
60 |
+
torch.Tensor -NCHW: log binomial distribution logbinomial(p;t)
|
61 |
+
"""
|
62 |
+
if x.ndim == 3:
|
63 |
+
x = x.unsqueeze(1) # make it nchw
|
64 |
+
|
65 |
+
one_minus_x = torch.clamp(1 - x, eps, 1)
|
66 |
+
x = torch.clamp(x, eps, 1)
|
67 |
+
y = log_binom(self.K_minus_1, self.k_idx) + self.k_idx * \
|
68 |
+
torch.log(x) + (self.K - 1 - self.k_idx) * torch.log(one_minus_x)
|
69 |
+
return self.act(y/t, dim=1)
|
70 |
+
|
71 |
+
|
72 |
+
class ConditionalLogBinomial(nn.Module):
|
73 |
+
def __init__(self, in_features, condition_dim, n_classes=256, bottleneck_factor=2, p_eps=1e-4, max_temp=50, min_temp=1e-7, act=torch.softmax):
|
74 |
+
"""Conditional Log Binomial distribution
|
75 |
+
|
76 |
+
Args:
|
77 |
+
in_features (int): number of input channels in main feature
|
78 |
+
condition_dim (int): number of input channels in condition feature
|
79 |
+
n_classes (int, optional): Number of classes. Defaults to 256.
|
80 |
+
bottleneck_factor (int, optional): Hidden dim factor. Defaults to 2.
|
81 |
+
p_eps (float, optional): small eps value. Defaults to 1e-4.
|
82 |
+
max_temp (float, optional): Maximum temperature of output distribution. Defaults to 50.
|
83 |
+
min_temp (float, optional): Minimum temperature of output distribution. Defaults to 1e-7.
|
84 |
+
"""
|
85 |
+
super().__init__()
|
86 |
+
self.p_eps = p_eps
|
87 |
+
self.max_temp = max_temp
|
88 |
+
self.min_temp = min_temp
|
89 |
+
self.log_binomial_transform = LogBinomial(n_classes, act=act)
|
90 |
+
bottleneck = (in_features + condition_dim) // bottleneck_factor
|
91 |
+
self.mlp = nn.Sequential(
|
92 |
+
nn.Conv2d(in_features + condition_dim, bottleneck,
|
93 |
+
kernel_size=1, stride=1, padding=0),
|
94 |
+
nn.GELU(),
|
95 |
+
# 2 for p linear norm, 2 for t linear norm
|
96 |
+
nn.Conv2d(bottleneck, 2+2, kernel_size=1, stride=1, padding=0),
|
97 |
+
nn.Softplus()
|
98 |
+
)
|
99 |
+
|
100 |
+
def forward(self, x, cond):
|
101 |
+
"""Forward pass
|
102 |
+
|
103 |
+
Args:
|
104 |
+
x (torch.Tensor - NCHW): Main feature
|
105 |
+
cond (torch.Tensor - NCHW): condition feature
|
106 |
+
|
107 |
+
Returns:
|
108 |
+
torch.Tensor: Output log binomial distribution
|
109 |
+
"""
|
110 |
+
pt = self.mlp(torch.concat((x, cond), dim=1))
|
111 |
+
p, t = pt[:, :2, ...], pt[:, 2:, ...]
|
112 |
+
|
113 |
+
p = p + self.p_eps
|
114 |
+
p = p[:, 0, ...] / (p[:, 0, ...] + p[:, 1, ...])
|
115 |
+
|
116 |
+
t = t + self.p_eps
|
117 |
+
t = t[:, 0, ...] / (t[:, 0, ...] + t[:, 1, ...])
|
118 |
+
t = t.unsqueeze(1)
|
119 |
+
t = (self.max_temp - self.min_temp) * t + self.min_temp
|
120 |
+
|
121 |
+
return self.log_binomial_transform(p, t)
|
ZoeDepth/zoedepth/models/layers/localbins_layers.py
ADDED
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import torch
|
26 |
+
import torch.nn as nn
|
27 |
+
|
28 |
+
|
29 |
+
class SeedBinRegressor(nn.Module):
|
30 |
+
def __init__(self, in_features, n_bins=16, mlp_dim=256, min_depth=1e-3, max_depth=10):
|
31 |
+
"""Bin center regressor network. Bin centers are bounded on (min_depth, max_depth) interval.
|
32 |
+
|
33 |
+
Args:
|
34 |
+
in_features (int): input channels
|
35 |
+
n_bins (int, optional): Number of bin centers. Defaults to 16.
|
36 |
+
mlp_dim (int, optional): Hidden dimension. Defaults to 256.
|
37 |
+
min_depth (float, optional): Min depth value. Defaults to 1e-3.
|
38 |
+
max_depth (float, optional): Max depth value. Defaults to 10.
|
39 |
+
"""
|
40 |
+
super().__init__()
|
41 |
+
self.version = "1_1"
|
42 |
+
self.min_depth = min_depth
|
43 |
+
self.max_depth = max_depth
|
44 |
+
|
45 |
+
self._net = nn.Sequential(
|
46 |
+
nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
|
47 |
+
nn.ReLU(inplace=True),
|
48 |
+
nn.Conv2d(mlp_dim, n_bins, 1, 1, 0),
|
49 |
+
nn.ReLU(inplace=True)
|
50 |
+
)
|
51 |
+
|
52 |
+
def forward(self, x):
|
53 |
+
"""
|
54 |
+
Returns tensor of bin_width vectors (centers). One vector b for every pixel
|
55 |
+
"""
|
56 |
+
B = self._net(x)
|
57 |
+
eps = 1e-3
|
58 |
+
B = B + eps
|
59 |
+
B_widths_normed = B / B.sum(dim=1, keepdim=True)
|
60 |
+
B_widths = (self.max_depth - self.min_depth) * \
|
61 |
+
B_widths_normed # .shape NCHW
|
62 |
+
# pad has the form (left, right, top, bottom, front, back)
|
63 |
+
B_widths = nn.functional.pad(
|
64 |
+
B_widths, (0, 0, 0, 0, 1, 0), mode='constant', value=self.min_depth)
|
65 |
+
B_edges = torch.cumsum(B_widths, dim=1) # .shape NCHW
|
66 |
+
|
67 |
+
B_centers = 0.5 * (B_edges[:, :-1, ...] + B_edges[:, 1:, ...])
|
68 |
+
return B_widths_normed, B_centers
|
69 |
+
|
70 |
+
|
71 |
+
class SeedBinRegressorUnnormed(nn.Module):
|
72 |
+
def __init__(self, in_features, n_bins=16, mlp_dim=256, min_depth=1e-3, max_depth=10):
|
73 |
+
"""Bin center regressor network. Bin centers are unbounded
|
74 |
+
|
75 |
+
Args:
|
76 |
+
in_features (int): input channels
|
77 |
+
n_bins (int, optional): Number of bin centers. Defaults to 16.
|
78 |
+
mlp_dim (int, optional): Hidden dimension. Defaults to 256.
|
79 |
+
min_depth (float, optional): Not used. (for compatibility with SeedBinRegressor)
|
80 |
+
max_depth (float, optional): Not used. (for compatibility with SeedBinRegressor)
|
81 |
+
"""
|
82 |
+
super().__init__()
|
83 |
+
self.version = "1_1"
|
84 |
+
self._net = nn.Sequential(
|
85 |
+
nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
|
86 |
+
nn.ReLU(inplace=True),
|
87 |
+
nn.Conv2d(mlp_dim, n_bins, 1, 1, 0),
|
88 |
+
nn.Softplus()
|
89 |
+
)
|
90 |
+
|
91 |
+
def forward(self, x):
|
92 |
+
"""
|
93 |
+
Returns tensor of bin_width vectors (centers). One vector b for every pixel
|
94 |
+
"""
|
95 |
+
B_centers = self._net(x)
|
96 |
+
return B_centers, B_centers
|
97 |
+
|
98 |
+
|
99 |
+
class Projector(nn.Module):
|
100 |
+
def __init__(self, in_features, out_features, mlp_dim=128):
|
101 |
+
"""Projector MLP
|
102 |
+
|
103 |
+
Args:
|
104 |
+
in_features (int): input channels
|
105 |
+
out_features (int): output channels
|
106 |
+
mlp_dim (int, optional): hidden dimension. Defaults to 128.
|
107 |
+
"""
|
108 |
+
super().__init__()
|
109 |
+
|
110 |
+
self._net = nn.Sequential(
|
111 |
+
nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
|
112 |
+
nn.ReLU(inplace=True),
|
113 |
+
nn.Conv2d(mlp_dim, out_features, 1, 1, 0),
|
114 |
+
)
|
115 |
+
|
116 |
+
def forward(self, x):
|
117 |
+
return self._net(x)
|
118 |
+
|
119 |
+
|
120 |
+
|
121 |
+
class LinearSplitter(nn.Module):
|
122 |
+
def __init__(self, in_features, prev_nbins, split_factor=2, mlp_dim=128, min_depth=1e-3, max_depth=10):
|
123 |
+
super().__init__()
|
124 |
+
|
125 |
+
self.prev_nbins = prev_nbins
|
126 |
+
self.split_factor = split_factor
|
127 |
+
self.min_depth = min_depth
|
128 |
+
self.max_depth = max_depth
|
129 |
+
|
130 |
+
self._net = nn.Sequential(
|
131 |
+
nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
|
132 |
+
nn.GELU(),
|
133 |
+
nn.Conv2d(mlp_dim, prev_nbins * split_factor, 1, 1, 0),
|
134 |
+
nn.ReLU()
|
135 |
+
)
|
136 |
+
|
137 |
+
def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False):
|
138 |
+
"""
|
139 |
+
x : feature block; shape - n, c, h, w
|
140 |
+
b_prev : previous bin widths normed; shape - n, prev_nbins, h, w
|
141 |
+
"""
|
142 |
+
if prev_b_embedding is not None:
|
143 |
+
if interpolate:
|
144 |
+
prev_b_embedding = nn.functional.interpolate(prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True)
|
145 |
+
x = x + prev_b_embedding
|
146 |
+
S = self._net(x)
|
147 |
+
eps = 1e-3
|
148 |
+
S = S + eps
|
149 |
+
n, c, h, w = S.shape
|
150 |
+
S = S.view(n, self.prev_nbins, self.split_factor, h, w)
|
151 |
+
S_normed = S / S.sum(dim=2, keepdim=True) # fractional splits
|
152 |
+
|
153 |
+
b_prev = nn.functional.interpolate(b_prev, (h,w), mode='bilinear', align_corners=True)
|
154 |
+
|
155 |
+
|
156 |
+
b_prev = b_prev / b_prev.sum(dim=1, keepdim=True) # renormalize for gurantees
|
157 |
+
# print(b_prev.shape, S_normed.shape)
|
158 |
+
# if is_for_query:(1).expand(-1, b_prev.size(0)//n, -1, -1, -1, -1).flatten(0,1) # TODO ? can replace all this with a single torch.repeat?
|
159 |
+
b = b_prev.unsqueeze(2) * S_normed
|
160 |
+
b = b.flatten(1,2) # .shape n, prev_nbins * split_factor, h, w
|
161 |
+
|
162 |
+
# calculate bin centers for loss calculation
|
163 |
+
B_widths = (self.max_depth - self.min_depth) * b # .shape N, nprev * splitfactor, H, W
|
164 |
+
# pad has the form (left, right, top, bottom, front, back)
|
165 |
+
B_widths = nn.functional.pad(B_widths, (0,0,0,0,1,0), mode='constant', value=self.min_depth)
|
166 |
+
B_edges = torch.cumsum(B_widths, dim=1) # .shape NCHW
|
167 |
+
|
168 |
+
B_centers = 0.5 * (B_edges[:, :-1, ...] + B_edges[:,1:,...])
|
169 |
+
return b, B_centers
|
ZoeDepth/zoedepth/models/layers/patch_transformer.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import torch
|
26 |
+
import torch.nn as nn
|
27 |
+
|
28 |
+
|
29 |
+
class PatchTransformerEncoder(nn.Module):
|
30 |
+
def __init__(self, in_channels, patch_size=10, embedding_dim=128, num_heads=4, use_class_token=False):
|
31 |
+
"""ViT-like transformer block
|
32 |
+
|
33 |
+
Args:
|
34 |
+
in_channels (int): Input channels
|
35 |
+
patch_size (int, optional): patch size. Defaults to 10.
|
36 |
+
embedding_dim (int, optional): Embedding dimension in transformer model. Defaults to 128.
|
37 |
+
num_heads (int, optional): number of attention heads. Defaults to 4.
|
38 |
+
use_class_token (bool, optional): Whether to use extra token at the start for global accumulation (called as "class token"). Defaults to False.
|
39 |
+
"""
|
40 |
+
super(PatchTransformerEncoder, self).__init__()
|
41 |
+
self.use_class_token = use_class_token
|
42 |
+
encoder_layers = nn.TransformerEncoderLayer(
|
43 |
+
embedding_dim, num_heads, dim_feedforward=1024)
|
44 |
+
self.transformer_encoder = nn.TransformerEncoder(
|
45 |
+
encoder_layers, num_layers=4) # takes shape S,N,E
|
46 |
+
|
47 |
+
self.embedding_convPxP = nn.Conv2d(in_channels, embedding_dim,
|
48 |
+
kernel_size=patch_size, stride=patch_size, padding=0)
|
49 |
+
|
50 |
+
def positional_encoding_1d(self, sequence_length, batch_size, embedding_dim, device='cpu'):
|
51 |
+
"""Generate positional encodings
|
52 |
+
|
53 |
+
Args:
|
54 |
+
sequence_length (int): Sequence length
|
55 |
+
embedding_dim (int): Embedding dimension
|
56 |
+
|
57 |
+
Returns:
|
58 |
+
torch.Tensor SBE: Positional encodings
|
59 |
+
"""
|
60 |
+
position = torch.arange(
|
61 |
+
0, sequence_length, dtype=torch.float32, device=device).unsqueeze(1)
|
62 |
+
index = torch.arange(
|
63 |
+
0, embedding_dim, 2, dtype=torch.float32, device=device).unsqueeze(0)
|
64 |
+
div_term = torch.exp(index * (-torch.log(torch.tensor(10000.0, device=device)) / embedding_dim))
|
65 |
+
pos_encoding = position * div_term
|
66 |
+
pos_encoding = torch.cat([torch.sin(pos_encoding), torch.cos(pos_encoding)], dim=1)
|
67 |
+
pos_encoding = pos_encoding.unsqueeze(1).repeat(1, batch_size, 1)
|
68 |
+
return pos_encoding
|
69 |
+
|
70 |
+
|
71 |
+
def forward(self, x):
|
72 |
+
"""Forward pass
|
73 |
+
|
74 |
+
Args:
|
75 |
+
x (torch.Tensor - NCHW): Input feature tensor
|
76 |
+
|
77 |
+
Returns:
|
78 |
+
torch.Tensor - SNE: Transformer output embeddings. S - sequence length (=HW/patch_size^2), N - batch size, E - embedding dim
|
79 |
+
"""
|
80 |
+
embeddings = self.embedding_convPxP(x).flatten(
|
81 |
+
2) # .shape = n,c,s = n, embedding_dim, s
|
82 |
+
if self.use_class_token:
|
83 |
+
# extra special token at start ?
|
84 |
+
embeddings = nn.functional.pad(embeddings, (1, 0))
|
85 |
+
|
86 |
+
# change to S,N,E format required by transformer
|
87 |
+
embeddings = embeddings.permute(2, 0, 1)
|
88 |
+
S, N, E = embeddings.shape
|
89 |
+
embeddings = embeddings + self.positional_encoding_1d(S, N, E, device=embeddings.device)
|
90 |
+
x = self.transformer_encoder(embeddings) # .shape = S, N, E
|
91 |
+
return x
|
ZoeDepth/zoedepth/models/model_io.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import torch
|
26 |
+
|
27 |
+
def load_state_dict(model, state_dict):
|
28 |
+
"""Load state_dict into model, handling DataParallel and DistributedDataParallel. Also checks for "model" key in state_dict.
|
29 |
+
|
30 |
+
DataParallel prefixes state_dict keys with 'module.' when saving.
|
31 |
+
If the model is not a DataParallel model but the state_dict is, then prefixes are removed.
|
32 |
+
If the model is a DataParallel model but the state_dict is not, then prefixes are added.
|
33 |
+
"""
|
34 |
+
state_dict = state_dict.get('model', state_dict)
|
35 |
+
# if model is a DataParallel model, then state_dict keys are prefixed with 'module.'
|
36 |
+
|
37 |
+
do_prefix = isinstance(
|
38 |
+
model, (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel))
|
39 |
+
state = {}
|
40 |
+
for k, v in state_dict.items():
|
41 |
+
if k.startswith('module.') and not do_prefix:
|
42 |
+
k = k[7:]
|
43 |
+
|
44 |
+
if not k.startswith('module.') and do_prefix:
|
45 |
+
k = 'module.' + k
|
46 |
+
|
47 |
+
state[k] = v
|
48 |
+
|
49 |
+
model.load_state_dict(state)
|
50 |
+
print("Loaded successfully")
|
51 |
+
return model
|
52 |
+
|
53 |
+
|
54 |
+
def load_wts(model, checkpoint_path):
|
55 |
+
ckpt = torch.load(checkpoint_path, map_location='cpu')
|
56 |
+
return load_state_dict(model, ckpt)
|
57 |
+
|
58 |
+
|
59 |
+
def load_state_dict_from_url(model, url, **kwargs):
|
60 |
+
state_dict = torch.hub.load_state_dict_from_url(url, map_location='cpu', **kwargs)
|
61 |
+
return load_state_dict(model, state_dict)
|
62 |
+
|
63 |
+
|
64 |
+
def load_state_from_resource(model, resource: str):
|
65 |
+
"""Loads weights to the model from a given resource. A resource can be of following types:
|
66 |
+
1. URL. Prefixed with "url::"
|
67 |
+
e.g. url::http(s)://url.resource.com/ckpt.pt
|
68 |
+
|
69 |
+
2. Local path. Prefixed with "local::"
|
70 |
+
e.g. local::/path/to/ckpt.pt
|
71 |
+
|
72 |
+
|
73 |
+
Args:
|
74 |
+
model (torch.nn.Module): Model
|
75 |
+
resource (str): resource string
|
76 |
+
|
77 |
+
Returns:
|
78 |
+
torch.nn.Module: Model with loaded weights
|
79 |
+
"""
|
80 |
+
print(f"Using pretrained resource {resource}")
|
81 |
+
|
82 |
+
if resource.startswith('url::'):
|
83 |
+
url = resource.split('url::')[1]
|
84 |
+
return load_state_dict_from_url(model, url, progress=True)
|
85 |
+
|
86 |
+
elif resource.startswith('local::'):
|
87 |
+
path = resource.split('local::')[1]
|
88 |
+
return load_wts(model, path)
|
89 |
+
|
90 |
+
else:
|
91 |
+
raise ValueError("Invalid resource type, only url:: and local:: are supported")
|
92 |
+
|
ZoeDepth/zoedepth/models/zoedepth/__init__.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
from .zoedepth_v1 import ZoeDepth
|
26 |
+
|
27 |
+
all_versions = {
|
28 |
+
"v1": ZoeDepth,
|
29 |
+
}
|
30 |
+
|
31 |
+
get_version = lambda v : all_versions[v]
|
ZoeDepth/zoedepth/models/zoedepth/config_zoedepth.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"name": "ZoeDepth",
|
4 |
+
"version_name": "v1",
|
5 |
+
"n_bins": 64,
|
6 |
+
"bin_embedding_dim": 128,
|
7 |
+
"bin_centers_type": "softplus",
|
8 |
+
"n_attractors":[16, 8, 4, 1],
|
9 |
+
"attractor_alpha": 1000,
|
10 |
+
"attractor_gamma": 2,
|
11 |
+
"attractor_kind" : "mean",
|
12 |
+
"attractor_type" : "inv",
|
13 |
+
"midas_model_type" : "DPT_BEiT_L_384",
|
14 |
+
"min_temp": 0.0212,
|
15 |
+
"max_temp": 50.0,
|
16 |
+
"output_distribution": "logbinomial",
|
17 |
+
"memory_efficient": true,
|
18 |
+
"inverse_midas": false,
|
19 |
+
"img_size": [384, 512]
|
20 |
+
},
|
21 |
+
|
22 |
+
"train": {
|
23 |
+
"train_midas": true,
|
24 |
+
"use_pretrained_midas": true,
|
25 |
+
"trainer": "zoedepth",
|
26 |
+
"epochs": 5,
|
27 |
+
"bs": 16,
|
28 |
+
"optim_kwargs": {"lr": 0.000161, "wd": 0.01},
|
29 |
+
"sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true},
|
30 |
+
"same_lr": false,
|
31 |
+
"w_si": 1,
|
32 |
+
"w_domain": 0.2,
|
33 |
+
"w_reg": 0,
|
34 |
+
"w_grad": 0,
|
35 |
+
"avoid_boundary": false,
|
36 |
+
"random_crop": false,
|
37 |
+
"input_width": 640,
|
38 |
+
"input_height": 480,
|
39 |
+
"midas_lr_factor": 1,
|
40 |
+
"encoder_lr_factor":10,
|
41 |
+
"pos_enc_lr_factor":10,
|
42 |
+
"freeze_midas_bn": true
|
43 |
+
|
44 |
+
},
|
45 |
+
|
46 |
+
"infer":{
|
47 |
+
"train_midas": false,
|
48 |
+
"use_pretrained_midas": false,
|
49 |
+
"pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt",
|
50 |
+
"force_keep_ar": true
|
51 |
+
},
|
52 |
+
|
53 |
+
"eval":{
|
54 |
+
"train_midas": false,
|
55 |
+
"use_pretrained_midas": false,
|
56 |
+
"pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt"
|
57 |
+
}
|
58 |
+
}
|
ZoeDepth/zoedepth/models/zoedepth/config_zoedepth_kitti.json
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"bin_centers_type": "normed",
|
4 |
+
"img_size": [384, 768]
|
5 |
+
},
|
6 |
+
|
7 |
+
"train": {
|
8 |
+
},
|
9 |
+
|
10 |
+
"infer":{
|
11 |
+
"train_midas": false,
|
12 |
+
"use_pretrained_midas": false,
|
13 |
+
"pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt",
|
14 |
+
"force_keep_ar": true
|
15 |
+
},
|
16 |
+
|
17 |
+
"eval":{
|
18 |
+
"train_midas": false,
|
19 |
+
"use_pretrained_midas": false,
|
20 |
+
"pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt"
|
21 |
+
}
|
22 |
+
}
|
ZoeDepth/zoedepth/models/zoedepth/zoedepth_v1.py
ADDED
@@ -0,0 +1,250 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import itertools
|
26 |
+
|
27 |
+
import torch
|
28 |
+
import torch.nn as nn
|
29 |
+
from zoedepth.models.depth_model import DepthModel
|
30 |
+
from zoedepth.models.base_models.midas import MidasCore
|
31 |
+
from zoedepth.models.layers.attractor import AttractorLayer, AttractorLayerUnnormed
|
32 |
+
from zoedepth.models.layers.dist_layers import ConditionalLogBinomial
|
33 |
+
from zoedepth.models.layers.localbins_layers import (Projector, SeedBinRegressor,
|
34 |
+
SeedBinRegressorUnnormed)
|
35 |
+
from zoedepth.models.model_io import load_state_from_resource
|
36 |
+
|
37 |
+
|
38 |
+
class ZoeDepth(DepthModel):
|
39 |
+
def __init__(self, core, n_bins=64, bin_centers_type="softplus", bin_embedding_dim=128, min_depth=1e-3, max_depth=10,
|
40 |
+
n_attractors=[16, 8, 4, 1], attractor_alpha=300, attractor_gamma=2, attractor_kind='sum', attractor_type='exp', min_temp=5, max_temp=50, train_midas=True,
|
41 |
+
midas_lr_factor=10, encoder_lr_factor=10, pos_enc_lr_factor=10, inverse_midas=False, **kwargs):
|
42 |
+
"""ZoeDepth model. This is the version of ZoeDepth that has a single metric head
|
43 |
+
|
44 |
+
Args:
|
45 |
+
core (models.base_models.midas.MidasCore): The base midas model that is used for extraction of "relative" features
|
46 |
+
n_bins (int, optional): Number of bin centers. Defaults to 64.
|
47 |
+
bin_centers_type (str, optional): "normed" or "softplus". Activation type used for bin centers. For "normed" bin centers, linear normalization trick is applied. This results in bounded bin centers.
|
48 |
+
For "softplus", softplus activation is used and thus are unbounded. Defaults to "softplus".
|
49 |
+
bin_embedding_dim (int, optional): bin embedding dimension. Defaults to 128.
|
50 |
+
min_depth (float, optional): Lower bound for normed bin centers. Defaults to 1e-3.
|
51 |
+
max_depth (float, optional): Upper bound for normed bin centers. Defaults to 10.
|
52 |
+
n_attractors (List[int], optional): Number of bin attractors at decoder layers. Defaults to [16, 8, 4, 1].
|
53 |
+
attractor_alpha (int, optional): Proportional attractor strength. Refer to models.layers.attractor for more details. Defaults to 300.
|
54 |
+
attractor_gamma (int, optional): Exponential attractor strength. Refer to models.layers.attractor for more details. Defaults to 2.
|
55 |
+
attractor_kind (str, optional): Attraction aggregation "sum" or "mean". Defaults to 'sum'.
|
56 |
+
attractor_type (str, optional): Type of attractor to use; "inv" (Inverse attractor) or "exp" (Exponential attractor). Defaults to 'exp'.
|
57 |
+
min_temp (int, optional): Lower bound for temperature of output probability distribution. Defaults to 5.
|
58 |
+
max_temp (int, optional): Upper bound for temperature of output probability distribution. Defaults to 50.
|
59 |
+
train_midas (bool, optional): Whether to train "core", the base midas model. Defaults to True.
|
60 |
+
midas_lr_factor (int, optional): Learning rate reduction factor for base midas model except its encoder and positional encodings. Defaults to 10.
|
61 |
+
encoder_lr_factor (int, optional): Learning rate reduction factor for the encoder in midas model. Defaults to 10.
|
62 |
+
pos_enc_lr_factor (int, optional): Learning rate reduction factor for positional encodings in the base midas model. Defaults to 10.
|
63 |
+
"""
|
64 |
+
super().__init__()
|
65 |
+
|
66 |
+
self.core = core
|
67 |
+
self.max_depth = max_depth
|
68 |
+
self.min_depth = min_depth
|
69 |
+
self.min_temp = min_temp
|
70 |
+
self.bin_centers_type = bin_centers_type
|
71 |
+
|
72 |
+
self.midas_lr_factor = midas_lr_factor
|
73 |
+
self.encoder_lr_factor = encoder_lr_factor
|
74 |
+
self.pos_enc_lr_factor = pos_enc_lr_factor
|
75 |
+
self.train_midas = train_midas
|
76 |
+
self.inverse_midas = inverse_midas
|
77 |
+
|
78 |
+
if self.encoder_lr_factor <= 0:
|
79 |
+
self.core.freeze_encoder(
|
80 |
+
freeze_rel_pos=self.pos_enc_lr_factor <= 0)
|
81 |
+
|
82 |
+
N_MIDAS_OUT = 32
|
83 |
+
btlnck_features = self.core.output_channels[0]
|
84 |
+
num_out_features = self.core.output_channels[1:]
|
85 |
+
|
86 |
+
self.conv2 = nn.Conv2d(btlnck_features, btlnck_features,
|
87 |
+
kernel_size=1, stride=1, padding=0) # btlnck conv
|
88 |
+
|
89 |
+
if bin_centers_type == "normed":
|
90 |
+
SeedBinRegressorLayer = SeedBinRegressor
|
91 |
+
Attractor = AttractorLayer
|
92 |
+
elif bin_centers_type == "softplus":
|
93 |
+
SeedBinRegressorLayer = SeedBinRegressorUnnormed
|
94 |
+
Attractor = AttractorLayerUnnormed
|
95 |
+
elif bin_centers_type == "hybrid1":
|
96 |
+
SeedBinRegressorLayer = SeedBinRegressor
|
97 |
+
Attractor = AttractorLayerUnnormed
|
98 |
+
elif bin_centers_type == "hybrid2":
|
99 |
+
SeedBinRegressorLayer = SeedBinRegressorUnnormed
|
100 |
+
Attractor = AttractorLayer
|
101 |
+
else:
|
102 |
+
raise ValueError(
|
103 |
+
"bin_centers_type should be one of 'normed', 'softplus', 'hybrid1', 'hybrid2'")
|
104 |
+
|
105 |
+
self.seed_bin_regressor = SeedBinRegressorLayer(
|
106 |
+
btlnck_features, n_bins=n_bins, min_depth=min_depth, max_depth=max_depth)
|
107 |
+
self.seed_projector = Projector(btlnck_features, bin_embedding_dim)
|
108 |
+
self.projectors = nn.ModuleList([
|
109 |
+
Projector(num_out, bin_embedding_dim)
|
110 |
+
for num_out in num_out_features
|
111 |
+
])
|
112 |
+
self.attractors = nn.ModuleList([
|
113 |
+
Attractor(bin_embedding_dim, n_bins, n_attractors=n_attractors[i], min_depth=min_depth, max_depth=max_depth,
|
114 |
+
alpha=attractor_alpha, gamma=attractor_gamma, kind=attractor_kind, attractor_type=attractor_type)
|
115 |
+
for i in range(len(num_out_features))
|
116 |
+
])
|
117 |
+
|
118 |
+
last_in = N_MIDAS_OUT + 1 # +1 for relative depth
|
119 |
+
|
120 |
+
# use log binomial instead of softmax
|
121 |
+
self.conditional_log_binomial = ConditionalLogBinomial(
|
122 |
+
last_in, bin_embedding_dim, n_classes=n_bins, min_temp=min_temp, max_temp=max_temp)
|
123 |
+
|
124 |
+
def forward(self, x, return_final_centers=False, denorm=False, return_probs=False, **kwargs):
|
125 |
+
"""
|
126 |
+
Args:
|
127 |
+
x (torch.Tensor): Input image tensor of shape (B, C, H, W)
|
128 |
+
return_final_centers (bool, optional): Whether to return the final bin centers. Defaults to False.
|
129 |
+
denorm (bool, optional): Whether to denormalize the input image. This reverses ImageNet normalization as midas normalization is different. Defaults to False.
|
130 |
+
return_probs (bool, optional): Whether to return the output probability distribution. Defaults to False.
|
131 |
+
|
132 |
+
Returns:
|
133 |
+
dict: Dictionary containing the following keys:
|
134 |
+
- rel_depth (torch.Tensor): Relative depth map of shape (B, H, W)
|
135 |
+
- metric_depth (torch.Tensor): Metric depth map of shape (B, 1, H, W)
|
136 |
+
- bin_centers (torch.Tensor): Bin centers of shape (B, n_bins). Present only if return_final_centers is True
|
137 |
+
- probs (torch.Tensor): Output probability distribution of shape (B, n_bins, H, W). Present only if return_probs is True
|
138 |
+
|
139 |
+
"""
|
140 |
+
b, c, h, w = x.shape
|
141 |
+
# print("input shape ", x.shape)
|
142 |
+
self.orig_input_width = w
|
143 |
+
self.orig_input_height = h
|
144 |
+
rel_depth, out = self.core(x, denorm=denorm, return_rel_depth=True)
|
145 |
+
# print("output shapes", rel_depth.shape, out.shape)
|
146 |
+
|
147 |
+
outconv_activation = out[0]
|
148 |
+
btlnck = out[1]
|
149 |
+
x_blocks = out[2:]
|
150 |
+
|
151 |
+
x_d0 = self.conv2(btlnck)
|
152 |
+
x = x_d0
|
153 |
+
_, seed_b_centers = self.seed_bin_regressor(x)
|
154 |
+
|
155 |
+
if self.bin_centers_type == 'normed' or self.bin_centers_type == 'hybrid2':
|
156 |
+
b_prev = (seed_b_centers - self.min_depth) / \
|
157 |
+
(self.max_depth - self.min_depth)
|
158 |
+
else:
|
159 |
+
b_prev = seed_b_centers
|
160 |
+
|
161 |
+
prev_b_embedding = self.seed_projector(x)
|
162 |
+
|
163 |
+
# unroll this loop for better performance
|
164 |
+
for projector, attractor, x in zip(self.projectors, self.attractors, x_blocks):
|
165 |
+
b_embedding = projector(x)
|
166 |
+
b, b_centers = attractor(
|
167 |
+
b_embedding, b_prev, prev_b_embedding, interpolate=True)
|
168 |
+
b_prev = b.clone()
|
169 |
+
prev_b_embedding = b_embedding.clone()
|
170 |
+
|
171 |
+
last = outconv_activation
|
172 |
+
|
173 |
+
if self.inverse_midas:
|
174 |
+
# invert depth followed by normalization
|
175 |
+
rel_depth = 1.0 / (rel_depth + 1e-6)
|
176 |
+
rel_depth = (rel_depth - rel_depth.min()) / \
|
177 |
+
(rel_depth.max() - rel_depth.min())
|
178 |
+
# concat rel depth with last. First interpolate rel depth to last size
|
179 |
+
rel_cond = rel_depth.unsqueeze(1)
|
180 |
+
rel_cond = nn.functional.interpolate(
|
181 |
+
rel_cond, size=last.shape[2:], mode='bilinear', align_corners=True)
|
182 |
+
last = torch.cat([last, rel_cond], dim=1)
|
183 |
+
|
184 |
+
b_embedding = nn.functional.interpolate(
|
185 |
+
b_embedding, last.shape[-2:], mode='bilinear', align_corners=True)
|
186 |
+
x = self.conditional_log_binomial(last, b_embedding)
|
187 |
+
|
188 |
+
# Now depth value is Sum px * cx , where cx are bin_centers from the last bin tensor
|
189 |
+
# print(x.shape, b_centers.shape)
|
190 |
+
b_centers = nn.functional.interpolate(
|
191 |
+
b_centers, x.shape[-2:], mode='bilinear', align_corners=True)
|
192 |
+
out = torch.sum(x * b_centers, dim=1, keepdim=True)
|
193 |
+
|
194 |
+
# Structure output dict
|
195 |
+
output = dict(metric_depth=out)
|
196 |
+
if return_final_centers or return_probs:
|
197 |
+
output['bin_centers'] = b_centers
|
198 |
+
|
199 |
+
if return_probs:
|
200 |
+
output['probs'] = x
|
201 |
+
|
202 |
+
return output
|
203 |
+
|
204 |
+
def get_lr_params(self, lr):
|
205 |
+
"""
|
206 |
+
Learning rate configuration for different layers of the model
|
207 |
+
Args:
|
208 |
+
lr (float) : Base learning rate
|
209 |
+
Returns:
|
210 |
+
list : list of parameters to optimize and their learning rates, in the format required by torch optimizers.
|
211 |
+
"""
|
212 |
+
param_conf = []
|
213 |
+
if self.train_midas:
|
214 |
+
if self.encoder_lr_factor > 0:
|
215 |
+
param_conf.append({'params': self.core.get_enc_params_except_rel_pos(
|
216 |
+
), 'lr': lr / self.encoder_lr_factor})
|
217 |
+
|
218 |
+
if self.pos_enc_lr_factor > 0:
|
219 |
+
param_conf.append(
|
220 |
+
{'params': self.core.get_rel_pos_params(), 'lr': lr / self.pos_enc_lr_factor})
|
221 |
+
|
222 |
+
midas_params = self.core.core.scratch.parameters()
|
223 |
+
midas_lr_factor = self.midas_lr_factor
|
224 |
+
param_conf.append(
|
225 |
+
{'params': midas_params, 'lr': lr / midas_lr_factor})
|
226 |
+
|
227 |
+
remaining_modules = []
|
228 |
+
for name, child in self.named_children():
|
229 |
+
if name != 'core':
|
230 |
+
remaining_modules.append(child)
|
231 |
+
remaining_params = itertools.chain(
|
232 |
+
*[child.parameters() for child in remaining_modules])
|
233 |
+
|
234 |
+
param_conf.append({'params': remaining_params, 'lr': lr})
|
235 |
+
|
236 |
+
return param_conf
|
237 |
+
|
238 |
+
@staticmethod
|
239 |
+
def build(midas_model_type="DPT_BEiT_L_384", pretrained_resource=None, use_pretrained_midas=False, train_midas=False, freeze_midas_bn=True, **kwargs):
|
240 |
+
core = MidasCore.build(midas_model_type=midas_model_type, use_pretrained_midas=use_pretrained_midas,
|
241 |
+
train_midas=train_midas, fetch_features=True, freeze_bn=freeze_midas_bn, **kwargs)
|
242 |
+
model = ZoeDepth(core, **kwargs)
|
243 |
+
if pretrained_resource:
|
244 |
+
assert isinstance(pretrained_resource, str), "pretrained_resource must be a string"
|
245 |
+
model = load_state_from_resource(model, pretrained_resource)
|
246 |
+
return model
|
247 |
+
|
248 |
+
@staticmethod
|
249 |
+
def build_from_config(config):
|
250 |
+
return ZoeDepth.build(**config)
|
ZoeDepth/zoedepth/models/zoedepth_nk/__init__.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
from .zoedepth_nk_v1 import ZoeDepthNK
|
26 |
+
|
27 |
+
all_versions = {
|
28 |
+
"v1": ZoeDepthNK,
|
29 |
+
}
|
30 |
+
|
31 |
+
get_version = lambda v : all_versions[v]
|
ZoeDepth/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"name": "ZoeDepthNK",
|
4 |
+
"version_name": "v1",
|
5 |
+
"bin_conf" : [
|
6 |
+
{
|
7 |
+
"name": "nyu",
|
8 |
+
"n_bins": 64,
|
9 |
+
"min_depth": 1e-3,
|
10 |
+
"max_depth": 10.0
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"name": "kitti",
|
14 |
+
"n_bins": 64,
|
15 |
+
"min_depth": 1e-3,
|
16 |
+
"max_depth": 80.0
|
17 |
+
}
|
18 |
+
],
|
19 |
+
"bin_embedding_dim": 128,
|
20 |
+
"bin_centers_type": "softplus",
|
21 |
+
"n_attractors":[16, 8, 4, 1],
|
22 |
+
"attractor_alpha": 1000,
|
23 |
+
"attractor_gamma": 2,
|
24 |
+
"attractor_kind" : "mean",
|
25 |
+
"attractor_type" : "inv",
|
26 |
+
"min_temp": 0.0212,
|
27 |
+
"max_temp": 50.0,
|
28 |
+
"memory_efficient": true,
|
29 |
+
"midas_model_type" : "DPT_BEiT_L_384",
|
30 |
+
"img_size": [384, 512]
|
31 |
+
},
|
32 |
+
|
33 |
+
"train": {
|
34 |
+
"train_midas": true,
|
35 |
+
"use_pretrained_midas": true,
|
36 |
+
"trainer": "zoedepth_nk",
|
37 |
+
"epochs": 5,
|
38 |
+
"bs": 16,
|
39 |
+
"optim_kwargs": {"lr": 0.0002512, "wd": 0.01},
|
40 |
+
"sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true},
|
41 |
+
"same_lr": false,
|
42 |
+
"w_si": 1,
|
43 |
+
"w_domain": 100,
|
44 |
+
"avoid_boundary": false,
|
45 |
+
"random_crop": false,
|
46 |
+
"input_width": 640,
|
47 |
+
"input_height": 480,
|
48 |
+
"w_grad": 0,
|
49 |
+
"w_reg": 0,
|
50 |
+
"midas_lr_factor": 10,
|
51 |
+
"encoder_lr_factor":10,
|
52 |
+
"pos_enc_lr_factor":10
|
53 |
+
},
|
54 |
+
|
55 |
+
"infer": {
|
56 |
+
"train_midas": false,
|
57 |
+
"pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt",
|
58 |
+
"use_pretrained_midas": false,
|
59 |
+
"force_keep_ar": true
|
60 |
+
},
|
61 |
+
|
62 |
+
"eval": {
|
63 |
+
"train_midas": false,
|
64 |
+
"pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt",
|
65 |
+
"use_pretrained_midas": false
|
66 |
+
}
|
67 |
+
}
|
ZoeDepth/zoedepth/models/zoedepth_nk/zoedepth_nk_v1.py
ADDED
@@ -0,0 +1,333 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import itertools
|
26 |
+
|
27 |
+
import torch
|
28 |
+
import torch.nn as nn
|
29 |
+
|
30 |
+
from zoedepth.models.depth_model import DepthModel
|
31 |
+
from zoedepth.models.base_models.midas import MidasCore
|
32 |
+
from zoedepth.models.layers.attractor import AttractorLayer, AttractorLayerUnnormed
|
33 |
+
from zoedepth.models.layers.dist_layers import ConditionalLogBinomial
|
34 |
+
from zoedepth.models.layers.localbins_layers import (Projector, SeedBinRegressor,
|
35 |
+
SeedBinRegressorUnnormed)
|
36 |
+
from zoedepth.models.layers.patch_transformer import PatchTransformerEncoder
|
37 |
+
from zoedepth.models.model_io import load_state_from_resource
|
38 |
+
|
39 |
+
|
40 |
+
class ZoeDepthNK(DepthModel):
|
41 |
+
def __init__(self, core, bin_conf, bin_centers_type="softplus", bin_embedding_dim=128,
|
42 |
+
n_attractors=[16, 8, 4, 1], attractor_alpha=300, attractor_gamma=2, attractor_kind='sum', attractor_type='exp',
|
43 |
+
min_temp=5, max_temp=50,
|
44 |
+
memory_efficient=False, train_midas=True,
|
45 |
+
is_midas_pretrained=True, midas_lr_factor=1, encoder_lr_factor=10, pos_enc_lr_factor=10, inverse_midas=False, **kwargs):
|
46 |
+
"""ZoeDepthNK model. This is the version of ZoeDepth that has two metric heads and uses a learned router to route to experts.
|
47 |
+
|
48 |
+
Args:
|
49 |
+
core (models.base_models.midas.MidasCore): The base midas model that is used for extraction of "relative" features
|
50 |
+
|
51 |
+
bin_conf (List[dict]): A list of dictionaries that contain the bin configuration for each metric head. Each dictionary should contain the following keys:
|
52 |
+
"name" (str, typically same as the dataset name), "n_bins" (int), "min_depth" (float), "max_depth" (float)
|
53 |
+
|
54 |
+
The length of this list determines the number of metric heads.
|
55 |
+
bin_centers_type (str, optional): "normed" or "softplus". Activation type used for bin centers. For "normed" bin centers, linear normalization trick is applied. This results in bounded bin centers.
|
56 |
+
For "softplus", softplus activation is used and thus are unbounded. Defaults to "normed".
|
57 |
+
bin_embedding_dim (int, optional): bin embedding dimension. Defaults to 128.
|
58 |
+
|
59 |
+
n_attractors (List[int], optional): Number of bin attractors at decoder layers. Defaults to [16, 8, 4, 1].
|
60 |
+
attractor_alpha (int, optional): Proportional attractor strength. Refer to models.layers.attractor for more details. Defaults to 300.
|
61 |
+
attractor_gamma (int, optional): Exponential attractor strength. Refer to models.layers.attractor for more details. Defaults to 2.
|
62 |
+
attractor_kind (str, optional): Attraction aggregation "sum" or "mean". Defaults to 'sum'.
|
63 |
+
attractor_type (str, optional): Type of attractor to use; "inv" (Inverse attractor) or "exp" (Exponential attractor). Defaults to 'exp'.
|
64 |
+
|
65 |
+
min_temp (int, optional): Lower bound for temperature of output probability distribution. Defaults to 5.
|
66 |
+
max_temp (int, optional): Upper bound for temperature of output probability distribution. Defaults to 50.
|
67 |
+
|
68 |
+
memory_efficient (bool, optional): Whether to use memory efficient version of attractor layers. Memory efficient version is slower but is recommended incase of multiple metric heads in order save GPU memory. Defaults to False.
|
69 |
+
|
70 |
+
train_midas (bool, optional): Whether to train "core", the base midas model. Defaults to True.
|
71 |
+
is_midas_pretrained (bool, optional): Is "core" pretrained? Defaults to True.
|
72 |
+
midas_lr_factor (int, optional): Learning rate reduction factor for base midas model except its encoder and positional encodings. Defaults to 10.
|
73 |
+
encoder_lr_factor (int, optional): Learning rate reduction factor for the encoder in midas model. Defaults to 10.
|
74 |
+
pos_enc_lr_factor (int, optional): Learning rate reduction factor for positional encodings in the base midas model. Defaults to 10.
|
75 |
+
|
76 |
+
"""
|
77 |
+
|
78 |
+
super().__init__()
|
79 |
+
|
80 |
+
self.core = core
|
81 |
+
self.bin_conf = bin_conf
|
82 |
+
self.min_temp = min_temp
|
83 |
+
self.max_temp = max_temp
|
84 |
+
self.memory_efficient = memory_efficient
|
85 |
+
self.train_midas = train_midas
|
86 |
+
self.is_midas_pretrained = is_midas_pretrained
|
87 |
+
self.midas_lr_factor = midas_lr_factor
|
88 |
+
self.encoder_lr_factor = encoder_lr_factor
|
89 |
+
self.pos_enc_lr_factor = pos_enc_lr_factor
|
90 |
+
self.inverse_midas = inverse_midas
|
91 |
+
|
92 |
+
N_MIDAS_OUT = 32
|
93 |
+
btlnck_features = self.core.output_channels[0]
|
94 |
+
num_out_features = self.core.output_channels[1:]
|
95 |
+
# self.scales = [16, 8, 4, 2] # spatial scale factors
|
96 |
+
|
97 |
+
self.conv2 = nn.Conv2d(
|
98 |
+
btlnck_features, btlnck_features, kernel_size=1, stride=1, padding=0)
|
99 |
+
|
100 |
+
# Transformer classifier on the bottleneck
|
101 |
+
self.patch_transformer = PatchTransformerEncoder(
|
102 |
+
btlnck_features, 1, 128, use_class_token=True)
|
103 |
+
self.mlp_classifier = nn.Sequential(
|
104 |
+
nn.Linear(128, 128),
|
105 |
+
nn.ReLU(),
|
106 |
+
nn.Linear(128, 2)
|
107 |
+
)
|
108 |
+
|
109 |
+
if bin_centers_type == "normed":
|
110 |
+
SeedBinRegressorLayer = SeedBinRegressor
|
111 |
+
Attractor = AttractorLayer
|
112 |
+
elif bin_centers_type == "softplus":
|
113 |
+
SeedBinRegressorLayer = SeedBinRegressorUnnormed
|
114 |
+
Attractor = AttractorLayerUnnormed
|
115 |
+
elif bin_centers_type == "hybrid1":
|
116 |
+
SeedBinRegressorLayer = SeedBinRegressor
|
117 |
+
Attractor = AttractorLayerUnnormed
|
118 |
+
elif bin_centers_type == "hybrid2":
|
119 |
+
SeedBinRegressorLayer = SeedBinRegressorUnnormed
|
120 |
+
Attractor = AttractorLayer
|
121 |
+
else:
|
122 |
+
raise ValueError(
|
123 |
+
"bin_centers_type should be one of 'normed', 'softplus', 'hybrid1', 'hybrid2'")
|
124 |
+
self.bin_centers_type = bin_centers_type
|
125 |
+
# We have bins for each bin conf.
|
126 |
+
# Create a map (ModuleDict) of 'name' -> seed_bin_regressor
|
127 |
+
self.seed_bin_regressors = nn.ModuleDict(
|
128 |
+
{conf['name']: SeedBinRegressorLayer(btlnck_features, conf["n_bins"], mlp_dim=bin_embedding_dim//2, min_depth=conf["min_depth"], max_depth=conf["max_depth"])
|
129 |
+
for conf in bin_conf}
|
130 |
+
)
|
131 |
+
|
132 |
+
self.seed_projector = Projector(
|
133 |
+
btlnck_features, bin_embedding_dim, mlp_dim=bin_embedding_dim//2)
|
134 |
+
self.projectors = nn.ModuleList([
|
135 |
+
Projector(num_out, bin_embedding_dim, mlp_dim=bin_embedding_dim//2)
|
136 |
+
for num_out in num_out_features
|
137 |
+
])
|
138 |
+
|
139 |
+
# Create a map (ModuleDict) of 'name' -> attractors (ModuleList)
|
140 |
+
self.attractors = nn.ModuleDict(
|
141 |
+
{conf['name']: nn.ModuleList([
|
142 |
+
Attractor(bin_embedding_dim, n_attractors[i],
|
143 |
+
mlp_dim=bin_embedding_dim, alpha=attractor_alpha,
|
144 |
+
gamma=attractor_gamma, kind=attractor_kind,
|
145 |
+
attractor_type=attractor_type, memory_efficient=memory_efficient,
|
146 |
+
min_depth=conf["min_depth"], max_depth=conf["max_depth"])
|
147 |
+
for i in range(len(n_attractors))
|
148 |
+
])
|
149 |
+
for conf in bin_conf}
|
150 |
+
)
|
151 |
+
|
152 |
+
last_in = N_MIDAS_OUT
|
153 |
+
# conditional log binomial for each bin conf
|
154 |
+
self.conditional_log_binomial = nn.ModuleDict(
|
155 |
+
{conf['name']: ConditionalLogBinomial(last_in, bin_embedding_dim, conf['n_bins'], bottleneck_factor=4, min_temp=self.min_temp, max_temp=self.max_temp)
|
156 |
+
for conf in bin_conf}
|
157 |
+
)
|
158 |
+
|
159 |
+
def forward(self, x, return_final_centers=False, denorm=False, return_probs=False, **kwargs):
|
160 |
+
"""
|
161 |
+
Args:
|
162 |
+
x (torch.Tensor): Input image tensor of shape (B, C, H, W). Assumes all images are from the same domain.
|
163 |
+
return_final_centers (bool, optional): Whether to return the final centers of the attractors. Defaults to False.
|
164 |
+
denorm (bool, optional): Whether to denormalize the input image. Defaults to False.
|
165 |
+
return_probs (bool, optional): Whether to return the probabilities of the bins. Defaults to False.
|
166 |
+
|
167 |
+
Returns:
|
168 |
+
dict: Dictionary of outputs with keys:
|
169 |
+
- "rel_depth": Relative depth map of shape (B, 1, H, W)
|
170 |
+
- "metric_depth": Metric depth map of shape (B, 1, H, W)
|
171 |
+
- "domain_logits": Domain logits of shape (B, 2)
|
172 |
+
- "bin_centers": Bin centers of shape (B, N, H, W). Present only if return_final_centers is True
|
173 |
+
- "probs": Bin probabilities of shape (B, N, H, W). Present only if return_probs is True
|
174 |
+
"""
|
175 |
+
b, c, h, w = x.shape
|
176 |
+
self.orig_input_width = w
|
177 |
+
self.orig_input_height = h
|
178 |
+
rel_depth, out = self.core(x, denorm=denorm, return_rel_depth=True)
|
179 |
+
|
180 |
+
outconv_activation = out[0]
|
181 |
+
btlnck = out[1]
|
182 |
+
x_blocks = out[2:]
|
183 |
+
|
184 |
+
x_d0 = self.conv2(btlnck)
|
185 |
+
x = x_d0
|
186 |
+
|
187 |
+
# Predict which path to take
|
188 |
+
embedding = self.patch_transformer(x)[0] # N, E
|
189 |
+
domain_logits = self.mlp_classifier(embedding) # N, 2
|
190 |
+
domain_vote = torch.softmax(domain_logits.sum(
|
191 |
+
dim=0, keepdim=True), dim=-1) # 1, 2
|
192 |
+
|
193 |
+
# Get the path
|
194 |
+
bin_conf_name = ["nyu", "kitti"][torch.argmax(
|
195 |
+
domain_vote, dim=-1).squeeze().item()]
|
196 |
+
|
197 |
+
try:
|
198 |
+
conf = [c for c in self.bin_conf if c.name == bin_conf_name][0]
|
199 |
+
except IndexError:
|
200 |
+
raise ValueError(
|
201 |
+
f"bin_conf_name {bin_conf_name} not found in bin_confs")
|
202 |
+
|
203 |
+
min_depth = conf['min_depth']
|
204 |
+
max_depth = conf['max_depth']
|
205 |
+
|
206 |
+
seed_bin_regressor = self.seed_bin_regressors[bin_conf_name]
|
207 |
+
_, seed_b_centers = seed_bin_regressor(x)
|
208 |
+
if self.bin_centers_type == 'normed' or self.bin_centers_type == 'hybrid2':
|
209 |
+
b_prev = (seed_b_centers - min_depth)/(max_depth - min_depth)
|
210 |
+
else:
|
211 |
+
b_prev = seed_b_centers
|
212 |
+
prev_b_embedding = self.seed_projector(x)
|
213 |
+
|
214 |
+
attractors = self.attractors[bin_conf_name]
|
215 |
+
for projector, attractor, x in zip(self.projectors, attractors, x_blocks):
|
216 |
+
b_embedding = projector(x)
|
217 |
+
b, b_centers = attractor(
|
218 |
+
b_embedding, b_prev, prev_b_embedding, interpolate=True)
|
219 |
+
b_prev = b
|
220 |
+
prev_b_embedding = b_embedding
|
221 |
+
|
222 |
+
last = outconv_activation
|
223 |
+
|
224 |
+
b_centers = nn.functional.interpolate(
|
225 |
+
b_centers, last.shape[-2:], mode='bilinear', align_corners=True)
|
226 |
+
b_embedding = nn.functional.interpolate(
|
227 |
+
b_embedding, last.shape[-2:], mode='bilinear', align_corners=True)
|
228 |
+
|
229 |
+
clb = self.conditional_log_binomial[bin_conf_name]
|
230 |
+
x = clb(last, b_embedding)
|
231 |
+
|
232 |
+
# Now depth value is Sum px * cx , where cx are bin_centers from the last bin tensor
|
233 |
+
# print(x.shape, b_centers.shape)
|
234 |
+
# b_centers = nn.functional.interpolate(b_centers, x.shape[-2:], mode='bilinear', align_corners=True)
|
235 |
+
out = torch.sum(x * b_centers, dim=1, keepdim=True)
|
236 |
+
|
237 |
+
output = dict(domain_logits=domain_logits, metric_depth=out)
|
238 |
+
if return_final_centers or return_probs:
|
239 |
+
output['bin_centers'] = b_centers
|
240 |
+
|
241 |
+
if return_probs:
|
242 |
+
output['probs'] = x
|
243 |
+
return output
|
244 |
+
|
245 |
+
def get_lr_params(self, lr):
|
246 |
+
"""
|
247 |
+
Learning rate configuration for different layers of the model
|
248 |
+
|
249 |
+
Args:
|
250 |
+
lr (float) : Base learning rate
|
251 |
+
Returns:
|
252 |
+
list : list of parameters to optimize and their learning rates, in the format required by torch optimizers.
|
253 |
+
"""
|
254 |
+
param_conf = []
|
255 |
+
if self.train_midas:
|
256 |
+
def get_rel_pos_params():
|
257 |
+
for name, p in self.core.core.pretrained.named_parameters():
|
258 |
+
if "relative_position" in name:
|
259 |
+
yield p
|
260 |
+
|
261 |
+
def get_enc_params_except_rel_pos():
|
262 |
+
for name, p in self.core.core.pretrained.named_parameters():
|
263 |
+
if "relative_position" not in name:
|
264 |
+
yield p
|
265 |
+
|
266 |
+
encoder_params = get_enc_params_except_rel_pos()
|
267 |
+
rel_pos_params = get_rel_pos_params()
|
268 |
+
midas_params = self.core.core.scratch.parameters()
|
269 |
+
midas_lr_factor = self.midas_lr_factor if self.is_midas_pretrained else 1.0
|
270 |
+
param_conf.extend([
|
271 |
+
{'params': encoder_params, 'lr': lr / self.encoder_lr_factor},
|
272 |
+
{'params': rel_pos_params, 'lr': lr / self.pos_enc_lr_factor},
|
273 |
+
{'params': midas_params, 'lr': lr / midas_lr_factor}
|
274 |
+
])
|
275 |
+
|
276 |
+
remaining_modules = []
|
277 |
+
for name, child in self.named_children():
|
278 |
+
if name != 'core':
|
279 |
+
remaining_modules.append(child)
|
280 |
+
remaining_params = itertools.chain(
|
281 |
+
*[child.parameters() for child in remaining_modules])
|
282 |
+
param_conf.append({'params': remaining_params, 'lr': lr})
|
283 |
+
return param_conf
|
284 |
+
|
285 |
+
def get_conf_parameters(self, conf_name):
|
286 |
+
"""
|
287 |
+
Returns parameters of all the ModuleDicts children that are exclusively used for the given bin configuration
|
288 |
+
"""
|
289 |
+
params = []
|
290 |
+
for name, child in self.named_children():
|
291 |
+
if isinstance(child, nn.ModuleDict):
|
292 |
+
for bin_conf_name, module in child.items():
|
293 |
+
if bin_conf_name == conf_name:
|
294 |
+
params += list(module.parameters())
|
295 |
+
return params
|
296 |
+
|
297 |
+
def freeze_conf(self, conf_name):
|
298 |
+
"""
|
299 |
+
Freezes all the parameters of all the ModuleDicts children that are exclusively used for the given bin configuration
|
300 |
+
"""
|
301 |
+
for p in self.get_conf_parameters(conf_name):
|
302 |
+
p.requires_grad = False
|
303 |
+
|
304 |
+
def unfreeze_conf(self, conf_name):
|
305 |
+
"""
|
306 |
+
Unfreezes all the parameters of all the ModuleDicts children that are exclusively used for the given bin configuration
|
307 |
+
"""
|
308 |
+
for p in self.get_conf_parameters(conf_name):
|
309 |
+
p.requires_grad = True
|
310 |
+
|
311 |
+
def freeze_all_confs(self):
|
312 |
+
"""
|
313 |
+
Freezes all the parameters of all the ModuleDicts children
|
314 |
+
"""
|
315 |
+
for name, child in self.named_children():
|
316 |
+
if isinstance(child, nn.ModuleDict):
|
317 |
+
for bin_conf_name, module in child.items():
|
318 |
+
for p in module.parameters():
|
319 |
+
p.requires_grad = False
|
320 |
+
|
321 |
+
@staticmethod
|
322 |
+
def build(midas_model_type="DPT_BEiT_L_384", pretrained_resource=None, use_pretrained_midas=False, train_midas=False, freeze_midas_bn=True, **kwargs):
|
323 |
+
core = MidasCore.build(midas_model_type=midas_model_type, use_pretrained_midas=use_pretrained_midas,
|
324 |
+
train_midas=train_midas, fetch_features=True, freeze_bn=freeze_midas_bn, **kwargs)
|
325 |
+
model = ZoeDepthNK(core, **kwargs)
|
326 |
+
if pretrained_resource:
|
327 |
+
assert isinstance(pretrained_resource, str), "pretrained_resource must be a string"
|
328 |
+
model = load_state_from_resource(model, pretrained_resource)
|
329 |
+
return model
|
330 |
+
|
331 |
+
@staticmethod
|
332 |
+
def build_from_config(config):
|
333 |
+
return ZoeDepthNK.build(**config)
|