shrutin97
commited on
Commit
•
af42517
1
Parent(s):
dd288d2
cloning multi-view-diffusion repository
Browse files- .gitignore +6 -0
- README.md +29 -0
- feature_extractor/preprocessor_config.json +29 -0
- image_encoder/config.json +23 -0
.gitignore
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.pt
|
2 |
+
*.yaml
|
3 |
+
**/__pycache__
|
4 |
+
*.pyc
|
5 |
+
|
6 |
+
venv/
|
README.md
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: openrail
|
3 |
+
pipeline_tag: image-to-3d
|
4 |
+
---
|
5 |
+
|
6 |
+
This is a copy of [ashawkey/imagedream-ipmv-diffusers](https://huggingface.co/ashawkey/imagedream-ipmv-diffusers).
|
7 |
+
|
8 |
+
It is hosted here for persistence throughout the ML for 3D course.
|
9 |
+
|
10 |
+
# MVDream-diffusers Model Card
|
11 |
+
|
12 |
+
This is a port of https://huggingface.co/Peng-Wang/ImageDream into diffusers.
|
13 |
+
|
14 |
+
For usage, please check: https://github.com/ashawkey/mvdream_diffusers
|
15 |
+
|
16 |
+
## Citation
|
17 |
+
|
18 |
+
```
|
19 |
+
@article{wang2023imagedream,
|
20 |
+
title={ImageDream: Image-Prompt Multi-view Diffusion for 3D Generation},
|
21 |
+
author={Wang, Peng and Shi, Yichun},
|
22 |
+
journal={arXiv preprint arXiv:2312.02201},
|
23 |
+
year={2023}
|
24 |
+
}
|
25 |
+
```
|
26 |
+
|
27 |
+
## Misuse, Malicious Use, and Out-of-Scope Use
|
28 |
+
|
29 |
+
The model should not be used to intentionally create or disseminate images that create hostile or alienating environments for people. This includes generating images that people would foreseeably find disturbing, distressing, or offensive; or content that propagates historical or current stereotypes.
|
feature_extractor/preprocessor_config.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"crop_size": {
|
3 |
+
"height": 224,
|
4 |
+
"width": 224
|
5 |
+
},
|
6 |
+
"do_center_crop": true,
|
7 |
+
"do_convert_rgb": true,
|
8 |
+
"do_normalize": true,
|
9 |
+
"do_rescale": true,
|
10 |
+
"do_resize": true,
|
11 |
+
"feature_extractor_type": "CLIPFeatureExtractor",
|
12 |
+
"image_mean": [
|
13 |
+
0.48145466,
|
14 |
+
0.4578275,
|
15 |
+
0.40821073
|
16 |
+
],
|
17 |
+
"image_processor_type": "CLIPImageProcessor",
|
18 |
+
"image_std": [
|
19 |
+
0.26862954,
|
20 |
+
0.26130258,
|
21 |
+
0.27577711
|
22 |
+
],
|
23 |
+
"resample": 3,
|
24 |
+
"rescale_factor": 0.00392156862745098,
|
25 |
+
"size": {
|
26 |
+
"shortest_edge": 224
|
27 |
+
},
|
28 |
+
"use_square_size": false
|
29 |
+
}
|
image_encoder/config.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "laion/CLIP-ViT-H-14-laion2B-s32B-b79K",
|
3 |
+
"architectures": [
|
4 |
+
"CLIPVisionModel"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"dropout": 0.0,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_size": 1280,
|
10 |
+
"image_size": 224,
|
11 |
+
"initializer_factor": 1.0,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 5120,
|
14 |
+
"layer_norm_eps": 1e-05,
|
15 |
+
"model_type": "clip_vision_model",
|
16 |
+
"num_attention_heads": 16,
|
17 |
+
"num_channels": 3,
|
18 |
+
"num_hidden_layers": 32,
|
19 |
+
"patch_size": 14,
|
20 |
+
"projection_dim": 1024,
|
21 |
+
"torch_dtype": "float16",
|
22 |
+
"transformers_version": "4.35.2"
|
23 |
+
}
|