BoyuanJiang commited on Dec 20, 2024

Commit

10321dd

1 Parent(s): b5a9c6d

upload model

Browse files

Files changed (18) hide show

.gitattributes +1 -0
README.md +116 -0
dwpose/dw-ll_ucoco_384.onnx +3 -0
dwpose/yolox_l.onnx +3 -0
humanparsing/parsing_atr.onnx +3 -0
humanparsing/parsing_lip.onnx +3 -0
model_index.json +28 -0
pose_guider/diffusion_pytorch_model.bin +3 -0
resource/img/manually_adjust.jpg +3 -0
resource/img/mask_offset.jpg +3 -0
resource/img/teaser.jpg +3 -0
scheduler/scheduler_config.json +6 -0
transformer_garm/config.json +16 -0
transformer_garm/diffusion_pytorch_model.safetensors +3 -0
transformer_vton/config.json +16 -0
transformer_vton/diffusion_pytorch_model.safetensors +3 -0
vae/config.json +36 -0
vae/diffusion_pytorch_model.safetensors +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,3 +1,119 @@
 ---
 license: cc-by-nc-sa-4.0
 ---

 ---
 license: cc-by-nc-sa-4.0
+extra_gated_prompt: "You agree that this model will only be used for non-commercial purposes."
+extra_gated_fields:
+  Name: text
+  Email: text
+  Country: country
+  Organization or Affiliation: text
+  I agree to use this dataset for non-commercial use ONLY: checkbox
 ---
+# FitDiT: Advancing the Authentic Garment Details for High-fidelity Virtual Try-onon
+<div style="display: flex; justify-content: center; align-items: center;">
+  <a href="https://arxiv.org/abs/2411.10499" style="margin: 0 2px;">
+    <img src='https://img.shields.io/badge/arXiv-2411.10499-red?style=flat&logo=arXiv&logoColor=red' alt='arxiv'>
+  </a>
+  <a href="https://github.com/BoyuanJiang/FitDiT" style="margin: 0 2px;">
+    <img src='https://img.shields.io/badge/GitHub-Repo-blue?style=flat&logo=GitHub' alt='GitHub'>
+  </a>
+  <a href="http://demo.fitdit.byjiang.com/" style="margin: 0 2px;">
+    <img src='https://img.shields.io/badge/Demo-Gradio-gold?style=flat&logo=Gradio&logoColor=red' alt='Demo'>
+  </a>
+  <a href='https://huggingface.co/BoyuanJiang/FitDiT' style="margin: 0 2px;">
+    <img src='https://img.shields.io/badge/Hugging Face-ckpts-orange?style=flat&logo=HuggingFace&logoColor=orange' alt='huggingface'>
+  </a>
+  <a href='https://byjiang.com/FitDiT/' style="margin: 0 2px;">
+    <img src='https://img.shields.io/badge/Webpage-Project-silver?style=flat&logo=&logoColor=orange' alt='webpage'>
+  </a>
+  <a href="https://raw.githubusercontent.com/BoyuanJiang/FitDiT/refs/heads/main/LICENSE" style="margin: 0 2px;">
+    <img src='https://img.shields.io/badge/License-CC BY--NC--SA--4.0-lightgreen?style=flat&logo=Lisence' alt='License'>
+  </a>
+</div>
+**FitDiT** is designed for high-fidelity virtual try-on using Diffusion Transformers (DiT).
+<div align="center">
+  <img src="resource/img/teaser.jpg" width="100%" height="100%"/>
+</div>
+## Updates
+- **`2024/12/20`**: The FitDiT [**model weight**](https://huggingface.co/BoyuanJiang/FitDiT) is available.
+- **`2024/12/17`**: Inference code is released.
+- **`2024/12/4`**: Our [**Online Demo**](http://demo.fitdit.byjiang.com/) is released.
+- **`2024/11/25`**: Our [**Complex Virtual Dressing Dataset (CVDD)**](https://huggingface.co/datasets/BoyuanJiang/CVDD) is released.
+- **`2024/11/15`**: Our [**FitDiT paper**](https://arxiv.org/abs/2411.10499) is available.
+## Gradio Demo
+Our algorithm is divided into two steps. The first step is to generate the mask of the try-on area, and the second step is to try-on in the mask area.
+### Step1: Run Mask
+You can simpley get try-on mask by click **Step1: Run Mask** at the right side of gradio demo. If the automatically generated mask are not well covered the area where you want to try-on, you can either adjust the mask by:
+1. Drag the slider of *mask offset top*, *mask offset bottom*, *mask offset left* or *mask offset right* and then click **Step1: Run Mask** button, this will re-generate mask.
+   ![mask_offset](resource/img/mask_offset.jpg)
+2. Using the brush or eraser tool to edit the automatically generated mask
+   ![manually_adjust](resource/img/manually_adjust.jpg)
+### Step2: Run Try-on
+After generating a suitable mask, you can get the try-on results by click **Step2: Run Try-on**. In the Try-on resolution drop-down box, you can select a suitable processing resolution. In our online demo, the default resolution is 1152x1536, which means that the input model image and garment image will be pad and resized to this resolution before being fed into the model.
+## Local Demo
+First apply access of FitDiT [model weight](https://huggingface.co/BoyuanJiang/FitDiT), then clone model to *local_model_dir*
+### Enviroment
+We test our model with following enviroment
+```
+torch==2.3.0
+torchvision==0.18.0
+diffusers==0.31.0
+transformers==4.39.3
+gradio==5.8.0
+onnxruntime-gpu==1.20.1
+```
+### Run gradio locally
+```
+# Run model with bf16 without any offload, fastest inference and most memory
+python gradio_sd3.py --model_path local_model_dir
+# Run model with fp16
+python gradio_sd3.py --model_path local_model_dir --fp16
+# Run model with fp16 and cpu offload, moderate inference and moderate memory
+python gradio_sd3.py --model_path local_model_dir --fp16 --offload
+# Run model with fp16 and aggressive cpu offload, slowest inference and less memory
+python gradio_sd3.py --model_path local_model_dir --fp16 --aggressive_offload
+```
+## Star History
+[![Star History Chart](https://api.star-history.com/svg?repos=BoyuanJiang/FitDiT&type=Date)](https://star-history.com/#BoyuanJiang/FitDiT&Date)
+## Contact
+This model can only be used **for non-commercial use**. If you want to use it for commercial use or expect better results, please contact me at byronjiang@tencent.com
+## Citation
+If you find our work helpful for your research, please consider citing our work.
+```
+@misc{jiang2024fitditadvancingauthenticgarment,
+      title={FitDiT: Advancing the Authentic Garment Details for High-fidelity Virtual Try-on},
+      author={Boyuan Jiang and Xiaobin Hu and Donghao Luo and Qingdong He and Chengming Xu and Jinlong Peng and Jiangning Zhang and Chengjie Wang and Yunsheng Wu and Yanwei Fu},
+      year={2024},
+      eprint={2411.10499},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV},
+      url={https://arxiv.org/abs/2411.10499},
+}
+```

dwpose/dw-ll_ucoco_384.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:724f4ff2439ed61afb86fb8a1951ec39c6220682803b4a8bd4f598cd913b1843
+size 134399116

dwpose/yolox_l.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7860ae79de6c89a3c1eb72ae9a2756c0ccfbe04b7791bb5880afabd97855a411
+size 216746733

humanparsing/parsing_atr.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:04c7d1d070d0e0ae943d86b18cb5aaaea9e278d97462e9cfb270cbbe4cd977f4
+size 266859305

humanparsing/parsing_lip.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8436e1dae96e2601c373d1ace29c8f0978b16357d9038c17a8ba756cca376dbc
+size 266863411

model_index.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "_class_name": "StableDiffusion3TryOnPipeline",
+  "_diffusers_version": "0.29.0.dev0",
+  "scheduler": [
+    "diffusers",
+    "FlowMatchEulerDiscreteScheduler"
+  ],
+  "image_encoder_large": [
+    "transformers",
+    "CLIPVisionModelWithProjection"
+  ],
+  "image_encoder_bigG": [
+    "transformers",
+    "CLIPVisionModelWithProjection"
+  ],
+  "transformer_garm": [
+    "diffusers",
+    "SD3Transformer2DModel"
+  ],
+  "transformer_vton": [
+    "diffusers",
+    "SD3Transformer2DModel"
+  ],
+  "vae": [
+    "diffusers",
+    "AutoencoderKL"
+  ]
+}

pose_guider/diffusion_pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c1b5ae40cdc9ccf32a157cc5f150e43812c5bba79cd10b7614ba199407e6d6f6
+size 10267174

resource/img/manually_adjust.jpg ADDED Viewed

Git LFS Details

SHA256: 3a3c0a42a9c5fdc423d2a6c04d45e34e71e49a785f0f0cb1786ae0bba73ef1fb
Pointer size: 131 Bytes
Size of remote file: 559 kB

resource/img/mask_offset.jpg ADDED Viewed

Git LFS Details

SHA256: 41c112c0093150c3eafcd825129a33c84888f2cf1fb59a94af05f8ad7b422852
Pointer size: 131 Bytes
Size of remote file: 321 kB

resource/img/teaser.jpg ADDED Viewed

Git LFS Details

SHA256: 8c625d1ea090054b5144fe14caa0bd9cbbdee5d52f0e9c8e9d8b001f70a501bf
Pointer size: 132 Bytes
Size of remote file: 2.03 MB

scheduler/scheduler_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_class_name": "FlowMatchEulerDiscreteScheduler",
+  "_diffusers_version": "0.29.0.dev0",
+  "num_train_timesteps": 1000,
+  "shift": 3.0
+}

transformer_garm/config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "_class_name": "SD3Transformer2DModel",
+  "_diffusers_version": "0.31.0",
+  "_name_or_path": "",
+  "attention_head_dim": 64,
+  "caption_projection_dim": 1536,
+  "in_channels": 16,
+  "joint_attention_dim": 4096,
+  "num_attention_heads": 24,
+  "num_layers": 24,
+  "out_channels": 16,
+  "patch_size": 2,
+  "pooled_projection_dim": 2048,
+  "pos_embed_max_size": 192,
+  "sample_size": 128
+}

transformer_garm/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:69c252316f4d8e4717cca392df6b0fbbb534276b2e1f4863163e9a25a5b85d49
+size 3830005160

transformer_vton/config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "_class_name": "SD3Transformer2DModel",
+  "_diffusers_version": "0.31.0",
+  "_name_or_path": "",
+  "attention_head_dim": 64,
+  "caption_projection_dim": 1536,
+  "in_channels": 33,
+  "joint_attention_dim": 4096,
+  "num_attention_heads": 24,
+  "num_layers": 24,
+  "out_channels": 16,
+  "patch_size": 2,
+  "pooled_projection_dim": 2048,
+  "pos_embed_max_size": 192,
+  "sample_size": 128
+}

transformer_vton/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:45c1a95de40e1d9039308a4fd479fa73f1a0b6092728169a81e4c2a8c32ed6b4
+size 3830214056

vae/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.29.0.dev0",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "force_upcast": true,
+  "in_channels": 3,
+  "latent_channels": 16,
+  "latents_mean": null,
+  "latents_std": null,
+  "layers_per_block": 2,
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 1024,
+  "scaling_factor": 1.5305,
+  "shift_factor": 0.0609,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ],
+  "use_post_quant_conv": false,
+  "use_quant_conv": false
+}

vae/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f9b67a279283625caee39d61eacb5324243848477b4eb535355eaaa8423d4e09
+size 167666654