catvton-flux-try-on

Running on Zero

App Files Files Community

xiaozaa commited on 23 days ago

Commit

a6d7aa6

•

1 Parent(s): 4ae4b3e

add spaces

Browse files

Files changed (4) hide show

README.md +1 -1
app.py +158 -96
example/garment/00396_00.jpg +0 -0
requirements.txt +1 -14

README.md CHANGED Viewed

@@ -8,7 +8,7 @@ Also inspired by [In-Context LoRA](https://arxiv.org/abs/2410.23775) for prompt
 ---
 **Latest Achievement**
 (2024/11/25):
-- Released lora weights.
 (2024/11/24):
 - Released FID score and gradio demo

 ---
 **Latest Achievement**
 (2024/11/25):
+- Released lora weights. FID: 6.0675811767578125 on VITON-HD dataset. Test configuration: scale 30, step 30.
 (2024/11/24):
 - Released FID score and gradio demo

app.py CHANGED Viewed

@@ -1,17 +1,63 @@
 import gradio as gr
 from tryon_inference import run_inference
 import os
 import numpy as np
 from PIL import Image
 import tempfile
 def gradio_inference(
     image_data,
     garment,
     num_steps=50,
     guidance_scale=30.0,
     seed=-1,
-    size=(768,1024)
 ):
     """Wrapper function for Gradio interface"""
     # Use temporary directory
@@ -38,116 +84,132 @@ def gradio_inference(
         try:
             # Run inference
             _, tryon_result = run_inference(
                 image_path=temp_image,
                 mask_path=temp_mask,
                 garment_path=temp_garment,
                 num_steps=num_steps,
                 guidance_scale=guidance_scale,
                 seed=seed,
-                size=size
             )
             return tryon_result
         except Exception as e:
             raise gr.Error(f"Error during inference: {str(e)}")
-def create_demo():
-    with gr.Blocks() as demo:
-        gr.Markdown("""
-        # CATVTON FLUX Virtual Try-On Demo
-        Upload a model image, an agnostic mask, and a garment image to generate virtual try-on results.
-        [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/xiaozaa/catvton-flux-alpha)
-        [![GitHub](https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white)](https://github.com/nftblackmagic/catvton-flux)
-        """)
-        with gr.Column():
-            with gr.Row():
-                with gr.Column():
-                    image_input = gr.ImageMask(
-                        label="Model Image (Draw mask where garment should go)",
-                        type="pil",
-                        height=600,
-                    )
-                    gr.Examples(
-                        examples=[
-                            ["./example/person/00008_00.jpg"],
-                            ["./example/person/00055_00.jpg"],
-                            ["./example/person/00057_00.jpg"],
-                            ["./example/person/00067_00.jpg"],
-                            ["./example/person/00069_00.jpg"],
-                        ],
-                        inputs=[image_input],
-                        label="Person Images",
-                    )
-                with gr.Column():
-                    garment_input = gr.Image(label="Garment Image", type="pil", height=600)
-                    gr.Examples(
-                        examples=[
-                            ["./example/garment/04564_00.jpg"],
-                            ["./example/garment/00055_00.jpg"],
-                            ["./example/garment/00057_00.jpg"],
-                            ["./example/garment/00067_00.jpg"],
-                            ["./example/garment/00069_00.jpg"],
-                        ],
-                        inputs=[garment_input],
-                        label="Garment Images",
-                    )
-            with gr.Row():
-                num_steps = gr.Slider(
-                    minimum=1,
-                    maximum=100,
-                    value=50,
-                    step=1,
-                    label="Number of Steps"
-                )
-                guidance_scale = gr.Slider(
-                    minimum=1.0,
-                    maximum=50.0,
-                    value=30.0,
-                    step=0.5,
-                    label="Guidance Scale"
                 )
-                seed = gr.Slider(
-                    minimum=-1,
-                    maximum=2147483647,
-                    step=1,
-                    value=-1,
-                    label="Seed (-1 for random)"
                 )
-            submit_btn = gr.Button("Generate Try-On", variant="primary")
             with gr.Column():
-                tryon_output = gr.Image(label="Try-On Result")
         with gr.Row():
-            gr.Markdown("""
-            ### Notes:
-            - The model image should be a full-body photo
-            - The mask should indicate the region where the garment will be placed
-            - The garment image should be on a clean background
-            """)
-        submit_btn.click(
-            fn=gradio_inference,
-            inputs=[
-                image_input,
-                garment_input,
-                num_steps,
-                guidance_scale,
-                seed
-            ],
-            outputs=[tryon_output],
-            api_name="try-on"
-        )
-    return demo
-if __name__ == "__main__":
-    demo = create_demo()
-    demo.queue()  # Enable queuing for multiple users
-    demo.launch(
-        share=True,
-        server_name="0.0.0.0"  # Makes the server accessible from other machines
-    )

+import spaces
 import gradio as gr
 from tryon_inference import run_inference
 import os
 import numpy as np
 from PIL import Image
 import tempfile
+import torch
+from diffusers import FluxTransformer2DModel, FluxFillPipeline
+import shutil
+def find_cuda():
+    # Check if CUDA_HOME or CUDA_PATH environment variables are set
+    cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
+    if cuda_home and os.path.exists(cuda_home):
+        return cuda_home
+    # Search for the nvcc executable in the system's PATH
+    nvcc_path = shutil.which('nvcc')
+    if nvcc_path:
+        # Remove the 'bin/nvcc' part to get the CUDA installation path
+        cuda_path = os.path.dirname(os.path.dirname(nvcc_path))
+        return cuda_path
+    return None
+cuda_path = find_cuda()
+if cuda_path:
+    print(f"CUDA installation found at: {cuda_path}")
+else:
+    print("CUDA installation not found")
+device = torch.device('cuda')
+print('Loading diffusion model ...')
+transformer = FluxTransformer2DModel.from_pretrained(
+    "xiaozaa/catvton-flux-alpha",
+    torch_dtype=torch.bfloat16
+)
+pipe = FluxFillPipeline.from_pretrained(
+    "black-forest-labs/FLUX.1-dev",
+    transformer=transformer,
+    torch_dtype=torch.bfloat16
+).to(device)
+print('Loading Finished!')
+@spaces.GPU
 def gradio_inference(
     image_data,
     garment,
     num_steps=50,
     guidance_scale=30.0,
     seed=-1,
+    width=768,
+    height=1024
 ):
     """Wrapper function for Gradio interface"""
     # Use temporary directory
         try:
             # Run inference
             _, tryon_result = run_inference(
+                pipe=pipe,
                 image_path=temp_image,
                 mask_path=temp_mask,
                 garment_path=temp_garment,
                 num_steps=num_steps,
                 guidance_scale=guidance_scale,
                 seed=seed,
+                size=(width, height)
             )
             return tryon_result
         except Exception as e:
             raise gr.Error(f"Error during inference: {str(e)}")
+with gr.Blocks() as demo:
+    gr.Markdown("""
+    # CATVTON FLUX Virtual Try-On Demo
+    Upload a model image, draw a mask, and a garment image to generate virtual try-on results.
+    [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/xiaozaa/catvton-flux-alpha)
+    [![GitHub](https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white)](https://github.com/nftblackmagic/catvton-flux)
+    """)
+    gr.Video("example/github.mp4", label="Demo Video: How to use the tool")
+    with gr.Column():
+        with gr.Row():
+            with gr.Column():
+                image_input = gr.ImageMask(
+                    label="Model Image (Click 'Edit' and draw mask over the clothing area)",
+                    type="pil",
+                    height=600,
+                    width=300
                 )
+                gr.Examples(
+                    examples=[
+                        ["./example/person/00008_00.jpg"],
+                        ["./example/person/00055_00.jpg"],
+                        ["./example/person/00057_00.jpg"],
+                        ["./example/person/00067_00.jpg"],
+                        ["./example/person/00069_00.jpg"],
+                    ],
+                    inputs=[image_input],
+                    label="Person Images",
                 )
             with gr.Column():
+                garment_input = gr.Image(label="Garment Image", type="pil", height=600, width=300)
+                gr.Examples(
+                    examples=[
+                        ["./example/garment/04564_00.jpg"],
+                        ["./example/garment/00055_00.jpg"],
+                        ["./example/garment/00396_00.jpg"],
+                        ["./example/garment/00067_00.jpg"],
+                        ["./example/garment/00069_00.jpg"],
+                    ],
+                    inputs=[garment_input],
+                    label="Garment Images",
+                )
+            with gr.Column():
+                tryon_output = gr.Image(label="Try-On Result", height=600, width=300)
         with gr.Row():
+            num_steps = gr.Slider(
+                minimum=1,
+                maximum=100,
+                value=30,
+                step=1,
+                label="Number of Steps"
+            )
+            guidance_scale = gr.Slider(
+                minimum=1.0,
+                maximum=50.0,
+                value=30.0,
+                step=0.5,
+                label="Guidance Scale"
+            )
+            seed = gr.Slider(
+                minimum=-1,
+                maximum=2147483647,
+                step=1,
+                value=-1,
+                label="Seed (-1 for random)"
+            )
+            width = gr.Slider(
+                minimum=256,
+                maximum=1024,
+                step=64,
+                value=768,
+                label="Width"
+            )
+            height = gr.Slider(
+                minimum=256,
+                maximum=1024,
+                step=64,
+                value=1024,
+                label="Height"
+            )
+        submit_btn = gr.Button("Generate Try-On", variant="primary")
+    with gr.Row():
+        gr.Markdown("""
+        ### Notes:
+        - The model is trained on VITON-HD dataset. It focuses on the woman upper body try-on generation.
+        - The mask should indicate the region where the garment will be placed.
+        - The garment image should be on a clean background.
+        - The model is not perfect. It may generate some artifacts.
+        - The model is slow. Please be patient.
+        - The model is just for research purpose.
+        """)
+    submit_btn.click(
+        fn=gradio_inference,
+        inputs=[
+            image_input,
+            garment_input,
+            num_steps,
+            guidance_scale,
+            seed,
+            width,
+            height
+        ],
+        outputs=[tryon_output],
+        api_name="try-on"
+    )
+demo.launch()

example/garment/00396_00.jpg ADDED Viewed

requirements.txt CHANGED Viewed

@@ -37,19 +37,6 @@ multiprocess==0.70.16
 networkx==3.3
 ninja==1.11.1.1
 numpy==1.26.4
-nvidia-cublas-cu12==12.1.3.1
-nvidia-cuda-cupti-cu12==12.1.105
-nvidia-cuda-nvrtc-cu12==12.1.105
-nvidia-cuda-runtime-cu12==12.1.105
-nvidia-cudnn-cu12==9.1.0.70
-nvidia-cufft-cu12==11.0.2.54
-nvidia-curand-cu12==10.3.2.106
-nvidia-cusolver-cu12==11.4.5.107
-nvidia-cusparse-cu12==12.1.0.106
-nvidia-ml-py==12.555.43
-nvidia-nccl-cu12==2.20.5
-nvidia-nvjitlink-cu12==12.6.20
-nvidia-nvtx-cu12==12.1.105
 omegaconf==2.3.0
 onnxruntime-gpu==1.18.1
 opencv-python==4.10.0.84
@@ -59,7 +46,6 @@ pandas==2.2.2
 pillow==10.4.0
 platformdirs==4.2.2
 protobuf==5.27.3
-psutil==6.0.0
 py-cpuinfo==9.0.0
 pyarrow==17.0.0
 pydantic==2.8.2
@@ -97,4 +83,5 @@ gradio==5.6.0
 gradio_client==1.4.3
 prodigyopt
 huggingface-hub
 git+https://github.com/huggingface/diffusers.git

 networkx==3.3
 ninja==1.11.1.1
 numpy==1.26.4
 omegaconf==2.3.0
 onnxruntime-gpu==1.18.1
 opencv-python==4.10.0.84
 pillow==10.4.0
 platformdirs==4.2.2
 protobuf==5.27.3
 py-cpuinfo==9.0.0
 pyarrow==17.0.0
 pydantic==2.8.2
 gradio_client==1.4.3
 prodigyopt
 huggingface-hub
+spaces
 git+https://github.com/huggingface/diffusers.git