Spaces:

tonyassi
/

fashion-try-on

Running on Zero

App Files Files Community

tonyassi commited on Sep 23

Commit

9cb2064

verified ·

1 Parent(s): fdc2668

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -67

app.py CHANGED Viewed

@@ -1,22 +1,24 @@
 # app.py
 import os
 import spaces
 import gradio as gr
-from PIL import Image
 import torch
 from diffusers import AutoPipelineForInpainting, AutoencoderKL
-# -----------------------------
-# Pure-CPU helpers (no CUDA here)
-# -----------------------------
-from PIL import Image, ImageChops
-import math
-def _round_up(x, m=8):
     return int(math.ceil(x / m) * m)
-def autocrop_content(img: Image.Image, bg_color=(255, 255, 255), tol=12) -> Image.Image:
     if img.mode in ("RGBA", "LA"):
         alpha = img.split()[-1]
         bbox = alpha.getbbox()
@@ -28,42 +30,46 @@ def autocrop_content(img: Image.Image, bg_color=(255, 255, 255), tol=12) -> Imag
     bbox = mask.getbbox()
     return img.crop(bbox) if bbox else img
-def squarify_image(img: Image.Image, color="white") -> Image.Image:
-    # 1) trim margins
-    img = autocrop_content(img, bg_color=(255, 255, 255), tol=12)
-    w, h = img.size
-    # 2) target square side rounded **up** to /8
-    side = _round_up(max(w, h), 8)
     bg = Image.new("RGB", (side, side), color=color)
-    bg.paste(img, ((side - w) // 2, (side - h) // 2))
-    return bg
-def divisible_by_8(image: Image.Image) -> Image.Image:
     w, h = image.size
-    # round **up** so we never shrink (keeps content + avoids 1012-style errors)
-    nw = _round_up(w, 8)
-    nh = _round_up(h, 8)
     if (nw, nh) == (w, h):
         return image
     return image.resize((nw, nh), Image.LANCZOS)
-# -----------------------------
 # Lazy singletons (created inside GPU context)
-# -----------------------------
 PIPELINE = None
-IP_LOADED = False
 def _get_pipeline(device: str):
     """
     Create & cache the diffusers pipeline once we actually have a GPU (ZeroGPU).
     No CUDA calls should happen before this is executed.
     """
-    global PIPELINE, IP_LOADED
     if PIPELINE is not None:
-        # ensure it's on the current device (ZeroGPU gives you a device per call)
         PIPELINE.to(device)
         return PIPELINE
@@ -75,11 +81,10 @@ def _get_pipeline(device: str):
     if not ip_adapter_repo:
         raise RuntimeError("Missing env var IP_ADAPTER (e.g. 'h94/IP-Adapter').")
-    # Build VAE & pipeline WITHOUT sending to CUDA yet
-    # (dtype is fine; just don't .to('cuda') at import time)
     vae = AutoencoderKL.from_pretrained(
         "madebyollin/sdxl-vae-fp16-fix",
-        torch_dtype=torch.float16
     )
     pipe = AutoPipelineForInpainting.from_pretrained(
@@ -90,65 +95,56 @@ def _get_pipeline(device: str):
         use_safetensors=True,
     )
-    # Load IP-Adapter weights
-    # (this only attaches modules; not a CUDA op)
     pipe.load_ip_adapter(
         ip_adapter_repo,
         subfolder="sdxl_models",
         weight_name="ip-adapter_sdxl.bin",
     )
-    # NOW move the whole pipeline to the GPU that ZeroGPU just handed us
     pipe.to(device)
     PIPELINE = pipe
-    IP_LOADED = True
     return PIPELINE
-# -----------------------------
 # Main generate (GPU section)
-# -----------------------------
-# Increase duration if you need >60s (100 steps on SDXL often does).
 @spaces.GPU(duration=180)
 def generate(person: Image.Image, clothing: Image.Image) -> Image.Image:
     """
     This function is called *after* ZeroGPU allocates a CUDA device.
     All CUDA/ONNXRuntime initializations must happen here (or deeper).
     """
-    # Import segmentation modules *inside* the GPU function so any CUDA/ORT provider
-    # decisions happen after the GPU exists. If these libs choose ORT providers,
-    # do it based on torch.cuda.is_available().
     from SegBody import segment_body
     from SegCloth import segment_clothing
     try:
-        import onnxruntime as ort  # some seg libs use ORT under the hood
-        # If ZeroGPU gave us a CUDA device, ORT can try CUDA; else fallback to CPU.
-        # (If the seg modules create sessions themselves, they should use similar logic.)
-        if torch.cuda.is_available():
-            _ = ort.get_device()  # just to ensure ORT is importable
-        else:
-            # As a defensive fallback, you can force CPU by env (only if needed)
             os.environ.setdefault("ORT_DISABLE_CUDA", "1")
     except Exception:
-        # If onnxruntime isn't used, that's fine.
         pass
     device = "cuda" if torch.cuda.is_available() else "cpu"
     pipe = _get_pipeline(device)
-    # --- Preprocess on CPU (cheap ops)
     person = person.copy()
     clothing = clothing.copy()
     person.thumbnail((1024, 1024))
-    person = divisible_by_8(person)
     clothing.thumbnail((1024, 1024))
-    clothing = divisible_by_8(clothing)
-    image = squarify_image(person)
-    # --- Segmentation (runs after GPU allocation; modules can use GPU if they want)
     seg_image, mask_image = segment_body(image, face=False)
     seg_cloth = segment_clothing(
         clothing,
@@ -158,9 +154,7 @@ def generate(person: Image.Image, clothing: Image.Image) -> Image.Image:
     # --- Diffusion
     pipe.set_ip_adapter_scale(1.0)
     result = pipe(
-        prompt=(
-            "photorealistic, perfect body, beautiful skin, realistic skin, natural skin"
-        ),
         negative_prompt=(
             "ugly, bad quality, bad anatomy, deformed body, deformed hands, "
             "deformed feet, deformed face, deformed clothing, deformed skin, "
@@ -176,19 +170,16 @@ def generate(person: Image.Image, clothing: Image.Image) -> Image.Image:
         num_inference_steps=100,
     ).images[0]
-    # Crop back to original (pre-squared) person dims
-    final = result.crop((0, 0, person.width, person.height))
     return final
-# -----------------------------
 # Gradio UI
-# -----------------------------
 iface = gr.Interface(
     fn=generate,
-    inputs=[
-        gr.Image(label="Person", type="pil"),
-        gr.Image(label="Clothing", type="pil"),
-    ],
     outputs=[gr.Image(label="Result")],
     title="Fashion Try-On",
     description="""

 # app.py
 import os
+import math
 import spaces
 import gradio as gr
 import torch
+from PIL import Image, ImageChops
 from diffusers import AutoPipelineForInpainting, AutoencoderKL
+# =============================
+# Helpers (CPU-only; no CUDA)
+# =============================
+def _round_up(x: int, m: int = 8) -> int:
     return int(math.ceil(x / m) * m)
+def autocrop_content(img: Image.Image, bg_color=(255, 255, 255), tol: int = 12) -> Image.Image:
+    """
+    Trim uniform white (or near-white) margins before centering/padding.
+    Handles RGBA via alpha bbox; for RGB compares to a solid background.
+    """
     if img.mode in ("RGBA", "LA"):
         alpha = img.split()[-1]
         bbox = alpha.getbbox()
     bbox = mask.getbbox()
     return img.crop(bbox) if bbox else img
+def square_pad_meta(
+    img: Image.Image, color: str = "white", multiple: int = 8
+) -> tuple[Image.Image, int, int, int, int, int]:
+    """
+    Autocrop -> center-pad to a square whose side is rounded UP to `multiple`.
+    Returns (square_img, left, top, orig_w, orig_h, side).
+    """
+    img = autocrop_content(img, (255, 255, 255), tol=12)
+    orig_w, orig_h = img.size
+    side = _round_up(max(orig_w, orig_h), multiple)
     bg = Image.new("RGB", (side, side), color=color)
+    left = (side - orig_w) // 2
+    top = (side - orig_h) // 2
+    bg.paste(img, (left, top))
+    return bg, left, top, orig_w, orig_h, side
+def resize_to_multiple(image: Image.Image, m: int = 8) -> Image.Image:
+    """
+    Resize **up** so width/height are multiples of m (avoids 1012x1012 errors).
+    """
     w, h = image.size
+    nw = _round_up(w, m)
+    nh = _round_up(h, m)
     if (nw, nh) == (w, h):
         return image
     return image.resize((nw, nh), Image.LANCZOS)
+# =============================
 # Lazy singletons (created inside GPU context)
+# =============================
 PIPELINE = None
 def _get_pipeline(device: str):
     """
     Create & cache the diffusers pipeline once we actually have a GPU (ZeroGPU).
     No CUDA calls should happen before this is executed.
     """
+    global PIPELINE
     if PIPELINE is not None:
         PIPELINE.to(device)
         return PIPELINE
     if not ip_adapter_repo:
         raise RuntimeError("Missing env var IP_ADAPTER (e.g. 'h94/IP-Adapter').")
+    # Build VAE & pipeline WITHOUT touching CUDA yet.
     vae = AutoencoderKL.from_pretrained(
         "madebyollin/sdxl-vae-fp16-fix",
+        torch_dtype=torch.float16,
     )
     pipe = AutoPipelineForInpainting.from_pretrained(
         use_safetensors=True,
     )
+    # Attach IP-Adapter weights (no CUDA op yet)
     pipe.load_ip_adapter(
         ip_adapter_repo,
         subfolder="sdxl_models",
         weight_name="ip-adapter_sdxl.bin",
     )
+    # NOW move the whole pipeline to the device ZeroGPU assigned
     pipe.to(device)
     PIPELINE = pipe
     return PIPELINE
+# =============================
 # Main generate (GPU section)
+# =============================
 @spaces.GPU(duration=180)
 def generate(person: Image.Image, clothing: Image.Image) -> Image.Image:
     """
     This function is called *after* ZeroGPU allocates a CUDA device.
     All CUDA/ONNXRuntime initializations must happen here (or deeper).
     """
+    # Import segmentation modules here so they initialize after GPU exists.
     from SegBody import segment_body
     from SegCloth import segment_clothing
+    # If onnxruntime is used under the hood, ensure it doesn't try CUDA without a GPU.
     try:
+        import onnxruntime as ort  # noqa: F401
+        if not torch.cuda.is_available():
             os.environ.setdefault("ORT_DISABLE_CUDA", "1")
     except Exception:
         pass
     device = "cuda" if torch.cuda.is_available() else "cpu"
     pipe = _get_pipeline(device)
+    # --- Preprocess (CPU)
     person = person.copy()
     clothing = clothing.copy()
+    # Keep person within 1024, then square-pad to /8 and remember offsets.
     person.thumbnail((1024, 1024))
+    square_img, left, top, ow, oh, side = square_pad_meta(person, color="white", multiple=8)
+    image = square_img  # feed this square to seg & pipeline (already /8-compliant)
+    # Clothing can be smaller; make dimensions /8 to be safe.
     clothing.thumbnail((1024, 1024))
+    clothing = resize_to_multiple(clothing, 8)
+    # --- Segmentation (after GPU allocation; modules can use GPU if they choose)
     seg_image, mask_image = segment_body(image, face=False)
     seg_cloth = segment_clothing(
         clothing,
     # --- Diffusion
     pipe.set_ip_adapter_scale(1.0)
     result = pipe(
+        prompt="photorealistic, perfect body, beautiful skin, realistic skin, natural skin",
         negative_prompt=(
             "ugly, bad quality, bad anatomy, deformed body, deformed hands, "
             "deformed feet, deformed face, deformed clothing, deformed skin, "
         num_inference_steps=100,
     ).images[0]
+    # Crop back to the original (post-thumbnail) person frame using the paste offsets.
+    final = result.crop((left, top, left + ow, top + oh))
     return final
+# =============================
 # Gradio UI
+# =============================
 iface = gr.Interface(
     fn=generate,
+    inputs=[gr.Image(label="Person", type="pil"), gr.Image(label="Clothing", type="pil")],
     outputs=[gr.Image(label="Result")],
     title="Fashion Try-On",
     description="""