Spaces:

Ashoka74
/

Demo_Refurnish

Running on Zero

App Files Files Community

Ashoka74 commited on 10 days ago

Commit

146a097

•

1 Parent(s): ec4577d

Update app_3.py

Browse files

Files changed (1) hide show

app_3.py +50 -15

app_3.py CHANGED Viewed

@@ -75,11 +75,11 @@ transform_image = transforms.Compose(
 # Model paths
 model_path = './models/iclight_sd15_fc.safetensors'
 model_path2 = './checkpoints/depth_anything_v2_vits.pth'
-#model_path3 = './checkpoints/sam2_hiera_large.pt'
 model_path4 = './checkpoints/config.json'
 model_path5 = './checkpoints/preprocessor_config.json'
-#model_path6 = './configs/sam2_hiera_l.yaml'
-#model_path7 = './mvadapter_i2mv_sdxl.safetensors'
 # Base URL for the repository
 BASE_URL = 'https://huggingface.co/Ashoka74/Placement/resolve/main/'
@@ -88,11 +88,11 @@ BASE_URL = 'https://huggingface.co/Ashoka74/Placement/resolve/main/'
 model_urls = {
     model_path: 'iclight_sd15_fc.safetensors',
     model_path2: 'depth_anything_v2_vits.pth',
-    #model_path3: 'sam2_hiera_large.pt',
     model_path4: 'config.json',
     model_path5: 'preprocessor_config.json',
-    #model_path6: 'sam2_hiera_l.yaml',
-    #model_path7: 'mvadapter_i2mv_sdxl.safetensors'
 }
 # Ensure directories exist
@@ -249,7 +249,9 @@ del sd_offset, sd_origin, sd_merged, keys
 # Device and dtype setup
 device = torch.device('cuda')
-dtype = torch.float16  # RTX 2070 works well with float16
 pipe = prepare_pipeline(
     base_model="stabilityai/stable-diffusion-xl-base-1.0",
@@ -264,19 +266,19 @@ pipe = prepare_pipeline(
 )
 # Memory optimizations for RTX 2070
-torch.backends.cudnn.benchmark = True
-if torch.cuda.is_available():
-    torch.backends.cuda.matmul.allow_tf32 = True
-    torch.backends.cudnn.allow_tf32 = True
-    # Set a very small attention slice size for RTX 2070 to avoid OOM
-    torch.backends.cuda.max_split_size_mb = 128
 # Move models to device with consistent dtype
 text_encoder = text_encoder.to(device=device, dtype=dtype)
 vae = vae.to(device=device, dtype=dtype)  # Changed from bfloat16 to float16
 unet = unet.to(device=device, dtype=dtype)
-rmbg = rmbg.to(device=device, dtype=torch.float32)  # Keep this as float32
 ddim_scheduler = DDIMScheduler(
     num_train_timesteps=1000,
@@ -510,6 +512,38 @@ def run_rmbg(image):
     mask = pred_pil.resize(image_size)
     image.putalpha(mask)
     return image
 @spaces.GPU(duration=60)
@@ -638,6 +672,7 @@ def extract_foreground(image):
     #logging.info(f"Input image shape: {image.shape}, dtype: {image.dtype}")
     #result, rgba = run_rmbg(image)
     result = run_rmbg(image)
     #logging.info(f"Result shape: {result.shape}, dtype: {result.dtype}")
     #logging.info(f"RGBA shape: {rgba.shape}, dtype: {rgba.dtype}")
     return result, gr.update(visible=True), gr.update(visible=True)

 # Model paths
 model_path = './models/iclight_sd15_fc.safetensors'
 model_path2 = './checkpoints/depth_anything_v2_vits.pth'
+model_path3 = './checkpoints/sam2_hiera_large.pt'
 model_path4 = './checkpoints/config.json'
 model_path5 = './checkpoints/preprocessor_config.json'
+model_path6 = './configs/sam2_hiera_l.yaml'
+model_path7 = './mvadapter_i2mv_sdxl.safetensors'
 # Base URL for the repository
 BASE_URL = 'https://huggingface.co/Ashoka74/Placement/resolve/main/'
 model_urls = {
     model_path: 'iclight_sd15_fc.safetensors',
     model_path2: 'depth_anything_v2_vits.pth',
+    model_path3: 'sam2_hiera_large.pt',
     model_path4: 'config.json',
     model_path5: 'preprocessor_config.json',
+    model_path6: 'sam2_hiera_l.yaml',
+    model_path7: 'mvadapter_i2mv_sdxl.safetensors'
 }
 # Ensure directories exist
 # Device and dtype setup
 device = torch.device('cuda')
+#dtype = torch.float16  # RTX 2070 works well with float16
+dtype = torch.bfloat16
 pipe = prepare_pipeline(
     base_model="stabilityai/stable-diffusion-xl-base-1.0",
 )
 # Memory optimizations for RTX 2070
+# torch.backends.cudnn.benchmark = True
+# if torch.cuda.is_available():
+#     torch.backends.cuda.matmul.allow_tf32 = True
+#     torch.backends.cudnn.allow_tf32 = True
+#     # Set a very small attention slice size for RTX 2070 to avoid OOM
+#     torch.backends.cuda.max_split_size_mb = 128
 # Move models to device with consistent dtype
 text_encoder = text_encoder.to(device=device, dtype=dtype)
 vae = vae.to(device=device, dtype=dtype)  # Changed from bfloat16 to float16
 unet = unet.to(device=device, dtype=dtype)
+#rmbg = rmbg.to(device=device, dtype=torch.float32)  # Keep this as float32
+rmbg = rmbg.to(device)
 ddim_scheduler = DDIMScheduler(
     num_train_timesteps=1000,
     mask = pred_pil.resize(image_size)
     image.putalpha(mask)
     return image
+def preprocess_image(image: Image.Image, height=768, width=768):
+    image = np.array(image)
+    alpha = image[..., 3] > 0
+    H, W = alpha.shape
+    # get the bounding box of alpha
+    y, x = np.where(alpha)
+    y0, y1 = max(y.min() - 1, 0), min(y.max() + 1, H)
+    x0, x1 = max(x.min() - 1, 0), min(x.max() + 1, W)
+    image_center = image[y0:y1, x0:x1]
+    # resize the longer side to H * 0.9
+    H, W, _ = image_center.shape
+    if H > W:
+        W = int(W * (height * 0.9) / H)
+        H = int(height * 0.9)
+    else:
+        H = int(H * (width * 0.9) / W)
+        W = int(width * 0.9)
+    image_center = np.array(Image.fromarray(image_center).resize((W, H)))
+    # pad to H, W
+    start_h = (height - H) // 2
+    start_w = (width - W) // 2
+    image = np.zeros((height, width, 4), dtype=np.uint8)
+    image[start_h : start_h + H, start_w : start_w + W] = image_center
+    image = image.astype(np.float32) / 255.0
+    image = image[:, :, :3] * image[:, :, 3:4] + (1 - image[:, :, 3:4]) * 0.5
+    image = (image * 255).clip(0, 255).astype(np.uint8)
+    image = Image.fromarray(image)
+    return image
 @spaces.GPU(duration=60)
     #logging.info(f"Input image shape: {image.shape}, dtype: {image.dtype}")
     #result, rgba = run_rmbg(image)
     result = run_rmbg(image)
+    result = process_image(result)
     #logging.info(f"Result shape: {result.shape}, dtype: {result.dtype}")
     #logging.info(f"RGBA shape: {rgba.shape}, dtype: {rgba.dtype}")
     return result, gr.update(visible=True), gr.update(visible=True)