Ashoka74 commited on
Commit
146a097
1 Parent(s): ec4577d

Update app_3.py

Browse files
Files changed (1) hide show
  1. app_3.py +50 -15
app_3.py CHANGED
@@ -75,11 +75,11 @@ transform_image = transforms.Compose(
75
  # Model paths
76
  model_path = './models/iclight_sd15_fc.safetensors'
77
  model_path2 = './checkpoints/depth_anything_v2_vits.pth'
78
- #model_path3 = './checkpoints/sam2_hiera_large.pt'
79
  model_path4 = './checkpoints/config.json'
80
  model_path5 = './checkpoints/preprocessor_config.json'
81
- #model_path6 = './configs/sam2_hiera_l.yaml'
82
- #model_path7 = './mvadapter_i2mv_sdxl.safetensors'
83
 
84
  # Base URL for the repository
85
  BASE_URL = 'https://huggingface.co/Ashoka74/Placement/resolve/main/'
@@ -88,11 +88,11 @@ BASE_URL = 'https://huggingface.co/Ashoka74/Placement/resolve/main/'
88
  model_urls = {
89
  model_path: 'iclight_sd15_fc.safetensors',
90
  model_path2: 'depth_anything_v2_vits.pth',
91
- #model_path3: 'sam2_hiera_large.pt',
92
  model_path4: 'config.json',
93
  model_path5: 'preprocessor_config.json',
94
- #model_path6: 'sam2_hiera_l.yaml',
95
- #model_path7: 'mvadapter_i2mv_sdxl.safetensors'
96
  }
97
 
98
  # Ensure directories exist
@@ -249,7 +249,9 @@ del sd_offset, sd_origin, sd_merged, keys
249
 
250
  # Device and dtype setup
251
  device = torch.device('cuda')
252
- dtype = torch.float16 # RTX 2070 works well with float16
 
 
253
 
254
  pipe = prepare_pipeline(
255
  base_model="stabilityai/stable-diffusion-xl-base-1.0",
@@ -264,19 +266,19 @@ pipe = prepare_pipeline(
264
  )
265
 
266
  # Memory optimizations for RTX 2070
267
- torch.backends.cudnn.benchmark = True
268
- if torch.cuda.is_available():
269
- torch.backends.cuda.matmul.allow_tf32 = True
270
- torch.backends.cudnn.allow_tf32 = True
271
- # Set a very small attention slice size for RTX 2070 to avoid OOM
272
- torch.backends.cuda.max_split_size_mb = 128
273
 
274
  # Move models to device with consistent dtype
275
  text_encoder = text_encoder.to(device=device, dtype=dtype)
276
  vae = vae.to(device=device, dtype=dtype) # Changed from bfloat16 to float16
277
  unet = unet.to(device=device, dtype=dtype)
278
- rmbg = rmbg.to(device=device, dtype=torch.float32) # Keep this as float32
279
-
280
 
281
  ddim_scheduler = DDIMScheduler(
282
  num_train_timesteps=1000,
@@ -510,6 +512,38 @@ def run_rmbg(image):
510
  mask = pred_pil.resize(image_size)
511
  image.putalpha(mask)
512
  return image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
513
 
514
 
515
  @spaces.GPU(duration=60)
@@ -638,6 +672,7 @@ def extract_foreground(image):
638
  #logging.info(f"Input image shape: {image.shape}, dtype: {image.dtype}")
639
  #result, rgba = run_rmbg(image)
640
  result = run_rmbg(image)
 
641
  #logging.info(f"Result shape: {result.shape}, dtype: {result.dtype}")
642
  #logging.info(f"RGBA shape: {rgba.shape}, dtype: {rgba.dtype}")
643
  return result, gr.update(visible=True), gr.update(visible=True)
 
75
  # Model paths
76
  model_path = './models/iclight_sd15_fc.safetensors'
77
  model_path2 = './checkpoints/depth_anything_v2_vits.pth'
78
+ model_path3 = './checkpoints/sam2_hiera_large.pt'
79
  model_path4 = './checkpoints/config.json'
80
  model_path5 = './checkpoints/preprocessor_config.json'
81
+ model_path6 = './configs/sam2_hiera_l.yaml'
82
+ model_path7 = './mvadapter_i2mv_sdxl.safetensors'
83
 
84
  # Base URL for the repository
85
  BASE_URL = 'https://huggingface.co/Ashoka74/Placement/resolve/main/'
 
88
  model_urls = {
89
  model_path: 'iclight_sd15_fc.safetensors',
90
  model_path2: 'depth_anything_v2_vits.pth',
91
+ model_path3: 'sam2_hiera_large.pt',
92
  model_path4: 'config.json',
93
  model_path5: 'preprocessor_config.json',
94
+ model_path6: 'sam2_hiera_l.yaml',
95
+ model_path7: 'mvadapter_i2mv_sdxl.safetensors'
96
  }
97
 
98
  # Ensure directories exist
 
249
 
250
  # Device and dtype setup
251
  device = torch.device('cuda')
252
+ #dtype = torch.float16 # RTX 2070 works well with float16
253
+ dtype = torch.bfloat16
254
+
255
 
256
  pipe = prepare_pipeline(
257
  base_model="stabilityai/stable-diffusion-xl-base-1.0",
 
266
  )
267
 
268
  # Memory optimizations for RTX 2070
269
+ # torch.backends.cudnn.benchmark = True
270
+ # if torch.cuda.is_available():
271
+ # torch.backends.cuda.matmul.allow_tf32 = True
272
+ # torch.backends.cudnn.allow_tf32 = True
273
+ # # Set a very small attention slice size for RTX 2070 to avoid OOM
274
+ # torch.backends.cuda.max_split_size_mb = 128
275
 
276
  # Move models to device with consistent dtype
277
  text_encoder = text_encoder.to(device=device, dtype=dtype)
278
  vae = vae.to(device=device, dtype=dtype) # Changed from bfloat16 to float16
279
  unet = unet.to(device=device, dtype=dtype)
280
+ #rmbg = rmbg.to(device=device, dtype=torch.float32) # Keep this as float32
281
+ rmbg = rmbg.to(device)
282
 
283
  ddim_scheduler = DDIMScheduler(
284
  num_train_timesteps=1000,
 
512
  mask = pred_pil.resize(image_size)
513
  image.putalpha(mask)
514
  return image
515
+
516
+
517
+
518
+ def preprocess_image(image: Image.Image, height=768, width=768):
519
+ image = np.array(image)
520
+ alpha = image[..., 3] > 0
521
+ H, W = alpha.shape
522
+ # get the bounding box of alpha
523
+ y, x = np.where(alpha)
524
+ y0, y1 = max(y.min() - 1, 0), min(y.max() + 1, H)
525
+ x0, x1 = max(x.min() - 1, 0), min(x.max() + 1, W)
526
+ image_center = image[y0:y1, x0:x1]
527
+ # resize the longer side to H * 0.9
528
+ H, W, _ = image_center.shape
529
+ if H > W:
530
+ W = int(W * (height * 0.9) / H)
531
+ H = int(height * 0.9)
532
+ else:
533
+ H = int(H * (width * 0.9) / W)
534
+ W = int(width * 0.9)
535
+ image_center = np.array(Image.fromarray(image_center).resize((W, H)))
536
+ # pad to H, W
537
+ start_h = (height - H) // 2
538
+ start_w = (width - W) // 2
539
+ image = np.zeros((height, width, 4), dtype=np.uint8)
540
+ image[start_h : start_h + H, start_w : start_w + W] = image_center
541
+ image = image.astype(np.float32) / 255.0
542
+ image = image[:, :, :3] * image[:, :, 3:4] + (1 - image[:, :, 3:4]) * 0.5
543
+ image = (image * 255).clip(0, 255).astype(np.uint8)
544
+ image = Image.fromarray(image)
545
+
546
+ return image
547
 
548
 
549
  @spaces.GPU(duration=60)
 
672
  #logging.info(f"Input image shape: {image.shape}, dtype: {image.dtype}")
673
  #result, rgba = run_rmbg(image)
674
  result = run_rmbg(image)
675
+ result = process_image(result)
676
  #logging.info(f"Result shape: {result.shape}, dtype: {result.dtype}")
677
  #logging.info(f"RGBA shape: {rgba.shape}, dtype: {rgba.dtype}")
678
  return result, gr.update(visible=True), gr.update(visible=True)