Spaces:
Running
on
Zero
Running
on
Zero
Update app_3.py
Browse files
app_3.py
CHANGED
@@ -75,11 +75,11 @@ transform_image = transforms.Compose(
|
|
75 |
# Model paths
|
76 |
model_path = './models/iclight_sd15_fc.safetensors'
|
77 |
model_path2 = './checkpoints/depth_anything_v2_vits.pth'
|
78 |
-
|
79 |
model_path4 = './checkpoints/config.json'
|
80 |
model_path5 = './checkpoints/preprocessor_config.json'
|
81 |
-
|
82 |
-
|
83 |
|
84 |
# Base URL for the repository
|
85 |
BASE_URL = 'https://huggingface.co/Ashoka74/Placement/resolve/main/'
|
@@ -88,11 +88,11 @@ BASE_URL = 'https://huggingface.co/Ashoka74/Placement/resolve/main/'
|
|
88 |
model_urls = {
|
89 |
model_path: 'iclight_sd15_fc.safetensors',
|
90 |
model_path2: 'depth_anything_v2_vits.pth',
|
91 |
-
|
92 |
model_path4: 'config.json',
|
93 |
model_path5: 'preprocessor_config.json',
|
94 |
-
|
95 |
-
|
96 |
}
|
97 |
|
98 |
# Ensure directories exist
|
@@ -249,7 +249,9 @@ del sd_offset, sd_origin, sd_merged, keys
|
|
249 |
|
250 |
# Device and dtype setup
|
251 |
device = torch.device('cuda')
|
252 |
-
dtype = torch.float16 # RTX 2070 works well with float16
|
|
|
|
|
253 |
|
254 |
pipe = prepare_pipeline(
|
255 |
base_model="stabilityai/stable-diffusion-xl-base-1.0",
|
@@ -264,19 +266,19 @@ pipe = prepare_pipeline(
|
|
264 |
)
|
265 |
|
266 |
# Memory optimizations for RTX 2070
|
267 |
-
torch.backends.cudnn.benchmark = True
|
268 |
-
if torch.cuda.is_available():
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
|
274 |
# Move models to device with consistent dtype
|
275 |
text_encoder = text_encoder.to(device=device, dtype=dtype)
|
276 |
vae = vae.to(device=device, dtype=dtype) # Changed from bfloat16 to float16
|
277 |
unet = unet.to(device=device, dtype=dtype)
|
278 |
-
rmbg = rmbg.to(device=device, dtype=torch.float32) # Keep this as float32
|
279 |
-
|
280 |
|
281 |
ddim_scheduler = DDIMScheduler(
|
282 |
num_train_timesteps=1000,
|
@@ -510,6 +512,38 @@ def run_rmbg(image):
|
|
510 |
mask = pred_pil.resize(image_size)
|
511 |
image.putalpha(mask)
|
512 |
return image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
513 |
|
514 |
|
515 |
@spaces.GPU(duration=60)
|
@@ -638,6 +672,7 @@ def extract_foreground(image):
|
|
638 |
#logging.info(f"Input image shape: {image.shape}, dtype: {image.dtype}")
|
639 |
#result, rgba = run_rmbg(image)
|
640 |
result = run_rmbg(image)
|
|
|
641 |
#logging.info(f"Result shape: {result.shape}, dtype: {result.dtype}")
|
642 |
#logging.info(f"RGBA shape: {rgba.shape}, dtype: {rgba.dtype}")
|
643 |
return result, gr.update(visible=True), gr.update(visible=True)
|
|
|
75 |
# Model paths
|
76 |
model_path = './models/iclight_sd15_fc.safetensors'
|
77 |
model_path2 = './checkpoints/depth_anything_v2_vits.pth'
|
78 |
+
model_path3 = './checkpoints/sam2_hiera_large.pt'
|
79 |
model_path4 = './checkpoints/config.json'
|
80 |
model_path5 = './checkpoints/preprocessor_config.json'
|
81 |
+
model_path6 = './configs/sam2_hiera_l.yaml'
|
82 |
+
model_path7 = './mvadapter_i2mv_sdxl.safetensors'
|
83 |
|
84 |
# Base URL for the repository
|
85 |
BASE_URL = 'https://huggingface.co/Ashoka74/Placement/resolve/main/'
|
|
|
88 |
model_urls = {
|
89 |
model_path: 'iclight_sd15_fc.safetensors',
|
90 |
model_path2: 'depth_anything_v2_vits.pth',
|
91 |
+
model_path3: 'sam2_hiera_large.pt',
|
92 |
model_path4: 'config.json',
|
93 |
model_path5: 'preprocessor_config.json',
|
94 |
+
model_path6: 'sam2_hiera_l.yaml',
|
95 |
+
model_path7: 'mvadapter_i2mv_sdxl.safetensors'
|
96 |
}
|
97 |
|
98 |
# Ensure directories exist
|
|
|
249 |
|
250 |
# Device and dtype setup
|
251 |
device = torch.device('cuda')
|
252 |
+
#dtype = torch.float16 # RTX 2070 works well with float16
|
253 |
+
dtype = torch.bfloat16
|
254 |
+
|
255 |
|
256 |
pipe = prepare_pipeline(
|
257 |
base_model="stabilityai/stable-diffusion-xl-base-1.0",
|
|
|
266 |
)
|
267 |
|
268 |
# Memory optimizations for RTX 2070
|
269 |
+
# torch.backends.cudnn.benchmark = True
|
270 |
+
# if torch.cuda.is_available():
|
271 |
+
# torch.backends.cuda.matmul.allow_tf32 = True
|
272 |
+
# torch.backends.cudnn.allow_tf32 = True
|
273 |
+
# # Set a very small attention slice size for RTX 2070 to avoid OOM
|
274 |
+
# torch.backends.cuda.max_split_size_mb = 128
|
275 |
|
276 |
# Move models to device with consistent dtype
|
277 |
text_encoder = text_encoder.to(device=device, dtype=dtype)
|
278 |
vae = vae.to(device=device, dtype=dtype) # Changed from bfloat16 to float16
|
279 |
unet = unet.to(device=device, dtype=dtype)
|
280 |
+
#rmbg = rmbg.to(device=device, dtype=torch.float32) # Keep this as float32
|
281 |
+
rmbg = rmbg.to(device)
|
282 |
|
283 |
ddim_scheduler = DDIMScheduler(
|
284 |
num_train_timesteps=1000,
|
|
|
512 |
mask = pred_pil.resize(image_size)
|
513 |
image.putalpha(mask)
|
514 |
return image
|
515 |
+
|
516 |
+
|
517 |
+
|
518 |
+
def preprocess_image(image: Image.Image, height=768, width=768):
|
519 |
+
image = np.array(image)
|
520 |
+
alpha = image[..., 3] > 0
|
521 |
+
H, W = alpha.shape
|
522 |
+
# get the bounding box of alpha
|
523 |
+
y, x = np.where(alpha)
|
524 |
+
y0, y1 = max(y.min() - 1, 0), min(y.max() + 1, H)
|
525 |
+
x0, x1 = max(x.min() - 1, 0), min(x.max() + 1, W)
|
526 |
+
image_center = image[y0:y1, x0:x1]
|
527 |
+
# resize the longer side to H * 0.9
|
528 |
+
H, W, _ = image_center.shape
|
529 |
+
if H > W:
|
530 |
+
W = int(W * (height * 0.9) / H)
|
531 |
+
H = int(height * 0.9)
|
532 |
+
else:
|
533 |
+
H = int(H * (width * 0.9) / W)
|
534 |
+
W = int(width * 0.9)
|
535 |
+
image_center = np.array(Image.fromarray(image_center).resize((W, H)))
|
536 |
+
# pad to H, W
|
537 |
+
start_h = (height - H) // 2
|
538 |
+
start_w = (width - W) // 2
|
539 |
+
image = np.zeros((height, width, 4), dtype=np.uint8)
|
540 |
+
image[start_h : start_h + H, start_w : start_w + W] = image_center
|
541 |
+
image = image.astype(np.float32) / 255.0
|
542 |
+
image = image[:, :, :3] * image[:, :, 3:4] + (1 - image[:, :, 3:4]) * 0.5
|
543 |
+
image = (image * 255).clip(0, 255).astype(np.uint8)
|
544 |
+
image = Image.fromarray(image)
|
545 |
+
|
546 |
+
return image
|
547 |
|
548 |
|
549 |
@spaces.GPU(duration=60)
|
|
|
672 |
#logging.info(f"Input image shape: {image.shape}, dtype: {image.dtype}")
|
673 |
#result, rgba = run_rmbg(image)
|
674 |
result = run_rmbg(image)
|
675 |
+
result = process_image(result)
|
676 |
#logging.info(f"Result shape: {result.shape}, dtype: {result.dtype}")
|
677 |
#logging.info(f"RGBA shape: {rgba.shape}, dtype: {rgba.dtype}")
|
678 |
return result, gr.update(visible=True), gr.update(visible=True)
|