import sys sys.path.append("./") import gradio as gr import spaces import torch from ip_adapter.utils import BLOCKS as BLOCKS import numpy as np import random from diffusers import ( AutoencoderKL, StableDiffusionXLPipeline, ) from ip_adapter import StyleStudio_Adapter device = "cuda" if torch.cuda.is_available() else "cpu" dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32 base_model_path = "/mnt/agilab/models/sdxl" image_encoder_path = "/mnt/agilab/models/ipadapter_sdxl/image_encoder" csgo_ckpt = "/mnt/agilab/models/CSGO/csgo_4_32.bin" pretrained_vae_name_or_path = '/mnt/agilab/models/madebyollin_sdxl-vae-fp16-fix' weight_dtype = torch.float16 vae = AutoencoderKL.from_pretrained(pretrained_vae_name_or_path,torch_dtype=torch.float16) pipe = StableDiffusionXLPipeline.from_pretrained( base_model_path, torch_dtype=torch.float16, add_watermarker=False, vae=vae ) pipe.enable_vae_tiling() target_style_blocks = BLOCKS['style'] csgo = StyleStudio_Adapter( pipe, image_encoder_path, csgo_ckpt, device, num_style_tokens=32, target_style_blocks=target_style_blocks, controlnet_adapter=False, style_model_resampler=True, fuSAttn=True, end_fusion=20, adainIP=True, ) MAX_SEED = np.iinfo(np.int32).max def get_example(): case = [ [ './assets/style1.jpg', "A red apple", 7.0, 42, 10, ], [ './assets/style2.jpg', "A black car", 7.0, 42, 10, ], [ './assets/style3.jpg', "A orange bus", 7.0, 42, 10, ], ] return case def run_for_examples(style_image_pil, prompt, guidance_scale, seed, end_fusion): return create_image( style_image_pil=style_image_pil, prompt=prompt, neg_prompt="text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry", guidance_scale=guidance_scale, num_inference_steps=50, seed=seed, end_fusion=end_fusion, use_SAttn=True, crossModalAdaIN=True, ) def randomize_seed_fn(seed: int, randomize_seed: bool) -> int: if randomize_seed: seed = random.randint(0, MAX_SEED) return seed def create_image(style_image_pil, prompt, neg_prompt="text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry", guidance_scale=7, num_inference_steps=50, end_fusion=20, crossModalAdaIN=True, use_SAttn=True, seed=42, ): style_image = style_image_pil generator = torch.Generator(device).manual_seed(seed) init_latents = torch.randn((1, 4, 128, 128), generator=generator, device="cuda", dtype=torch.float16) num_sample=1 if use_SAttn: num_sample=2 init_latents = init_latents.repeat(num_sample, 1, 1, 1) with torch.no_grad(): images = csgo.generate(pil_style_image=style_image, prompt=prompt, negative_prompt=neg_prompt, height=1024, width=1024, guidance_scale=guidance_scale, num_images_per_prompt=1, num_samples=num_sample, num_inference_steps=num_inference_steps, end_fusion=end_fusion, cross_modal_adain=crossModalAdaIN, use_SAttn=use_SAttn, generator=generator, latents=init_latents, ) if use_SAttn: return [images[1]] else: return [images[0]] # Description title = r"""