File size: 2,898 Bytes
bb2108c 118a337 bb2108c f2d9a72 bb2108c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
from huggingface_hub import hf_hub_download
import torch
import PIL
class CosXLEdit():
"""
Edit Cos Stable Diffusion XL 1.0 Base is tuned to use a Cosine-Continuous EDM VPred schedule, and then upgraded to perform instructed image editing.
Reference: https://huggingface.co/stabilityai/cosxl
"""
def __init__(self, device="cuda"):
"""
Attributes:
pipe (CosStableDiffusionXLInstructPix2PixPipeline): The InstructPix2Pix pipeline for image transformation.
Args:
device (str, optional): Device on which the pipeline runs. Defaults to "cuda".
"""
from diffusers import EDMEulerScheduler
from .cosxl.custom_pipeline import CosStableDiffusionXLInstructPix2PixPipeline
from .cosxl.utils import set_timesteps_patched
EDMEulerScheduler.set_timesteps = set_timesteps_patched
try:
edit_file = hf_hub_download(repo_id="TIGER-Lab/cosxl", filename="cosxl_edit.safetensors")
self.pipe = CosStableDiffusionXLInstructPix2PixPipeline.from_single_file(
edit_file, num_in_channels=8
)
except:
edit_file_path = "./black_box_image_edit/cosxl/cosxl_edit.safetensors"
self.pipe = CosStableDiffusionXLInstructPix2PixPipeline.from_single_file(
edit_file_path, num_in_channels=8
)
self.pipe.scheduler = EDMEulerScheduler(sigma_min=0.002, sigma_max=120.0, sigma_data=1.0, prediction_type="v_prediction")
self.pipe.to(device)
self.pipe.enable_vae_tiling()
self.pipe.enable_model_cpu_offload()
def infer_one_image(self, src_image: PIL.Image.Image = None, src_prompt: str = None, target_prompt: str = None, instruct_prompt: str = None, seed: int = 42, negative_prompt=""):
"""
Modifies the source image based on the provided instruction prompt.
Args:
src_image (PIL.Image.Image): Source image in RGB format.
instruct_prompt (str): Caption for editing the image.
seed (int, optional): Seed for random generator. Defaults to 42.
Returns:
PIL.Image.Image: The transformed image.
"""
src_image = src_image.convert('RGB') # force it to RGB format
generator = torch.manual_seed(seed)
resolution = 1024
preprocessed_image = src_image.resize((resolution, resolution))
image = self.pipe(prompt=instruct_prompt,
image=preprocessed_image,
height=resolution,
width=resolution,
negative_prompt=negative_prompt,
guidance_scale=7,
num_inference_steps=20,
generator=generator).images[0]
image = image.resize((src_image.width, src_image.height))
return image
|