Spaces:
Running
on
A100
Running
on
A100
from diffusers import DiffusionPipeline, LCMScheduler, AutoencoderTiny | |
from compel import Compel, ReturnedEmbeddingsType | |
import torch | |
import os | |
try: | |
import intel_extension_for_pytorch as ipex | |
except: | |
pass | |
from PIL import Image | |
import numpy as np | |
import gradio as gr | |
import psutil | |
SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None) | |
TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None) | |
HF_TOKEN = os.environ.get("HF_TOKEN", None) | |
# check if MPS is available OSX only M1/M2/M3 chips | |
mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available() | |
xpu_available = hasattr(torch, "xpu") and torch.xpu.is_available() | |
device = torch.device( | |
"cuda" if torch.cuda.is_available() else "xpu" if xpu_available else "cpu" | |
) | |
torch_device = device | |
torch_dtype = torch.float16 | |
print(f"SAFETY_CHECKER: {SAFETY_CHECKER}") | |
print(f"TORCH_COMPILE: {TORCH_COMPILE}") | |
print(f"device: {device}") | |
if mps_available: | |
device = torch.device("mps") | |
torch_device = "cpu" | |
torch_dtype = torch.float32 | |
model_id = "stabilityai/stable-diffusion-xl-base-1.0" | |
if SAFETY_CHECKER == "True": | |
pipe = DiffusionPipeline.from_pretrained(model_id) | |
else: | |
pipe = DiffusionPipeline.from_pretrained(model_id, safety_checker=None) | |
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) | |
pipe.to(device=torch_device, dtype=torch_dtype).to(device) | |
pipe.unet.to(memory_format=torch.channels_last) | |
# check if computer has less than 64GB of RAM using sys or os | |
if psutil.virtual_memory().total < 64 * 1024**3: | |
pipe.enable_attention_slicing() | |
if TORCH_COMPILE: | |
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) | |
pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True) | |
pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0) | |
# Load LCM LoRA | |
pipe.load_lora_weights( | |
"lcm-sd/lcm-sdxl-lora", | |
weight_name="lcm_sdxl_lora.safetensors", | |
#adapter_name="lcm", | |
use_auth_token=HF_TOKEN, | |
) | |
## Load papercut LoRA | |
#pipe.load_lora_weights( | |
# "TheLastBen/Papercut_SDXL", | |
# weight_name="papercut.safetensors", | |
# adapter_name="papercut", | |
#) | |
# Mix the LoRAs | |
#pipe.set_adapters(["lcm", "papercut"], adapter_weights=[1.0, 0.8]) | |
compel_proc = Compel( | |
tokenizer=[pipe.tokenizer, pipe.tokenizer_2], | |
text_encoder=[pipe.text_encoder, pipe.text_encoder_2], | |
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, | |
requires_pooled=[False, True], | |
) | |
def predict( | |
prompt, guidance, steps, seed=1231231, progress=gr.Progress(track_tqdm=True) | |
): | |
generator = torch.manual_seed(seed) | |
prompt_embeds, pooled_prompt_embeds = compel_proc(prompt) | |
results = pipe( | |
prompt_embeds=prompt_embeds, | |
pooled_prompt_embeds=pooled_prompt_embeds, | |
generator=generator, | |
num_inference_steps=steps, | |
guidance_scale=guidance, | |
width=1024, | |
height=1024, | |
# original_inference_steps=params.lcm_steps, | |
output_type="pil", | |
) | |
nsfw_content_detected = ( | |
results.nsfw_content_detected[0] | |
if "nsfw_content_detected" in results | |
else False | |
) | |
if nsfw_content_detected: | |
raise gr.Error("NSFW content detected.") | |
return results.images[0] | |
css = """ | |
#container{ | |
margin: 0 auto; | |
max-width: 50rem; | |
} | |
#intro{ | |
max-width: 100%; | |
text-align: center; | |
margin: 0 auto; | |
} | |
""" | |
with gr.Blocks(css=css) as demo: | |
with gr.Column(elem_id="container"): | |
gr.Markdown( | |
"""# Ultra-Fast SDXL with Latent Consistency LoRA | |
SDXL is loaded with a LCM-LoRA, giving it the super power of doing inference in as little as 4 steps. [Learn more on our blog](#) or [technical report](#). | |
""", | |
elem_id="intro", | |
) | |
with gr.Row(): | |
with gr.Row(): | |
prompt = gr.Textbox( | |
placeholder="Insert your prompt here:", value="papercut style of a cute monster", scale=5, container=False | |
) | |
generate_bt = gr.Button("Generate", scale=1) | |
image = gr.Image(type="filepath") | |
with gr.Accordion("Advanced options", open=False): | |
guidance = gr.Slider( | |
label="Guidance", minimum=0.0, maximum=5, value=0.3, step=0.001 | |
) | |
steps = gr.Slider(label="Steps", value=4, minimum=2, maximum=10, step=1) | |
seed = gr.Slider( | |
randomize=True, minimum=0, maximum=12013012031030, label="Seed", step=1 | |
) | |
with gr.Group(): | |
gr.Markdown('''## Using it with `diffusers` | |
```py | |
from diffusers import DiffusionPipeline, LCMScheduler | |
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0").to("cuda") | |
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) | |
pipe.load_lora_weights("lcm-sd/lcm-sdxl-lora") | |
results = pipe( | |
prompt="The spirit of a tamagotchi wandering in the city of Vienna", | |
num_inference_steps=4, | |
guidance_scale=0.5, | |
) | |
results.images[0] | |
``` | |
''') | |
inputs = [prompt, guidance, steps, seed] | |
generate_bt.click(fn=predict, inputs=inputs, outputs=image) | |
demo.queue() | |
demo.launch() | |