|
import model_loader |
|
import pipeline |
|
from PIL import Image |
|
from pathlib import Path |
|
from transformers import CLIPTokenizer |
|
import torch |
|
|
|
|
|
DEVICE = "cpu" |
|
|
|
ALLOW_CUDA = True |
|
ALLOW_MPS = False |
|
|
|
if torch.cuda.is_available() and ALLOW_CUDA: |
|
DEVICE = "cuda" |
|
|
|
print(f"Using device: {DEVICE}") |
|
|
|
tokenizer = CLIPTokenizer("../data/tokenizer_vocab.json", merges_file="../data/tokenizer_merges.txt") |
|
model_file = "../data/v1-5-pruned-emaonly.ckpt" |
|
models = model_loader.preload_models_from_standard_weights(model_file, device=DEVICE) |
|
|
|
|
|
|
|
|
|
prompt = "A cat stretching on the floor, highly detailed, ultra sharp, cinematic, 100mm lens, 8k resolution." |
|
uncond_prompt = "" |
|
do_cfg = True |
|
cfg_scale = 8 |
|
|
|
|
|
|
|
input_image = None |
|
|
|
image_path = "../images/dog.jpg" |
|
|
|
|
|
|
|
strength = 0.9 |
|
|
|
|
|
|
|
sampler = "ddpm" |
|
num_inference_steps = 2 |
|
seed = 42 |
|
|
|
output_image = pipeline.generate( |
|
prompt=prompt, |
|
uncond_prompt=uncond_prompt, |
|
input_image=input_image, |
|
strength=strength, |
|
do_cfg=do_cfg, |
|
cfg_scale=cfg_scale, |
|
sampler_name=sampler, |
|
n_inference_steps=num_inference_steps, |
|
seed=seed, |
|
models=models, |
|
device=DEVICE, |
|
idle_device="cpu", |
|
tokenizer=tokenizer, |
|
) |
|
|
|
|
|
Image.fromarray(output_image) |