|
|
|
|
|
import torch |
|
import models |
|
from models import pipelines |
|
from shared import model_dict, DEFAULT_OVERALL_NEGATIVE_PROMPT |
|
import gc |
|
from io import BytesIO |
|
import base64 |
|
import PIL.Image |
|
|
|
vae, tokenizer, text_encoder, unet, scheduler, dtype = model_dict.vae, model_dict.tokenizer, model_dict.text_encoder, model_dict.unet, model_dict.scheduler, model_dict.dtype |
|
|
|
torch.set_grad_enabled(False) |
|
|
|
height = 512 |
|
width = 512 |
|
guidance_scale = 7.5 |
|
batch_size = 1 |
|
|
|
|
|
image_scale = (512, 512) |
|
|
|
bg_negative = DEFAULT_OVERALL_NEGATIVE_PROMPT |
|
|
|
|
|
def run(prompt, scheduler_key='dpm_scheduler', bg_seed=1, num_inference_steps=20): |
|
print(f"prompt: {prompt}") |
|
generator = torch.manual_seed(bg_seed) |
|
|
|
prompts = [prompt] |
|
input_embeddings = models.encode_prompts(prompts=prompts, tokenizer=tokenizer, text_encoder=text_encoder, negative_prompt=bg_negative) |
|
|
|
latents = models.get_unscaled_latents(batch_size, unet.config.in_channels, height, width, generator, dtype) |
|
|
|
latents = latents * scheduler.init_noise_sigma |
|
|
|
pipelines.gligen_enable_fuser(model_dict['unet'], enabled=False) |
|
_, images = pipelines.generate( |
|
model_dict, latents, input_embeddings, num_inference_steps, |
|
guidance_scale=guidance_scale, scheduler_key=scheduler_key |
|
) |
|
|
|
|
|
image = PIL.Image.fromarray(images[0]) |
|
|
|
|
|
buffer = BytesIO() |
|
image.save(buffer, format='PNG') |
|
|
|
|
|
png_bytes = buffer.getvalue() |
|
base64_string = base64.b64encode(png_bytes).decode('utf-8') |
|
|
|
|
|
gc.collect() |
|
torch.cuda.empty_cache() |
|
|
|
|
|
return images[0], base64_string |
|
|