Spaces:

cbensimon
/

PASD

Paused

App Files Files Community

PASD / app.py

cbensimon HF staff

Fix samples path

a706bd8 10 months ago

raw

history blame contribute delete

10.6 kB

	import torch
	import types
	torch.cuda.get_device_capability = lambda args, *kwargs: (8, 6)
	torch.cuda.get_device_properties = lambda args, *kwargs: types.SimpleNamespace(name='NVIDIA A10G', major=8, minor=6, total_memory=23836033024, multi_processor_count=80)

	import huggingface_hub
	huggingface_hub.snapshot_download(
	repo_id='camenduru/PASD',
	allow_patterns=[
	'pasd/**',
	'pasd_light/**',
	'pasd_light_rrdb/**',
	'pasd_rrdb/**',
	],
	local_dir='PASD/runs',
	local_dir_use_symlinks=False,
	)
	huggingface_hub.hf_hub_download(
	repo_id='camenduru/PASD',
	filename='majicmixRealistic_v6.safetensors',
	local_dir='PASD/checkpoints/personalized_models',
	local_dir_use_symlinks=False,
	)
	huggingface_hub.hf_hub_download(
	repo_id='akhaliq/RetinaFace-R50',
	filename='RetinaFace-R50.pth',
	local_dir='PASD/annotator/ckpts',
	local_dir_use_symlinks=False,
	)

	import sys; sys.path.append('./PASD')
	import spaces
	import os
	import datetime
	import einops
	import gradio as gr
	from gradio_imageslider import ImageSlider
	import numpy as np
	import torch
	import random
	from PIL import Image
	from pathlib import Path
	from torchvision import transforms
	import torch.nn.functional as F
	from torchvision.models import resnet50, ResNet50_Weights

	from pytorch_lightning import seed_everything
	from transformers import CLIPTextModel, CLIPTokenizer, CLIPImageProcessor
	from diffusers import AutoencoderKL, DDIMScheduler, PNDMScheduler, DPMSolverMultistepScheduler, UniPCMultistepScheduler

	from pipelines.pipeline_pasd import StableDiffusionControlNetPipeline
	from myutils.misc import load_dreambooth_lora, rand_name
	from myutils.wavelet_color_fix import wavelet_color_fix
	from annotator.retinaface import RetinaFaceDetection

	use_pasd_light = False
	face_detector = RetinaFaceDetection()

	if use_pasd_light:
	from models.pasd_light.unet_2d_condition import UNet2DConditionModel
	from models.pasd_light.controlnet import ControlNetModel
	else:
	from models.pasd.unet_2d_condition import UNet2DConditionModel
	from models.pasd.controlnet import ControlNetModel

	pretrained_model_path = "runwayml/stable-diffusion-v1-5"
	ckpt_path = "PASD/runs/pasd/checkpoint-100000"
	#dreambooth_lora_path = "checkpoints/personalized_models/toonyou_beta3.safetensors"
	dreambooth_lora_path = "PASD/checkpoints/personalized_models/majicmixRealistic_v6.safetensors"
	#dreambooth_lora_path = "checkpoints/personalized_models/Realistic_Vision_V5.1.safetensors"
	weight_dtype = torch.float16
	device = "cuda"

	scheduler = UniPCMultistepScheduler.from_pretrained(pretrained_model_path, subfolder="scheduler")
	text_encoder = CLIPTextModel.from_pretrained(pretrained_model_path, subfolder="text_encoder")
	tokenizer = CLIPTokenizer.from_pretrained(pretrained_model_path, subfolder="tokenizer")
	vae = AutoencoderKL.from_pretrained(pretrained_model_path, subfolder="vae")
	feature_extractor = CLIPImageProcessor.from_pretrained(pretrained_model_path, subfolder="feature_extractor")
	unet = UNet2DConditionModel.from_pretrained(ckpt_path, subfolder="unet")
	controlnet = ControlNetModel.from_pretrained(ckpt_path, subfolder="controlnet")
	vae.requires_grad_(False)
	text_encoder.requires_grad_(False)
	unet.requires_grad_(False)
	controlnet.requires_grad_(False)

	unet, vae, text_encoder = load_dreambooth_lora(unet, vae, text_encoder, dreambooth_lora_path)

	text_encoder.to(device, dtype=weight_dtype)
	vae.to(device, dtype=weight_dtype)
	unet.to(device, dtype=weight_dtype)
	controlnet.to(device, dtype=weight_dtype)

	validation_pipeline = StableDiffusionControlNetPipeline(
	vae=vae, text_encoder=text_encoder, tokenizer=tokenizer, feature_extractor=feature_extractor,
	unet=unet, controlnet=controlnet, scheduler=scheduler, safety_checker=None, requires_safety_checker=False,
	)
	#validation_pipeline.enable_vae_tiling()
	validation_pipeline._init_tiled_vae(decoder_tile_size=224)

	weights = ResNet50_Weights.DEFAULT
	preprocess = weights.transforms()
	resnet = resnet50(weights=weights)
	resnet.eval()

	def resize_image(image_path, target_height):
	# Open the image file
	with Image.open(image_path) as img:
	# Calculate the ratio to resize the image to the target height
	ratio = target_height / float(img.size[1])
	# Calculate the new width based on the aspect ratio
	new_width = int(float(img.size[0]) * ratio)
	# Resize the image
	resized_img = img.resize((new_width, target_height), Image.LANCZOS)
	# Save the resized image
	#resized_img.save(output_path)
	return resized_img

	@spaces.GPU(enable_queue=True)
	def inference(input_image, prompt, a_prompt, n_prompt, denoise_steps, upscale, alpha, cfg, seed):

	#tempo fix for seed equals-1
	if seed == -1:
	seed = 0

	input_image = resize_image(input_image, 512)
	process_size = 768
	resize_preproc = transforms.Compose([
	transforms.Resize(process_size, interpolation=transforms.InterpolationMode.BILINEAR),
	])

	# Get the current timestamp
	timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")

	with torch.no_grad():
	seed_everything(seed)
	generator = torch.Generator(device=device)

	input_image = input_image.convert('RGB')
	batch = preprocess(input_image).unsqueeze(0)
	prediction = resnet(batch).squeeze(0).softmax(0)
	class_id = prediction.argmax().item()
	score = prediction[class_id].item()
	category_name = weights.meta["categories"][class_id]
	if score >= 0.1:
	prompt += f"{category_name}" if prompt=='' else f", {category_name}"

	prompt = a_prompt if prompt=='' else f"{prompt}, {a_prompt}"

	ori_width, ori_height = input_image.size
	resize_flag = False

	rscale = upscale
	input_image = input_image.resize((input_image.size[0]rscale, input_image.size[1]rscale))

	#if min(validation_image.size) < process_size:
	# validation_image = resize_preproc(validation_image)

	input_image = input_image.resize((input_image.size[0]//88, input_image.size[1]//88))
	width, height = input_image.size
	resize_flag = True #

	try:
	image = validation_pipeline(
	None, prompt, input_image, num_inference_steps=denoise_steps, generator=generator, height=height, width=width, guidance_scale=cfg,
	negative_prompt=n_prompt, conditioning_scale=alpha, eta=0.0,
	).images[0]

	if True: #alpha<1.0:
	image = wavelet_color_fix(image, input_image)

	if resize_flag:
	image = image.resize((ori_widthrscale, ori_heightrscale))
	except Exception as e:
	print(e)
	image = Image.new(mode="RGB", size=(512, 512))

	# Convert and save the image as JPEG
	image.save(f'result_{timestamp}.jpg', 'JPEG')

	# Convert and save the image as JPEG
	input_image.save(f'input_{timestamp}.jpg', 'JPEG')

	return (f"input_{timestamp}.jpg", f"result_{timestamp}.jpg"), f"result_{timestamp}.jpg"

	title = "Pixel-Aware Stable Diffusion for Real-ISR"
	description = "Gradio Demo for PASD Real-ISR. To use it, simply upload your image, or click one of the examples to load them."
	article = "<a href='https://github.com/yangxy/PASD' target='_blank'>Github Repo Pytorch</a>"
	#examples=[['samples/27d38eeb2dbbe7c9.png'],['samples/629e4da70703193b.png']]

	css = """
	#col-container{
	margin: 0 auto;
	max-width: 720px;
	}
	#project-links{
	margin: 0 0 12px !important;
	column-gap: 8px;
	display: flex;
	justify-content: center;
	flex-wrap: nowrap;
	flex-direction: row;
	align-items: center;
	}
	"""

	with gr.Blocks(css=css) as demo:
	with gr.Column(elem_id="col-container"):
	gr.HTML(f"""
	<h2 style="text-align: center;">
	PASD Magnify
	</h2>
	<p style="text-align: center;">
	Pixel-Aware Stable Diffusion for Realistic Image Super-resolution and Personalized Stylization
	</p>
	<p id="project-links" align="center">
	<a href='https://github.com/yangxy/PASD'><img src='https://img.shields.io/badge/Project-Page-Green'></a> <a href='https://huggingface.co/papers/2308.14469'><img src='https://img.shields.io/badge/Paper-Arxiv-red'></a>
	</p>
	<p style="margin:12px auto;display: flex;justify-content: center;">
	<a href="https://huggingface.co/spaces/fffiloni/PASD?duplicate=true"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg.svg" alt="Duplicate this Space"></a>
	</p>

	""")
	with gr.Row():
	with gr.Column():
	input_image = gr.Image(type="filepath", sources=["upload"], value="PASD/samples/frog.png")
	prompt_in = gr.Textbox(label="Prompt", value="Frog")
	with gr.Accordion(label="Advanced settings", open=False):
	added_prompt = gr.Textbox(label="Added Prompt", value='clean, high-resolution, 8k, best quality, masterpiece')
	neg_prompt = gr.Textbox(label="Negative Prompt",value='dotted, noise, blur, lowres, oversmooth, longbody, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality')
	denoise_steps = gr.Slider(label="Denoise Steps", minimum=10, maximum=50, value=20, step=1)
	upsample_scale = gr.Slider(label="Upsample Scale", minimum=1, maximum=4, value=2, step=1)
	condition_scale = gr.Slider(label="Conditioning Scale", minimum=0.5, maximum=1.5, value=1.1, step=0.1)
	classifier_free_guidance = gr.Slider(label="Classier-free Guidance", minimum=0.1, maximum=10.0, value=7.5, step=0.1)
	seed = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, randomize=True)
	submit_btn = gr.Button("Submit")
	with gr.Column():
	b_a_slider = ImageSlider(label="B/A result", position=0.5)
	file_output = gr.File(label="Downloadable image result")

	submit_btn.click(
	fn = inference,
	inputs = [
	input_image, prompt_in,
	added_prompt, neg_prompt,
	denoise_steps,
	upsample_scale, condition_scale,
	classifier_free_guidance, seed
	],
	outputs = [
	b_a_slider,
	file_output
	]
	)
	demo.queue(max_size=20).launch()