Spaces:

yashvii
/

Idfy-Avatarifyyy

Paused

App Files Files Community

Idfy-Avatarifyyy / gradio_demo /demo.py

yashvii

Upload folder using huggingface_hub

0791e43 verified 3 months ago

raw

history blame contribute delete

12.6 kB

	import sys
	sys.path.append('./')

	from typing import Tuple

	import os
	import cv2
	import math
	import torch
	import random
	import numpy as np
	import argparse

	import PIL
	from PIL import Image

	import diffusers
	from diffusers.utils import load_image
	from diffusers.models import ControlNetModel
	from diffusers import LCMScheduler

	from huggingface_hub import hf_hub_download

	import insightface
	from insightface.app import FaceAnalysis

	from style_template import styles
	from pipeline_stable_diffusion_xl_instantid_full import StableDiffusionXLInstantIDPipeline
	from model_util import load_models_xl, get_torch_device, torch_gc

	from cv2 import imencode
	import base64

	# def encode_pil_to_base64_new(pil_image):
	# print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
	# image_arr = np.asarray(pil_image)[:,:,::-1]
	# _, byte_data = imencode('.png', image_arr)
	# base64_data = base64.b64encode(byte_data)
	# base64_string_opencv = base64_data.decode("utf-8")
	# return "data:image/png;base64," + base64_string_opencv

	import gradio as gr


	# global variable
	MAX_SEED = np.iinfo(np.int32).max
	device = get_torch_device()
	dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
	STYLE_NAMES = list(styles.keys())
	DEFAULT_STYLE_NAME = "Watercolor"

	# Load face encoder
	app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
	app.prepare(ctx_id=0, det_size=(320, 320))

	# Path to InstantID models
	face_adapter = f'./checkpoints/ip-adapter.bin'
	controlnet_path = f'./checkpoints/ControlNetModel'

	# Load pipeline
	controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=dtype)

	logo = Image.open("./gradio_demo/logo.png")

	pretrained_model_name_or_path="wangqixun/YamerMIX_v8"


	if pretrained_model_name_or_path.endswith(
	".ckpt"
	) or pretrained_model_name_or_path.endswith(".safetensors"):
	scheduler_kwargs = hf_hub_download(
	repo_id="wangqixun/YamerMIX_v8",
	subfolder="scheduler",
	filename="scheduler_config.json",
	)

	(tokenizers, text_encoders, unet, _, vae) = load_models_xl(
	pretrained_model_name_or_path=pretrained_model_name_or_path,
	scheduler_name=None,
	weight_dtype=dtype,
	)

	scheduler = diffusers.EulerDiscreteScheduler.from_config(scheduler_kwargs)
	pipe = StableDiffusionXLInstantIDPipeline(
	vae=vae,
	text_encoder=text_encoders[0],
	text_encoder_2=text_encoders[1],
	tokenizer=tokenizers[0],
	tokenizer_2=tokenizers[1],
	unet=unet,
	scheduler=scheduler,
	controlnet=controlnet,
	).to(device)

	else:
	pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
	pretrained_model_name_or_path,
	controlnet=controlnet,
	torch_dtype=dtype,
	safety_checker=None,
	feature_extractor=None,
	).to(device)

	pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)

	pipe.load_ip_adapter_instantid(face_adapter)
	# load and disable LCM
	pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
	pipe.disable_lora()

	# gr.processing_utils.encode_pil_to_base64 = encode_pil_to_base64_new
	def remove_tips():
	print("GG")
	return gr.update(visible=False)

	def convert_from_cv2_to_image(img: np.ndarray) -> Image:
	return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

	def convert_from_image_to_cv2(img: Image) -> np.ndarray:
	return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)

	def run_for_prompts1(face_file,style,progress=gr.Progress(track_tqdm=True)):
	# if email != "":
	p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
	return generate_image(face_file, p[0], n)
	# else:
	# raise gr.Error("Email ID is compulsory")
	def run_for_prompts2(face_file,style,progress=gr.Progress(track_tqdm=True)):
	# if email != "":
	p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
	return generate_image(face_file, p[1], n)

	def run_for_prompts3(face_file,style,progress=gr.Progress(track_tqdm=True)):
	# if email != "":
	p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
	return generate_image(face_file, p[2], n)

	def run_for_prompts4(face_file,style,progress=gr.Progress(track_tqdm=True)):
	# if email != "":
	p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
	return generate_image(face_file, p[3], n)


	def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255)]):
	stickwidth = 4
	limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
	kps = np.array(kps)

	w, h = image_pil.size
	out_img = np.zeros([h, w, 3])

	for i in range(len(limbSeq)):
	index = limbSeq[i]
	color = color_list[index[0]]

	x = kps[index][:, 0]
	y = kps[index][:, 1]
	length = ((x[0] - x[1]) 2 + (y[0] - y[1]) 2) ** 0.5
	angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
	polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
	out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
	out_img = (out_img * 0.6).astype(np.uint8)

	for idx_kp, kp in enumerate(kps):
	color = color_list[idx_kp]
	x, y = kp
	out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)

	out_img_pil = Image.fromarray(out_img.astype(np.uint8))
	return out_img_pil

	def resize_img(input_image, max_side=640, min_side=640, size=None,
	pad_to_max_side=True, mode=PIL.Image.BILINEAR, base_pixel_number=64):

	w, h = input_image.size
	print(w)
	print(h)
	if size is not None:
	w_resize_new, h_resize_new = size
	else:
	ratio = min_side / min(h, w)
	w, h = round(ratiow), round(ratioh)
	ratio = max_side / max(h, w)
	input_image = input_image.resize([round(ratiow), round(ratioh)], mode)
	w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
	h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
	input_image = input_image.resize([w_resize_new, h_resize_new], mode)

	if pad_to_max_side:
	res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
	offset_x = (max_side - w_resize_new) // 2
	offset_y = (max_side - h_resize_new) // 2
	res[offset_y:offset_y+h_resize_new, offset_x:offset_x+w_resize_new] = np.array(input_image)
	input_image = Image.fromarray(res)
	return input_image


	def generate_image(face_image,prompt,negative_prompt):
	pose_image_path = None
	# prompt = "superman"
	enable_LCM = False
	identitynet_strength_ratio = 0.95
	adapter_strength_ratio = 0.60
	num_steps = 15
	guidance_scale = 8.5
	seed = random.randint(0, MAX_SEED)
	# negative_prompt = ""
	# negative_prompt += neg
	enhance_face_region = True
	if enable_LCM:
	pipe.enable_lora()
	pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
	else:
	pipe.disable_lora()
	pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)

	if face_image is None:
	raise gr.Error(f"Cannot find any input face image! Please upload the face image")

	# if prompt is None:
	# prompt = "a person"

	# apply the style template
	# prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)

	# face_image = load_image(face_image_path)
	face_image = resize_img(face_image)
	face_image_cv2 = convert_from_image_to_cv2(face_image)
	height, width, _ = face_image_cv2.shape

	# Extract face features
	face_info = app.get(face_image_cv2)

	if len(face_info) == 0:
	raise gr.Error(f"Cannot find any face in the image! Please upload another person image")

	face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
	face_emb = face_info['embedding']
	face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info['kps'])

	if pose_image_path is not None:
	pose_image = load_image(pose_image_path)
	pose_image = resize_img(pose_image)
	pose_image_cv2 = convert_from_image_to_cv2(pose_image)

	face_info = app.get(pose_image_cv2)

	if len(face_info) == 0:
	raise gr.Error(f"Cannot find any face in the reference image! Please upload another person image")

	face_info = face_info[-1]
	face_kps = draw_kps(pose_image, face_info['kps'])

	width, height = face_kps.size

	if enhance_face_region:
	control_mask = np.zeros([height, width, 3])
	x1, y1, x2, y2 = face_info["bbox"]
	x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
	control_mask[y1:y2, x1:x2] = 255
	control_mask = Image.fromarray(control_mask.astype(np.uint8))
	else:
	control_mask = None

	generator = torch.Generator(device=device).manual_seed(seed)

	print("Start inference...")
	print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")

	pipe.set_ip_adapter_scale(adapter_strength_ratio)
	images = pipe(
	prompt=prompt,
	negative_prompt=negative_prompt,
	image_embeds=face_emb,
	image=face_kps,
	control_mask=control_mask,
	controlnet_conditioning_scale=float(identitynet_strength_ratio),
	num_inference_steps=num_steps,
	guidance_scale=guidance_scale,
	height=height,
	width=width,
	generator=generator,
	# num_images_per_prompt = 4
	).images

	return images[0]

	def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=False):




	### Description
	title = r"""
	<h1 align="center">Choose your AVATAR</h1>
	"""

	description = r"""
	<h2> Powered by IDfy </h2>"""

	article = r""""""

	tips = r""""""

	js = ''' '''

	css = '''
	.gradio-container {width: 95% !important; background-color: #E6F3FF;}
	.image-gallery {height: 100vh !important; overflow: auto;}
	.gradio-row .gradio-element { margin: 0 !important; }
	'''


	with gr.Blocks(css=css, js=js) as demo:

	# description
	gr.Markdown(title)
	with gr.Row():
	gr.Image("./gradio_demo/logo.png",scale=0,min_width=50,show_label=False,show_download_button=False)
	gr.Markdown(description)
	with gr.Row():
	with gr.Column():
	style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
	face_file = gr.Image(label="Upload a photo of your face", type="pil",sources="webcam")
	submit = gr.Button("Submit", variant="primary")
	with gr.Column():
	with gr.Row():
	gallery1 = gr.Image(label="Generated Images")
	gallery2 = gr.Image(label="Generated Images")
	with gr.Row():
	gallery3 = gr.Image(label="Generated Images")
	gallery4 = gr.Image(label="Generated Images")
	email = gr.Textbox(label="Email",
	info="Enter your email address",
	value="")

	usage_tips = gr.Markdown(label="Usage tips of InstantID", value=tips ,visible=False)

	face_file.upload(
	fn=remove_tips,
	outputs=usage_tips,
	queue=True,
	api_name=False,
	show_progress = "full"
	)

	submit.click(
	fn=remove_tips,
	outputs=usage_tips,
	queue=True,
	api_name=False,
	show_progress = "full"
	).then(
	fn=run_for_prompts1,
	inputs=[face_file,style],
	outputs=[gallery1]
	)


	gr.Markdown(article)

	demo.launch(share=True)

	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument("--pretrained_model_name_or_path", type=str, default="wangqixun/YamerMIX_v8")
	args = parser.parse_args()

	main(args.pretrained_model_name_or_path, False)