import gradio as gr import spaces import os import cv2 import torch from PIL import Image from insightface.app import FaceAnalysis from ip_adapter.ip_adapter_faceid import IPAdapterFaceID from transformers import CLIPFeatureExtractor from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler, AutoencoderKL from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker import dlib import imutils from imutils import face_utils import numpy as np from skimage import transform as tf import random base_model_path = "SG161222/Realistic_Vision_V6.0_B1_noVAE" base_cache = "model-cache" vae_model_path = "stabilityai/sd-vae-ft-mse" ip_cache = "./ip-cache" device = "cuda" # Setup function to load models and other dependencies def setup(): """Load the model into memory to make running multiple predictions efficient""" # Get ip-adapter-faceid model if not os.path.exists("ip-cache/ip-adapter-faceid_sd15.bin"): os.makedirs(ip_cache, exist_ok=True) os.system(f"wget -O ip-cache/ip-adapter-faceid_sd15.bin https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid_sd15.bin") # Download shape_predictor_68_face_landmarks.dat if it doesn't exist if not os.path.exists("faceid/shape_predictor_68_face_landmarks.dat"): os.makedirs("faceid", exist_ok=True) os.system("wget -O faceid/shape_predictor_68_face_landmarks.dat https://github.com/italojs/facial-landmarks-recognition/raw/master/shape_predictor_68_face_landmarks.dat") # Face embedding app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) app.prepare(ctx_id=0, det_size=(640, 640)) # SD noise_scheduler = EulerDiscreteScheduler( num_train_timesteps=1000, beta_start=0.00085, beta_end=0.012 ) vae = AutoencoderKL.from_pretrained( vae_model_path ).to(dtype=torch.float16) pipe = StableDiffusionPipeline.from_pretrained( base_model_path, torch_dtype=torch.float16, scheduler=noise_scheduler, vae=vae, feature_extractor=CLIPFeatureExtractor.from_pretrained("openai/clip-vit-base-patch32"), safety_checker=StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker"), cache_dir=base_cache, ) pipe = pipe.to(device) # IP adapter ip_model = IPAdapterFaceID( pipe, "ip-cache/ip-adapter-faceid_sd15.bin", device ) return app, ip_model app, ip_model = setup() def get_face_landmarks(image_path): detector = dlib.get_frontal_face_detector() predictor = dlib.shape_predictor('faceid/shape_predictor_68_face_landmarks.dat') image = cv2.imread(image_path) image = imutils.resize(image, width=512) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) rects = detector(gray, 1) for (i, rect) in enumerate(rects): shape = predictor(gray, rect) shape = face_utils.shape_to_np(shape) return shape, image def morph_faces(image1_path, image2_path, alpha=0.5): landmarks1, image1 = get_face_landmarks(image1_path) landmarks2, image2 = get_face_landmarks(image2_path) average_landmarks = (landmarks1 + landmarks2) / 2 tform1 = tf.estimate_transform('similarity', landmarks1, average_landmarks) tform2 = tf.estimate_transform('similarity', landmarks2, average_landmarks) morphed_image1 = tf.warp(image1, inverse_map=tform1.inverse, output_shape=(512, 512)) morphed_image2 = tf.warp(image2, inverse_map=tform2.inverse, output_shape=(512, 512)) morphed_image = (1 - alpha) * morphed_image1 + alpha * morphed_image2 morphed_image = (morphed_image * 255).astype(np.uint8) # Convert to [0, 255] range output_path = "tmp.png" cv2.imwrite(output_path, morphed_image) return output_path def get_negative_prompt(gender): if gender == "Boy": return "(mascara, makeup: 1.4), (breasts, boobs, naked, nude: 1.4), (deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation" elif gender == "Girl": return "(beard, mustache, male features: 1.4), (naked, nude: 1.4), (deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation" else: # Random return "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation" spaces.GPU(duration=40) def generate_image(face_image_1, face_image_2, prompt, gender, width, height, num_inference_steps, seed): if seed is None: seed = int.from_bytes(os.urandom(4), "big") if gender == "Random": gender = random.choice(["Boy", "Girl"]) negative_prompt = get_negative_prompt(gender) baby_image_path = morph_faces(face_image_1, face_image_2) def generate_images(faceid_embeds, num_outputs=1): images = ip_model.generate( prompt=prompt, negative_prompt=negative_prompt, faceid_embeds=faceid_embeds, num_samples=num_outputs, width=width, height=height, num_inference_steps=num_inference_steps, seed=seed ) return images faceid_embeds = app.get(cv2.imread(baby_image_path))[0].normed_embedding faceid_embeds = torch.from_numpy(faceid_embeds).unsqueeze(0) generated_images = generate_images(faceid_embeds) return generated_images[0] # Gradio Interface gr_interface = gr.Interface( fn=generate_image, inputs=[ gr.Image(type="filepath", label="First Face Image"), gr.Image(type="filepath", label="Second Face Image"), gr.Textbox(value="portrait of a 6 y.o. child, 8k, HD, happy, in living room, perfect eyes, cute", label="Prompt"), gr.Dropdown(choices=["Boy", "Girl", "Random"], value="Boy", label="Gender"), gr.Slider(256, 1024, value=768, step=64, label="Width"), gr.Slider(256, 1024, value=768, step=64, label="Height"), gr.Slider(1, 200, value=30, step=1, label="Number of Inference Steps"), gr.Number(value=None, label="Random Seed") ], outputs=gr.Image(label="Generated Image"), title="Face Morphing and Image Generation with Stable Diffusion" ) gr_interface.launch(share=True)