Spaces:
Build error
Build error
import gradio as gr | |
import spaces | |
import os | |
import cv2 | |
import torch | |
from PIL import Image | |
from insightface.app import FaceAnalysis | |
from ip_adapter.ip_adapter_faceid import IPAdapterFaceID | |
from transformers import CLIPFeatureExtractor | |
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler, AutoencoderKL | |
from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker | |
import dlib | |
import imutils | |
from imutils import face_utils | |
import numpy as np | |
from skimage import transform as tf | |
import random | |
base_model_path = "SG161222/Realistic_Vision_V6.0_B1_noVAE" | |
base_cache = "model-cache" | |
vae_model_path = "stabilityai/sd-vae-ft-mse" | |
ip_cache = "./ip-cache" | |
device = "cuda" | |
# Setup function to load models and other dependencies | |
def setup(): | |
"""Load the model into memory to make running multiple predictions efficient""" | |
# Get ip-adapter-faceid model | |
if not os.path.exists("ip-cache/ip-adapter-faceid_sd15.bin"): | |
os.makedirs(ip_cache, exist_ok=True) | |
os.system(f"wget -O ip-cache/ip-adapter-faceid_sd15.bin https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid_sd15.bin") | |
# Download shape_predictor_68_face_landmarks.dat if it doesn't exist | |
if not os.path.exists("faceid/shape_predictor_68_face_landmarks.dat"): | |
os.makedirs("faceid", exist_ok=True) | |
os.system("wget -O faceid/shape_predictor_68_face_landmarks.dat https://github.com/italojs/facial-landmarks-recognition/raw/master/shape_predictor_68_face_landmarks.dat") | |
# Face embedding | |
app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) | |
app.prepare(ctx_id=0, det_size=(640, 640)) | |
# SD | |
noise_scheduler = EulerDiscreteScheduler( | |
num_train_timesteps=1000, | |
beta_start=0.00085, | |
beta_end=0.012 | |
) | |
vae = AutoencoderKL.from_pretrained( | |
vae_model_path | |
).to(dtype=torch.float16) | |
pipe = StableDiffusionPipeline.from_pretrained( | |
base_model_path, | |
torch_dtype=torch.float16, | |
scheduler=noise_scheduler, | |
vae=vae, | |
feature_extractor=CLIPFeatureExtractor.from_pretrained("openai/clip-vit-base-patch32"), | |
safety_checker=StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker"), | |
cache_dir=base_cache, | |
) | |
pipe = pipe.to(device) | |
# IP adapter | |
ip_model = IPAdapterFaceID( | |
pipe, | |
"ip-cache/ip-adapter-faceid_sd15.bin", | |
device | |
) | |
return app, ip_model | |
app, ip_model = setup() | |
def get_face_landmarks(image_path): | |
detector = dlib.get_frontal_face_detector() | |
predictor = dlib.shape_predictor('faceid/shape_predictor_68_face_landmarks.dat') | |
image = cv2.imread(image_path) | |
image = imutils.resize(image, width=512) | |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
rects = detector(gray, 1) | |
for (i, rect) in enumerate(rects): | |
shape = predictor(gray, rect) | |
shape = face_utils.shape_to_np(shape) | |
return shape, image | |
def morph_faces(image1_path, image2_path, alpha=0.5): | |
landmarks1, image1 = get_face_landmarks(image1_path) | |
landmarks2, image2 = get_face_landmarks(image2_path) | |
average_landmarks = (landmarks1 + landmarks2) / 2 | |
tform1 = tf.estimate_transform('similarity', landmarks1, average_landmarks) | |
tform2 = tf.estimate_transform('similarity', landmarks2, average_landmarks) | |
morphed_image1 = tf.warp(image1, inverse_map=tform1.inverse, output_shape=(512, 512)) | |
morphed_image2 = tf.warp(image2, inverse_map=tform2.inverse, output_shape=(512, 512)) | |
morphed_image = (1 - alpha) * morphed_image1 + alpha * morphed_image2 | |
morphed_image = (morphed_image * 255).astype(np.uint8) # Convert to [0, 255] range | |
output_path = "tmp.png" | |
cv2.imwrite(output_path, morphed_image) | |
return output_path | |
def get_negative_prompt(gender): | |
if gender == "Boy": | |
return "(mascara, makeup: 1.4), (breasts, boobs, naked, nude: 1.4), (deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation" | |
elif gender == "Girl": | |
return "(beard, mustache, male features: 1.4), (naked, nude: 1.4), (deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation" | |
else: # Random | |
return "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation" | |
spaces.GPU(duration=40) | |
def generate_image(face_image_1, face_image_2, prompt, gender, width, height, num_inference_steps, seed): | |
if seed is None: | |
seed = int.from_bytes(os.urandom(4), "big") | |
if gender == "Random": | |
gender = random.choice(["Boy", "Girl"]) | |
negative_prompt = get_negative_prompt(gender) | |
baby_image_path = morph_faces(face_image_1, face_image_2) | |
def generate_images(faceid_embeds, num_outputs=1): | |
images = ip_model.generate( | |
prompt=prompt, | |
negative_prompt=negative_prompt, | |
faceid_embeds=faceid_embeds, | |
num_samples=num_outputs, | |
width=width, | |
height=height, | |
num_inference_steps=num_inference_steps, | |
seed=seed | |
) | |
return images | |
faceid_embeds = app.get(cv2.imread(baby_image_path))[0].normed_embedding | |
faceid_embeds = torch.from_numpy(faceid_embeds).unsqueeze(0) | |
generated_images = generate_images(faceid_embeds) | |
return generated_images[0] | |
# Gradio Interface | |
gr_interface = gr.Interface( | |
fn=generate_image, | |
inputs=[ | |
gr.Image(type="filepath", label="First Face Image"), | |
gr.Image(type="filepath", label="Second Face Image"), | |
gr.Textbox(value="portrait of a 6 y.o. child, 8k, HD, happy, in living room, perfect eyes, cute", label="Prompt"), | |
gr.Dropdown(choices=["Boy", "Girl", "Random"], value="Boy", label="Gender"), | |
gr.Slider(256, 1024, value=768, step=64, label="Width"), | |
gr.Slider(256, 1024, value=768, step=64, label="Height"), | |
gr.Slider(1, 200, value=30, step=1, label="Number of Inference Steps"), | |
gr.Number(value=None, label="Random Seed") | |
], | |
outputs=gr.Image(label="Generated Image"), | |
title="Face Morphing and Image Generation with Stable Diffusion" | |
) | |
gr_interface.launch(share=True) | |