File size: 6,983 Bytes
68f6e22
15b02cd
68f6e22
 
 
 
 
 
 
 
 
 
 
 
 
 
3f0e127
68f6e22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3f0e127
 
 
 
 
 
 
 
15b02cd
3f0e127
68f6e22
 
 
3f0e127
 
 
 
 
15b02cd
68f6e22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208a670
 
3f0e127
 
68f6e22
 
 
 
 
 
 
 
 
15b02cd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import gradio as gr
import spaces
import os
import cv2
import torch
from PIL import Image
from insightface.app import FaceAnalysis
from ip_adapter.ip_adapter_faceid import IPAdapterFaceID
from transformers import CLIPFeatureExtractor
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler, AutoencoderKL
from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
import dlib
import imutils
from imutils import face_utils
import numpy as np
from skimage import transform as tf
import random

base_model_path = "SG161222/Realistic_Vision_V6.0_B1_noVAE"
base_cache = "model-cache"
vae_model_path = "stabilityai/sd-vae-ft-mse"
ip_cache = "./ip-cache"
device = "cuda"

# Setup function to load models and other dependencies
def setup():
    """Load the model into memory to make running multiple predictions efficient"""
    # Get ip-adapter-faceid model
    if not os.path.exists("ip-cache/ip-adapter-faceid_sd15.bin"):
        os.makedirs(ip_cache, exist_ok=True)
        os.system(f"wget -O ip-cache/ip-adapter-faceid_sd15.bin https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid_sd15.bin")
    
    # Download shape_predictor_68_face_landmarks.dat if it doesn't exist
    if not os.path.exists("faceid/shape_predictor_68_face_landmarks.dat"):
        os.makedirs("faceid", exist_ok=True)
        os.system("wget -O faceid/shape_predictor_68_face_landmarks.dat https://github.com/italojs/facial-landmarks-recognition/raw/master/shape_predictor_68_face_landmarks.dat")
    
    # Face embedding
    app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
    app.prepare(ctx_id=0, det_size=(640, 640))
    
    # SD
    noise_scheduler = EulerDiscreteScheduler(
        num_train_timesteps=1000,
        beta_start=0.00085,
        beta_end=0.012
    )
    vae = AutoencoderKL.from_pretrained(
        vae_model_path
    ).to(dtype=torch.float16)
    pipe = StableDiffusionPipeline.from_pretrained(
        base_model_path,
        torch_dtype=torch.float16,
        scheduler=noise_scheduler,
        vae=vae,
        feature_extractor=CLIPFeatureExtractor.from_pretrained("openai/clip-vit-base-patch32"),
        safety_checker=StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker"),
        cache_dir=base_cache,
    )
    pipe = pipe.to(device)
    
    # IP adapter
    ip_model = IPAdapterFaceID(
        pipe,
        "ip-cache/ip-adapter-faceid_sd15.bin",
        device
    )
    
    return app, ip_model

app, ip_model = setup()

def get_face_landmarks(image_path):
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor('faceid/shape_predictor_68_face_landmarks.dat')  
    image = cv2.imread(image_path)
    image = imutils.resize(image, width=512)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    rects = detector(gray, 1)
    for (i, rect) in enumerate(rects):
        shape = predictor(gray, rect)
        shape = face_utils.shape_to_np(shape)
        return shape, image

def morph_faces(image1_path, image2_path, alpha=0.5):
    landmarks1, image1 = get_face_landmarks(image1_path)
    landmarks2, image2 = get_face_landmarks(image2_path)
    average_landmarks = (landmarks1 + landmarks2) / 2
    tform1 = tf.estimate_transform('similarity', landmarks1, average_landmarks)
    tform2 = tf.estimate_transform('similarity', landmarks2, average_landmarks)
    morphed_image1 = tf.warp(image1, inverse_map=tform1.inverse, output_shape=(512, 512))
    morphed_image2 = tf.warp(image2, inverse_map=tform2.inverse, output_shape=(512, 512))
    morphed_image = (1 - alpha) * morphed_image1 + alpha * morphed_image2
    morphed_image = (morphed_image * 255).astype(np.uint8)  # Convert to [0, 255] range
    output_path = "tmp.png"
    cv2.imwrite(output_path, morphed_image) 
    return output_path

def get_negative_prompt(gender):
    if gender == "Boy":
        return "(mascara, makeup: 1.4), (breasts, boobs, naked, nude: 1.4), (deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation"
    elif gender == "Girl":
        return "(beard, mustache, male features: 1.4), (naked, nude: 1.4), (deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation"
    else:  # Random
        return "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation"

spaces.GPU(duration=40)
def generate_image(face_image_1, face_image_2, prompt, gender, width, height, num_inference_steps, seed):
    if seed is None:
        seed = int.from_bytes(os.urandom(4), "big")

    if gender == "Random":
        gender = random.choice(["Boy", "Girl"])

    negative_prompt = get_negative_prompt(gender)
    
    baby_image_path = morph_faces(face_image_1, face_image_2)

    def generate_images(faceid_embeds, num_outputs=1):
        images = ip_model.generate(
            prompt=prompt,
            negative_prompt=negative_prompt,
            faceid_embeds=faceid_embeds,
            num_samples=num_outputs,
            width=width,
            height=height,
            num_inference_steps=num_inference_steps,
            seed=seed
        )
        return images

    faceid_embeds = app.get(cv2.imread(baby_image_path))[0].normed_embedding
    faceid_embeds = torch.from_numpy(faceid_embeds).unsqueeze(0)
    
    generated_images = generate_images(faceid_embeds)
    return generated_images[0]

# Gradio Interface
gr_interface = gr.Interface(
    fn=generate_image,
    inputs=[
        gr.Image(type="filepath", label="First Face Image"),
        gr.Image(type="filepath", label="Second Face Image"),
        gr.Textbox(value="portrait of a 6 y.o. child, 8k, HD, happy, in living room, perfect eyes, cute", label="Prompt"),
        gr.Dropdown(choices=["Boy", "Girl", "Random"], value="Boy", label="Gender"),
        gr.Slider(256, 1024, value=768, step=64, label="Width"),
        gr.Slider(256, 1024, value=768, step=64, label="Height"),
        gr.Slider(1, 200, value=30, step=1, label="Number of Inference Steps"),
        gr.Number(value=None, label="Random Seed")
    ],
    outputs=gr.Image(label="Generated Image"),
    title="Face Morphing and Image Generation with Stable Diffusion"
)

gr_interface.launch(share=True)