File size: 7,521 Bytes
68f6e22
15b02cd
68f6e22
 
 
 
 
 
 
 
 
 
 
 
 
 
3f0e127
68f6e22
 
 
 
 
4eeda8f
68f6e22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4eeda8f
68f6e22
 
 
 
4eeda8f
68f6e22
 
 
 
 
 
 
c9970db
 
 
 
 
 
 
 
 
 
 
 
 
68f6e22
 
c9970db
68f6e22
 
c9970db
 
 
 
68f6e22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3f0e127
 
 
 
 
 
 
 
ef101b1
 
 
 
 
 
 
 
 
 
 
68f6e22
4eeda8f
ef101b1
 
 
3f0e127
 
15b02cd
68f6e22
 
 
ef101b1
68f6e22
 
 
ef101b1
 
 
 
68f6e22
 
 
 
 
 
 
 
 
6f30bf0
68f6e22
 
 
208a670
 
ef101b1
 
68f6e22
 
ef101b1
6f30bf0
cc62b3b
6f30bf0
68f6e22
 
15b02cd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import gradio as gr
import spaces
import os
import cv2
import torch
from PIL import Image
from insightface.app import FaceAnalysis
from ip_adapter.ip_adapter_faceid import IPAdapterFaceID
from transformers import CLIPFeatureExtractor
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler, AutoencoderKL
from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
import dlib
import imutils
from imutils import face_utils
import numpy as np
from skimage import transform as tf
import random

base_model_path = "SG161222/Realistic_Vision_V6.0_B1_noVAE"
base_cache = "model-cache"
vae_model_path = "stabilityai/sd-vae-ft-mse"
ip_cache = "./ip-cache"
#device = "cuda"

# Setup function to load models and other dependencies
def setup():
    """Load the model into memory to make running multiple predictions efficient"""
    # Get ip-adapter-faceid model
    if not os.path.exists("ip-cache/ip-adapter-faceid_sd15.bin"):
        os.makedirs(ip_cache, exist_ok=True)
        os.system(f"wget -O ip-cache/ip-adapter-faceid_sd15.bin https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid_sd15.bin")
    
    # Download shape_predictor_68_face_landmarks.dat if it doesn't exist
    if not os.path.exists("faceid/shape_predictor_68_face_landmarks.dat"):
        os.makedirs("faceid", exist_ok=True)
        os.system("wget -O faceid/shape_predictor_68_face_landmarks.dat https://github.com/italojs/facial-landmarks-recognition/raw/master/shape_predictor_68_face_landmarks.dat")
    
    # Face embedding
    app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
    app.prepare(ctx_id=0, det_size=(640, 640))
    
    # SD
    noise_scheduler = EulerDiscreteScheduler(
        num_train_timesteps=1000,
        beta_start=0.00085,
        beta_end=0.012
    )
    vae = AutoencoderKL.from_pretrained(
        vae_model_path
    ).to(dtype=torch.float16)
    pipe = StableDiffusionPipeline.from_pretrained(
        base_model_path,
        torch_dtype=torch.float16,
        scheduler=noise_scheduler,
        vae=vae,
        feature_extractor=CLIPFeatureExtractor.from_pretrained("openai/clip-vit-base-patch32"),
        safety_checker=StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker"),
        cache_dir=base_cache,
    )
    #pipe = pipe.to(device)
    
    # IP adapter
    ip_model = IPAdapterFaceID(
        pipe,
        "ip-cache/ip-adapter-faceid_sd15.bin" #device
    )
    
    return app, ip_model

app, ip_model = setup()

def get_face_landmarks(image_path):
    def add_padding(image, padding_size=50):
        height, width = image.shape[:2]
        padded_image = cv2.copyMakeBorder(
            image, 
            top=padding_size, 
            bottom=padding_size, 
            left=padding_size, 
            right=padding_size, 
            borderType=cv2.BORDER_CONSTANT, 
            value=[255, 255, 255]  # White padding
        )
        return padded_image

    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor('faceid/shape_predictor_68_face_landmarks.dat')  
    
    image = cv2.imread(image_path)
    image = imutils.resize(image, width=512)

    # Add padding to the image
    image = add_padding(image)
    
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    rects = detector(gray, 1)
    for (i, rect) in enumerate(rects):
        shape = predictor(gray, rect)
        shape = face_utils.shape_to_np(shape)
        return shape, image

def morph_faces(image1_path, image2_path, alpha=0.5):
    landmarks1, image1 = get_face_landmarks(image1_path)
    landmarks2, image2 = get_face_landmarks(image2_path)
    average_landmarks = (landmarks1 + landmarks2) / 2
    tform1 = tf.estimate_transform('similarity', landmarks1, average_landmarks)
    tform2 = tf.estimate_transform('similarity', landmarks2, average_landmarks)
    morphed_image1 = tf.warp(image1, inverse_map=tform1.inverse, output_shape=(512, 512))
    morphed_image2 = tf.warp(image2, inverse_map=tform2.inverse, output_shape=(512, 512))
    morphed_image = (1 - alpha) * morphed_image1 + alpha * morphed_image2
    morphed_image = (morphed_image * 255).astype(np.uint8)  # Convert to [0, 255] range
    output_path = "tmp.png"
    cv2.imwrite(output_path, morphed_image) 
    return output_path

def get_negative_prompt(gender):
    if gender == "Boy":
        return "(mascara, makeup: 1.4), (breasts, boobs, naked, nude: 1.4), (deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation"
    elif gender == "Girl":
        return "(beard, mustache, male features: 1.4), (naked, nude: 1.4), (deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation"
    else:  # Random
        return "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation"

def construct_prompt(base_prompt, additional_prompt, gender):
    full_prompt = base_prompt
    if gender == "Boy":
        full_prompt += ", male child, boy"
    elif gender == "Girl":
        full_prompt += ", female child, girl"
    
    if additional_prompt:
        full_prompt += ", " + additional_prompt
    
    return full_prompt

#@spaces.GPU(duration = 40)
def generate_image(face_image_1, face_image_2, additional_prompt, gender):
    base_prompt = "portrait of a 6 y.o. child, 8k, HD, happy, perfect eyes, cute"
    full_prompt = construct_prompt(base_prompt, additional_prompt, gender)

    negative_prompt = get_negative_prompt(gender)
    baby_image_path = morph_faces(face_image_1, face_image_2)

    def generate_images(faceid_embeds, num_outputs=1):
        images = ip_model.generate(
            prompt=full_prompt,
            negative_prompt=negative_prompt,
            faceid_embeds=faceid_embeds,
            num_samples=num_outputs,
            width=768,
            height=768,
            num_inference_steps=40,
            seed=None
        )
        return images

    faceid_embeds = app.get(cv2.imread(baby_image_path))[0].normed_embedding
    faceid_embeds = torch.from_numpy(faceid_embeds).unsqueeze(0)
    
    generated_images = generate_images(faceid_embeds)
    return generated_images[0]

# Gradio Interface with Examples
gr_interface = gr.Interface(
    fn=generate_image,
    inputs=[
        gr.Image(type="filepath", label="First Face Image"),
        gr.Image(type="filepath", label="Second Face Image"),
        gr.Textbox(label="Prompt"),
        gr.Dropdown(choices=["Boy", "Girl", "Random"], value="Boy", label="Gender")
    ],
    outputs=gr.Image(label="Generated Image"),
    title="Face Morphing and Image Generation with Stable Diffusion",
    examples=[
        ["yann-lecun.jpg", "isabelle-guyon.jpg", "playing chess", "Boy"]
    ]
)

gr_interface.launch(share=True)