import threading
from typing import Any
import insightface
import roop.globals
from roop.typing import Frame, Face
import cv2
import numpy as np
from skimage import transform as trans
from roop.capturer import get_video_frame
from roop.utilities import resolve_relative_path, conditional_thread_semaphore
#THREAD_LOCK_ANALYSER = threading.Lock()
#THREAD_LOCK_SWAPPER = threading.Lock()
def get_face_analyser() -> Any:
with conditional_thread_semaphore():
if FACE_ANALYSER is None or roop.globals.g_current_face_analysis != roop.globals.g_desired_face_analysis:
model_path = resolve_relative_path('..')
# removed genderage
allowed_modules = roop.globals.g_desired_face_analysis
roop.globals.g_current_face_analysis = roop.globals.g_desired_face_analysis
if roop.globals.CFG.force_cpu:
print("Forcing CPU for Face Analysis")
root=model_path, providers=["CPUExecutionProvider"],allowed_modules=allowed_modules
name="buffalo_l", root=model_path, providers=roop.globals.execution_providers,allowed_modules=allowed_modules
det_size=(640, 640) if roop.globals.default_det_size else (320, 320),
def get_first_face(frame: Frame) -> Any:
faces = get_face_analyser().get(frame)
return min(faces, key=lambda x: x.bbox[0])
# return sorted(faces, reverse=True, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[0]
return None
def get_all_faces(frame: Frame) -> Any:
faces = get_face_analyser().get(frame)
return sorted(faces, key=lambda x: x.bbox[0])
return None
def extract_face_images(source_filename, video_info, extra_padding=-1.0):
face_data = []
source_image = None
if video_info[0]:
frame = get_video_frame(source_filename, video_info[1])
if frame is not None:
source_image = frame
return face_data
source_image = cv2.imdecode(np.fromfile(source_filename, dtype=np.uint8), cv2.IMREAD_COLOR)
faces = get_all_faces(source_image)
if faces is None:
return face_data
i = 0
for face in faces:
(startX, startY, endX, endY) = face["bbox"].astype("int")
startX, endX, startY, endY = clamp_cut_values(startX, endX, startY, endY, source_image)
if extra_padding > 0.0:
if source_image.shape[:2] == (512, 512):
i += 1
face_data.append([face, source_image])
found = False
for i in range(1, 3):
(startX, startY, endX, endY) = face["bbox"].astype("int")
startX, endX, startY, endY = clamp_cut_values(startX, endX, startY, endY, source_image)
cutout_padding = extra_padding
# top needs extra room for detection
padding = int((endY - startY) * cutout_padding)
oldY = startY
startY -= padding
factor = 0.25 if i == 1 else 0.5
cutout_padding = factor
padding = int((endY - oldY) * cutout_padding)
endY += padding
padding = int((endX - startX) * cutout_padding)
startX -= padding
endX += padding
startX, endX, startY, endY = clamp_cut_values(
startX, endX, startY, endY, source_image
face_temp = source_image[startY:endY, startX:endX]
face_temp = resize_image_keep_content(face_temp)
testfaces = get_all_faces(face_temp)
if testfaces is not None and len(testfaces) > 0:
i += 1
face_data.append([testfaces[0], face_temp])
found = True
if not found:
print("No face found after resizing, this shouldn't happen!")
face_temp = source_image[startY:endY, startX:endX]
if face_temp.size < 1:
i += 1
face_data.append([face, face_temp])
return face_data
def clamp_cut_values(startX, endX, startY, endY, image):
if startX < 0:
startX = 0
if endX > image.shape[1]:
endX = image.shape[1]
if startY < 0:
startY = 0
if endY > image.shape[0]:
endY = image.shape[0]
return startX, endX, startY, endY
def face_offset_top(face: Face, offset):
face["bbox"][1] += offset
face["bbox"][3] += offset
lm106 = face.landmark_2d_106
add = np.full_like(lm106, [0, offset])
face["landmark_2d_106"] = lm106 + add
return face
def resize_image_keep_content(image, new_width=512, new_height=512):
dim = None
(h, w) = image.shape[:2]
if h > w:
r = new_height / float(h)
dim = (int(w * r), new_height)
# Calculate the ratio of the width and construct the dimensions
r = new_width / float(w)
dim = (new_width, int(h * r))
image = cv2.resize(image, dim, interpolation=cv2.INTER_AREA)
(h, w) = image.shape[:2]
if h == new_height and w == new_width:
return image
resize_img = np.zeros(shape=(new_height, new_width, 3), dtype=image.dtype)
offs = (new_width - w) if h == new_height else (new_height - h)
startoffs = int(offs // 2) if offs % 2 == 0 else int(offs // 2) + 1
offs = int(offs // 2)
if h == new_height:
resize_img[0:new_height, startoffs : new_width - offs] = image
resize_img[startoffs : new_height - offs, 0:new_width] = image
return resize_img
def rotate_image_90(image, rotate=True):
if rotate:
return np.rot90(image)
return np.rot90(image, 1, (1, 0))
def rotate_anticlockwise(frame):
return rotate_image_90(frame)
def rotate_clockwise(frame):
return rotate_image_90(frame, False)
def rotate_image_180(image):
return np.flip(image, 0)
# alignment code from insightface
arcface_dst = np.array(
[38.2946, 51.6963],
[73.5318, 51.5014],
[56.0252, 71.7366],
[41.5493, 92.3655],
[70.7299, 92.2041],
def estimate_norm(lmk, image_size=112):
assert lmk.shape == (5, 2)
if image_size % 112 == 0:
ratio = float(image_size) / 112.0
diff_x = 0
elif image_size % 128 == 0:
ratio = float(image_size) / 128.0
diff_x = 8.0 * ratio
elif image_size % 512 == 0:
ratio = float(image_size) / 512.0
diff_x = 32.0 * ratio
dst = arcface_dst * ratio
dst[:, 0] += diff_x
tform = trans.SimilarityTransform()
tform.estimate(lmk, dst)
M = tform.params[0:2, :]
return M
# aligned, M = norm_crop2(f[1], face.kps, 512)
def align_crop(img, landmark, image_size=112, mode="arcface"):
M = estimate_norm(landmark, image_size)
warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
return warped, M
def square_crop(im, S):
if im.shape[0] > im.shape[1]:
height = S
width = int(float(im.shape[1]) / im.shape[0] * S)
scale = float(S) / im.shape[0]
width = S
height = int(float(im.shape[0]) / im.shape[1] * S)
scale = float(S) / im.shape[1]
resized_im = cv2.resize(im, (width, height))
det_im = np.zeros((S, S, 3), dtype=np.uint8)
det_im[: resized_im.shape[0], : resized_im.shape[1], :] = resized_im
return det_im, scale
def transform(data, center, output_size, scale, rotation):
scale_ratio = scale
rot = float(rotation) * np.pi / 180.0
# translation = (output_size/2-center[0]*scale_ratio, output_size/2-center[1]*scale_ratio)
t1 = trans.SimilarityTransform(scale=scale_ratio)
cx = center[0] * scale_ratio
cy = center[1] * scale_ratio
t2 = trans.SimilarityTransform(translation=(-1 * cx, -1 * cy))
t3 = trans.SimilarityTransform(rotation=rot)
t4 = trans.SimilarityTransform(translation=(output_size / 2, output_size / 2))
t = t1 + t2 + t3 + t4
M = t.params[0:2]
cropped = cv2.warpAffine(data, M, (output_size, output_size), borderValue=0.0)
return cropped, M
def trans_points2d(pts, M):
new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
for i in range(pts.shape[0]):
pt = pts[i]
new_pt = np.array([pt[0], pt[1], 1.0], dtype=np.float32)
new_pt =, new_pt)
# print('new_pt', new_pt.shape, new_pt)
new_pts[i] = new_pt[0:2]
return new_pts
def trans_points3d(pts, M):
scale = np.sqrt(M[0][0] * M[0][0] + M[0][1] * M[0][1])
# print(scale)
new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
for i in range(pts.shape[0]):
pt = pts[i]
new_pt = np.array([pt[0], pt[1], 1.0], dtype=np.float32)
new_pt =, new_pt)
# print('new_pt', new_pt.shape, new_pt)
new_pts[i][0:2] = new_pt[0:2]
new_pts[i][2] = pts[i][2] * scale
return new_pts
def trans_points(pts, M):
if pts.shape[1] == 2:
return trans_points2d(pts, M)
return trans_points3d(pts, M)
def create_blank_image(width, height):
img = np.zeros((height, width, 4), dtype=np.uint8)
img[:] = [0,0,0,0]
return img