JGN / e4e /metrics /LEC.py
cagataydag's picture
Duplicate from akhaliq/JoJoGAN
4750bc6
raw
history blame
5.53 kB
import sys
import argparse
import torch
import numpy as np
from torch.utils.data import DataLoader
sys.path.append(".")
sys.path.append("..")
from configs import data_configs
from datasets.images_dataset import ImagesDataset
from utils.model_utils import setup_model
class LEC:
def __init__(self, net, is_cars=False):
"""
Latent Editing Consistency metric as proposed in the main paper.
:param net: e4e model loaded over the pSp framework.
:param is_cars: An indication as to whether or not to crop the middle of the StyleGAN's output images.
"""
self.net = net
self.is_cars = is_cars
def _encode(self, images):
"""
Encodes the given images into StyleGAN's latent space.
:param images: Tensor of shape NxCxHxW representing the images to be encoded.
:return: Tensor of shape NxKx512 representing the latent space embeddings of the given image (in W(K, *) space).
"""
codes = self.net.encoder(images)
assert codes.ndim == 3, f"Invalid latent codes shape, should be NxKx512 but is {codes.shape}"
# normalize with respect to the center of an average face
if self.net.opts.start_from_latent_avg:
codes = codes + self.net.latent_avg.repeat(codes.shape[0], 1, 1)
return codes
def _generate(self, codes):
"""
Generate the StyleGAN2 images of the given codes
:param codes: Tensor of shape NxKx512 representing the StyleGAN's latent codes (in W(K, *) space).
:return: Tensor of shape NxCxHxW representing the generated images.
"""
images, _ = self.net.decoder([codes], input_is_latent=True, randomize_noise=False, return_latents=True)
images = self.net.face_pool(images)
if self.is_cars:
images = images[:, :, 32:224, :]
return images
@staticmethod
def _filter_outliers(arr):
arr = np.array(arr)
lo = np.percentile(arr, 1, interpolation="lower")
hi = np.percentile(arr, 99, interpolation="higher")
return np.extract(
np.logical_and(lo <= arr, arr <= hi), arr
)
def calculate_metric(self, data_loader, edit_function, inverse_edit_function):
"""
Calculate the LEC metric score.
:param data_loader: An iterable that returns a tuple of (images, _), similar to the training data loader.
:param edit_function: A function that receives latent codes and performs a semantically meaningful edit in the
latent space.
:param inverse_edit_function: A function that receives latent codes and performs the inverse edit of the
`edit_function` parameter.
:return: The LEC metric score.
"""
distances = []
with torch.no_grad():
for batch in data_loader:
x, _ = batch
inputs = x.to(device).float()
codes = self._encode(inputs)
edited_codes = edit_function(codes)
edited_image = self._generate(edited_codes)
edited_image_inversion_codes = self._encode(edited_image)
inverse_edit_codes = inverse_edit_function(edited_image_inversion_codes)
dist = (codes - inverse_edit_codes).norm(2, dim=(1, 2)).mean()
distances.append(dist.to("cpu").numpy())
distances = self._filter_outliers(distances)
return distances.mean()
if __name__ == "__main__":
device = "cuda"
parser = argparse.ArgumentParser(description="LEC metric calculator")
parser.add_argument("--batch", type=int, default=8, help="batch size for the models")
parser.add_argument("--images_dir", type=str, default=None,
help="Path to the images directory on which we calculate the LEC score")
parser.add_argument("ckpt", metavar="CHECKPOINT", help="path to the model checkpoints")
args = parser.parse_args()
print(args)
net, opts = setup_model(args.ckpt, device)
dataset_args = data_configs.DATASETS[opts.dataset_type]
transforms_dict = dataset_args['transforms'](opts).get_transforms()
images_directory = dataset_args['test_source_root'] if args.images_dir is None else args.images_dir
test_dataset = ImagesDataset(source_root=images_directory,
target_root=images_directory,
source_transform=transforms_dict['transform_source'],
target_transform=transforms_dict['transform_test'],
opts=opts)
data_loader = DataLoader(test_dataset,
batch_size=args.batch,
shuffle=False,
num_workers=2,
drop_last=True)
print(f'dataset length: {len(test_dataset)}')
# In the following example, we are using an InterfaceGAN based editing to calculate the LEC metric.
# Change the provided example according to your domain and needs.
direction = torch.load('../editings/interfacegan_directions/age.pt').to(device)
def edit_func_example(codes):
return codes + 3 * direction
def inverse_edit_func_example(codes):
return codes - 3 * direction
lec = LEC(net, is_cars='car' in opts.dataset_type)
result = lec.calculate_metric(data_loader, edit_func_example, inverse_edit_func_example)
print(f"LEC: {result}")