vidimatch

Runtime error

File size: 12,785 Bytes

# From https://github.com/TRI-ML/KP2D.

# Copyright 2020 Toyota Research Institute.  All rights reserved.

import random
from math import pi

import cv2
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
from PIL import Image

from utils import image_grid


def filter_dict(dict, keywords):
    """
    Returns only the keywords that are part of a dictionary

    Parameters
    ----------
    dictionary : dict
        Dictionary for filtering
    keywords : list of str
        Keywords that will be filtered

    Returns
    -------
    keywords : list of str
        List containing the keywords that are keys in dictionary
    """
    return [key for key in keywords if key in dict]


def resize_sample(sample, image_shape, image_interpolation=Image.ANTIALIAS):
    """
    Resizes a sample, which contains an input image.

    Parameters
    ----------
    sample : dict
        Dictionary with sample values (output from a dataset's __getitem__ method)
    shape : tuple (H,W)
        Output shape
    image_interpolation : int
        Interpolation mode

    Returns
    -------
    sample : dict
        Resized sample
    """
    # image
    image_transform = transforms.Resize(image_shape, interpolation=image_interpolation)
    sample["image"] = image_transform(sample["image"])
    return sample


def spatial_augment_sample(sample):
    """Apply spatial augmentation to an image (flipping and random affine transformation)."""
    augment_image = transforms.Compose(
        [
            transforms.RandomVerticalFlip(p=0.5),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomAffine(15, translate=(0.1, 0.1), scale=(0.9, 1.1)),
        ]
    )
    sample["image"] = augment_image(sample["image"])

    return sample


def unnormalize_image(tensor, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)):
    """Counterpart method of torchvision.transforms.Normalize."""
    for t, m, s in zip(tensor, mean, std):
        t.div_(1 / s).sub_(-m)
    return tensor


def sample_homography(
    shape,
    perspective=True,
    scaling=True,
    rotation=True,
    translation=True,
    n_scales=100,
    n_angles=100,
    scaling_amplitude=0.1,
    perspective_amplitude=0.4,
    patch_ratio=0.8,
    max_angle=pi / 4,
):
    """Sample a random homography that includes perspective, scale, translation and rotation operations."""

    width = float(shape[1])
    hw_ratio = float(shape[0]) / float(shape[1])

    pts1 = np.stack([[-1.0, -1.0], [-1.0, 1.0], [1.0, -1.0], [1.0, 1.0]], axis=0)
    pts2 = pts1.copy() * patch_ratio
    pts2[:, 1] *= hw_ratio

    if perspective:

        perspective_amplitude_x = np.random.normal(0.0, perspective_amplitude / 2, (2))
        perspective_amplitude_y = np.random.normal(
            0.0, hw_ratio * perspective_amplitude / 2, (2)
        )

        perspective_amplitude_x = np.clip(
            perspective_amplitude_x,
            -perspective_amplitude / 2,
            perspective_amplitude / 2,
        )
        perspective_amplitude_y = np.clip(
            perspective_amplitude_y,
            hw_ratio * -perspective_amplitude / 2,
            hw_ratio * perspective_amplitude / 2,
        )

        pts2[0, 0] -= perspective_amplitude_x[1]
        pts2[0, 1] -= perspective_amplitude_y[1]

        pts2[1, 0] -= perspective_amplitude_x[0]
        pts2[1, 1] += perspective_amplitude_y[1]

        pts2[2, 0] += perspective_amplitude_x[1]
        pts2[2, 1] -= perspective_amplitude_y[0]

        pts2[3, 0] += perspective_amplitude_x[0]
        pts2[3, 1] += perspective_amplitude_y[0]

    if scaling:

        random_scales = np.random.normal(1, scaling_amplitude / 2, (n_scales))
        random_scales = np.clip(
            random_scales, 1 - scaling_amplitude / 2, 1 + scaling_amplitude / 2
        )

        scales = np.concatenate([[1.0], random_scales], 0)
        center = np.mean(pts2, axis=0, keepdims=True)
        scaled = (
            np.expand_dims(pts2 - center, axis=0)
            * np.expand_dims(np.expand_dims(scales, 1), 1)
            + center
        )
        valid = np.arange(n_scales)  # all scales are valid except scale=1
        idx = valid[np.random.randint(valid.shape[0])]
        pts2 = scaled[idx]

    if translation:
        t_min, t_max = np.min(pts2 - [-1.0, -hw_ratio], axis=0), np.min(
            [1.0, hw_ratio] - pts2, axis=0
        )
        pts2 += np.expand_dims(
            np.stack(
                [
                    np.random.uniform(-t_min[0], t_max[0]),
                    np.random.uniform(-t_min[1], t_max[1]),
                ]
            ),
            axis=0,
        )

    if rotation:
        angles = np.linspace(-max_angle, max_angle, n_angles)
        angles = np.concatenate([[0.0], angles], axis=0)

        center = np.mean(pts2, axis=0, keepdims=True)
        rot_mat = np.reshape(
            np.stack(
                [np.cos(angles), -np.sin(angles), np.sin(angles), np.cos(angles)],
                axis=1,
            ),
            [-1, 2, 2],
        )
        rotated = (
            np.matmul(
                np.tile(np.expand_dims(pts2 - center, axis=0), [n_angles + 1, 1, 1]),
                rot_mat,
            )
            + center
        )

        valid = np.where(
            np.all(
                (rotated >= [-1.0, -hw_ratio]) & (rotated < [1.0, hw_ratio]),
                axis=(1, 2),
            )
        )[0]

        idx = valid[np.random.randint(valid.shape[0])]
        pts2 = rotated[idx]

    pts2[:, 1] /= hw_ratio

    def ax(p, q):
        return [p[0], p[1], 1, 0, 0, 0, -p[0] * q[0], -p[1] * q[0]]

    def ay(p, q):
        return [0, 0, 0, p[0], p[1], 1, -p[0] * q[1], -p[1] * q[1]]

    a_mat = np.stack([f(pts1[i], pts2[i]) for i in range(4) for f in (ax, ay)], axis=0)
    p_mat = np.transpose(
        np.stack([[pts2[i][j] for i in range(4) for j in range(2)]], axis=0)
    )

    homography = np.matmul(np.linalg.pinv(a_mat), p_mat).squeeze()
    homography = np.concatenate([homography, [1.0]]).reshape(3, 3)
    return homography


def warp_homography(sources, homography):
    """Warp features given a homography

    Parameters
    ----------
    sources: torch.tensor (1,H,W,2)
        Keypoint vector.
    homography: torch.Tensor (3,3)
        Homography.

    Returns
    -------
    warped_sources: torch.tensor (1,H,W,2)
        Warped feature vector.
    """
    _, H, W, _ = sources.shape
    warped_sources = sources.clone().squeeze()
    warped_sources = warped_sources.view(-1, 2)
    warped_sources = torch.addmm(
        homography[:, 2], warped_sources, homography[:, :2].t()
    )
    warped_sources.mul_(1 / warped_sources[:, 2].unsqueeze(1))
    warped_sources = warped_sources[:, :2].contiguous().view(1, H, W, 2)
    return warped_sources


def add_noise(img, mode="gaussian", percent=0.02):
    """Add image noise

    Parameters
    ----------
    image : np.array
        Input image
    mode: str
        Type of noise, from ['gaussian','salt','pepper','s&p']
    percent: float
        Percentage image points to add noise to.
    Returns
    -------
    image : np.array
        Image plus noise.
    """
    original_dtype = img.dtype
    if mode == "gaussian":
        mean = 0
        var = 0.1
        sigma = var * 0.5

        if img.ndim == 2:
            h, w = img.shape
            gauss = np.random.normal(mean, sigma, (h, w))
        else:
            h, w, c = img.shape
            gauss = np.random.normal(mean, sigma, (h, w, c))

        if img.dtype not in [np.float32, np.float64]:
            gauss = gauss * np.iinfo(img.dtype).max
            img = np.clip(img.astype(np.float) + gauss, 0, np.iinfo(img.dtype).max)
        else:
            img = np.clip(img.astype(np.float) + gauss, 0, 1)

    elif mode == "salt":
        print(img.dtype)
        s_vs_p = 1
        num_salt = np.ceil(percent * img.size * s_vs_p)
        coords = tuple([np.random.randint(0, i - 1, int(num_salt)) for i in img.shape])

        if img.dtype in [np.float32, np.float64]:
            img[coords] = 1
        else:
            img[coords] = np.iinfo(img.dtype).max
            print(img.dtype)
    elif mode == "pepper":
        s_vs_p = 0
        num_pepper = np.ceil(percent * img.size * (1.0 - s_vs_p))
        coords = tuple(
            [np.random.randint(0, i - 1, int(num_pepper)) for i in img.shape]
        )
        img[coords] = 0

    elif mode == "s&p":
        s_vs_p = 0.5

        # Salt mode
        num_salt = np.ceil(percent * img.size * s_vs_p)
        coords = tuple([np.random.randint(0, i - 1, int(num_salt)) for i in img.shape])
        if img.dtype in [np.float32, np.float64]:
            img[coords] = 1
        else:
            img[coords] = np.iinfo(img.dtype).max

        # Pepper mode
        num_pepper = np.ceil(percent * img.size * (1.0 - s_vs_p))
        coords = tuple(
            [np.random.randint(0, i - 1, int(num_pepper)) for i in img.shape]
        )
        img[coords] = 0
    else:
        raise ValueError("not support mode for {}".format(mode))

    noisy = img.astype(original_dtype)
    return noisy


def non_spatial_augmentation(
    img_warp_ori, jitter_paramters, color_order=[0, 1, 2], to_gray=False
):
    """Apply non-spatial augmentation to an image (jittering, color swap, convert to gray scale, Gaussian blur)."""

    brightness, contrast, saturation, hue = jitter_paramters
    color_augmentation = transforms.ColorJitter(brightness, contrast, saturation, hue)
    """
    augment_image = color_augmentation.get_params(brightness=[max(0, 1 - brightness), 1 + brightness],
                                                    contrast=[max(0, 1 - contrast), 1 + contrast],
                                                    saturation=[max(0, 1 - saturation), 1 + saturation],
                                                    hue=[-hue, hue])
    """

    B = img_warp_ori.shape[0]
    img_warp = []
    kernel_sizes = [0, 1, 3, 5]
    for b in range(B):
        img_warp_sub = img_warp_ori[b].cpu()
        img_warp_sub = torchvision.transforms.functional.to_pil_image(img_warp_sub)

        img_warp_sub_np = np.array(img_warp_sub)
        img_warp_sub_np = img_warp_sub_np[:, :, color_order]

        if np.random.rand() > 0.5:
            img_warp_sub_np = add_noise(img_warp_sub_np)

        rand_index = np.random.randint(4)
        kernel_size = kernel_sizes[rand_index]
        if kernel_size > 0:
            img_warp_sub_np = cv2.GaussianBlur(
                img_warp_sub_np, (kernel_size, kernel_size), sigmaX=0
            )

        if to_gray:
            img_warp_sub_np = cv2.cvtColor(img_warp_sub_np, cv2.COLOR_RGB2GRAY)
            img_warp_sub_np = cv2.cvtColor(img_warp_sub_np, cv2.COLOR_GRAY2RGB)

        img_warp_sub = Image.fromarray(img_warp_sub_np)
        img_warp_sub = color_augmentation(img_warp_sub)

        img_warp_sub = torchvision.transforms.functional.to_tensor(img_warp_sub).to(
            img_warp_ori.device
        )

        img_warp.append(img_warp_sub)

    img_warp = torch.stack(img_warp, dim=0)
    return img_warp


def ha_augment_sample(
    data,
    jitter_paramters=[0.5, 0.5, 0.2, 0.05],
    patch_ratio=0.7,
    scaling_amplitude=0.2,
    max_angle=pi / 4,
):
    """Apply Homography Adaptation image augmentation."""
    input_img = data["image"].unsqueeze(0)
    _, _, H, W = input_img.shape
    device = input_img.device

    homography = (
        torch.from_numpy(
            sample_homography(
                [H, W],
                patch_ratio=patch_ratio,
                scaling_amplitude=scaling_amplitude,
                max_angle=max_angle,
            )
        )
        .float()
        .to(device)
    )
    homography_inv = torch.inverse(homography)

    source = (
        image_grid(
            1, H, W, dtype=input_img.dtype, device=device, ones=False, normalized=True
        )
        .clone()
        .permute(0, 2, 3, 1)
    )

    target_warped = warp_homography(source, homography)
    img_warp = torch.nn.functional.grid_sample(input_img, target_warped)

    color_order = [0, 1, 2]
    if np.random.rand() > 0.5:
        random.shuffle(color_order)

    to_gray = False
    if np.random.rand() > 0.5:
        to_gray = True

    input_img = non_spatial_augmentation(
        input_img,
        jitter_paramters=jitter_paramters,
        color_order=color_order,
        to_gray=to_gray,
    )
    img_warp = non_spatial_augmentation(
        img_warp,
        jitter_paramters=jitter_paramters,
        color_order=color_order,
        to_gray=to_gray,
    )

    data["image"] = input_img.squeeze()
    data["image_aug"] = img_warp.squeeze()
    data["homography"] = homography
    data["homography_inv"] = homography_inv
    return data