# Copyright 2019-present NAVER Corp. # CC BY-NC-SA 3.0 # Available only for non-commercial use import pdb from PIL import Image import numpy as np import torch import torchvision.transforms as tvf from tools.transforms import instanciate_transformation from tools.transforms_tools import persp_apply RGB_mean = [0.485, 0.456, 0.406] RGB_std = [0.229, 0.224, 0.225] norm_RGB = tvf.Compose([tvf.ToTensor(), tvf.Normalize(mean=RGB_mean, std=RGB_std)]) class PairLoader: """ On-the-fly jittering of pairs of image with dense pixel ground-truth correspondences. crop: random crop applied to both images scale: random scaling applied to img2 distort: random ditorsion applied to img2 self[idx] returns a dictionary with keys: img1, img2, aflow, mask - img1: cropped original - img2: distorted cropped original - aflow: 'absolute' optical flow = (x,y) position of each pixel from img1 in img2 - mask: (binary image) valid pixels of img1 """ def __init__(self, dataset, crop='', scale='', distort='', norm = norm_RGB, what = 'aflow mask', idx_as_rng_seed = False): assert hasattr(dataset, 'npairs') assert hasattr(dataset, 'get_pair') self.dataset = dataset self.distort = instanciate_transformation(distort) self.crop = instanciate_transformation(crop) self.norm = instanciate_transformation(norm) self.scale = instanciate_transformation(scale) self.idx_as_rng_seed = idx_as_rng_seed # to remove randomness self.what = what.split() if isinstance(what, str) else what self.n_samples = 5 # number of random trials per image def __len__(self): assert len(self.dataset) == self.dataset.npairs, pdb.set_trace() # and not nimg return len(self.dataset) def __repr__(self): fmt_str = 'PairLoader\n' fmt_str += repr(self.dataset) fmt_str += ' npairs: %d\n' % self.dataset.npairs short_repr = lambda s: repr(s).strip().replace('\n',', ')[14:-1].replace(' ',' ') fmt_str += ' Distort: %s\n' % short_repr(self.distort) fmt_str += ' Crop: %s\n' % short_repr(self.crop) fmt_str += ' Norm: %s\n' % short_repr(self.norm) return fmt_str def __getitem__(self, i): #from time import time as now; t0 = now() if self.idx_as_rng_seed: import random random.seed(i) np.random.seed(i) # Retrieve an image pair and their absolute flow img_a, img_b, metadata = self.dataset.get_pair(i, self.what) # aflow contains pixel coordinates indicating where each # pixel from the left image ended up in the right image # as (x,y) pairs, but its shape is (H,W,2) aflow = np.float32(metadata['aflow']) mask = metadata.get('mask', np.ones(aflow.shape[:2],np.uint8)) # apply transformations to the second image img_b = {'img': img_b, 'persp':(1,0,0,0,1,0,0,0)} if self.scale: img_b = self.scale(img_b) if self.distort: img_b = self.distort(img_b) # apply the same transformation to the flow aflow[:] = persp_apply(img_b['persp'], aflow.reshape(-1,2)).reshape(aflow.shape) corres = None if 'corres' in metadata: corres = np.float32(metadata['corres']) corres[:,1] = persp_apply(img_b['persp'], corres[:,1]) # apply the same transformation to the homography homography = None if 'homography' in metadata: homography = np.float32(metadata['homography']) # p_b = homography * p_a persp = np.float32(img_b['persp']+(1,)).reshape(3,3) homography = persp @ homography # determine crop size img_b = img_b['img'] crop_size = self.crop({'imsize':(10000,10000)})['imsize'] output_size_a = min(img_a.size, crop_size) output_size_b = min(img_b.size, crop_size) img_a = np.array(img_a) img_b = np.array(img_b) ah,aw,p1 = img_a.shape bh,bw,p2 = img_b.shape assert p1 == 3 assert p2 == 3 assert aflow.shape == (ah, aw, 2) assert mask.shape == (ah, aw) # Let's start by computing the scale of the # optical flow and applying a median filter: dx = np.gradient(aflow[:,:,0]) dy = np.gradient(aflow[:,:,1]) scale = np.sqrt(np.clip(np.abs(dx[1]*dy[0] - dx[0]*dy[1]), 1e-16, 1e16)) accu2 = np.zeros((16,16), bool) Q = lambda x, w: np.int32(16 * (x - w.start) / (w.stop - w.start)) def window1(x, size, w): l = x - int(0.5 + size / 2) r = l + int(0.5 + size) if l < 0: l,r = (0, r - l) if r > w: l,r = (l + w - r, w) if l < 0: l,r = 0,w # larger than width return slice(l,r) def window(cx, cy, win_size, scale, img_shape): return (window1(cy, win_size[1]*scale, img_shape[0]), window1(cx, win_size[0]*scale, img_shape[1])) n_valid_pixel = mask.sum() sample_w = mask / (1e-16 + n_valid_pixel) def sample_valid_pixel(): n = np.random.choice(sample_w.size, p=sample_w.ravel()) y, x = np.unravel_index(n, sample_w.shape) return x, y # Find suitable left and right windows trials = 0 # take the best out of few trials best = -np.inf, None for _ in range(50*self.n_samples): if trials >= self.n_samples: break # finished! # pick a random valid point from the first image if n_valid_pixel == 0: break c1x, c1y = sample_valid_pixel() # Find in which position the center of the left # window ended up being placed in the right image c2x, c2y = (aflow[c1y, c1x] + 0.5).astype(np.int32) if not(0 <= c2x < bw and 0 <= c2y < bh): continue # Get the flow scale sigma = scale[c1y, c1x] # Determine sampling windows if 0.2 < sigma < 1: win1 = window(c1x, c1y, output_size_a, 1/sigma, img_a.shape) win2 = window(c2x, c2y, output_size_b, 1, img_b.shape) elif 1 <= sigma < 5: win1 = window(c1x, c1y, output_size_a, 1, img_a.shape) win2 = window(c2x, c2y, output_size_b, sigma, img_b.shape) else: continue # bad scale # compute a score based on the flow x2,y2 = aflow[win1].reshape(-1, 2).T.astype(np.int32) # Check the proportion of valid flow vectors valid = (win2[1].start <= x2) & (x2 < win2[1].stop) \ & (win2[0].start <= y2) & (y2 < win2[0].stop) score1 = (valid * mask[win1].ravel()).mean() # check the coverage of the second window accu2[:] = False accu2[Q(y2[valid],win2[0]), Q(x2[valid],win2[1])] = True score2 = accu2.mean() # Check how many hits we got score = min(score1, score2) trials += 1 if score > best[0]: best = score, win1, win2 if None in best: # counldn't find a good window img_a = np.zeros(output_size_a[::-1]+(3,), dtype=np.uint8) img_b = np.zeros(output_size_b[::-1]+(3,), dtype=np.uint8) aflow = np.nan * np.ones((2,)+output_size_a[::-1], dtype=np.float32) homography = np.nan * np.ones((3,3), dtype=np.float32) else: win1, win2 = best[1:] img_a = img_a[win1] img_b = img_b[win2] aflow = aflow[win1] - np.float32([[[win2[1].start, win2[0].start]]]) mask = mask[win1] aflow[~mask.view(bool)] = np.nan # mask bad pixels! aflow = aflow.transpose(2,0,1) # --> (2,H,W) if corres is not None: corres[:,0] -= (win1[1].start, win1[0].start) corres[:,1] -= (win2[1].start, win2[0].start) if homography is not None: trans1 = np.eye(3, dtype=np.float32) trans1[:2,2] = (win1[1].start, win1[0].start) trans2 = np.eye(3, dtype=np.float32) trans2[:2,2] = (-win2[1].start, -win2[0].start) homography = trans2 @ homography @ trans1 homography /= homography[2,2] # rescale if necessary if img_a.shape[:2][::-1] != output_size_a: sx, sy = (np.float32(output_size_a)-1)/(np.float32(img_a.shape[:2][::-1])-1) img_a = np.asarray(Image.fromarray(img_a).resize(output_size_a, Image.ANTIALIAS)) mask = np.asarray(Image.fromarray(mask).resize(output_size_a, Image.NEAREST)) afx = Image.fromarray(aflow[0]).resize(output_size_a, Image.NEAREST) afy = Image.fromarray(aflow[1]).resize(output_size_a, Image.NEAREST) aflow = np.stack((np.float32(afx), np.float32(afy))) if corres is not None: corres[:,0] *= (sx, sy) if homography is not None: homography = homography @ np.diag(np.float32([1/sx,1/sy,1])) homography /= homography[2,2] if img_b.shape[:2][::-1] != output_size_b: sx, sy = (np.float32(output_size_b)-1)/(np.float32(img_b.shape[:2][::-1])-1) img_b = np.asarray(Image.fromarray(img_b).resize(output_size_b, Image.ANTIALIAS)) aflow *= [[[sx]], [[sy]]] if corres is not None: corres[:,1] *= (sx, sy) if homography is not None: homography = np.diag(np.float32([sx,sy,1])) @ homography homography /= homography[2,2] assert aflow.dtype == np.float32, pdb.set_trace() assert homography is None or homography.dtype == np.float32, pdb.set_trace() if 'flow' in self.what: H, W = img_a.shape[:2] mgrid = np.mgrid[0:H, 0:W][::-1].astype(np.float32) flow = aflow - mgrid result = dict(img1=self.norm(img_a), img2=self.norm(img_b)) for what in self.what: try: result[what] = eval(what) except NameError: pass return result def threaded_loader( loader, iscuda, threads, batch_size=1, shuffle=True): """ Get a data loader, given the dataset and some parameters. Parameters ---------- loader : object[i] returns the i-th training example. iscuda : bool batch_size : int threads : int shuffle : int Returns ------- a multi-threaded pytorch loader. """ return torch.utils.data.DataLoader( loader, batch_size = batch_size, shuffle = shuffle, sampler = None, num_workers = threads, pin_memory = iscuda, collate_fn=collate) def collate(batch, _use_shared_memory=True): """Puts each data field into a tensor with outer dimension batch size. Copied from https://github.com/pytorch in torch/utils/data/_utils/collate.py """ import re error_msg = "batch must contain tensors, numbers, dicts or lists; found {}" elem_type = type(batch[0]) if isinstance(batch[0], torch.Tensor): out = None if _use_shared_memory: # If we're in a background process, concatenate directly into a # shared memory tensor to avoid an extra copy numel = sum([x.numel() for x in batch]) storage = batch[0].storage()._new_shared(numel) out = batch[0].new(storage) return torch.stack(batch, 0, out=out) elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \ and elem_type.__name__ != 'string_': elem = batch[0] assert elem_type.__name__ == 'ndarray' # array of string classes and object if re.search('[SaUO]', elem.dtype.str) is not None: raise TypeError(error_msg.format(elem.dtype)) batch = [torch.from_numpy(b) for b in batch] try: return torch.stack(batch, 0) except RuntimeError: return batch elif batch[0] is None: return list(batch) elif isinstance(batch[0], int): return torch.LongTensor(batch) elif isinstance(batch[0], float): return torch.DoubleTensor(batch) elif isinstance(batch[0], str): return batch elif isinstance(batch[0], dict): return {key: collate([d[key] for d in batch]) for key in batch[0]} elif isinstance(batch[0], (tuple,list)): transposed = zip(*batch) return [collate(samples) for samples in transposed] raise TypeError((error_msg.format(type(batch[0])))) def tensor2img(tensor, model=None): """ convert back a torch/numpy tensor to a PIL Image by undoing the ToTensor() and Normalize() transforms. """ mean = norm_RGB.transforms[1].mean std = norm_RGB.transforms[1].std if isinstance(tensor, torch.Tensor): tensor = tensor.detach().cpu().numpy() res = np.uint8(np.clip(255*((tensor.transpose(1,2,0) * std) + mean), 0, 255)) from PIL import Image return Image.fromarray(res) if __name__ == '__main__': import argparse parser = argparse.ArgumentParser("Tool to debug/visualize the data loader") parser.add_argument("dataloader", type=str, help="command to create the data loader") args = parser.parse_args() from datasets import * auto_pairs = lambda db: SyntheticPairDataset(db, 'RandomScale(256,1024,can_upscale=True)', 'RandomTilting(0.5), PixelNoise(25)') loader = eval(args.dataloader) print("Data loader =", loader) from tools.viz import show_flow for data in loader: aflow = data['aflow'] H, W = aflow.shape[-2:] flow = (aflow - np.mgrid[:H, :W][::-1]).transpose(1,2,0) show_flow(tensor2img(data['img1']), tensor2img(data['img2']), flow)