vidimatch

Runtime error

vidimatch / third_party /r2d2 /nets /sampler.py

Vincentqyw

fix: roma

8b973ee over 1 year ago

15.4 kB

	# Copyright 2019-present NAVER Corp.
	# CC BY-NC-SA 3.0
	# Available only for non-commercial use

	import pdb

	import numpy as np
	import torch
	import torch.nn as nn
	import torch.nn.functional as F


	""" Different samplers, each specifying how to sample pixels for the AP loss.
	"""


	class FullSampler(nn.Module):
	"""all pixels are selected
	- feats: keypoint descriptors
	- confs: reliability values
	"""

	def __init__(self):
	nn.Module.__init__(self)
	self.mode = "bilinear"
	self.padding = "zeros"

	@staticmethod
	def _aflow_to_grid(aflow):
	H, W = aflow.shape[2:]
	grid = aflow.permute(0, 2, 3, 1).clone()
	grid[:, :, :, 0] *= 2 / (W - 1)
	grid[:, :, :, 1] *= 2 / (H - 1)
	grid -= 1
	grid[torch.isnan(grid)] = 9e9 # invalids
	return grid

	def _warp(self, feats, confs, aflow):
	if isinstance(aflow, tuple):
	return aflow # result was precomputed
	feat1, feat2 = feats
	conf1, conf2 = confs if confs else (None, None)

	B, two, H, W = aflow.shape
	D = feat1.shape[1]
	assert feat1.shape == feat2.shape == (B, D, H, W) # D = 128, B = batch
	assert conf1.shape == conf2.shape == (B, 1, H, W) if confs else True

	# warp img2 to img1
	grid = self._aflow_to_grid(aflow)
	ones2 = feat2.new_ones(feat2[:, 0:1].shape)
	feat2to1 = F.grid_sample(feat2, grid, mode=self.mode, padding_mode=self.padding)
	mask2to1 = F.grid_sample(ones2, grid, mode="nearest", padding_mode="zeros")
	conf2to1 = (
	F.grid_sample(conf2, grid, mode=self.mode, padding_mode=self.padding)
	if confs
	else None
	)
	return feat2to1, mask2to1.byte(), conf2to1

	def _warp_positions(self, aflow):
	B, two, H, W = aflow.shape
	assert two == 2

	Y = torch.arange(H, device=aflow.device)
	X = torch.arange(W, device=aflow.device)
	XY = torch.stack(torch.meshgrid(Y, X)[::-1], dim=0)
	XY = XY[None].expand(B, 2, H, W).float()

	grid = self._aflow_to_grid(aflow)
	XY2 = F.grid_sample(XY, grid, mode="bilinear", padding_mode="zeros")
	return XY, XY2


	class SubSampler(FullSampler):
	"""pixels are selected in an uniformly spaced grid"""

	def __init__(self, border, subq, subd, perimage=False):
	FullSampler.__init__(self)
	assert subq % subd == 0, "subq must be multiple of subd"
	self.sub_q = subq
	self.sub_d = subd
	self.border = border
	self.perimage = perimage

	def __repr__(self):
	return "SubSampler(border=%d, subq=%d, subd=%d, perimage=%d)" % (
	self.border,
	self.sub_q,
	self.sub_d,
	self.perimage,
	)

	def __call__(self, feats, confs, aflow):
	feat1, conf1 = feats[0], (confs[0] if confs else None)
	# warp with optical flow in img1 coords
	feat2, mask2, conf2 = self._warp(feats, confs, aflow)

	# subsample img1
	slq = slice(self.border, -self.border or None, self.sub_q)
	feat1 = feat1[:, :, slq, slq]
	conf1 = conf1[:, :, slq, slq] if confs else None
	# subsample img2
	sld = slice(self.border, -self.border or None, self.sub_d)
	feat2 = feat2[:, :, sld, sld]
	mask2 = mask2[:, :, sld, sld]
	conf2 = conf2[:, :, sld, sld] if confs else None

	B, D, Hq, Wq = feat1.shape
	B, D, Hd, Wd = feat2.shape

	# compute gt
	if self.perimage or self.sub_q != self.sub_d:
	# compute ground-truth by comparing pixel indices
	f = feats[0][0:1, 0] if self.perimage else feats[0][:, 0]
	idxs = torch.arange(f.numel(), dtype=torch.int64, device=feat1.device).view(
	f.shape
	)
	idxs1 = idxs[:, slq, slq].reshape(-1, Hq * Wq)
	idxs2 = idxs[:, sld, sld].reshape(-1, Hd * Wd)
	if self.perimage:
	gt = idxs1[0].view(-1, 1) == idxs2[0].view(1, -1)
	gt = gt[None, :, :].expand(B, Hq * Wq, Hd * Wd)
	else:
	gt = idxs1.view(-1, 1) == idxs2.view(1, -1)
	else:
	gt = torch.eye(
	feat1[:, 0].numel(), dtype=torch.uint8, device=feat1.device
	) # always binary for AP loss

	# compute all images together
	queries = feat1.reshape(B, D, -1) # B x D x (Hq x Wq)
	database = feat2.reshape(B, D, -1) # B x D x (Hd x Wd)
	if self.perimage:
	queries = queries.transpose(1, 2) # B x (Hd x Wd) x D
	scores = torch.bmm(queries, database) # B x (Hq x Wq) x (Hd x Wd)
	else:
	queries = queries.transpose(1, 2).reshape(-1, D) # (B x Hq x Wq) x D
	database = database.transpose(1, 0).reshape(D, -1) # D x (B x Hd x Wd)
	scores = torch.matmul(queries, database) # (B x Hq x Wq) x (B x Hd x Wd)

	# compute reliability
	qconf = (conf1 + conf2) / 2 if confs else None

	assert gt.shape == scores.shape
	return scores, gt, mask2, qconf


	class NghSampler(FullSampler):
	"""all pixels in a small neighborhood"""

	def __init__(self, ngh, subq=1, subd=1, ignore=1, border=None):
	FullSampler.__init__(self)
	assert 0 <= ignore < ngh
	self.ngh = ngh
	self.ignore = ignore
	assert subd <= ngh
	self.sub_q = subq
	self.sub_d = subd
	if border is None:
	border = ngh
	assert border >= ngh, "border has to be larger than ngh"
	self.border = border

	def __repr__(self):
	return "NghSampler(ngh=%d, subq=%d, subd=%d, ignore=%d, border=%d)" % (
	self.ngh,
	self.sub_q,
	self.sub_d,
	self.ignore,
	self.border,
	)

	def trans(self, arr, i, j):
	s = lambda i: slice(self.border + i, i - self.border or None, self.sub_q)
	return arr[:, :, s(j), s(i)]

	def __call__(self, feats, confs, aflow):
	feat1, conf1 = feats[0], (confs[0] if confs else None)
	# warp with optical flow in img1 coords
	feat2, mask2, conf2 = self._warp(feats, confs, aflow)

	qfeat = self.trans(feat1, 0, 0)
	qconf = (
	(self.trans(conf1, 0, 0) + self.trans(conf2, 0, 0)) / 2 if confs else None
	)
	mask2 = self.trans(mask2, 0, 0)
	scores_at = lambda i, j: (qfeat * self.trans(feat2, i, j)).sum(dim=1)

	# compute scores for all neighbors
	B, D = feat1.shape[:2]
	min_d = self.ignore**2
	max_d = self.ngh**2
	rad = (self.ngh // self.sub_d) * self.ngh # make an integer multiple
	negs = []
	offsets = []
	for j in range(-rad, rad + 1, self.sub_d):
	for i in range(-rad, rad + 1, self.sub_d):
	if not (min_d < i * i + j * j <= max_d):
	continue # out of scope
	offsets.append((i, j)) # Note: this list is just for debug
	negs.append(scores_at(i, j))

	scores = torch.stack([scores_at(0, 0)] + negs, dim=-1)
	gt = scores.new_zeros(scores.shape, dtype=torch.uint8)
	gt[..., 0] = 1 # only the center point is positive

	return scores, gt, mask2, qconf


	class FarNearSampler(FullSampler):
	"""Sample pixels from both a small neighborhood and far-away pixels.

	How it works?
	1) Queries are sampled from img1,
	- at least `border` pixels from borders and
	- on a grid with step = `subq`

	2) Close database pixels
	- from the corresponding image (img2),
	- within a `ngh` distance radius
	- on a grid with step = `subd_ngh`
	- ignored if distance to query is >0 and <=`ignore`

	3) Far-away database pixels from ,
	- from all batch images in `img2`
	- at least `border` pixels from borders
	- on a grid with step = `subd_far`
	"""

	def __init__(
	self, subq, ngh, subd_ngh, subd_far, border=None, ignore=1, maxpool_ngh=False
	):
	FullSampler.__init__(self)
	border = border or ngh
	assert ignore < ngh < subd_far, "neighborhood needs to be smaller than far step"
	self.close_sampler = NghSampler(
	ngh=ngh, subq=subq, subd=subd_ngh, ignore=not (maxpool_ngh), border=border
	)
	self.faraway_sampler = SubSampler(border=border, subq=subq, subd=subd_far)
	self.maxpool_ngh = maxpool_ngh

	def __repr__(self):
	c, f = self.close_sampler, self.faraway_sampler
	res = "FarNearSampler(subq=%d, ngh=%d" % (c.sub_q, c.ngh)
	res += ", subd_ngh=%d, subd_far=%d" % (c.sub_d, f.sub_d)
	res += ", border=%d, ign=%d" % (f.border, c.ignore)
	res += ", maxpool_ngh=%d" % self.maxpool_ngh
	return res + ")"

	def __call__(self, feats, confs, aflow):
	# warp with optical flow in img1 coords
	aflow = self._warp(feats, confs, aflow)

	# sample ngh pixels
	scores1, gt1, msk1, conf1 = self.close_sampler(feats, confs, aflow)
	scores1, gt1 = scores1.view(-1, scores1.shape[-1]), gt1.view(-1, gt1.shape[-1])
	if self.maxpool_ngh:
	# we consider all scores from ngh as potential positives
	scores1, self._cached_maxpool_ngh = scores1.max(dim=1, keepdim=True)
	gt1 = gt1[:, 0:1]

	# sample far pixels
	scores2, gt2, msk2, conf2 = self.faraway_sampler(feats, confs, aflow)
	# assert (msk1 == msk2).all()
	# assert (conf1 == conf2).all()

	return (
	torch.cat((scores1, scores2), dim=1),
	torch.cat((gt1, gt2), dim=1),
	msk1,
	conf1 if confs else None,
	)


	class NghSampler2(nn.Module):
	"""Similar to NghSampler, but doesnt warp the 2nd image.
	Distance to GT => 0 ... pos_d ... neg_d ... ngh
	Pixel label => + + + + + + 0 0 - - - - - - -

	Subsample on query side: if > 0, regular grid
	< 0, random points
	In both cases, the number of query points is = WH/subq*2
	"""

	def __init__(
	self,
	ngh,
	subq=1,
	subd=1,
	pos_d=0,
	neg_d=2,
	border=None,
	maxpool_pos=True,
	subd_neg=0,
	):
	nn.Module.__init__(self)
	assert 0 <= pos_d < neg_d <= (ngh if ngh else 99)
	self.ngh = ngh
	self.pos_d = pos_d
	self.neg_d = neg_d
	assert subd <= ngh or ngh == 0
	assert subq != 0
	self.sub_q = subq
	self.sub_d = subd
	self.sub_d_neg = subd_neg
	if border is None:
	border = ngh
	assert border >= ngh, "border has to be larger than ngh"
	self.border = border
	self.maxpool_pos = maxpool_pos
	self.precompute_offsets()

	def precompute_offsets(self):
	pos_d2 = self.pos_d**2
	neg_d2 = self.neg_d**2
	rad2 = self.ngh**2
	rad = (self.ngh // self.sub_d) * self.ngh # make an integer multiple
	pos = []
	neg = []
	for j in range(-rad, rad + 1, self.sub_d):
	for i in range(-rad, rad + 1, self.sub_d):
	d2 = i * i + j * j
	if d2 <= pos_d2:
	pos.append((i, j))
	elif neg_d2 <= d2 <= rad2:
	neg.append((i, j))

	self.register_buffer("pos_offsets", torch.LongTensor(pos).view(-1, 2).t())
	self.register_buffer("neg_offsets", torch.LongTensor(neg).view(-1, 2).t())

	def gen_grid(self, step, aflow):
	B, two, H, W = aflow.shape
	dev = aflow.device
	b1 = torch.arange(B, device=dev)
	if step > 0:
	# regular grid
	x1 = torch.arange(self.border, W - self.border, step, device=dev)
	y1 = torch.arange(self.border, H - self.border, step, device=dev)
	H1, W1 = len(y1), len(x1)
	x1 = x1[None, None, :].expand(B, H1, W1).reshape(-1)
	y1 = y1[None, :, None].expand(B, H1, W1).reshape(-1)
	b1 = b1[:, None, None].expand(B, H1, W1).reshape(-1)
	shape = (B, H1, W1)
	else:
	# randomly spread
	n = (H - 2 * self.border) * (W - 2 * self.border) // step**2
	x1 = torch.randint(self.border, W - self.border, (n,), device=dev)
	y1 = torch.randint(self.border, H - self.border, (n,), device=dev)
	x1 = x1[None, :].expand(B, n).reshape(-1)
	y1 = y1[None, :].expand(B, n).reshape(-1)
	b1 = b1[:, None].expand(B, n).reshape(-1)
	shape = (B, n)
	return b1, y1, x1, shape

	def forward(self, feats, confs, aflow, **kw):
	B, two, H, W = aflow.shape
	assert two == 2
	feat1, conf1 = feats[0], (confs[0] if confs else None)
	feat2, conf2 = feats[1], (confs[1] if confs else None)

	# positions in the first image
	b1, y1, x1, shape = self.gen_grid(self.sub_q, aflow)

	# sample features from first image
	feat1 = feat1[b1, :, y1, x1]
	qconf = conf1[b1, :, y1, x1].view(shape) if confs else None

	# sample GT from second image
	b2 = b1
	xy2 = (aflow[b1, :, y1, x1] + 0.5).long().t()
	mask = (0 <= xy2[0]) * (0 <= xy2[1]) * (xy2[0] < W) * (xy2[1] < H)
	mask = mask.view(shape)

	def clamp(xy):
	torch.clamp(xy[0], 0, W - 1, out=xy[0])
	torch.clamp(xy[1], 0, H - 1, out=xy[1])
	return xy

	# compute positive scores
	xy2p = clamp(xy2[:, None, :] + self.pos_offsets[:, :, None])
	pscores = (feat1[None, :, :] * feat2[b2, :, xy2p[1], xy2p[0]]).sum(dim=-1).t()
	# xy1p = clamp(torch.stack((x1,y1))[:,None,:] + self.pos_offsets[:,:,None])
	# grid = FullSampler._aflow_to_grid(aflow)
	# feat2p = F.grid_sample(feat2, grid, mode='bilinear', padding_mode='border')
	# pscores = (feat1[None,:,:] * feat2p[b1,:,xy1p[1], xy1p[0]]).sum(dim=-1).t()
	if self.maxpool_pos:
	pscores, pos = pscores.max(dim=1, keepdim=True)
	if confs:
	sel = clamp(xy2 + self.pos_offsets[:, pos.view(-1)])
	qconf = (qconf + conf2[b2, :, sel[1], sel[0]].view(shape)) / 2

	# compute negative scores
	xy2n = clamp(xy2[:, None, :] + self.neg_offsets[:, :, None])
	nscores = (feat1[None, :, :] * feat2[b2, :, xy2n[1], xy2n[0]]).sum(dim=-1).t()

	if self.sub_d_neg:
	# add distractors from a grid
	b3, y3, x3, _ = self.gen_grid(self.sub_d_neg, aflow)
	distractors = feat2[b3, :, y3, x3]
	dscores = torch.matmul(feat1, distractors.t())
	del distractors

	# remove scores that corresponds to positives or nulls
	dis2 = (x3 - xy2[0][:, None]) 2 + (y3 - xy2[1][:, None]) 2
	dis2 += (b3 != b2[:, None]).long() * self.neg_d**2
	dscores[dis2 < self.neg_d**2] = 0

	scores = torch.cat((pscores, nscores, dscores), dim=1)
	else:
	# concat everything
	scores = torch.cat((pscores, nscores), dim=1)

	gt = scores.new_zeros(scores.shape, dtype=torch.uint8)
	gt[:, : pscores.shape[1]] = 1

	return scores, gt, mask, qconf