Spaces:

CVPR
/

regionclip-demo

Runtime error

regionclip-demo / detectron2 /modeling /proposal_generator /proposal_utils.py

jwyang

first commit

4121bec over 2 years ago

8.15 kB

	# Copyright (c) Facebook, Inc. and its affiliates.
	import logging
	import math
	from typing import List, Tuple, Union
	import torch

	from detectron2.layers import batched_nms, cat
	from detectron2.structures import Boxes, Instances
	from detectron2.utils.env import TORCH_VERSION

	logger = logging.getLogger(__name__)


	def _is_tracing():
	if torch.jit.is_scripting():
	# https://github.com/pytorch/pytorch/issues/47379
	return False
	else:
	return TORCH_VERSION >= (1, 7) and torch.jit.is_tracing()


	def find_top_rpn_proposals(
	proposals: List[torch.Tensor],
	pred_objectness_logits: List[torch.Tensor],
	image_sizes: List[Tuple[int, int]],
	nms_thresh: float,
	pre_nms_topk: int,
	post_nms_topk: int,
	min_box_size: float,
	training: bool,
	):
	"""
	For each feature map, select the `pre_nms_topk` highest scoring proposals,
	apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk`
	highest scoring proposals among all the feature maps for each image.

	Args:
	proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, HiWiA, 4).
	All proposal predictions on the feature maps.
	pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, HiWiA).
	image_sizes (list[tuple]): sizes (h, w) for each image
	nms_thresh (float): IoU threshold to use for NMS
	pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS.
	When RPN is run on multiple feature maps (as in FPN) this number is per
	feature map.
	post_nms_topk (int): number of top k scoring proposals to keep after applying NMS.
	When RPN is run on multiple feature maps (as in FPN) this number is total,
	over all feature maps.
	min_box_size (float): minimum proposal box side length in pixels (absolute units
	wrt input images).
	training (bool): True if proposals are to be used in training, otherwise False.
	This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..."
	comment.

	Returns:
	list[Instances]: list of N Instances. The i-th Instances
	stores post_nms_topk object proposals for image i, sorted by their
	objectness score in descending order.
	"""
	num_images = len(image_sizes)
	device = proposals[0].device

	# 1. Select top-k anchor for every level and every image
	topk_scores = [] # #lvl Tensor, each of shape N x topk
	topk_proposals = []
	level_ids = [] # #lvl Tensor, each of shape (topk,)
	batch_idx = torch.arange(num_images, device=device)
	for level_id, (proposals_i, logits_i) in enumerate(zip(proposals, pred_objectness_logits)):
	Hi_Wi_A = logits_i.shape[1]
	if isinstance(Hi_Wi_A, torch.Tensor): # it's a tensor in tracing
	num_proposals_i = torch.clamp(Hi_Wi_A, max=pre_nms_topk)
	else:
	num_proposals_i = min(Hi_Wi_A, pre_nms_topk)

	# sort is faster than topk: https://github.com/pytorch/pytorch/issues/22812
	# topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1)
	logits_i, idx = logits_i.sort(descending=True, dim=1)
	topk_scores_i = logits_i.narrow(1, 0, num_proposals_i)
	topk_idx = idx.narrow(1, 0, num_proposals_i)

	# each is N x topk
	topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx] # N x topk x 4

	topk_proposals.append(topk_proposals_i)
	topk_scores.append(topk_scores_i)
	level_ids.append(torch.full((num_proposals_i,), level_id, dtype=torch.int64, device=device))

	# 2. Concat all levels together
	topk_scores = cat(topk_scores, dim=1)
	topk_proposals = cat(topk_proposals, dim=1)
	level_ids = cat(level_ids, dim=0)

	# 3. For each image, run a per-level NMS, and choose topk results.
	results: List[Instances] = []
	for n, image_size in enumerate(image_sizes):
	boxes = Boxes(topk_proposals[n])
	scores_per_img = topk_scores[n]
	lvl = level_ids

	valid_mask = torch.isfinite(boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img)
	if not valid_mask.all():
	if training:
	raise FloatingPointError(
	"Predicted boxes or scores contain Inf/NaN. Training has diverged."
	)
	boxes = boxes[valid_mask]
	scores_per_img = scores_per_img[valid_mask]
	lvl = lvl[valid_mask]
	boxes.clip(image_size)

	# filter empty boxes
	keep = boxes.nonempty(threshold=min_box_size)
	if _is_tracing() or keep.sum().item() != len(boxes):
	boxes, scores_per_img, lvl = boxes[keep], scores_per_img[keep], lvl[keep]

	keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh)
	# In Detectron1, there was different behavior during training vs. testing.
	# (https://github.com/facebookresearch/Detectron/issues/459)
	# During training, topk is over the proposals from all images in the training batch.
	# During testing, it is over the proposals for each image separately.
	# As a result, the training behavior becomes batch-dependent,
	# and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size.
	# This bug is addressed in Detectron2 to make the behavior independent of batch size.
	keep = keep[:post_nms_topk] # keep is already sorted

	res = Instances(image_size)
	res.proposal_boxes = boxes[keep]
	res.objectness_logits = scores_per_img[keep]
	results.append(res)
	return results


	def add_ground_truth_to_proposals(
	gt: Union[List[Instances], List[Boxes]], proposals: List[Instances]
	) -> List[Instances]:
	"""
	Call `add_ground_truth_to_proposals_single_image` for all images.

	Args:
	gt(Union[List[Instances], List[Boxes]): list of N elements. Element i is a Instances
	representing the ground-truth for image i.
	proposals (list[Instances]): list of N elements. Element i is a Instances
	representing the proposals for image i.

	Returns:
	list[Instances]: list of N Instances. Each is the proposals for the image,
	with field "proposal_boxes" and "objectness_logits".
	"""
	assert gt is not None

	if len(proposals) != len(gt):
	raise ValueError("proposals and gt should have the same length as the number of images!")
	if len(proposals) == 0:
	return proposals

	return [
	add_ground_truth_to_proposals_single_image(gt_i, proposals_i)
	for gt_i, proposals_i in zip(gt, proposals)
	]


	def add_ground_truth_to_proposals_single_image(
	gt: Union[Instances, Boxes], proposals: Instances
	) -> Instances:
	"""
	Augment `proposals` with `gt`.

	Args:
	Same as `add_ground_truth_to_proposals`, but with gt and proposals
	per image.

	Returns:
	Same as `add_ground_truth_to_proposals`, but for only one image.
	"""
	if isinstance(gt, Boxes):
	# convert Boxes to Instances
	gt = Instances(proposals.image_size, gt_boxes=gt)

	gt_boxes = gt.gt_boxes
	device = proposals.objectness_logits.device
	# Assign all ground-truth boxes an objectness logit corresponding to
	# P(object) = sigmoid(logit) =~ 1.
	gt_logit_value = math.log((1.0 - 1e-10) / (1 - (1.0 - 1e-10)))
	gt_logits = gt_logit_value * torch.ones(len(gt_boxes), device=device)

	# Concatenating gt_boxes with proposals requires them to have the same fields
	gt_proposal = Instances(proposals.image_size, **gt.get_fields())
	gt_proposal.proposal_boxes = gt_boxes
	gt_proposal.objectness_logits = gt_logits

	for key in proposals.get_fields().keys():
	assert gt_proposal.has(
	key
	), "The attribute '{}' in `proposals` does not exist in `gt`".format(key)

	# NOTE: Instances.cat only use fields from the first item. Extra fields in latter items
	# will be thrown away.
	new_proposals = Instances.cat([proposals, gt_proposal])

	return new_proposals