File size: 8,128 Bytes
b213d84 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 |
# Copyright (c) Facebook, Inc. and its affiliates.
import logging
import math
from typing import List, Tuple, Union
import torch
from detectron2.layers import batched_nms, cat, move_device_like
from detectron2.structures import Boxes, Instances
logger = logging.getLogger(__name__)
def _is_tracing():
# (fixed in TORCH_VERSION >= 1.9)
if torch.jit.is_scripting():
# https://github.com/pytorch/pytorch/issues/47379
return False
else:
return torch.jit.is_tracing()
def find_top_rpn_proposals(
proposals: List[torch.Tensor],
pred_objectness_logits: List[torch.Tensor],
image_sizes: List[Tuple[int, int]],
nms_thresh: float,
pre_nms_topk: int,
post_nms_topk: int,
min_box_size: float,
training: bool,
):
"""
For each feature map, select the `pre_nms_topk` highest scoring proposals,
apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk`
highest scoring proposals among all the feature maps for each image.
Args:
proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4).
All proposal predictions on the feature maps.
pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A).
image_sizes (list[tuple]): sizes (h, w) for each image
nms_thresh (float): IoU threshold to use for NMS
pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS.
When RPN is run on multiple feature maps (as in FPN) this number is per
feature map.
post_nms_topk (int): number of top k scoring proposals to keep after applying NMS.
When RPN is run on multiple feature maps (as in FPN) this number is total,
over all feature maps.
min_box_size (float): minimum proposal box side length in pixels (absolute units
wrt input images).
training (bool): True if proposals are to be used in training, otherwise False.
This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..."
comment.
Returns:
list[Instances]: list of N Instances. The i-th Instances
stores post_nms_topk object proposals for image i, sorted by their
objectness score in descending order.
"""
num_images = len(image_sizes)
device = (
proposals[0].device
if torch.jit.is_scripting()
else ("cpu" if torch.jit.is_tracing() else proposals[0].device)
)
# 1. Select top-k anchor for every level and every image
topk_scores = [] # #lvl Tensor, each of shape N x topk
topk_proposals = []
level_ids = [] # #lvl Tensor, each of shape (topk,)
batch_idx = move_device_like(torch.arange(num_images, device=device), proposals[0])
for level_id, (proposals_i, logits_i) in enumerate(zip(proposals, pred_objectness_logits)):
Hi_Wi_A = logits_i.shape[1]
if isinstance(Hi_Wi_A, torch.Tensor): # it's a tensor in tracing
num_proposals_i = torch.clamp(Hi_Wi_A, max=pre_nms_topk)
else:
num_proposals_i = min(Hi_Wi_A, pre_nms_topk)
topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1)
# each is N x topk
topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx] # N x topk x 4
topk_proposals.append(topk_proposals_i)
topk_scores.append(topk_scores_i)
level_ids.append(
move_device_like(
torch.full((num_proposals_i,), level_id, dtype=torch.int64, device=device),
proposals[0],
)
)
# 2. Concat all levels together
topk_scores = cat(topk_scores, dim=1)
topk_proposals = cat(topk_proposals, dim=1)
level_ids = cat(level_ids, dim=0)
# 3. For each image, run a per-level NMS, and choose topk results.
results: List[Instances] = []
for n, image_size in enumerate(image_sizes):
boxes = Boxes(topk_proposals[n])
scores_per_img = topk_scores[n]
lvl = level_ids
valid_mask = torch.isfinite(boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img)
if not valid_mask.all():
if training:
raise FloatingPointError(
"Predicted boxes or scores contain Inf/NaN. Training has diverged."
)
boxes = boxes[valid_mask]
scores_per_img = scores_per_img[valid_mask]
lvl = lvl[valid_mask]
boxes.clip(image_size)
# filter empty boxes
keep = boxes.nonempty(threshold=min_box_size)
if _is_tracing() or keep.sum().item() != len(boxes):
boxes, scores_per_img, lvl = boxes[keep], scores_per_img[keep], lvl[keep]
keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh)
# In Detectron1, there was different behavior during training vs. testing.
# (https://github.com/facebookresearch/Detectron/issues/459)
# During training, topk is over the proposals from *all* images in the training batch.
# During testing, it is over the proposals for each image separately.
# As a result, the training behavior becomes batch-dependent,
# and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size.
# This bug is addressed in Detectron2 to make the behavior independent of batch size.
keep = keep[:post_nms_topk] # keep is already sorted
res = Instances(image_size)
res.proposal_boxes = boxes[keep]
res.objectness_logits = scores_per_img[keep]
results.append(res)
return results
def add_ground_truth_to_proposals(
gt: Union[List[Instances], List[Boxes]], proposals: List[Instances]
) -> List[Instances]:
"""
Call `add_ground_truth_to_proposals_single_image` for all images.
Args:
gt(Union[List[Instances], List[Boxes]): list of N elements. Element i is a Instances
representing the ground-truth for image i.
proposals (list[Instances]): list of N elements. Element i is a Instances
representing the proposals for image i.
Returns:
list[Instances]: list of N Instances. Each is the proposals for the image,
with field "proposal_boxes" and "objectness_logits".
"""
assert gt is not None
if len(proposals) != len(gt):
raise ValueError("proposals and gt should have the same length as the number of images!")
if len(proposals) == 0:
return proposals
return [
add_ground_truth_to_proposals_single_image(gt_i, proposals_i)
for gt_i, proposals_i in zip(gt, proposals)
]
def add_ground_truth_to_proposals_single_image(
gt: Union[Instances, Boxes], proposals: Instances
) -> Instances:
"""
Augment `proposals` with `gt`.
Args:
Same as `add_ground_truth_to_proposals`, but with gt and proposals
per image.
Returns:
Same as `add_ground_truth_to_proposals`, but for only one image.
"""
if isinstance(gt, Boxes):
# convert Boxes to Instances
gt = Instances(proposals.image_size, gt_boxes=gt)
gt_boxes = gt.gt_boxes
device = proposals.objectness_logits.device
# Assign all ground-truth boxes an objectness logit corresponding to
# P(object) = sigmoid(logit) =~ 1.
gt_logit_value = math.log((1.0 - 1e-10) / (1 - (1.0 - 1e-10)))
gt_logits = gt_logit_value * torch.ones(len(gt_boxes), device=device)
# Concatenating gt_boxes with proposals requires them to have the same fields
gt_proposal = Instances(proposals.image_size, **gt.get_fields())
gt_proposal.proposal_boxes = gt_boxes
gt_proposal.objectness_logits = gt_logits
for key in proposals.get_fields().keys():
assert gt_proposal.has(
key
), "The attribute '{}' in `proposals` does not exist in `gt`".format(key)
# NOTE: Instances.cat only use fields from the first item. Extra fields in latter items
# will be thrown away.
new_proposals = Instances.cat([proposals, gt_proposal])
return new_proposals
|