File size: 12,990 Bytes
b213d84 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 |
# Copyright (c) Facebook, Inc. and its affiliates.
from typing import List
import torch
from torch import nn
from torch.autograd.function import Function
from detectron2.config import configurable
from detectron2.layers import ShapeSpec
from detectron2.structures import Boxes, Instances, pairwise_iou
from detectron2.utils.events import get_event_storage
from ..box_regression import Box2BoxTransform
from ..matcher import Matcher
from ..poolers import ROIPooler
from .box_head import build_box_head
from .fast_rcnn import FastRCNNOutputLayers, fast_rcnn_inference
from .roi_heads import ROI_HEADS_REGISTRY, StandardROIHeads
class _ScaleGradient(Function):
@staticmethod
def forward(ctx, input, scale):
ctx.scale = scale
return input
@staticmethod
def backward(ctx, grad_output):
return grad_output * ctx.scale, None
@ROI_HEADS_REGISTRY.register()
class CascadeROIHeads(StandardROIHeads):
"""
The ROI heads that implement :paper:`Cascade R-CNN`.
"""
@configurable
def __init__(
self,
*,
box_in_features: List[str],
box_pooler: ROIPooler,
box_heads: List[nn.Module],
box_predictors: List[nn.Module],
proposal_matchers: List[Matcher],
**kwargs,
):
"""
NOTE: this interface is experimental.
Args:
box_pooler (ROIPooler): pooler that extracts region features from given boxes
box_heads (list[nn.Module]): box head for each cascade stage
box_predictors (list[nn.Module]): box predictor for each cascade stage
proposal_matchers (list[Matcher]): matcher with different IoU thresholds to
match boxes with ground truth for each stage. The first matcher matches
RPN proposals with ground truth, the other matchers use boxes predicted
by the previous stage as proposals and match them with ground truth.
"""
assert "proposal_matcher" not in kwargs, (
"CascadeROIHeads takes 'proposal_matchers=' for each stage instead "
"of one 'proposal_matcher='."
)
# The first matcher matches RPN proposals with ground truth, done in the base class
kwargs["proposal_matcher"] = proposal_matchers[0]
num_stages = self.num_cascade_stages = len(box_heads)
box_heads = nn.ModuleList(box_heads)
box_predictors = nn.ModuleList(box_predictors)
assert len(box_predictors) == num_stages, f"{len(box_predictors)} != {num_stages}!"
assert len(proposal_matchers) == num_stages, f"{len(proposal_matchers)} != {num_stages}!"
super().__init__(
box_in_features=box_in_features,
box_pooler=box_pooler,
box_head=box_heads,
box_predictor=box_predictors,
**kwargs,
)
self.proposal_matchers = proposal_matchers
@classmethod
def from_config(cls, cfg, input_shape):
ret = super().from_config(cfg, input_shape)
ret.pop("proposal_matcher")
return ret
@classmethod
def _init_box_head(cls, cfg, input_shape):
# fmt: off
in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES
pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features)
sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
cascade_bbox_reg_weights = cfg.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS
cascade_ious = cfg.MODEL.ROI_BOX_CASCADE_HEAD.IOUS
assert len(cascade_bbox_reg_weights) == len(cascade_ious)
assert cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG, \
"CascadeROIHeads only support class-agnostic regression now!"
assert cascade_ious[0] == cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS[0]
# fmt: on
in_channels = [input_shape[f].channels for f in in_features]
# Check all channel counts are equal
assert len(set(in_channels)) == 1, in_channels
in_channels = in_channels[0]
box_pooler = ROIPooler(
output_size=pooler_resolution,
scales=pooler_scales,
sampling_ratio=sampling_ratio,
pooler_type=pooler_type,
)
pooled_shape = ShapeSpec(
channels=in_channels, width=pooler_resolution, height=pooler_resolution
)
box_heads, box_predictors, proposal_matchers = [], [], []
for match_iou, bbox_reg_weights in zip(cascade_ious, cascade_bbox_reg_weights):
box_head = build_box_head(cfg, pooled_shape)
box_heads.append(box_head)
box_predictors.append(
FastRCNNOutputLayers(
cfg,
box_head.output_shape,
box2box_transform=Box2BoxTransform(weights=bbox_reg_weights),
)
)
proposal_matchers.append(Matcher([match_iou], [0, 1], allow_low_quality_matches=False))
return {
"box_in_features": in_features,
"box_pooler": box_pooler,
"box_heads": box_heads,
"box_predictors": box_predictors,
"proposal_matchers": proposal_matchers,
}
def forward(self, images, features, proposals, targets=None):
del images
if self.training:
proposals = self.label_and_sample_proposals(proposals, targets)
if self.training:
# Need targets to box head
losses = self._forward_box(features, proposals, targets)
losses.update(self._forward_mask(features, proposals))
losses.update(self._forward_keypoint(features, proposals))
return proposals, losses
else:
pred_instances = self._forward_box(features, proposals)
pred_instances = self.forward_with_given_boxes(features, pred_instances)
return pred_instances, {}
def _forward_box(self, features, proposals, targets=None):
"""
Args:
features, targets: the same as in
Same as in :meth:`ROIHeads.forward`.
proposals (list[Instances]): the per-image object proposals with
their matching ground truth.
Each has fields "proposal_boxes", and "objectness_logits",
"gt_classes", "gt_boxes".
"""
features = [features[f] for f in self.box_in_features]
head_outputs = [] # (predictor, predictions, proposals)
prev_pred_boxes = None
image_sizes = [x.image_size for x in proposals]
for k in range(self.num_cascade_stages):
if k > 0:
# The output boxes of the previous stage are used to create the input
# proposals of the next stage.
proposals = self._create_proposals_from_boxes(prev_pred_boxes, image_sizes)
if self.training:
proposals = self._match_and_label_boxes(proposals, k, targets)
predictions = self._run_stage(features, proposals, k)
prev_pred_boxes = self.box_predictor[k].predict_boxes(predictions, proposals)
head_outputs.append((self.box_predictor[k], predictions, proposals))
if self.training:
losses = {}
storage = get_event_storage()
for stage, (predictor, predictions, proposals) in enumerate(head_outputs):
with storage.name_scope("stage{}".format(stage)):
stage_losses = predictor.losses(predictions, proposals)
losses.update({k + "_stage{}".format(stage): v for k, v in stage_losses.items()})
return losses
else:
# Each is a list[Tensor] of length #image. Each tensor is Ri x (K+1)
scores_per_stage = [h[0].predict_probs(h[1], h[2]) for h in head_outputs]
# Average the scores across heads
scores = [
sum(list(scores_per_image)) * (1.0 / self.num_cascade_stages)
for scores_per_image in zip(*scores_per_stage)
]
# Use the boxes of the last head
predictor, predictions, proposals = head_outputs[-1]
boxes = predictor.predict_boxes(predictions, proposals)
pred_instances, _ = fast_rcnn_inference(
boxes,
scores,
image_sizes,
predictor.test_score_thresh,
predictor.test_nms_thresh,
predictor.test_topk_per_image,
)
return pred_instances
@torch.no_grad()
def _match_and_label_boxes(self, proposals, stage, targets):
"""
Match proposals with groundtruth using the matcher at the given stage.
Label the proposals as foreground or background based on the match.
Args:
proposals (list[Instances]): One Instances for each image, with
the field "proposal_boxes".
stage (int): the current stage
targets (list[Instances]): the ground truth instances
Returns:
list[Instances]: the same proposals, but with fields "gt_classes" and "gt_boxes"
"""
num_fg_samples, num_bg_samples = [], []
for proposals_per_image, targets_per_image in zip(proposals, targets):
match_quality_matrix = pairwise_iou(
targets_per_image.gt_boxes, proposals_per_image.proposal_boxes
)
# proposal_labels are 0 or 1
matched_idxs, proposal_labels = self.proposal_matchers[stage](match_quality_matrix)
if len(targets_per_image) > 0:
gt_classes = targets_per_image.gt_classes[matched_idxs]
# Label unmatched proposals (0 label from matcher) as background (label=num_classes)
gt_classes[proposal_labels == 0] = self.num_classes
gt_boxes = targets_per_image.gt_boxes[matched_idxs]
else:
gt_classes = torch.zeros_like(matched_idxs) + self.num_classes
gt_boxes = Boxes(
targets_per_image.gt_boxes.tensor.new_zeros((len(proposals_per_image), 4))
)
proposals_per_image.gt_classes = gt_classes
proposals_per_image.gt_boxes = gt_boxes
num_fg_samples.append((proposal_labels == 1).sum().item())
num_bg_samples.append(proposal_labels.numel() - num_fg_samples[-1])
# Log the number of fg/bg samples in each stage
storage = get_event_storage()
storage.put_scalar(
"stage{}/roi_head/num_fg_samples".format(stage),
sum(num_fg_samples) / len(num_fg_samples),
)
storage.put_scalar(
"stage{}/roi_head/num_bg_samples".format(stage),
sum(num_bg_samples) / len(num_bg_samples),
)
return proposals
def _run_stage(self, features, proposals, stage):
"""
Args:
features (list[Tensor]): #lvl input features to ROIHeads
proposals (list[Instances]): #image Instances, with the field "proposal_boxes"
stage (int): the current stage
Returns:
Same output as `FastRCNNOutputLayers.forward()`.
"""
box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals])
# The original implementation averages the losses among heads,
# but scale up the parameter gradients of the heads.
# This is equivalent to adding the losses among heads,
# but scale down the gradients on features.
if self.training:
box_features = _ScaleGradient.apply(box_features, 1.0 / self.num_cascade_stages)
box_features = self.box_head[stage](box_features)
return self.box_predictor[stage](box_features)
def _create_proposals_from_boxes(self, boxes, image_sizes):
"""
Args:
boxes (list[Tensor]): per-image predicted boxes, each of shape Ri x 4
image_sizes (list[tuple]): list of image shapes in (h, w)
Returns:
list[Instances]: per-image proposals with the given boxes.
"""
# Just like RPN, the proposals should not have gradients
boxes = [Boxes(b.detach()) for b in boxes]
proposals = []
for boxes_per_image, image_size in zip(boxes, image_sizes):
boxes_per_image.clip(image_size)
if self.training:
# do not filter empty boxes at inference time,
# because the scores from each stage need to be aligned and added later
boxes_per_image = boxes_per_image[boxes_per_image.nonempty()]
prop = Instances(image_size)
prop.proposal_boxes = boxes_per_image
proposals.append(prop)
return proposals
|