Spaces:
Build error
Build error
# -*- coding: utf-8 -*- | |
# Copyright (c) Facebook, Inc. and its affiliates. | |
import contextlib | |
import io | |
import numpy as np | |
import unittest | |
from collections import defaultdict | |
import torch | |
import tqdm | |
from fvcore.common.benchmark import benchmark | |
from pycocotools.coco import COCO | |
from tabulate import tabulate | |
from torch.nn import functional as F | |
from detectron2.data import MetadataCatalog | |
from detectron2.layers.mask_ops import ( | |
pad_masks, | |
paste_mask_in_image_old, | |
paste_masks_in_image, | |
scale_boxes, | |
) | |
from detectron2.structures import BitMasks, Boxes, BoxMode, PolygonMasks | |
from detectron2.structures.masks import polygons_to_bitmask | |
from detectron2.utils.file_io import PathManager | |
from detectron2.utils.testing import random_boxes | |
def iou_between_full_image_bit_masks(a, b): | |
intersect = (a & b).sum() | |
union = (a | b).sum() | |
return intersect / union | |
def rasterize_polygons_with_grid_sample(full_image_bit_mask, box, mask_size, threshold=0.5): | |
x0, y0, x1, y1 = box[0], box[1], box[2], box[3] | |
img_h, img_w = full_image_bit_mask.shape | |
mask_y = np.arange(0.0, mask_size) + 0.5 # mask y sample coords in [0.5, mask_size - 0.5] | |
mask_x = np.arange(0.0, mask_size) + 0.5 # mask x sample coords in [0.5, mask_size - 0.5] | |
mask_y = mask_y / mask_size * (y1 - y0) + y0 | |
mask_x = mask_x / mask_size * (x1 - x0) + x0 | |
mask_x = (mask_x - 0.5) / (img_w - 1) * 2 + -1 | |
mask_y = (mask_y - 0.5) / (img_h - 1) * 2 + -1 | |
gy, gx = torch.meshgrid(torch.from_numpy(mask_y), torch.from_numpy(mask_x)) | |
ind = torch.stack([gx, gy], dim=-1).to(dtype=torch.float32) | |
full_image_bit_mask = torch.from_numpy(full_image_bit_mask) | |
mask = F.grid_sample( | |
full_image_bit_mask[None, None, :, :].to(dtype=torch.float32), | |
ind[None, :, :, :], | |
align_corners=True, | |
) | |
return mask[0, 0] >= threshold | |
class TestMaskCropPaste(unittest.TestCase): | |
def setUp(self): | |
json_file = MetadataCatalog.get("coco_2017_val_100").json_file | |
if not PathManager.isfile(json_file): | |
raise unittest.SkipTest("{} not found".format(json_file)) | |
with contextlib.redirect_stdout(io.StringIO()): | |
json_file = PathManager.get_local_path(json_file) | |
self.coco = COCO(json_file) | |
def test_crop_paste_consistency(self): | |
""" | |
rasterize_polygons_within_box (used in training) | |
and | |
paste_masks_in_image (used in inference) | |
should be inverse operations to each other. | |
This function runs several implementation of the above two operations and prints | |
the reconstruction error. | |
""" | |
anns = self.coco.loadAnns(self.coco.getAnnIds(iscrowd=False)) # avoid crowd annotations | |
selected_anns = anns[:100] | |
ious = [] | |
for ann in tqdm.tqdm(selected_anns): | |
results = self.process_annotation(ann) | |
ious.append([k[2] for k in results]) | |
ious = np.array(ious) | |
mean_ious = ious.mean(axis=0) | |
table = [] | |
res_dic = defaultdict(dict) | |
for row, iou in zip(results, mean_ious): | |
table.append((row[0], row[1], iou)) | |
res_dic[row[0]][row[1]] = iou | |
print(tabulate(table, headers=["rasterize", "paste", "iou"], tablefmt="simple")) | |
# assert that the reconstruction is good: | |
self.assertTrue(res_dic["polygon"]["aligned"] > 0.94) | |
self.assertTrue(res_dic["roialign"]["aligned"] > 0.95) | |
def process_annotation(self, ann, mask_side_len=28): | |
# Parse annotation data | |
img_info = self.coco.loadImgs(ids=[ann["image_id"]])[0] | |
height, width = img_info["height"], img_info["width"] | |
gt_polygons = [np.array(p, dtype=np.float64) for p in ann["segmentation"]] | |
gt_bbox = BoxMode.convert(ann["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) | |
gt_bit_mask = polygons_to_bitmask(gt_polygons, height, width) | |
# Run rasterize .. | |
torch_gt_bbox = torch.tensor(gt_bbox).to(dtype=torch.float32).reshape(-1, 4) | |
box_bitmasks = { | |
"polygon": PolygonMasks([gt_polygons]).crop_and_resize(torch_gt_bbox, mask_side_len)[0], | |
"gridsample": rasterize_polygons_with_grid_sample(gt_bit_mask, gt_bbox, mask_side_len), | |
"roialign": BitMasks(torch.from_numpy(gt_bit_mask[None, :, :])).crop_and_resize( | |
torch_gt_bbox, mask_side_len | |
)[0], | |
} | |
# Run paste .. | |
results = defaultdict(dict) | |
for k, box_bitmask in box_bitmasks.items(): | |
padded_bitmask, scale = pad_masks(box_bitmask[None, :, :], 1) | |
scaled_boxes = scale_boxes(torch_gt_bbox, scale) | |
r = results[k] | |
r["old"] = paste_mask_in_image_old( | |
padded_bitmask[0], scaled_boxes[0], height, width, threshold=0.5 | |
) | |
r["aligned"] = paste_masks_in_image( | |
box_bitmask[None, :, :], Boxes(torch_gt_bbox), (height, width) | |
)[0] | |
table = [] | |
for rasterize_method, r in results.items(): | |
for paste_method, mask in r.items(): | |
mask = np.asarray(mask) | |
iou = iou_between_full_image_bit_masks(gt_bit_mask.astype("uint8"), mask) | |
table.append((rasterize_method, paste_method, iou)) | |
return table | |
def test_polygon_area(self): | |
# Draw polygon boxes | |
for d in [5.0, 10.0, 1000.0]: | |
polygon = PolygonMasks([[[0, 0, 0, d, d, d, d, 0]]]) | |
area = polygon.area()[0] | |
target = d ** 2 | |
self.assertEqual(area, target) | |
# Draw polygon triangles | |
for d in [5.0, 10.0, 1000.0]: | |
polygon = PolygonMasks([[[0, 0, 0, d, d, d]]]) | |
area = polygon.area()[0] | |
target = d ** 2 / 2 | |
self.assertEqual(area, target) | |
def test_paste_mask_scriptable(self): | |
scripted_f = torch.jit.script(paste_masks_in_image) | |
N = 10 | |
masks = torch.rand(N, 28, 28) | |
boxes = Boxes(random_boxes(N, 100)) | |
image_shape = (150, 150) | |
out = paste_masks_in_image(masks, boxes, image_shape) | |
scripted_out = scripted_f(masks, boxes, image_shape) | |
self.assertTrue(torch.equal(out, scripted_out)) | |
def benchmark_paste(): | |
S = 800 | |
H, W = image_shape = (S, S) | |
N = 64 | |
torch.manual_seed(42) | |
masks = torch.rand(N, 28, 28) | |
center = torch.rand(N, 2) * 600 + 100 | |
wh = torch.clamp(torch.randn(N, 2) * 40 + 200, min=50) | |
x0y0 = torch.clamp(center - wh * 0.5, min=0.0) | |
x1y1 = torch.clamp(center + wh * 0.5, max=S) | |
boxes = Boxes(torch.cat([x0y0, x1y1], axis=1)) | |
def func(device, n=3): | |
m = masks.to(device=device) | |
b = boxes.to(device=device) | |
def bench(): | |
for _ in range(n): | |
paste_masks_in_image(m, b, image_shape) | |
if device.type == "cuda": | |
torch.cuda.synchronize() | |
return bench | |
specs = [{"device": torch.device("cpu"), "n": 3}] | |
if torch.cuda.is_available(): | |
specs.append({"device": torch.device("cuda"), "n": 3}) | |
benchmark(func, "paste_masks", specs, num_iters=10, warmup_iters=2) | |
if __name__ == "__main__": | |
benchmark_paste() | |
unittest.main() | |