|
|
|
""" |
|
MaskFormer Training Script. |
|
|
|
This script is a simplified version of the training script in detectron2/tools. |
|
""" |
|
import copy |
|
import itertools |
|
import logging |
|
import os |
|
from collections import OrderedDict |
|
from typing import Any, Dict, List, Set |
|
|
|
import torch |
|
|
|
import detectron2.utils.comm as comm |
|
from detectron2.checkpoint import DetectionCheckpointer |
|
from detectron2.config import get_cfg |
|
from detectron2.data import MetadataCatalog, build_detection_train_loader |
|
from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch |
|
from detectron2.evaluation import CityscapesInstanceEvaluator, CityscapesSemSegEvaluator, \ |
|
COCOEvaluator, COCOPanopticEvaluator, DatasetEvaluators, SemSegEvaluator, verify_results, \ |
|
DatasetEvaluator |
|
|
|
from detectron2.projects.deeplab import add_deeplab_config, build_lr_scheduler |
|
from detectron2.solver.build import maybe_add_gradient_clipping |
|
from detectron2.utils.logger import setup_logger |
|
|
|
from detectron2.utils.file_io import PathManager |
|
import numpy as np |
|
from PIL import Image |
|
import glob |
|
|
|
import pycocotools.mask as mask_util |
|
|
|
from detectron2.data import DatasetCatalog, MetadataCatalog |
|
from detectron2.utils.comm import all_gather, is_main_process, synchronize |
|
import json |
|
from torch.nn.parallel import DistributedDataParallel |
|
from detectron2.engine.train_loop import AMPTrainer, SimpleTrainer, TrainerBase, HookBase |
|
import weakref |
|
from detectron2.utils.events import EventStorage |
|
from detectron2.utils.logger import _log_api_usage |
|
|
|
|
|
|
|
|
|
class SemSegGzeroEvaluator(DatasetEvaluator): |
|
""" |
|
Evaluate semantic segmentation metrics. |
|
""" |
|
|
|
def __init__( |
|
self, dataset_name, distributed, output_dir=None, *, num_classes=None, ignore_label=None |
|
): |
|
""" |
|
Args: |
|
dataset_name (str): name of the dataset to be evaluated. |
|
distributed (True): if True, will collect results from all ranks for evaluation. |
|
Otherwise, will evaluate the results in the current process. |
|
output_dir (str): an output directory to dump results. |
|
num_classes, ignore_label: deprecated argument |
|
""" |
|
self._logger = logging.getLogger(__name__) |
|
if num_classes is not None: |
|
self._logger.warn( |
|
"SemSegEvaluator(num_classes) is deprecated! It should be obtained from metadata." |
|
) |
|
if ignore_label is not None: |
|
self._logger.warn( |
|
"SemSegEvaluator(ignore_label) is deprecated! It should be obtained from metadata." |
|
) |
|
self._dataset_name = dataset_name |
|
self._distributed = distributed |
|
self._output_dir = output_dir |
|
|
|
self._cpu_device = torch.device("cpu") |
|
|
|
self.input_file_to_gt_file = { |
|
dataset_record["file_name"]: dataset_record["sem_seg_file_name"] |
|
for dataset_record in DatasetCatalog.get(dataset_name) |
|
} |
|
|
|
meta = MetadataCatalog.get(dataset_name) |
|
|
|
try: |
|
c2d = meta.stuff_dataset_id_to_contiguous_id |
|
self._contiguous_id_to_dataset_id = {v: k for k, v in c2d.items()} |
|
except AttributeError: |
|
self._contiguous_id_to_dataset_id = None |
|
self._class_names = meta.stuff_classes |
|
self._val_extra_classes = meta.val_extra_classes |
|
self._num_classes = len(meta.stuff_classes) |
|
if num_classes is not None: |
|
assert self._num_classes == num_classes, f"{self._num_classes} != {num_classes}" |
|
self._ignore_label = ignore_label if ignore_label is not None else meta.ignore_label |
|
|
|
def reset(self): |
|
self._conf_matrix = np.zeros((self._num_classes + 1, self._num_classes + 1), dtype=np.int64) |
|
self._predictions = [] |
|
|
|
def process(self, inputs, outputs): |
|
""" |
|
Args: |
|
inputs: the inputs to a model. |
|
It is a list of dicts. Each dict corresponds to an image and |
|
contains keys like "height", "width", "file_name". |
|
outputs: the outputs of a model. It is either list of semantic segmentation predictions |
|
(Tensor [H, W]) or list of dicts with key "sem_seg" that contains semantic |
|
segmentation prediction in the same format. |
|
""" |
|
for input, output in zip(inputs, outputs): |
|
output = output["sem_seg"].argmax(dim=0).to(self._cpu_device) |
|
pred = np.array(output, dtype=np.int) |
|
with PathManager.open(self.input_file_to_gt_file[input["file_name"]], "rb") as f: |
|
gt = np.array(Image.open(f), dtype=np.int) |
|
|
|
gt[gt == self._ignore_label] = self._num_classes |
|
|
|
self._conf_matrix += np.bincount( |
|
(self._num_classes + 1) * pred.reshape(-1) + gt.reshape(-1), |
|
minlength=self._conf_matrix.size, |
|
).reshape(self._conf_matrix.shape) |
|
|
|
self._predictions.extend(self.encode_json_sem_seg(pred, input["file_name"])) |
|
|
|
def evaluate(self): |
|
""" |
|
Evaluates standard semantic segmentation metrics (http://cocodataset.org/#stuff-eval): |
|
|
|
* Mean intersection-over-union averaged across classes (mIoU) |
|
* Frequency Weighted IoU (fwIoU) |
|
* Mean pixel accuracy averaged across classes (mACC) |
|
* Pixel Accuracy (pACC) |
|
""" |
|
if self._distributed: |
|
synchronize() |
|
conf_matrix_list = all_gather(self._conf_matrix) |
|
self._predictions = all_gather(self._predictions) |
|
self._predictions = list(itertools.chain(*self._predictions)) |
|
if not is_main_process(): |
|
return |
|
|
|
self._conf_matrix = np.zeros_like(self._conf_matrix) |
|
for conf_matrix in conf_matrix_list: |
|
self._conf_matrix += conf_matrix |
|
|
|
if self._output_dir: |
|
PathManager.mkdirs(self._output_dir) |
|
file_path = os.path.join(self._output_dir, "sem_seg_predictions.json") |
|
with PathManager.open(file_path, "w") as f: |
|
f.write(json.dumps(self._predictions)) |
|
|
|
acc = np.full(self._num_classes, np.nan, dtype=np.float) |
|
iou = np.full(self._num_classes, np.nan, dtype=np.float) |
|
tp = self._conf_matrix.diagonal()[:-1].astype(np.float) |
|
pos_gt = np.sum(self._conf_matrix[:-1, :-1], axis=0).astype(np.float) |
|
class_weights = pos_gt / np.sum(pos_gt) |
|
pos_pred = np.sum(self._conf_matrix[:-1, :-1], axis=1).astype(np.float) |
|
acc_valid = pos_gt > 0 |
|
acc[acc_valid] = tp[acc_valid] / pos_gt[acc_valid] |
|
iou_valid = (pos_gt + pos_pred) > 0 |
|
union = pos_gt + pos_pred - tp |
|
iou[acc_valid] = tp[acc_valid] / union[acc_valid] |
|
macc = np.sum(acc[acc_valid]) / np.sum(acc_valid) |
|
miou = np.sum(iou[acc_valid]) / np.sum(iou_valid) |
|
fiou = np.sum(iou[acc_valid] * class_weights[acc_valid]) |
|
pacc = np.sum(tp) / np.sum(pos_gt) |
|
seen_IoU = 0 |
|
unseen_IoU = 0 |
|
seen_acc = 0 |
|
unseen_acc = 0 |
|
res = {} |
|
res["mIoU"] = 100 * miou |
|
res["fwIoU"] = 100 * fiou |
|
for i, name in enumerate(self._class_names): |
|
res["IoU-{}".format(name)] = 100 * iou[i] |
|
if name in self._val_extra_classes: |
|
unseen_IoU = unseen_IoU + 100 * iou[i] |
|
else: |
|
seen_IoU = seen_IoU + 100 * iou[i] |
|
unseen_IoU = unseen_IoU / len(self._val_extra_classes) |
|
seen_IoU = seen_IoU / (self._num_classes - len(self._val_extra_classes)) |
|
res["mACC"] = 100 * macc |
|
res["pACC"] = 100 * pacc |
|
for i, name in enumerate(self._class_names): |
|
res["ACC-{}".format(name)] = 100 * acc[i] |
|
if name in self._val_extra_classes: |
|
unseen_acc = unseen_acc + 100 * iou[i] |
|
else: |
|
seen_acc = seen_acc + 100 * iou[i] |
|
unseen_acc = unseen_acc / len(self._val_extra_classes) |
|
seen_acc = seen_acc / (self._num_classes - len(self._val_extra_classes)) |
|
res["seen_IoU"] = seen_IoU |
|
res["unseen_IoU"] = unseen_IoU |
|
res["harmonic mean"] = 2 * seen_IoU * unseen_IoU / (seen_IoU + unseen_IoU) |
|
|
|
|
|
if self._output_dir: |
|
file_path = os.path.join(self._output_dir, "sem_seg_evaluation.pth") |
|
with PathManager.open(file_path, "wb") as f: |
|
torch.save(res, f) |
|
results = OrderedDict({"sem_seg": res}) |
|
self._logger.info(results) |
|
return results |
|
|
|
def encode_json_sem_seg(self, sem_seg, input_file_name): |
|
""" |
|
Convert semantic segmentation to COCO stuff format with segments encoded as RLEs. |
|
See http://cocodataset.org/#format-results |
|
""" |
|
json_list = [] |
|
for label in np.unique(sem_seg): |
|
if self._contiguous_id_to_dataset_id is not None: |
|
|
|
assert ( |
|
label in self._contiguous_id_to_dataset_id |
|
), "Label {} is not in the metadata info for {}".format(label, self._dataset_name) |
|
dataset_id = self._contiguous_id_to_dataset_id[label] |
|
else: |
|
dataset_id = int(label) |
|
mask = (sem_seg == label).astype(np.uint8) |
|
mask_rle = mask_util.encode(np.array(mask[:, :, None], order="F"))[0] |
|
mask_rle["counts"] = mask_rle["counts"].decode("utf-8") |
|
json_list.append( |
|
{"file_name": input_file_name, "category_id": dataset_id, "segmentation": mask_rle} |
|
) |
|
return json_list |
|
|
|
|
|
|
|
from cat_seg import ( |
|
DETRPanopticDatasetMapper, |
|
MaskFormerPanopticDatasetMapper, |
|
MaskFormerSemanticDatasetMapper, |
|
SemanticSegmentorWithTTA, |
|
add_mask_former_config, |
|
) |
|
|
|
|
|
def create_ddp_model(model, *, fp16_compression=False, **kwargs): |
|
""" |
|
Create a DistributedDataParallel model if there are >1 processes. |
|
|
|
Args: |
|
model: a torch.nn.Module |
|
fp16_compression: add fp16 compression hooks to the ddp object. |
|
See more at https://pytorch.org/docs/stable/ddp_comm_hooks.html#torch.distributed.algorithms.ddp_comm_hooks.default_hooks.fp16_compress_hook |
|
kwargs: other arguments of :module:`torch.nn.parallel.DistributedDataParallel`. |
|
""" |
|
if comm.get_world_size() == 1: |
|
return model |
|
if "device_ids" not in kwargs: |
|
kwargs["device_ids"] = [comm.get_local_rank()] |
|
ddp = DistributedDataParallel(model, **kwargs) |
|
if fp16_compression: |
|
from torch.distributed.algorithms.ddp_comm_hooks import default as comm_hooks |
|
|
|
ddp.register_comm_hook(state=None, hook=comm_hooks.fp16_compress_hook) |
|
return ddp |
|
|
|
class Trainer(DefaultTrainer): |
|
""" |
|
Extension of the Trainer class adapted to DETR. |
|
""" |
|
|
|
def __init__(self, cfg): |
|
|
|
self._hooks: List[HookBase] = [] |
|
self.iter: int = 0 |
|
self.start_iter: int = 0 |
|
self.max_iter: int |
|
self.storage: EventStorage |
|
_log_api_usage("trainer." + self.__class__.__name__) |
|
|
|
logger = logging.getLogger("detectron2") |
|
if not logger.isEnabledFor(logging.INFO): |
|
setup_logger() |
|
cfg = DefaultTrainer.auto_scale_workers(cfg, comm.get_world_size()) |
|
|
|
|
|
model = self.build_model(cfg) |
|
optimizer = self.build_optimizer(cfg, model) |
|
data_loader = self.build_train_loader(cfg) |
|
|
|
model = create_ddp_model(model, broadcast_buffers=False, find_unused_parameters=True) |
|
self._trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else SimpleTrainer)( |
|
model, data_loader, optimizer |
|
) |
|
|
|
self.scheduler = self.build_lr_scheduler(cfg, optimizer) |
|
self.checkpointer = DetectionCheckpointer( |
|
|
|
model, |
|
cfg.OUTPUT_DIR, |
|
trainer=weakref.proxy(self), |
|
) |
|
self.start_iter = 0 |
|
self.max_iter = cfg.SOLVER.MAX_ITER |
|
self.cfg = cfg |
|
|
|
self.register_hooks(self.build_hooks()) |
|
|
|
@classmethod |
|
def build_evaluator(cls, cfg, dataset_name, output_folder=None): |
|
""" |
|
Create evaluator(s) for a given dataset. |
|
This uses the special metadata "evaluator_type" associated with each |
|
builtin dataset. For your own dataset, you can simply create an |
|
evaluator manually in your script and do not have to worry about the |
|
hacky if-else logic here. |
|
""" |
|
if output_folder is None: |
|
output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") |
|
evaluator_list = [] |
|
evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type |
|
if evaluator_type in ["sem_seg", "ade20k_panoptic_seg"]: |
|
evaluator_list.append( |
|
SemSegEvaluator( |
|
dataset_name, |
|
distributed=True, |
|
output_dir=output_folder, |
|
) |
|
) |
|
|
|
if evaluator_type == "sem_seg_gzero": |
|
|
|
evaluator_list.append( |
|
SemSegGzeroEvaluator( |
|
dataset_name, |
|
distributed=True, |
|
output_dir=output_folder, |
|
) |
|
) |
|
if evaluator_type == "coco": |
|
evaluator_list.append(COCOEvaluator(dataset_name, output_dir=output_folder)) |
|
if evaluator_type in [ |
|
"coco_panoptic_seg", |
|
"ade20k_panoptic_seg", |
|
"cityscapes_panoptic_seg", |
|
]: |
|
evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder)) |
|
if evaluator_type == "cityscapes_instance": |
|
assert ( |
|
torch.cuda.device_count() >= comm.get_rank() |
|
), "CityscapesEvaluator currently do not work with multiple machines." |
|
return CityscapesInstanceEvaluator(dataset_name) |
|
if evaluator_type == "cityscapes_sem_seg": |
|
assert ( |
|
torch.cuda.device_count() >= comm.get_rank() |
|
), "CityscapesEvaluator currently do not work with multiple machines." |
|
return CityscapesSemSegEvaluator(dataset_name) |
|
if evaluator_type == "cityscapes_panoptic_seg": |
|
assert ( |
|
torch.cuda.device_count() >= comm.get_rank() |
|
), "CityscapesEvaluator currently do not work with multiple machines." |
|
evaluator_list.append(CityscapesSemSegEvaluator(dataset_name)) |
|
if len(evaluator_list) == 0: |
|
raise NotImplementedError( |
|
"no Evaluator for the dataset {} with the type {}".format( |
|
dataset_name, evaluator_type |
|
) |
|
) |
|
elif len(evaluator_list) == 1: |
|
return evaluator_list[0] |
|
return DatasetEvaluators(evaluator_list) |
|
|
|
@classmethod |
|
def build_train_loader(cls, cfg): |
|
|
|
if cfg.INPUT.DATASET_MAPPER_NAME == "mask_former_semantic": |
|
mapper = MaskFormerSemanticDatasetMapper(cfg, True) |
|
|
|
elif cfg.INPUT.DATASET_MAPPER_NAME == "mask_former_panoptic": |
|
mapper = MaskFormerPanopticDatasetMapper(cfg, True) |
|
|
|
elif cfg.INPUT.DATASET_MAPPER_NAME == "detr_panoptic": |
|
mapper = DETRPanopticDatasetMapper(cfg, True) |
|
else: |
|
mapper = None |
|
return build_detection_train_loader(cfg, mapper=mapper) |
|
|
|
@classmethod |
|
def build_lr_scheduler(cls, cfg, optimizer): |
|
""" |
|
It now calls :func:`detectron2.solver.build_lr_scheduler`. |
|
Overwrite it if you'd like a different scheduler. |
|
""" |
|
return build_lr_scheduler(cfg, optimizer) |
|
|
|
@classmethod |
|
def build_optimizer(cls, cfg, model): |
|
weight_decay_norm = cfg.SOLVER.WEIGHT_DECAY_NORM |
|
weight_decay_embed = cfg.SOLVER.WEIGHT_DECAY_EMBED |
|
|
|
defaults = {} |
|
defaults["lr"] = cfg.SOLVER.BASE_LR |
|
defaults["weight_decay"] = cfg.SOLVER.WEIGHT_DECAY |
|
|
|
norm_module_types = ( |
|
torch.nn.BatchNorm1d, |
|
torch.nn.BatchNorm2d, |
|
torch.nn.BatchNorm3d, |
|
torch.nn.SyncBatchNorm, |
|
|
|
torch.nn.GroupNorm, |
|
torch.nn.InstanceNorm1d, |
|
torch.nn.InstanceNorm2d, |
|
torch.nn.InstanceNorm3d, |
|
torch.nn.LayerNorm, |
|
torch.nn.LocalResponseNorm, |
|
) |
|
|
|
params: List[Dict[str, Any]] = [] |
|
memo: Set[torch.nn.parameter.Parameter] = set() |
|
for module_name, module in model.named_modules(): |
|
for module_param_name, value in module.named_parameters(recurse=False): |
|
if not value.requires_grad: |
|
continue |
|
|
|
if value in memo: |
|
continue |
|
memo.add(value) |
|
|
|
hyperparams = copy.copy(defaults) |
|
if "backbone" in module_name: |
|
hyperparams["lr"] = hyperparams["lr"] * cfg.SOLVER.BACKBONE_MULTIPLIER |
|
if ( |
|
"relative_position_bias_table" in module_param_name |
|
or "absolute_pos_embed" in module_param_name |
|
): |
|
print(module_param_name) |
|
hyperparams["weight_decay"] = 0.0 |
|
if isinstance(module, norm_module_types): |
|
hyperparams["weight_decay"] = weight_decay_norm |
|
if isinstance(module, torch.nn.Embedding): |
|
hyperparams["weight_decay"] = weight_decay_embed |
|
params.append({"params": [value], **hyperparams}) |
|
|
|
def maybe_add_full_model_gradient_clipping(optim): |
|
|
|
clip_norm_val = cfg.SOLVER.CLIP_GRADIENTS.CLIP_VALUE |
|
enable = ( |
|
cfg.SOLVER.CLIP_GRADIENTS.ENABLED |
|
and cfg.SOLVER.CLIP_GRADIENTS.CLIP_TYPE == "full_model" |
|
and clip_norm_val > 0.0 |
|
) |
|
|
|
class FullModelGradientClippingOptimizer(optim): |
|
def step(self, closure=None): |
|
all_params = itertools.chain(*[x["params"] for x in self.param_groups]) |
|
torch.nn.utils.clip_grad_norm_(all_params, clip_norm_val) |
|
super().step(closure=closure) |
|
|
|
return FullModelGradientClippingOptimizer if enable else optim |
|
|
|
optimizer_type = cfg.SOLVER.OPTIMIZER |
|
if optimizer_type == "SGD": |
|
optimizer = maybe_add_full_model_gradient_clipping(torch.optim.SGD)( |
|
params, cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.MOMENTUM |
|
) |
|
elif optimizer_type == "ADAMW": |
|
optimizer = maybe_add_full_model_gradient_clipping(torch.optim.AdamW)( |
|
params, cfg.SOLVER.BASE_LR |
|
) |
|
else: |
|
raise NotImplementedError(f"no optimizer type {optimizer_type}") |
|
if not cfg.SOLVER.CLIP_GRADIENTS.CLIP_TYPE == "full_model": |
|
optimizer = maybe_add_gradient_clipping(cfg, optimizer) |
|
return optimizer |
|
|
|
@classmethod |
|
def test_with_TTA(cls, cfg, model): |
|
logger = logging.getLogger("detectron2.trainer") |
|
|
|
logger.info("Running inference with test-time augmentation ...") |
|
model = SemanticSegmentorWithTTA(cfg, model) |
|
evaluators = [ |
|
cls.build_evaluator( |
|
cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA") |
|
) |
|
for name in cfg.DATASETS.TEST |
|
] |
|
res = cls.test(cfg, model, evaluators) |
|
res = OrderedDict({k + "_TTA": v for k, v in res.items()}) |
|
return res |
|
|
|
|
|
def setup(args): |
|
""" |
|
Create configs and perform basic setups. |
|
""" |
|
cfg = get_cfg() |
|
|
|
add_deeplab_config(cfg) |
|
add_mask_former_config(cfg) |
|
cfg.merge_from_file(args.config_file) |
|
cfg.merge_from_list(args.opts) |
|
cfg.freeze() |
|
default_setup(cfg, args) |
|
|
|
setup_logger(output=cfg.OUTPUT_DIR, distributed_rank=comm.get_rank(), name="mask_former") |
|
return cfg |
|
|
|
|
|
def main(args): |
|
cfg = setup(args) |
|
|
|
if args.eval_only: |
|
model = Trainer.build_model(cfg) |
|
DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( |
|
cfg.MODEL.WEIGHTS, resume=args.resume |
|
) |
|
res = Trainer.test(cfg, model) |
|
if cfg.TEST.AUG.ENABLED: |
|
res.update(Trainer.test_with_TTA(cfg, model)) |
|
if comm.is_main_process(): |
|
verify_results(cfg, res) |
|
return res |
|
|
|
trainer = Trainer(cfg) |
|
trainer.resume_or_load(resume=args.resume) |
|
return trainer.train() |
|
|
|
|
|
if __name__ == "__main__": |
|
args = default_argument_parser().parse_args() |
|
print("Command Line Args:", args) |
|
launch( |
|
main, |
|
args.num_gpus, |
|
num_machines=args.num_machines, |
|
machine_rank=args.machine_rank, |
|
dist_url=args.dist_url, |
|
args=(args,), |
|
) |
|
|