Leffa

Sleeping

App Files Files Community

Leffa / detectron2 /evaluation /lvis_evaluation.py

franciszzj

init code

b213d84 10 days ago

raw

history blame

15 kB

	# Copyright (c) Facebook, Inc. and its affiliates.
	import copy
	import itertools
	import json
	import logging
	import os
	import pickle
	from collections import OrderedDict
	import torch

	import detectron2.utils.comm as comm
	from detectron2.config import CfgNode
	from detectron2.data import MetadataCatalog
	from detectron2.structures import Boxes, BoxMode, pairwise_iou
	from detectron2.utils.file_io import PathManager
	from detectron2.utils.logger import create_small_table

	from .coco_evaluation import instances_to_coco_json
	from .evaluator import DatasetEvaluator


	class LVISEvaluator(DatasetEvaluator):
	"""
	Evaluate object proposal and instance detection/segmentation outputs using
	LVIS's metrics and evaluation API.
	"""

	def __init__(
	self,
	dataset_name,
	tasks=None,
	distributed=True,
	output_dir=None,
	*,
	max_dets_per_image=None,
	):
	"""
	Args:
	dataset_name (str): name of the dataset to be evaluated.
	It must have the following corresponding metadata:
	"json_file": the path to the LVIS format annotation
	tasks (tuple[str]): tasks that can be evaluated under the given
	configuration. A task is one of "bbox", "segm".
	By default, will infer this automatically from predictions.
	distributed (True): if True, will collect results from all ranks for evaluation.
	Otherwise, will evaluate the results in the current process.
	output_dir (str): optional, an output directory to dump results.
	max_dets_per_image (None or int): limit on maximum detections per image in evaluating AP
	This limit, by default of the LVIS dataset, is 300.
	"""
	from lvis import LVIS

	self._logger = logging.getLogger(__name__)

	if tasks is not None and isinstance(tasks, CfgNode):
	self._logger.warn(
	"COCO Evaluator instantiated using config, this is deprecated behavior."
	" Please pass in explicit arguments instead."
	)
	self._tasks = None # Infering it from predictions should be better
	else:
	self._tasks = tasks

	self._distributed = distributed
	self._output_dir = output_dir
	self._max_dets_per_image = max_dets_per_image

	self._cpu_device = torch.device("cpu")

	self._metadata = MetadataCatalog.get(dataset_name)
	json_file = PathManager.get_local_path(self._metadata.json_file)
	self._lvis_api = LVIS(json_file)
	# Test set json files do not contain annotations (evaluation must be
	# performed using the LVIS evaluation server).
	self._do_evaluation = len(self._lvis_api.get_ann_ids()) > 0

	def reset(self):
	self._predictions = []

	def process(self, inputs, outputs):
	"""
	Args:
	inputs: the inputs to a LVIS model (e.g., GeneralizedRCNN).
	It is a list of dict. Each dict corresponds to an image and
	contains keys like "height", "width", "file_name", "image_id".
	outputs: the outputs of a LVIS model. It is a list of dicts with key
	"instances" that contains :class:`Instances`.
	"""
	for input, output in zip(inputs, outputs):
	prediction = {"image_id": input["image_id"]}

	if "instances" in output:
	instances = output["instances"].to(self._cpu_device)
	prediction["instances"] = instances_to_coco_json(instances, input["image_id"])
	if "proposals" in output:
	prediction["proposals"] = output["proposals"].to(self._cpu_device)
	self._predictions.append(prediction)

	def evaluate(self):
	if self._distributed:
	comm.synchronize()
	predictions = comm.gather(self._predictions, dst=0)
	predictions = list(itertools.chain(*predictions))

	if not comm.is_main_process():
	return
	else:
	predictions = self._predictions

	if len(predictions) == 0:
	self._logger.warning("[LVISEvaluator] Did not receive valid predictions.")
	return {}

	if self._output_dir:
	PathManager.mkdirs(self._output_dir)
	file_path = os.path.join(self._output_dir, "instances_predictions.pth")
	with PathManager.open(file_path, "wb") as f:
	torch.save(predictions, f)

	self._results = OrderedDict()
	if "proposals" in predictions[0]:
	self._eval_box_proposals(predictions)
	if "instances" in predictions[0]:
	self._eval_predictions(predictions)
	# Copy so the caller can do whatever with results
	return copy.deepcopy(self._results)

	def _tasks_from_predictions(self, predictions):
	for pred in predictions:
	if "segmentation" in pred:
	return ("bbox", "segm")
	return ("bbox",)

	def _eval_predictions(self, predictions):
	"""
	Evaluate predictions. Fill self._results with the metrics of the tasks.

	Args:
	predictions (list[dict]): list of outputs from the model
	"""
	self._logger.info("Preparing results in the LVIS format ...")
	lvis_results = list(itertools.chain(*[x["instances"] for x in predictions]))
	tasks = self._tasks or self._tasks_from_predictions(lvis_results)

	# LVIS evaluator can be used to evaluate results for COCO dataset categories.
	# In this case `_metadata` variable will have a field with COCO-specific category mapping.
	if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
	reverse_id_mapping = {
	v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items()
	}
	for result in lvis_results:
	result["category_id"] = reverse_id_mapping[result["category_id"]]
	else:
	# unmap the category ids for LVIS (from 0-indexed to 1-indexed)
	for result in lvis_results:
	result["category_id"] += 1

	if self._output_dir:
	file_path = os.path.join(self._output_dir, "lvis_instances_results.json")
	self._logger.info("Saving results to {}".format(file_path))
	with PathManager.open(file_path, "w") as f:
	f.write(json.dumps(lvis_results))
	f.flush()

	if not self._do_evaluation:
	self._logger.info("Annotations are not available for evaluation.")
	return

	self._logger.info("Evaluating predictions ...")
	for task in sorted(tasks):
	res = _evaluate_predictions_on_lvis(
	self._lvis_api,
	lvis_results,
	task,
	max_dets_per_image=self._max_dets_per_image,
	class_names=self._metadata.get("thing_classes"),
	)
	self._results[task] = res

	def _eval_box_proposals(self, predictions):
	"""
	Evaluate the box proposals in predictions.
	Fill self._results with the metrics for "box_proposals" task.
	"""
	if self._output_dir:
	# Saving generated box proposals to file.
	# Predicted box_proposals are in XYXY_ABS mode.
	bbox_mode = BoxMode.XYXY_ABS.value
	ids, boxes, objectness_logits = [], [], []
	for prediction in predictions:
	ids.append(prediction["image_id"])
	boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy())
	objectness_logits.append(prediction["proposals"].objectness_logits.numpy())

	proposal_data = {
	"boxes": boxes,
	"objectness_logits": objectness_logits,
	"ids": ids,
	"bbox_mode": bbox_mode,
	}
	with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f:
	pickle.dump(proposal_data, f)

	if not self._do_evaluation:
	self._logger.info("Annotations are not available for evaluation.")
	return

	self._logger.info("Evaluating bbox proposals ...")
	res = {}
	areas = {"all": "", "small": "s", "medium": "m", "large": "l"}
	for limit in [100, 1000]:
	for area, suffix in areas.items():
	stats = _evaluate_box_proposals(predictions, self._lvis_api, area=area, limit=limit)
	key = "AR{}@{:d}".format(suffix, limit)
	res[key] = float(stats["ar"].item() * 100)
	self._logger.info("Proposal metrics: \n" + create_small_table(res))
	self._results["box_proposals"] = res


	# inspired from Detectron:
	# https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa
	def _evaluate_box_proposals(dataset_predictions, lvis_api, thresholds=None, area="all", limit=None):
	"""
	Evaluate detection proposal recall metrics. This function is a much
	faster alternative to the official LVIS API recall evaluation code. However,
	it produces slightly different results.
	"""
	# Record max overlap value for each gt box
	# Return vector of overlap values
	areas = {
	"all": 0,
	"small": 1,
	"medium": 2,
	"large": 3,
	"96-128": 4,
	"128-256": 5,
	"256-512": 6,
	"512-inf": 7,
	}
	area_ranges = [
	[02, 1e52], # all
	[02, 322], # small
	[322, 962], # medium
	[962, 1e52], # large
	[962, 1282], # 96-128
	[1282, 2562], # 128-256
	[2562, 5122], # 256-512
	[5122, 1e52],
	] # 512-inf
	assert area in areas, "Unknown area range: {}".format(area)
	area_range = area_ranges[areas[area]]
	gt_overlaps = []
	num_pos = 0

	for prediction_dict in dataset_predictions:
	predictions = prediction_dict["proposals"]

	# sort predictions in descending order
	# TODO maybe remove this and make it explicit in the documentation
	inds = predictions.objectness_logits.sort(descending=True)[1]
	predictions = predictions[inds]

	ann_ids = lvis_api.get_ann_ids(img_ids=[prediction_dict["image_id"]])
	anno = lvis_api.load_anns(ann_ids)
	gt_boxes = [
	BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) for obj in anno
	]
	gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4) # guard against no boxes
	gt_boxes = Boxes(gt_boxes)
	gt_areas = torch.as_tensor([obj["area"] for obj in anno])

	if len(gt_boxes) == 0 or len(predictions) == 0:
	continue

	valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1])
	gt_boxes = gt_boxes[valid_gt_inds]

	num_pos += len(gt_boxes)

	if len(gt_boxes) == 0:
	continue

	if limit is not None and len(predictions) > limit:
	predictions = predictions[:limit]

	overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes)

	_gt_overlaps = torch.zeros(len(gt_boxes))
	for j in range(min(len(predictions), len(gt_boxes))):
	# find which proposal box maximally covers each gt box
	# and get the iou amount of coverage for each gt box
	max_overlaps, argmax_overlaps = overlaps.max(dim=0)

	# find which gt box is 'best' covered (i.e. 'best' = most iou)
	gt_ovr, gt_ind = max_overlaps.max(dim=0)
	assert gt_ovr >= 0
	# find the proposal box that covers the best covered gt box
	box_ind = argmax_overlaps[gt_ind]
	# record the iou coverage of this gt box
	_gt_overlaps[j] = overlaps[box_ind, gt_ind]
	assert _gt_overlaps[j] == gt_ovr
	# mark the proposal box and the gt box as used
	overlaps[box_ind, :] = -1
	overlaps[:, gt_ind] = -1

	# append recorded iou coverage level
	gt_overlaps.append(_gt_overlaps)
	gt_overlaps = (
	torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32)
	)
	gt_overlaps, _ = torch.sort(gt_overlaps)

	if thresholds is None:
	step = 0.05
	thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
	recalls = torch.zeros_like(thresholds)
	# compute recall for each iou threshold
	for i, t in enumerate(thresholds):
	recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos)
	# ar = 2 * np.trapz(recalls, thresholds)
	ar = recalls.mean()
	return {
	"ar": ar,
	"recalls": recalls,
	"thresholds": thresholds,
	"gt_overlaps": gt_overlaps,
	"num_pos": num_pos,
	}


	def _evaluate_predictions_on_lvis(
	lvis_gt, lvis_results, iou_type, max_dets_per_image=None, class_names=None
	):
	"""
	Args:
	iou_type (str):
	max_dets_per_image (None or int): limit on maximum detections per image in evaluating AP
	This limit, by default of the LVIS dataset, is 300.
	class_names (None or list[str]): if provided, will use it to predict
	per-category AP.

	Returns:
	a dict of {metric name: score}
	"""
	metrics = {
	"bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
	"segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
	}[iou_type]

	logger = logging.getLogger(__name__)

	if len(lvis_results) == 0: # TODO: check if needed
	logger.warn("No predictions from the model!")
	return {metric: float("nan") for metric in metrics}

	if iou_type == "segm":
	lvis_results = copy.deepcopy(lvis_results)
	# When evaluating mask AP, if the results contain bbox, LVIS API will
	# use the box area as the area of the instance, instead of the mask area.
	# This leads to a different definition of small/medium/large.
	# We remove the bbox field to let mask AP use mask area.
	for c in lvis_results:
	c.pop("bbox", None)

	if max_dets_per_image is None:
	max_dets_per_image = 300 # Default for LVIS dataset

	from lvis import LVISEval, LVISResults

	logger.info(f"Evaluating with max detections per image = {max_dets_per_image}")
	lvis_results = LVISResults(lvis_gt, lvis_results, max_dets=max_dets_per_image)
	lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type)
	lvis_eval.run()
	lvis_eval.print_results()

	# Pull the standard metrics from the LVIS results
	results = lvis_eval.get_results()
	results = {metric: float(results[metric] * 100) for metric in metrics}
	logger.info("Evaluation results for {}: \n".format(iou_type) + create_small_table(results))
	return results