Federico Galatolo
work in progress
bc679dd
raw
history blame
6 kB
# Author: Alexander Riedel
# License: Unlicensed
# Link: https://github.com/alexriedel1/detectron2-GradCAM
import cv2
import numpy as np
class GradCAM():
"""
Class to implement the GradCam function with it's necessary Pytorch hooks.
Attributes
----------
model : detectron2 GeneralizedRCNN Model
A model using the detectron2 API for inferencing
layer_name : str
name of the convolutional layer to perform GradCAM with
"""
def __init__(self, model, target_layer_name):
self.model = model
self.target_layer_name = target_layer_name
self.activations = None
self.gradient = None
self.model.eval()
self.activations_grads = []
self._register_hook()
def _get_activations_hook(self, module, input, output):
self.activations = output
def _get_grads_hook(self, module, input_grad, output_grad):
self.gradient = output_grad[0]
def _register_hook(self):
for (name, module) in self.model.named_modules():
if name == self.target_layer_name:
self.activations_grads.append(module.register_forward_hook(self._get_activations_hook))
self.activations_grads.append(module.register_backward_hook(self._get_grads_hook))
return True
print(f"Layer {self.target_layer_name} not found in Model!")
def _release_activations_grads(self):
for handle in self.activations_grads:
handle.remove()
def _postprocess_cam(self, raw_cam, img_width, img_height):
cam_orig = np.sum(raw_cam, axis=0) # [H,W]
cam_orig = np.maximum(cam_orig, 0) # ReLU
cam_orig -= np.min(cam_orig)
cam_orig /= np.max(cam_orig)
cam = cv2.resize(cam_orig, (img_width, img_height))
return cam, cam_orig
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, exc_tb):
self._release_activations_grads()
def __call__(self, inputs, target_category):
"""
Calls the GradCAM++ instance
Parameters
----------
inputs : dict
The input in the standard detectron2 model input format
https://detectron2.readthedocs.io/en/latest/tutorials/models.html#model-input-format
target_category : int, optional
The target category index. If `None` the highest scoring class will be selected
Returns
-------
cam : np.array()
Gradient weighted class activation map
output : list
list of Instance objects representing the detectron2 model output
"""
self.model.zero_grad()
output = self.model.forward([inputs])
if target_category == None:
target_category = np.argmax(output[0]['instances'].scores.cpu().data.numpy(), axis=-1)
score = output[0]['instances'].scores[target_category]
#box0 = output[0]['instances'].pred_boxes[0].tensor[0][target_category]
#print(box0)
#box0.backward()
score.backward()
gradient = self.gradient[0].cpu().data.numpy() # [C,H,W]
activations = self.activations[0].cpu().data.numpy() # [C,H,W]
weight = np.mean(gradient, axis=(1, 2)) # [C]
cam = activations * weight[:, np.newaxis, np.newaxis] # [C,H,W]
cam, cam_orig = self._postprocess_cam(cam, inputs["width"], inputs["height"])
return cam, cam_orig, output
class GradCamPlusPlus(GradCAM):
"""
Subclass to implement the GradCam++ function with it's necessary PyTorch hooks.
...
Attributes
----------
model : detectron2 GeneralizedRCNN Model
A model using the detectron2 API for inferencing
target_layer_name : str
name of the convolutional layer to perform GradCAM++ with
"""
def __init__(self, model, target_layer_name):
super(GradCamPlusPlus, self).__init__(model, target_layer_name)
def __call__(self, inputs, target_category):
"""
Calls the GradCAM++ instance
Parameters
----------
inputs : dict
The input in the standard detectron2 model input format
https://detectron2.readthedocs.io/en/latest/tutorials/models.html#model-input-format
target_category : int, optional
The target category index. If `None` the highest scoring class will be selected
Returns
-------
cam : np.array()
Gradient weighted class activation map
output : list
list of Instance objects representing the detectron2 model output
"""
self.model.zero_grad()
output = self.model.forward([inputs])
if target_category == None:
target_category = np.argmax(output[0]['instances'].scores.cpu().data.numpy(), axis=-1)
score = output[0]['instances'].scores[target_category]
score.backward()
gradient = self.gradient[0].cpu().data.numpy() # [C,H,W]
activations = self.activations[0].cpu().data.numpy() # [C,H,W]
#from https://github.com/jacobgil/pytorch-grad-cam/blob/master/pytorch_grad_cam/grad_cam_plusplus.py
grads_power_2 = gradient**2
grads_power_3 = grads_power_2 * gradient
# Equation 19 in https://arxiv.org/abs/1710.11063
sum_activations = np.sum(activations, axis=(1, 2))
eps = 0.000001
aij = grads_power_2 / (2 * grads_power_2 +
sum_activations[:, None, None] * grads_power_3 + eps)
# Now bring back the ReLU from eq.7 in the paper,
# And zero out aijs where the activations are 0
aij = np.where(gradient != 0, aij, 0)
weights = np.maximum(gradient, 0) * aij
weight = np.sum(weights, axis=(1, 2))
cam = activations * weight[:, np.newaxis, np.newaxis] # [C,H,W]
cam, cam_orig = self._postprocess_cam(cam, inputs["width"], inputs["height"])
return cam, cam_orig, output