jeasinema/gradcam.py

## gradcam.py
import torch
import numpy as np
import argparse
from torchvision import models
import matplotlib.pyplot as plt

import cv2
import os

resnet = models.resnet50(pretrained=True)

class FeatureExtractor():
    """ Class for extracting activations and
    registering gradients from targetted intermediate layers """

    def __init__(self, model, target_layers):
        self.model = model
        self.target_layers = target_layers
        self.gradients = []

    def save_gradient(self, grad):
        self.gradients.append(grad)

    def __call__(self, x):
        outputs = []
        self.gradients = []
        for name, module in self.model._modules.items():
            x = module(x)
            print('name=', name)
            print('x.size()=', x.size())
            if name in self.target_layers:
                x.register_hook(self.save_gradient)
                outputs += [x]
            print('outputs.size()=', x.size())
        return outputs, x


class ModelOutputs():
    """ Class for making a forward pass, and getting:
    1. The network output.
    2. Activations from intermeddiate targetted layers.
    3. Gradients from intermeddiate targetted layers. """

    def __init__(self, model, target_layers, use_cuda):
        self.model = model
        self.feature_extractor = FeatureExtractor(self.model, target_layers)
        self.cuda = use_cuda

    def get_gradients(self):
        return self.feature_extractor.gradients

    def __call__(self, x):
        target_activations, output = self.feature_extractor(x)
        output = output.view(output.size(0), -1)
        if self.cuda:
            output = output.cpu()
            output = resnet.fc(output).cuda()
        else:
            output = resnet.fc(output)
        return target_activations, output


def preprocess_image(img):
    means = [0.485, 0.456, 0.406]
    stds = [0.229, 0.224, 0.225]

    preprocessed_img = img.copy()[:, :, ::-1]
    for i in range(3):
        preprocessed_img[:, :, i] = preprocessed_img[:, :, i] - means[i]
        preprocessed_img[:, :, i] = preprocessed_img[:, :, i] / stds[i]
    preprocessed_img = \
        np.ascontiguousarray(np.transpose(preprocessed_img, (2, 0, 1)))
    preprocessed_img = torch.from_numpy(preprocessed_img)
    preprocessed_img.unsqueeze_(0)
    input = torch.Tensor(preprocessed_img)
    return input


def show_cam_on_image(img, mask):
    heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET)
    heatmap = np.float32(heatmap) / 255
    cam = heatmap*0.3 + np.float32(img)*0.7
    cam = cam / np.max(cam)
    cam = np.uint8(255 * cam)
    return cam


class GradCam:
    def __init__(self, model, target_layer_names, use_cuda):
        self.model = model
        self.model.eval()
        self.cuda = use_cuda
        if self.cuda:
            self.model = model.cuda()

        self.extractor = ModelOutputs(self.model, target_layer_names, use_cuda)

    def forward(self, input):
        return self.model(input)

    def __call__(self, input, index=None):
        if self.cuda:
            features, output = self.extractor(input.cuda())
        else:
            features, output = self.extractor(input)

        if index == None:
            index = np.argmax(output.cpu().data.numpy())

        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
        one_hot[0][index] = 1
        one_hot = torch.Tensor(torch.from_numpy(one_hot))
        if self.cuda:
            one_hot = torch.sum(one_hot.cuda() * output)
        else:
            one_hot = torch.sum(one_hot * output)

        print("my one_hot out:", one_hot)
        self.model.zero_grad()
        one_hot.backward(retain_graph=True)

        grads_val = self.extractor.get_gradients()[-1].cpu().data.numpy()
        print('grads_val', grads_val.shape)
        target = features[-1]
        target = target.cpu().data.numpy()[0, :]

        weights = np.mean(grads_val, axis=(2, 3))[0, :]
        print('weights', weights.shape)
        cam = np.zeros(target.shape[1:], dtype=np.float32)
        print('cam', cam.shape)
        print('features', features[-1].shape)
        print('target', target.shape)
        for i, w in enumerate(weights):
            cam += w * target[i, :, :]

        cam = np.maximum(cam, 0)
        cam = cv2.resize(cam, (224, 224))
        cam = cam - np.min(cam)
        cam = cam / np.max(cam)
        return cam

def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--use-cuda', action='store_true', default=False,
                        help='Use NVIDIA GPU acceleration')
    parser.add_argument('--image-path', type=str, default='./examples/',
                        help='Input image path')
    args = parser.parse_args()
    args.use_cuda = args.use_cuda and torch.cuda.is_available()
    return args


if __name__ == '__main__':
    model = models.resnet50(pretrained=True)
    del model.fc
    print(model)

    image = []
    grad_cam = GradCam(model, target_layer_names=["layer4"], use_cuda=False)

    image_path = '/data00/home/kongtao/data00/home/kongtao/workspace/images'
    image_name = 'cat1.jpeg'
    image = cv2.imread(os.path.join(image_path, image_name))
    image = np.float32(cv2.resize(image, (224, 224))) / 255
    input = preprocess_image(image)

    target_index = None
    mask = grad_cam(input, target_index)

    vis_map = show_cam_on_image(image, mask)

    plt.figure()
    plt.imshow(vis_map)
    plt.show()
	import torch
	import numpy as np
	import argparse
	from torchvision import models
	import matplotlib.pyplot as plt

	import cv2
	import os

	resnet = models.resnet50(pretrained=True)

	class FeatureExtractor():
	""" Class for extracting activations and
	registering gradients from targetted intermediate layers """

	def __init__(self, model, target_layers):
	self.model = model
	self.target_layers = target_layers
	self.gradients = []

	def save_gradient(self, grad):
	self.gradients.append(grad)

	def __call__(self, x):
	outputs = []
	self.gradients = []
	for name, module in self.model._modules.items():
	x = module(x)
	print('name=', name)
	print('x.size()=', x.size())
	if name in self.target_layers:
	x.register_hook(self.save_gradient)
	outputs += [x]
	print('outputs.size()=', x.size())
	return outputs, x


	class ModelOutputs():
	""" Class for making a forward pass, and getting:
	1. The network output.
	2. Activations from intermeddiate targetted layers.
	3. Gradients from intermeddiate targetted layers. """

	def __init__(self, model, target_layers, use_cuda):
	self.model = model
	self.feature_extractor = FeatureExtractor(self.model, target_layers)
	self.cuda = use_cuda

	def get_gradients(self):
	return self.feature_extractor.gradients

	def __call__(self, x):
	target_activations, output = self.feature_extractor(x)
	output = output.view(output.size(0), -1)
	if self.cuda:
	output = output.cpu()
	output = resnet.fc(output).cuda()
	else:
	output = resnet.fc(output)
	return target_activations, output


	def preprocess_image(img):
	means = [0.485, 0.456, 0.406]
	stds = [0.229, 0.224, 0.225]

	preprocessed_img = img.copy()[:, :, ::-1]
	for i in range(3):
	preprocessed_img[:, :, i] = preprocessed_img[:, :, i] - means[i]
	preprocessed_img[:, :, i] = preprocessed_img[:, :, i] / stds[i]
	preprocessed_img = \
	np.ascontiguousarray(np.transpose(preprocessed_img, (2, 0, 1)))
	preprocessed_img = torch.from_numpy(preprocessed_img)
	preprocessed_img.unsqueeze_(0)
	input = torch.Tensor(preprocessed_img)
	return input


	def show_cam_on_image(img, mask):
	heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET)
	heatmap = np.float32(heatmap) / 255
	cam = heatmap0.3 + np.float32(img)0.7
	cam = cam / np.max(cam)
	cam = np.uint8(255 * cam)
	return cam


	class GradCam:
	def __init__(self, model, target_layer_names, use_cuda):
	self.model = model
	self.model.eval()
	self.cuda = use_cuda
	if self.cuda:
	self.model = model.cuda()

	self.extractor = ModelOutputs(self.model, target_layer_names, use_cuda)

	def forward(self, input):
	return self.model(input)

	def __call__(self, input, index=None):
	if self.cuda:
	features, output = self.extractor(input.cuda())
	else:
	features, output = self.extractor(input)

	if index == None:
	index = np.argmax(output.cpu().data.numpy())

	one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
	one_hot[0][index] = 1
	one_hot = torch.Tensor(torch.from_numpy(one_hot))
	if self.cuda:
	one_hot = torch.sum(one_hot.cuda() * output)
	else:
	one_hot = torch.sum(one_hot * output)

	print("my one_hot out:", one_hot)
	self.model.zero_grad()
	one_hot.backward(retain_graph=True)

	grads_val = self.extractor.get_gradients()[-1].cpu().data.numpy()
	print('grads_val', grads_val.shape)
	target = features[-1]
	target = target.cpu().data.numpy()[0, :]

	weights = np.mean(grads_val, axis=(2, 3))[0, :]
	print('weights', weights.shape)
	cam = np.zeros(target.shape[1:], dtype=np.float32)
	print('cam', cam.shape)
	print('features', features[-1].shape)
	print('target', target.shape)
	for i, w in enumerate(weights):
	cam += w * target[i, :, :]

	cam = np.maximum(cam, 0)
	cam = cv2.resize(cam, (224, 224))
	cam = cam - np.min(cam)
	cam = cam / np.max(cam)
	return cam

	def get_args():
	parser = argparse.ArgumentParser()
	parser.add_argument('--use-cuda', action='store_true', default=False,
	help='Use NVIDIA GPU acceleration')
	parser.add_argument('--image-path', type=str, default='./examples/',
	help='Input image path')
	args = parser.parse_args()
	args.use_cuda = args.use_cuda and torch.cuda.is_available()
	return args


	if __name__ == '__main__':
	model = models.resnet50(pretrained=True)
	del model.fc
	print(model)

	image = []
	grad_cam = GradCam(model, target_layer_names=["layer4"], use_cuda=False)

	image_path = '/data00/home/kongtao/data00/home/kongtao/workspace/images'
	image_name = 'cat1.jpeg'
	image = cv2.imread(os.path.join(image_path, image_name))
	image = np.float32(cv2.resize(image, (224, 224))) / 255
	input = preprocess_image(image)

	target_index = None
	mask = grad_cam(input, target_index)

	vis_map = show_cam_on_image(image, mask)

	plt.figure()
	plt.imshow(vis_map)
	plt.show()