jediofgever/modified_predictor.py

## modified_predictor.py
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import cv2
import torch
import numpy as np
from torchvision import transforms as T

from maskrcnn_benchmark.modeling.detector import build_detection_model
from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer
from maskrcnn_benchmark.structures.image_list import to_image_list
from maskrcnn_benchmark.modeling.roi_heads.mask_head.inference import Masker
from maskrcnn_benchmark import layers as L
from maskrcnn_benchmark.utils import cv2_util


class COCODemo(object):
    # COCO categories for pretty print
    CATEGORIES = [
        "__background",
        "person",
        "bicycle",
        "car",
        "motorcycle",
        "airplane",
        "bus",
        "train",
        "truck",
        "boat",
        "traffic light",
        "fire hydrant",
        "stop sign",
        "parking meter",
        "bench",
        "bird",
        "cat",
        "dog",
        "horse",
        "sheep",
        "cow",
        "elephant",
        "bear",
        "zebra",
        "giraffe",
        "backpack",
        "umbrella",
        "handbag",
        "tie",
        "suitcase",
        "frisbee",
        "skis",
        "snowboard",
        "sports ball",
        "kite",
        "baseball bat",
        "baseball glove",
        "skateboard",
        "surfboard",
        "tennis racket",
        "bottle",
        "wine glass",
        "cup",
        "fork",
        "knife",
        "spoon",
        "bowl",
        "banana",
        "apple",
        "sandwich",
        "orange",
        "broccoli",
        "carrot",
        "hot dog",
        "pizza",
        "donut",
        "cake",
        "chair",
        "couch",
        "potted plant",
        "bed",
        "dining table",
        "toilet",
        "tv",
        "laptop",
        "mouse",
        "remote",
        "keyboard",
        "cell phone",
        "microwave",
        "oven",
        "toaster",
        "sink",
        "refrigerator",
        "book",
        "clock",
        "vase",
        "scissors",
        "teddy bear",
        "hair drier",
        "toothbrush",
    ]

    def __init__(
        self,
        cfg,
        confidence_threshold=0.7,
        show_mask_heatmaps=False,
        masks_per_dim=2,
        min_image_size=224,
    ):
        self.cfg = cfg.clone()
        self.model = build_detection_model(cfg)
        self.model.eval()
        self.device = torch.device(cfg.MODEL.DEVICE)
        self.model.to(self.device)
        self.min_image_size = min_image_size

        save_dir = cfg.OUTPUT_DIR
        checkpointer = DetectronCheckpointer(cfg, self.model, save_dir=save_dir)
        _ = checkpointer.load(cfg.MODEL.WEIGHT)

        self.transforms = self.build_transform()

        mask_threshold = -1 if show_mask_heatmaps else 0.5
        self.masker = Masker(threshold=mask_threshold, padding=1)

        # used to make colors for each class
        self.palette = torch.tensor([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1])

        self.cpu_device = torch.device("cpu")
        self.confidence_threshold = confidence_threshold
        self.show_mask_heatmaps = show_mask_heatmaps
        self.masks_per_dim = masks_per_dim

    def build_transform(self):
        """
        Creates a basic transformation that was used to train the models
        """
        cfg = self.cfg

        # we are loading images with OpenCV, so we don't need to convert them
        # to BGR, they are already! So all we need to do is to normalize
        # by 255 if we want to convert to BGR255 format, or flip the channels
        # if we want it to be in RGB in [0-1] range.
        if cfg.INPUT.TO_BGR255:
            to_bgr_transform = T.Lambda(lambda x: x * 255)
        else:
            to_bgr_transform = T.Lambda(lambda x: x[[2, 1, 0]])

        normalize_transform = T.Normalize(
            mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD
        )

        transform = T.Compose(
            [
                T.ToPILImage(),
                T.Resize(self.min_image_size),
                T.ToTensor(),
                to_bgr_transform,
                normalize_transform,
            ]
        )
        return transform

    def run_on_opencv_image(self, image):
        """
        Arguments:
            image (np.ndarray): an image as returned by OpenCV

        Returns:
            prediction (BoxList): the detected objects. Additional information
                of the detection properties can be found in the fields of
                the BoxList via `prediction.fields()`
        """
        predictions = self.compute_prediction(image)
        top_predictions = self.select_top_predictions(predictions)


        height,width, _ = image.shape

        result = np.zeros((height,width,3), np.uint8)

        result[:,0:width] = (255,255,255)

        #result = image.copy()
        if self.show_mask_heatmaps:
            return self.create_mask_montage(result, top_predictions)
        result = self.overlay_boxes(result, top_predictions)
        if self.cfg.MODEL.MASK_ON:
            result = self.overlay_mask(result, top_predictions)
        result = self.overlay_class_names(result, top_predictions)

        return result, top_predictions

    def compute_prediction(self, original_image):
        """
        Arguments:
            original_image (np.ndarray): an image as returned by OpenCV

        Returns:
            prediction (BoxList): the detected objects. Additional information
                of the detection properties can be found in the fields of
                the BoxList via `prediction.fields()`
        """
        # apply pre-processing to image
        image = self.transforms(original_image)
        # convert to an ImageList, padded so that it is divisible by
        # cfg.DATALOADER.SIZE_DIVISIBILITY
        image_list = to_image_list(image, self.cfg.DATALOADER.SIZE_DIVISIBILITY)
        image_list = image_list.to(self.device)
        # compute predictions
        with torch.no_grad():
            predictions = self.model(image_list)
        predictions = [o.to(self.cpu_device) for o in predictions]

        # always single image is passed at a time
        prediction = predictions[0]

        # reshape prediction (a BoxList) into the original image size
        height, width = original_image.shape[:-1]
        prediction = prediction.resize((width, height))

        if prediction.has_field("mask"):
            # if we have masks, paste the masks in the right position
            # in the image, as defined by the bounding boxes
            masks = prediction.get_field("mask")
            # always single image is passed at a time
            masks = self.masker([masks], [prediction])[0]
            prediction.add_field("mask", masks)
        return prediction

    def select_top_predictions(self, predictions):
        """
        Select only predictions which have a `score` > self.confidence_threshold,
        and returns the predictions in descending order of score

        Arguments:
            predictions (BoxList): the result of the computation by the model.
                It should contain the field `scores`.

        Returns:
            prediction (BoxList): the detected objects. Additional information
                of the detection properties can be found in the fields of
                the BoxList via `prediction.fields()`
        """
        scores = predictions.get_field("scores")
        keep = torch.nonzero(scores > self.confidence_threshold).squeeze(1)
        predictions = predictions[keep]
        scores = predictions.get_field("scores")
        _, idx = scores.sort(0, descending=True)
        return predictions[idx]

    def compute_colors_for_labels(self, labels):
        """
        Simple function that adds fixed colors depending on the class
        """
        colors = labels[:, None] * self.palette
        colors = (colors % 255).numpy().astype("uint8")
        return colors

    def overlay_boxes(self, image, predictions):
        """
        Adds the predicted boxes on top of the image

        Arguments:
            image (np.ndarray): an image as returned by OpenCV
            predictions (BoxList): the result of the computation by the model.
                It should contain the field `labels`.
        """
        labels = predictions.get_field("labels")
        boxes = predictions.bbox

        colors = self.compute_colors_for_labels(labels).tolist()

        for box, color in zip(boxes, colors):
            box = box.to(torch.int64)
            top_left, bottom_right = box[:2].tolist(), box[2:].tolist()
            image = cv2.rectangle(
                image, tuple(top_left), tuple(bottom_right), tuple(color), 1
            )

        return image

    def overlay_mask(self, image, predictions):
        """
        Adds the instances contours for each predicted object.
        Each label has a different color.

        Arguments:
            image (np.ndarray): an image as returned by OpenCV
            predictions (BoxList): the result of the computation by the model.
                It should contain the field `mask` and `labels`.
        """
        masks = predictions.get_field("mask").numpy()
        labels = predictions.get_field("labels")

        colors = self.compute_colors_for_labels(labels).tolist()

        for mask, color in zip(masks, colors):
            thresh = mask[0, :, :, None]
            contours, hierarchy = cv2_util.findContours(
                thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
            )
            #image = cv2.drawContours(image, contours, -1, color, 3)
            image = cv2.fillPoly(image, contours, color)

        composite = image

        return composite

    def create_mask_montage(self, image, predictions):
        """
        Create a montage showing the probability heatmaps for each one one of the
        detected objects

        Arguments:
            image (np.ndarray): an image as returned by OpenCV
            predictions (BoxList): the result of the computation by the model.
                It should contain the field `mask`.
        """
        masks = predictions.get_field("mask")
        masks_per_dim = self.masks_per_dim
        masks = L.interpolate(
            masks.float(), scale_factor=1 / masks_per_dim
        ).byte()
        height, width = masks.shape[-2:]
        max_masks = masks_per_dim ** 2
        masks = masks[:max_masks]
        # handle case where we have less detections than max_masks
        if len(masks) < max_masks:
            masks_padded = torch.zeros(max_masks, 1, height, width, dtype=torch.uint8)
            masks_padded[: len(masks)] = masks
            masks = masks_padded
        masks = masks.reshape(masks_per_dim, masks_per_dim, height, width)
        result = torch.zeros(
            (masks_per_dim * height, masks_per_dim * width), dtype=torch.uint8
        )
        for y in range(masks_per_dim):
            start_y = y * height
            end_y = (y + 1) * height
            for x in range(masks_per_dim):
                start_x = x * width
                end_x = (x + 1) * width
                result[start_y:end_y, start_x:end_x] = masks[y, x]
        return cv2.applyColorMap(result.numpy(), cv2.COLORMAP_JET)

    def overlay_class_names(self, image, predictions):
        """
        Adds detected class names and scores in the positions defined by the
        top-left corner of the predicted bounding box

        Arguments:
            image (np.ndarray): an image as returned by OpenCV
            predictions (BoxList): the result of the computation by the model.
                It should contain the field `scores` and `labels`.
        """
        scores = predictions.get_field("scores").tolist()
        labels = predictions.get_field("labels").tolist()
        labels = [self.CATEGORIES[i] for i in labels]
        boxes = predictions.bbox

        template = "{}: {:.2f}"
        for box, score, label in zip(boxes, scores, labels):
            x, y = box[:2]
            s = template.format(label, score)
            cv2.putText(
                image, s, (x, y), cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 255, 255), 1
            )

        return image
	# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
	import cv2
	import torch
	import numpy as np
	from torchvision import transforms as T

	from maskrcnn_benchmark.modeling.detector import build_detection_model
	from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer
	from maskrcnn_benchmark.structures.image_list import to_image_list
	from maskrcnn_benchmark.modeling.roi_heads.mask_head.inference import Masker
	from maskrcnn_benchmark import layers as L
	from maskrcnn_benchmark.utils import cv2_util


	class COCODemo(object):
	# COCO categories for pretty print
	CATEGORIES = [
	"__background",
	"person",
	"bicycle",
	"car",
	"motorcycle",
	"airplane",
	"bus",
	"train",
	"truck",
	"boat",
	"traffic light",
	"fire hydrant",
	"stop sign",
	"parking meter",
	"bench",
	"bird",
	"cat",
	"dog",
	"horse",
	"sheep",
	"cow",
	"elephant",
	"bear",
	"zebra",
	"giraffe",
	"backpack",
	"umbrella",
	"handbag",
	"tie",
	"suitcase",
	"frisbee",
	"skis",
	"snowboard",
	"sports ball",
	"kite",
	"baseball bat",
	"baseball glove",
	"skateboard",
	"surfboard",
	"tennis racket",
	"bottle",
	"wine glass",
	"cup",
	"fork",
	"knife",
	"spoon",
	"bowl",
	"banana",
	"apple",
	"sandwich",
	"orange",
	"broccoli",
	"carrot",
	"hot dog",
	"pizza",
	"donut",
	"cake",
	"chair",
	"couch",
	"potted plant",
	"bed",
	"dining table",
	"toilet",
	"tv",
	"laptop",
	"mouse",
	"remote",
	"keyboard",
	"cell phone",
	"microwave",
	"oven",
	"toaster",
	"sink",
	"refrigerator",
	"book",
	"clock",
	"vase",
	"scissors",
	"teddy bear",
	"hair drier",
	"toothbrush",
	]

	def __init__(
	self,
	cfg,
	confidence_threshold=0.7,
	show_mask_heatmaps=False,
	masks_per_dim=2,
	min_image_size=224,
	):
	self.cfg = cfg.clone()
	self.model = build_detection_model(cfg)
	self.model.eval()
	self.device = torch.device(cfg.MODEL.DEVICE)
	self.model.to(self.device)
	self.min_image_size = min_image_size

	save_dir = cfg.OUTPUT_DIR
	checkpointer = DetectronCheckpointer(cfg, self.model, save_dir=save_dir)
	_ = checkpointer.load(cfg.MODEL.WEIGHT)

	self.transforms = self.build_transform()

	mask_threshold = -1 if show_mask_heatmaps else 0.5
	self.masker = Masker(threshold=mask_threshold, padding=1)

	# used to make colors for each class
	self.palette = torch.tensor([2 25 - 1, 2 15 - 1, 2 ** 21 - 1])

	self.cpu_device = torch.device("cpu")
	self.confidence_threshold = confidence_threshold
	self.show_mask_heatmaps = show_mask_heatmaps
	self.masks_per_dim = masks_per_dim

	def build_transform(self):
	"""
	Creates a basic transformation that was used to train the models
	"""
	cfg = self.cfg

	# we are loading images with OpenCV, so we don't need to convert them
	# to BGR, they are already! So all we need to do is to normalize
	# by 255 if we want to convert to BGR255 format, or flip the channels
	# if we want it to be in RGB in [0-1] range.
	if cfg.INPUT.TO_BGR255:
	to_bgr_transform = T.Lambda(lambda x: x * 255)
	else:
	to_bgr_transform = T.Lambda(lambda x: x[[2, 1, 0]])

	normalize_transform = T.Normalize(
	mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD
	)

	transform = T.Compose(
	[
	T.ToPILImage(),
	T.Resize(self.min_image_size),
	T.ToTensor(),
	to_bgr_transform,
	normalize_transform,
	]
	)
	return transform

	def run_on_opencv_image(self, image):
	"""
	Arguments:
	image (np.ndarray): an image as returned by OpenCV

	Returns:
	prediction (BoxList): the detected objects. Additional information
	of the detection properties can be found in the fields of
	the BoxList via `prediction.fields()`
	"""
	predictions = self.compute_prediction(image)
	top_predictions = self.select_top_predictions(predictions)





	height,width, _ = image.shape

	result = np.zeros((height,width,3), np.uint8)

	result[:,0:width] = (255,255,255)

	#result = image.copy()
	if self.show_mask_heatmaps:
	return self.create_mask_montage(result, top_predictions)
	result = self.overlay_boxes(result, top_predictions)
	if self.cfg.MODEL.MASK_ON:
	result = self.overlay_mask(result, top_predictions)
	result = self.overlay_class_names(result, top_predictions)

	return result, top_predictions

	def compute_prediction(self, original_image):
	"""
	Arguments:
	original_image (np.ndarray): an image as returned by OpenCV

	Returns:
	prediction (BoxList): the detected objects. Additional information
	of the detection properties can be found in the fields of
	the BoxList via `prediction.fields()`
	"""
	# apply pre-processing to image
	image = self.transforms(original_image)
	# convert to an ImageList, padded so that it is divisible by
	# cfg.DATALOADER.SIZE_DIVISIBILITY
	image_list = to_image_list(image, self.cfg.DATALOADER.SIZE_DIVISIBILITY)
	image_list = image_list.to(self.device)
	# compute predictions
	with torch.no_grad():
	predictions = self.model(image_list)
	predictions = [o.to(self.cpu_device) for o in predictions]

	# always single image is passed at a time
	prediction = predictions[0]

	# reshape prediction (a BoxList) into the original image size
	height, width = original_image.shape[:-1]
	prediction = prediction.resize((width, height))

	if prediction.has_field("mask"):
	# if we have masks, paste the masks in the right position
	# in the image, as defined by the bounding boxes
	masks = prediction.get_field("mask")
	# always single image is passed at a time
	masks = self.masker([masks], [prediction])[0]
	prediction.add_field("mask", masks)
	return prediction

	def select_top_predictions(self, predictions):
	"""
	Select only predictions which have a `score` > self.confidence_threshold,
	and returns the predictions in descending order of score

	Arguments:
	predictions (BoxList): the result of the computation by the model.
	It should contain the field `scores`.

	Returns:
	prediction (BoxList): the detected objects. Additional information
	of the detection properties can be found in the fields of
	the BoxList via `prediction.fields()`
	"""
	scores = predictions.get_field("scores")
	keep = torch.nonzero(scores > self.confidence_threshold).squeeze(1)
	predictions = predictions[keep]
	scores = predictions.get_field("scores")
	_, idx = scores.sort(0, descending=True)
	return predictions[idx]

	def compute_colors_for_labels(self, labels):
	"""
	Simple function that adds fixed colors depending on the class
	"""
	colors = labels[:, None] * self.palette
	colors = (colors % 255).numpy().astype("uint8")
	return colors

	def overlay_boxes(self, image, predictions):
	"""
	Adds the predicted boxes on top of the image

	Arguments:
	image (np.ndarray): an image as returned by OpenCV
	predictions (BoxList): the result of the computation by the model.
	It should contain the field `labels`.
	"""
	labels = predictions.get_field("labels")
	boxes = predictions.bbox

	colors = self.compute_colors_for_labels(labels).tolist()

	for box, color in zip(boxes, colors):
	box = box.to(torch.int64)
	top_left, bottom_right = box[:2].tolist(), box[2:].tolist()
	image = cv2.rectangle(
	image, tuple(top_left), tuple(bottom_right), tuple(color), 1
	)

	return image

	def overlay_mask(self, image, predictions):
	"""
	Adds the instances contours for each predicted object.
	Each label has a different color.

	Arguments:
	image (np.ndarray): an image as returned by OpenCV
	predictions (BoxList): the result of the computation by the model.
	It should contain the field `mask` and `labels`.
	"""
	masks = predictions.get_field("mask").numpy()
	labels = predictions.get_field("labels")

	colors = self.compute_colors_for_labels(labels).tolist()

	for mask, color in zip(masks, colors):
	thresh = mask[0, :, :, None]
	contours, hierarchy = cv2_util.findContours(
	thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
	)
	#image = cv2.drawContours(image, contours, -1, color, 3)
	image = cv2.fillPoly(image, contours, color)

	composite = image

	return composite

	def create_mask_montage(self, image, predictions):
	"""
	Create a montage showing the probability heatmaps for each one one of the
	detected objects

	Arguments:
	image (np.ndarray): an image as returned by OpenCV
	predictions (BoxList): the result of the computation by the model.
	It should contain the field `mask`.
	"""
	masks = predictions.get_field("mask")
	masks_per_dim = self.masks_per_dim
	masks = L.interpolate(
	masks.float(), scale_factor=1 / masks_per_dim
	).byte()
	height, width = masks.shape[-2:]
	max_masks = masks_per_dim ** 2
	masks = masks[:max_masks]
	# handle case where we have less detections than max_masks
	if len(masks) < max_masks:
	masks_padded = torch.zeros(max_masks, 1, height, width, dtype=torch.uint8)
	masks_padded[: len(masks)] = masks
	masks = masks_padded
	masks = masks.reshape(masks_per_dim, masks_per_dim, height, width)
	result = torch.zeros(
	(masks_per_dim * height, masks_per_dim * width), dtype=torch.uint8
	)
	for y in range(masks_per_dim):
	start_y = y * height
	end_y = (y + 1) * height
	for x in range(masks_per_dim):
	start_x = x * width
	end_x = (x + 1) * width
	result[start_y:end_y, start_x:end_x] = masks[y, x]
	return cv2.applyColorMap(result.numpy(), cv2.COLORMAP_JET)

	def overlay_class_names(self, image, predictions):
	"""
	Adds detected class names and scores in the positions defined by the
	top-left corner of the predicted bounding box

	Arguments:
	image (np.ndarray): an image as returned by OpenCV
	predictions (BoxList): the result of the computation by the model.
	It should contain the field `scores` and `labels`.
	"""
	scores = predictions.get_field("scores").tolist()
	labels = predictions.get_field("labels").tolist()
	labels = [self.CATEGORIES[i] for i in labels]
	boxes = predictions.bbox

	template = "{}: {:.2f}"
	for box, score, label in zip(boxes, scores, labels):
	x, y = box[:2]
	s = template.format(label, score)
	cv2.putText(
	image, s, (x, y), cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 255, 255), 1
	)

	return image