Wheest/coco_labels.txt Secret

## coco_labels.txt
person
bicycle
car
motorbike
aeroplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
sofa
pottedplant
bed
diningtable
toilet
tvmonitor
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush

## common.py
#
# Copyright 1993-2020 NVIDIA Corporation.  All rights reserved.
#
# NOTICE TO LICENSEE:
#
# This source code and/or documentation ("Licensed Deliverables") are
# subject to NVIDIA intellectual property rights under U.S. and
# international Copyright laws.
#
# These Licensed Deliverables contained herein is PROPRIETARY and
# CONFIDENTIAL to NVIDIA and is being provided under the terms and
# conditions of a form of NVIDIA software license agreement by and
# between NVIDIA and Licensee ("License Agreement") or electronically
# accepted by Licensee.  Notwithstanding any terms or conditions to
# the contrary in the License Agreement, reproduction or disclosure
# of the Licensed Deliverables to any third party without the express
# written consent of NVIDIA is prohibited.
#
# NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
# LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
# SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
# PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
# NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
# DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
# NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
# NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
# LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
# SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
# OF THESE LICENSED DELIVERABLES.
#
# U.S. Government End Users.  These Licensed Deliverables are a
# "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
# 1995), consisting of "commercial computer software" and "commercial
# computer software documentation" as such terms are used in 48
# C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
# only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
# 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
# U.S. Government End Users acquire the Licensed Deliverables with
# only those rights set forth herein.
#
# Any use of the Licensed Deliverables in individual and commercial
# software must include, in the user documentation and internal
# comments to the code, the above Disclaimer and U.S. Government End
# Users Notice.
#

from itertools import chain
import argparse
import os

import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np

import tensorrt as trt

try:
    # Sometimes python2 does not understand FileNotFoundError
    FileNotFoundError
except NameError:
    FileNotFoundError = IOError

EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)

def GiB(val):
    return val * 1 << 30


def add_help(description):
    parser = argparse.ArgumentParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    args, _ = parser.parse_known_args()


def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]):
    '''
    Parses sample arguments.

    Args:
        description (str): Description of the sample.
        subfolder (str): The subfolder containing data relevant to this sample
        find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path.

    Returns:
        str: Path of data directory.
    '''

    # Standard command-line arguments for all samples.
    kDEFAULT_DATA_ROOT = os.path.join(os.sep, "usr", "src", "tensorrt", "data")
    parser = argparse.ArgumentParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-d", "--datadir", help="Location of the TensorRT sample data directory, and any additional data directories.", action="append", default=[kDEFAULT_DATA_ROOT])
    args, _ = parser.parse_known_args()

    def get_data_path(data_dir):
        # If the subfolder exists, append it to the path, otherwise use the provided path as-is.
        data_path = os.path.join(data_dir, subfolder)
        if not os.path.exists(data_path):
            print("WARNING: " + data_path + " does not exist. Trying " + data_dir + " instead.")
            data_path = data_dir
        # Make sure data directory exists.
        if not (os.path.exists(data_path)):
            print("WARNING: {:} does not exist. Please provide the correct data path with the -d option.".format(data_path))
        return data_path

    data_paths = [get_data_path(data_dir) for data_dir in args.datadir]
    return data_paths, locate_files(data_paths, find_files)

def locate_files(data_paths, filenames):
    """
    Locates the specified files in the specified data directories.
    If a file exists in multiple data directories, the first directory is used.

    Args:
        data_paths (List[str]): The data directories.
        filename (List[str]): The names of the files to find.

    Returns:
        List[str]: The absolute paths of the files.

    Raises:
        FileNotFoundError if a file could not be located.
    """
    found_files = [None] * len(filenames)
    for data_path in data_paths:
        # Find all requested files.
        for index, (found, filename) in enumerate(zip(found_files, filenames)):
            if not found:
                file_path = os.path.abspath(os.path.join(data_path, filename))
                if os.path.exists(file_path):
                    found_files[index] = file_path

    # Check that all files were found
    for f, filename in zip(found_files, filenames):
        if not f or not os.path.exists(f):
            raise FileNotFoundError("Could not find {:}. Searched in data paths: {:}".format(filename, data_paths))
    return found_files

# Simple helper data class that's a little nicer to use than a 2-tuple.
class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()

# Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream

# This function is generalized for multiple inputs/outputs.
# inputs and outputs are expected to be lists of HostDeviceMem objects.
def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
    # Transfer input data to the GPU.
    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
    # Run inference.
    context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)
    # Transfer predictions back from the GPU.
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    # Synchronize the stream
    stream.synchronize()
    # Return only the host outputs.
    return [out.host for out in outputs]

# This function is generalized for multiple inputs/outputs for full dimension networks.
# inputs and outputs are expected to be lists of HostDeviceMem objects.
def do_inference_v2(context, bindings, inputs, outputs, stream):
    # Transfer input data to the GPU.
    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
    # Run inference.
    context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
    # Transfer predictions back from the GPU.
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    # Synchronize the stream
    stream.synchronize()
    # Return only the host outputs.
    return [out.host for out in outputs]

## data_processing.py
#
# Copyright 1993-2020 NVIDIA Corporation.  All rights reserved.
#
# NOTICE TO LICENSEE:
#
# This source code and/or documentation ("Licensed Deliverables") are
# subject to NVIDIA intellectual property rights under U.S. and
# international Copyright laws.
#
# These Licensed Deliverables contained herein is PROPRIETARY and
# CONFIDENTIAL to NVIDIA and is being provided under the terms and
# conditions of a form of NVIDIA software license agreement by and
# between NVIDIA and Licensee ("License Agreement") or electronically
# accepted by Licensee.  Notwithstanding any terms or conditions to
# the contrary in the License Agreement, reproduction or disclosure
# of the Licensed Deliverables to any third party without the express
# written consent of NVIDIA is prohibited.
#
# NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
# LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
# SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
# PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
# NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
# DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
# NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
# NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
# LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
# SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
# OF THESE LICENSED DELIVERABLES.
#
# U.S. Government End Users.  These Licensed Deliverables are a
# "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
# 1995), consisting of "commercial computer software" and "commercial
# computer software documentation" as such terms are used in 48
# C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
# only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
# 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
# U.S. Government End Users acquire the Licensed Deliverables with
# only those rights set forth herein.
#
# Any use of the Licensed Deliverables in individual and commercial
# software must include, in the user documentation and internal
# comments to the code, the above Disclaimer and U.S. Government End
# Users Notice.
#

import math
from PIL import Image
import numpy as np
import os


# YOLOv3-608 has been trained with these 80 categories from COCO:
# Lin, Tsung-Yi, et al. "Microsoft COCO: Common Objects in Context."
# European Conference on Computer Vision. Springer, Cham, 2014.

def load_label_categories(label_file_path):
    categories = [line.rstrip('\n') for line in open(label_file_path)]
    return categories

LABEL_FILE_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'coco_labels.txt')
ALL_CATEGORIES = load_label_categories(LABEL_FILE_PATH)

# Let's make sure that there are 80 classes, as expected for the COCO data set:
CATEGORY_NUM = len(ALL_CATEGORIES)
assert CATEGORY_NUM == 80


class PreprocessYOLO(object):
    """A simple class for loading images with PIL and reshaping them to the specified
    input resolution for YOLOv3-608.
    """

    def __init__(self, yolo_input_resolution):
        """Initialize with the input resolution for YOLOv3, which will stay fixed in this sample.

        Keyword arguments:
        yolo_input_resolution -- two-dimensional tuple with the target network's (spatial)
        input resolution in HW order
        """
        self.yolo_input_resolution = yolo_input_resolution

    def process(self, input_image_path):
        """Load an image from the specified input path,
        and return it together with a pre-processed version required for feeding it into a
        YOLOv3 network.

        Keyword arguments:
        input_image_path -- string path of the image to be loaded
        """
        image_raw, image_resized = self._load_and_resize(input_image_path)
        image_preprocessed = self._shuffle_and_normalize(image_resized)
        return image_raw, image_preprocessed

    def _load_and_resize(self, input_image_path):
        """Load an image from the specified path and resize it to the input resolution.
        Return the input image before resizing as a PIL Image (required for visualization),
        and the resized image as a NumPy float array.

        Keyword arguments:
        input_image_path -- string path of the image to be loaded
        """

        image_raw = Image.open(input_image_path)
        # Expecting yolo_input_resolution in (height, width) format, adjusting to PIL
        # convention (width, height) in PIL:
        new_resolution = (
            self.yolo_input_resolution[1],
            self.yolo_input_resolution[0])
        image_resized = image_raw.resize(
            new_resolution, resample=Image.BICUBIC)
        image_resized = np.array(image_resized, dtype=np.float32, order='C')
        image_resized = np.moveaxis(image_resized, 0, -1)
        print('hey', image_resized.shape)
        return image_raw, image_resized

    def _shuffle_and_normalize(self, image):
        """Normalize a NumPy array representing an image to the range [0, 1], and
        convert it from HWC format ("channels last") to NCHW format ("channels first"
        with leading batch dimension).

        Keyword arguments:
        image -- image as three-dimensional NumPy float array, in HWC format
        """
        image /= 255.0
        # HWC to CHW format:
        image = np.transpose(image, [2, 0, 1])
        # CHW to NCHW format
        image = np.expand_dims(image, axis=0)
        # Convert the image to row-major order, also known as "C order":
        image = np.array(image, dtype=np.float32, order='C')
        return image


class PostprocessYOLO(object):
    """Class for post-processing the three outputs tensors from YOLOv3-608."""

    def __init__(self,
                 yolo_masks,
                 yolo_anchors,
                 obj_threshold,
                 nms_threshold,
                 yolo_input_resolution):
        """Initialize with all values that will be kept when processing several frames.
        Assuming 3 outputs of the network in the case of (large) YOLOv3.

        Keyword arguments:
        yolo_masks -- a list of 3 three-dimensional tuples for the YOLO masks
        yolo_anchors -- a list of 9 two-dimensional tuples for the YOLO anchors
        object_threshold -- threshold for object coverage, float value between 0 and 1
        nms_threshold -- threshold for non-max suppression algorithm,
        float value between 0 and 1
        input_resolution_yolo -- two-dimensional tuple with the target network's (spatial)
        input resolution in HW order
        """
        self.masks = yolo_masks
        self.anchors = yolo_anchors
        self.object_threshold = obj_threshold
        self.nms_threshold = nms_threshold
        self.input_resolution_yolo = yolo_input_resolution

    def process(self, outputs, resolution_raw):
        """Take the YOLOv3 outputs generated from a TensorRT forward pass, post-process them
        and return a list of bounding boxes for detected object together with their category
        and their confidences in separate lists.

        Keyword arguments:
        outputs -- outputs from a TensorRT engine in NCHW format
        resolution_raw -- the original spatial resolution from the input PIL image in WH order
        """
        outputs_reshaped = list()
        for output in outputs:
            outputs_reshaped.append(self._reshape_output(output))

        boxes, categories, confidences = self._process_yolo_output(
            outputs_reshaped, resolution_raw)

        return boxes, categories, confidences

    def _reshape_output(self, output):
        """Reshape a TensorRT output from NCHW to NHWC format (with expected C=255),
        and then return it in (height,width,3,85) dimensionality after further reshaping.

        Keyword argument:
        output -- an output from a TensorRT engine after inference
        """
        output = np.transpose(output, [0, 2, 3, 1])
        _, height, width, _ = output.shape
        dim1, dim2 = height, width
        dim3 = 3
        # There are CATEGORY_NUM=80 object categories:
        dim4 = (4 + 1 + CATEGORY_NUM)
        return np.reshape(output, (dim1, dim2, dim3, dim4))

    def _process_yolo_output(self, outputs_reshaped, resolution_raw):
        """Take in a list of three reshaped YOLO outputs in (height,width,3,85) shape and return
        return a list of bounding boxes for detected object together with their category and their
        confidences in separate lists.

        Keyword arguments:
        outputs_reshaped -- list of three reshaped YOLO outputs as NumPy arrays
        with shape (height,width,3,85)
        resolution_raw -- the original spatial resolution from the input PIL image in WH order
        """

        # E.g. in YOLOv3-608, there are three output tensors, which we associate with their
        # respective masks. Then we iterate through all output-mask pairs and generate candidates
        # for bounding boxes, their corresponding category predictions and their confidences:
        boxes, categories, confidences = list(), list(), list()
        for output, mask in zip(outputs_reshaped, self.masks):
            box, category, confidence = self._process_feats(output, mask)
            box, category, confidence = self._filter_boxes(box, category, confidence)
            boxes.append(box)
            categories.append(category)
            confidences.append(confidence)

        boxes = np.concatenate(boxes)
        categories = np.concatenate(categories)
        confidences = np.concatenate(confidences)

        # Scale boxes back to original image shape:
        width, height = resolution_raw
        image_dims = [width, height, width, height]
        boxes = boxes * image_dims

        # Using the candidates from the previous (loop) step, we apply the non-max suppression
        # algorithm that clusters adjacent bounding boxes to a single bounding box:
        nms_boxes, nms_categories, nscores = list(), list(), list()
        for category in set(categories):
            idxs = np.where(categories == category)
            box = boxes[idxs]
            category = categories[idxs]
            confidence = confidences[idxs]

            keep = self._nms_boxes(box, confidence)

            nms_boxes.append(box[keep])
            nms_categories.append(category[keep])
            nscores.append(confidence[keep])

        if not nms_categories and not nscores:
            return None, None, None

        boxes = np.concatenate(nms_boxes)
        categories = np.concatenate(nms_categories)
        confidences = np.concatenate(nscores)

        return boxes, categories, confidences

    def _process_feats(self, output_reshaped, mask):
        """Take in a reshaped YOLO output in height,width,3,85 format together with its
        corresponding YOLO mask and return the detected bounding boxes, the confidence,
        and the class probability in each cell/pixel.

        Keyword arguments:
        output_reshaped -- reshaped YOLO output as NumPy arrays with shape (height,width,3,85)
        mask -- 2-dimensional tuple with mask specification for this output
        """

        # Two in-line functions required for calculating the bounding box
        # descriptors:
        def sigmoid(value):
            """Return the sigmoid of the input."""
            return 1.0 / (1.0 + math.exp(-value))

        def exponential(value):
            """Return the exponential of the input."""
            return math.exp(value)

        # Vectorized calculation of above two functions:
        sigmoid_v = np.vectorize(sigmoid)
        exponential_v = np.vectorize(exponential)

        grid_h, grid_w, _, _ = output_reshaped.shape

        anchors = [self.anchors[i] for i in mask]

        # Reshape to N, height, width, num_anchors, box_params:
        anchors_tensor = np.reshape(anchors, [1, 1, len(anchors), 2])
        box_xy = sigmoid_v(output_reshaped[..., :2])
        box_wh = exponential_v(output_reshaped[..., 2:4]) * anchors_tensor
        box_confidence = sigmoid_v(output_reshaped[..., 4])

        box_confidence = np.expand_dims(box_confidence, axis=-1)
        box_class_probs = sigmoid_v(output_reshaped[..., 5:])

        col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
        row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)

        col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
        row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
        grid = np.concatenate((col, row), axis=-1)

        box_xy += grid
        box_xy /= (grid_w, grid_h)
        box_wh /= self.input_resolution_yolo
        box_xy -= (box_wh / 2.)
        boxes = np.concatenate((box_xy, box_wh), axis=-1)

        # boxes: centroids, box_confidence: confidence level, box_class_probs:
        # class confidence
        return boxes, box_confidence, box_class_probs

    def _filter_boxes(self, boxes, box_confidences, box_class_probs):
        """Take in the unfiltered bounding box descriptors and discard each cell
        whose score is lower than the object threshold set during class initialization.

        Keyword arguments:
        boxes -- bounding box coordinates with shape (height,width,3,4); 4 for
        x,y,height,width coordinates of the boxes
        box_confidences -- bounding box confidences with shape (height,width,3,1); 1 for as
        confidence scalar per element
        box_class_probs -- class probabilities with shape (height,width,3,CATEGORY_NUM)

        """
        box_scores = box_confidences * box_class_probs
        box_classes = np.argmax(box_scores, axis=-1)
        box_class_scores = np.max(box_scores, axis=-1)
        pos = np.where(box_class_scores >= self.object_threshold)

        boxes = boxes[pos]
        classes = box_classes[pos]
        scores = box_class_scores[pos]

        return boxes, classes, scores

    def _nms_boxes(self, boxes, box_confidences):
        """Apply the Non-Maximum Suppression (NMS) algorithm on the bounding boxes with their
        confidence scores and return an array with the indexes of the bounding boxes we want to
        keep (and display later).

        Keyword arguments:
        boxes -- a NumPy array containing N bounding-box coordinates that survived filtering,
        with shape (N,4); 4 for x,y,height,width coordinates of the boxes
        box_confidences -- a Numpy array containing the corresponding confidences with shape N
        """
        x_coord = boxes[:, 0]
        y_coord = boxes[:, 1]
        width = boxes[:, 2]
        height = boxes[:, 3]

        areas = width * height
        ordered = box_confidences.argsort()[::-1]

        keep = list()
        while ordered.size > 0:
            # Index of the current element:
            i = ordered[0]
            keep.append(i)
            xx1 = np.maximum(x_coord[i], x_coord[ordered[1:]])
            yy1 = np.maximum(y_coord[i], y_coord[ordered[1:]])
            xx2 = np.minimum(x_coord[i] + width[i], x_coord[ordered[1:]] + width[ordered[1:]])
            yy2 = np.minimum(y_coord[i] + height[i], y_coord[ordered[1:]] + height[ordered[1:]])

            width1 = np.maximum(0.0, xx2 - xx1 + 1)
            height1 = np.maximum(0.0, yy2 - yy1 + 1)
            intersection = width1 * height1
            union = (areas[i] + areas[ordered[1:]] - intersection)

            # Compute the Intersection over Union (IoU) score:
            iou = intersection / union

            # The goal of the NMS algorithm is to reduce the number of adjacent bounding-box
            # candidates to a minimum. In this step, we keep only those elements whose overlap
            # with the current bounding box is lower than the threshold:
            indexes = np.where(iou <= self.nms_threshold)[0]
            ordered = ordered[indexes + 1]

        keep = np.array(keep)
        return keep

## graph_surgeon.py
#!/usr/bin/env python

import onnx_graphsurgeon as gs
import onnx
import numpy as np

graph = gs.import_onnx(onnx.load("ssd_model.onnx"))
for inp in graph.inputs:
    inp.dtype = np.float32

onnx.save(gs.export_onnx(graph), "updated_ssd_model.onnx")

## onnx_to_tensorrt.py
#!/usr/bin/env python2
#
# Copyright 1993-2020 NVIDIA Corporation.  All rights reserved.
#
# NOTICE TO LICENSEE:
#
# This source code and/or documentation ("Licensed Deliverables") are
# subject to NVIDIA intellectual property rights under U.S. and
# international Copyright laws.
#
# These Licensed Deliverables contained herein is PROPRIETARY and
# CONFIDENTIAL to NVIDIA and is being provided under the terms and
# conditions of a form of NVIDIA software license agreement by and
# between NVIDIA and Licensee ("License Agreement") or electronically
# accepted by Licensee.  Notwithstanding any terms or conditions to
# the contrary in the License Agreement, reproduction or disclosure
# of the Licensed Deliverables to any third party without the express
# written consent of NVIDIA is prohibited.
#
# NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
# LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
# SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
# PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
# NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
# DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
# NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
# NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
# LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
# SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
# OF THESE LICENSED DELIVERABLES.
#
# U.S. Government End Users.  These Licensed Deliverables are a
# "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
# 1995), consisting of "commercial computer software" and "commercial
# computer software documentation" as such terms are used in 48
# C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
# only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
# 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
# U.S. Government End Users acquire the Licensed Deliverables with
# only those rights set forth herein.
#
# Any use of the Licensed Deliverables in individual and commercial
# software must include, in the user documentation and internal
# comments to the code, the above Disclaimer and U.S. Government End
# Users Notice.
#

from __future__ import print_function

import numpy as np
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
from PIL import ImageDraw
import wget

from data_processing import PreprocessYOLO, PostprocessYOLO, ALL_CATEGORIES

import sys, os
sys.path.insert(1, os.path.join(sys.path[0], ".."))
import common

TRT_LOGGER = trt.Logger()

def download_file(local_path, link, checksum_reference=None):
    """Checks if a local file is present and downloads it from the specified path otherwise.
    If checksum_reference is specified, the file's md5 checksum is compared against the
    expected value.

    Keyword arguments:
    local_path -- path of the file whose checksum shall be generated
    link -- link where the file shall be downloaded from if it is not found locally
    checksum_reference -- expected MD5 checksum of the file
    """
    if not os.path.exists(local_path):
        print('Downloading from %s, this may take a while...' % link)
        wget.download(link, local_path)
        print()
    if checksum_reference is not None:
        checksum = generate_md5_checksum(local_path)
        if checksum != checksum_reference:
            raise ValueError(
                'The MD5 checksum of local file %s differs from %s, please manually remove \
                 the file and try again.' %
                (local_path, checksum_reference))
    return local_path


def draw_bboxes(image_raw, bboxes, confidences, categories, all_categories, bbox_color='blue'):
    """Draw the bounding boxes on the original input image and return it.

    Keyword arguments:
    image_raw -- a raw PIL Image
    bboxes -- NumPy array containing the bounding box coordinates of N objects, with shape (N,4).
    categories -- NumPy array containing the corresponding category for each object,
    with shape (N,)
    confidences -- NumPy array containing the corresponding confidence for each object,
    with shape (N,)
    all_categories -- a list of all categories in the correct ordered (required for looking up
    the category name)
    bbox_color -- an optional string specifying the color of the bounding boxes (default: 'blue')
    """
    draw = ImageDraw.Draw(image_raw)
    print(bboxes, confidences, categories)
    for box, score, category in zip(bboxes, confidences, categories):
        x_coord, y_coord, width, height = box
        left = max(0, np.floor(x_coord + 0.5).astype(int))
        top = max(0, np.floor(y_coord + 0.5).astype(int))
        right = min(image_raw.width, np.floor(x_coord + width + 0.5).astype(int))
        bottom = min(image_raw.height, np.floor(y_coord + height + 0.5).astype(int))

        draw.rectangle(((left, top), (right, bottom)), outline=bbox_color)
        draw.text((left, top - 12), '{0} {1:.2f}'.format(all_categories[category], score), fill=bbox_color)

    return image_raw

def get_engine(onnx_file_path, engine_file_path=""):
    """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
    def build_engine():
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder, builder.create_network(common.EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
            builder.max_workspace_size = 1 << 28 # 256MiB
            builder.max_batch_size = 1
            # Parse model file
            if not os.path.exists(onnx_file_path):
                print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
                exit(0)
            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                if not parser.parse(model.read()):
                    print ('ERROR: Failed to parse the ONNX file.')
                    for error in range(parser.num_errors):
                        print (parser.get_error(error))
                    return None
            # The actual yolov3.onnx is generated with batch size 64. Reshape input to batch size 1
            # network.get_input(0).shape = [1, 3, 608, 608]
            network.get_input(0).shape = [1, 3, 224, 224]
            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
            engine = builder.build_cuda_engine(network)
            print("Completed creating Engine")
            with open(engine_file_path, "wb") as f:
                f.write(engine.serialize())
            return engine

    if os.path.exists(engine_file_path):
        # If a serialized engine exists, use it instead of building an engine.
        print("Reading engine from file {}".format(engine_file_path))
        with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())
    else:
        return build_engine()


import argparse

parser = argparse.ArgumentParser(description='')
parser.add_argument('onnx_file', type=str)
args = parser.parse_args()

def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = args.onnx_file
    engine_file_path = "ssd_model.trt"
    # Download a dog image and save it to the following file path:
    input_image_path = download_file('dog.jpg',
        'https://github.com/pjreddie/darknet/raw/f86901f6177dfc6116360a13cc06ab680e0c86b0/data/dog.jpg', checksum_reference=None)

    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
    input_resolution_yolov3_HW = (640, 640)
    # Create a pre-processor object by specifying the required input resolution for YOLOv3
    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
    # Load an image from the specified input path, and return it together with  a pre-processed version
    image_raw, image = preprocessor.process(input_image_path)
    # Store the shape of the original input image in WH format, we will need it for later
    shape_orig_WH = image_raw.size

    # Output shapes expected by the post-processor
    output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
    # Do inference with TensorRT
    trt_outputs = []
    print(onnx_file_path)
    with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        # Do inference
        print('Running inference on image {}...'.format(input_image_path))
        import time
        start = time.time()
        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
        inputs[0].host = image
        trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)

    print('finished inference')
    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
    trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]

    postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],                    # A list of 3 three-dimensional tuples for the YOLO masks
                          "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),  # A list of 9 two-dimensional tuples for the YOLO anchors
                                           (59, 119), (116, 90), (156, 198), (373, 326)],
                          "obj_threshold": 0.6,                                               # Threshold for object coverage, float value between 0 and 1
                          "nms_threshold": 0.5,                                               # Threshold for non-max suppression algorithm, float value between 0 and 1
                          "yolo_input_resolution": input_resolution_yolov3_HW}

    postprocessor = PostprocessYOLO(**postprocessor_args)

    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
    boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH))
    end = time.time()
    print(f'Inference time: {end - start}')
    # Draw the bounding boxes onto the original input image and save it as a PNG file
    obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES)
    output_image_path = 'dog_bboxes.png'
    obj_detected_img.save(output_image_path, 'PNG')
    print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path))

if __name__ == '__main__':
    main()
	person
	bicycle
	car
	motorbike
	aeroplane
	bus
	train
	truck
	boat
	traffic light
	fire hydrant
	stop sign
	parking meter
	bench
	bird
	cat
	dog
	horse
	sheep
	cow
	elephant
	bear
	zebra
	giraffe
	backpack
	umbrella
	handbag
	tie
	suitcase
	frisbee
	skis
	snowboard
	sports ball
	kite
	baseball bat
	baseball glove
	skateboard
	surfboard
	tennis racket
	bottle
	wine glass
	cup
	fork
	knife
	spoon
	bowl
	banana
	apple
	sandwich
	orange
	broccoli
	carrot
	hot dog
	pizza
	donut
	cake
	chair
	sofa
	pottedplant
	bed
	diningtable
	toilet
	tvmonitor
	laptop
	mouse
	remote
	keyboard
	cell phone
	microwave
	oven
	toaster
	sink
	refrigerator
	book
	clock
	vase
	scissors
	teddy bear
	hair drier
	toothbrush
	#
	# Copyright 1993-2020 NVIDIA Corporation. All rights reserved.
	#
	# NOTICE TO LICENSEE:
	#
	# This source code and/or documentation ("Licensed Deliverables") are
	# subject to NVIDIA intellectual property rights under U.S. and
	# international Copyright laws.
	#
	# These Licensed Deliverables contained herein is PROPRIETARY and
	# CONFIDENTIAL to NVIDIA and is being provided under the terms and
	# conditions of a form of NVIDIA software license agreement by and
	# between NVIDIA and Licensee ("License Agreement") or electronically
	# accepted by Licensee. Notwithstanding any terms or conditions to
	# the contrary in the License Agreement, reproduction or disclosure
	# of the Licensed Deliverables to any third party without the express
	# written consent of NVIDIA is prohibited.
	#
	# NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
	# LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
	# SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
	# PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
	# NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
	# DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
	# NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
	# NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
	# LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
	# SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
	# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
	# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
	# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
	# OF THESE LICENSED DELIVERABLES.
	#
	# U.S. Government End Users. These Licensed Deliverables are a
	# "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
	# 1995), consisting of "commercial computer software" and "commercial
	# computer software documentation" as such terms are used in 48
	# C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
	# only as a commercial end item. Consistent with 48 C.F.R.12.212 and
	# 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
	# U.S. Government End Users acquire the Licensed Deliverables with
	# only those rights set forth herein.
	#
	# Any use of the Licensed Deliverables in individual and commercial
	# software must include, in the user documentation and internal
	# comments to the code, the above Disclaimer and U.S. Government End
	# Users Notice.
	#

	from itertools import chain
	import argparse
	import os

	import pycuda.driver as cuda
	import pycuda.autoinit
	import numpy as np

	import tensorrt as trt

	try:
	# Sometimes python2 does not understand FileNotFoundError
	FileNotFoundError
	except NameError:
	FileNotFoundError = IOError

	EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)

	def GiB(val):
	return val * 1 << 30


	def add_help(description):
	parser = argparse.ArgumentParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
	args, _ = parser.parse_known_args()


	def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]):
	'''
	Parses sample arguments.

	Args:
	description (str): Description of the sample.
	subfolder (str): The subfolder containing data relevant to this sample
	find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path.

	Returns:
	str: Path of data directory.
	'''

	# Standard command-line arguments for all samples.
	kDEFAULT_DATA_ROOT = os.path.join(os.sep, "usr", "src", "tensorrt", "data")
	parser = argparse.ArgumentParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
	parser.add_argument("-d", "--datadir", help="Location of the TensorRT sample data directory, and any additional data directories.", action="append", default=[kDEFAULT_DATA_ROOT])
	args, _ = parser.parse_known_args()

	def get_data_path(data_dir):
	# If the subfolder exists, append it to the path, otherwise use the provided path as-is.
	data_path = os.path.join(data_dir, subfolder)
	if not os.path.exists(data_path):
	print("WARNING: " + data_path + " does not exist. Trying " + data_dir + " instead.")
	data_path = data_dir
	# Make sure data directory exists.
	if not (os.path.exists(data_path)):
	print("WARNING: {:} does not exist. Please provide the correct data path with the -d option.".format(data_path))
	return data_path

	data_paths = [get_data_path(data_dir) for data_dir in args.datadir]
	return data_paths, locate_files(data_paths, find_files)

	def locate_files(data_paths, filenames):
	"""
	Locates the specified files in the specified data directories.
	If a file exists in multiple data directories, the first directory is used.

	Args:
	data_paths (List[str]): The data directories.
	filename (List[str]): The names of the files to find.

	Returns:
	List[str]: The absolute paths of the files.

	Raises:
	FileNotFoundError if a file could not be located.
	"""
	found_files = [None] * len(filenames)
	for data_path in data_paths:
	# Find all requested files.
	for index, (found, filename) in enumerate(zip(found_files, filenames)):
	if not found:
	file_path = os.path.abspath(os.path.join(data_path, filename))
	if os.path.exists(file_path):
	found_files[index] = file_path

	# Check that all files were found
	for f, filename in zip(found_files, filenames):
	if not f or not os.path.exists(f):
	raise FileNotFoundError("Could not find {:}. Searched in data paths: {:}".format(filename, data_paths))
	return found_files

	# Simple helper data class that's a little nicer to use than a 2-tuple.
	class HostDeviceMem(object):
	def __init__(self, host_mem, device_mem):
	self.host = host_mem
	self.device = device_mem

	def __str__(self):
	return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

	def __repr__(self):
	return self.__str__()

	# Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
	def allocate_buffers(engine):
	inputs = []
	outputs = []
	bindings = []
	stream = cuda.Stream()
	for binding in engine:
	size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
	dtype = trt.nptype(engine.get_binding_dtype(binding))
	# Allocate host and device buffers
	host_mem = cuda.pagelocked_empty(size, dtype)
	device_mem = cuda.mem_alloc(host_mem.nbytes)
	# Append the device buffer to device bindings.
	bindings.append(int(device_mem))
	# Append to the appropriate list.
	if engine.binding_is_input(binding):
	inputs.append(HostDeviceMem(host_mem, device_mem))
	else:
	outputs.append(HostDeviceMem(host_mem, device_mem))
	return inputs, outputs, bindings, stream

	# This function is generalized for multiple inputs/outputs.
	# inputs and outputs are expected to be lists of HostDeviceMem objects.
	def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
	# Transfer input data to the GPU.
	[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
	# Run inference.
	context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)
	# Transfer predictions back from the GPU.
	[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
	# Synchronize the stream
	stream.synchronize()
	# Return only the host outputs.
	return [out.host for out in outputs]

	# This function is generalized for multiple inputs/outputs for full dimension networks.
	# inputs and outputs are expected to be lists of HostDeviceMem objects.
	def do_inference_v2(context, bindings, inputs, outputs, stream):
	# Transfer input data to the GPU.
	[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
	# Run inference.
	context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
	# Transfer predictions back from the GPU.
	[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
	# Synchronize the stream
	stream.synchronize()
	# Return only the host outputs.
	return [out.host for out in outputs]
	#!/usr/bin/env python

	import onnx_graphsurgeon as gs
	import onnx
	import numpy as np

	graph = gs.import_onnx(onnx.load("ssd_model.onnx"))
	for inp in graph.inputs:
	inp.dtype = np.float32

	onnx.save(gs.export_onnx(graph), "updated_ssd_model.onnx")
	#!/usr/bin/env python2
	#
	# Copyright 1993-2020 NVIDIA Corporation. All rights reserved.
	#
	# NOTICE TO LICENSEE:
	#
	# This source code and/or documentation ("Licensed Deliverables") are
	# subject to NVIDIA intellectual property rights under U.S. and
	# international Copyright laws.
	#
	# These Licensed Deliverables contained herein is PROPRIETARY and
	# CONFIDENTIAL to NVIDIA and is being provided under the terms and
	# conditions of a form of NVIDIA software license agreement by and
	# between NVIDIA and Licensee ("License Agreement") or electronically
	# accepted by Licensee. Notwithstanding any terms or conditions to
	# the contrary in the License Agreement, reproduction or disclosure
	# of the Licensed Deliverables to any third party without the express
	# written consent of NVIDIA is prohibited.
	#
	# NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
	# LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
	# SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
	# PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
	# NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
	# DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
	# NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
	# NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
	# LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
	# SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
	# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
	# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
	# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
	# OF THESE LICENSED DELIVERABLES.
	#
	# U.S. Government End Users. These Licensed Deliverables are a
	# "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
	# 1995), consisting of "commercial computer software" and "commercial
	# computer software documentation" as such terms are used in 48
	# C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
	# only as a commercial end item. Consistent with 48 C.F.R.12.212 and
	# 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
	# U.S. Government End Users acquire the Licensed Deliverables with
	# only those rights set forth herein.
	#
	# Any use of the Licensed Deliverables in individual and commercial
	# software must include, in the user documentation and internal
	# comments to the code, the above Disclaimer and U.S. Government End
	# Users Notice.
	#

	from __future__ import print_function

	import numpy as np
	import tensorrt as trt
	import pycuda.driver as cuda
	import pycuda.autoinit
	from PIL import ImageDraw
	import wget

	from data_processing import PreprocessYOLO, PostprocessYOLO, ALL_CATEGORIES

	import sys, os
	sys.path.insert(1, os.path.join(sys.path[0], ".."))
	import common

	TRT_LOGGER = trt.Logger()

	def download_file(local_path, link, checksum_reference=None):
	"""Checks if a local file is present and downloads it from the specified path otherwise.
	If checksum_reference is specified, the file's md5 checksum is compared against the
	expected value.

	Keyword arguments:
	local_path -- path of the file whose checksum shall be generated
	link -- link where the file shall be downloaded from if it is not found locally
	checksum_reference -- expected MD5 checksum of the file
	"""
	if not os.path.exists(local_path):
	print('Downloading from %s, this may take a while...' % link)
	wget.download(link, local_path)
	print()
	if checksum_reference is not None:
	checksum = generate_md5_checksum(local_path)
	if checksum != checksum_reference:
	raise ValueError(
	'The MD5 checksum of local file %s differs from %s, please manually remove \
	the file and try again.' %
	(local_path, checksum_reference))
	return local_path


	def draw_bboxes(image_raw, bboxes, confidences, categories, all_categories, bbox_color='blue'):
	"""Draw the bounding boxes on the original input image and return it.

	Keyword arguments:
	image_raw -- a raw PIL Image
	bboxes -- NumPy array containing the bounding box coordinates of N objects, with shape (N,4).
	categories -- NumPy array containing the corresponding category for each object,
	with shape (N,)
	confidences -- NumPy array containing the corresponding confidence for each object,
	with shape (N,)
	all_categories -- a list of all categories in the correct ordered (required for looking up
	the category name)
	bbox_color -- an optional string specifying the color of the bounding boxes (default: 'blue')
	"""
	draw = ImageDraw.Draw(image_raw)
	print(bboxes, confidences, categories)
	for box, score, category in zip(bboxes, confidences, categories):
	x_coord, y_coord, width, height = box
	left = max(0, np.floor(x_coord + 0.5).astype(int))
	top = max(0, np.floor(y_coord + 0.5).astype(int))
	right = min(image_raw.width, np.floor(x_coord + width + 0.5).astype(int))
	bottom = min(image_raw.height, np.floor(y_coord + height + 0.5).astype(int))

	draw.rectangle(((left, top), (right, bottom)), outline=bbox_color)
	draw.text((left, top - 12), '{0} {1:.2f}'.format(all_categories[category], score), fill=bbox_color)

	return image_raw

	def get_engine(onnx_file_path, engine_file_path=""):
	"""Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
	def build_engine():
	"""Takes an ONNX file and creates a TensorRT engine to run inference with"""
	with trt.Builder(TRT_LOGGER) as builder, builder.create_network(common.EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
	builder.max_workspace_size = 1 << 28 # 256MiB
	builder.max_batch_size = 1
	# Parse model file
	if not os.path.exists(onnx_file_path):
	print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
	exit(0)
	print('Loading ONNX file from path {}...'.format(onnx_file_path))
	with open(onnx_file_path, 'rb') as model:
	print('Beginning ONNX file parsing')
	if not parser.parse(model.read()):
	print ('ERROR: Failed to parse the ONNX file.')
	for error in range(parser.num_errors):
	print (parser.get_error(error))
	return None
	# The actual yolov3.onnx is generated with batch size 64. Reshape input to batch size 1
	# network.get_input(0).shape = [1, 3, 608, 608]
	network.get_input(0).shape = [1, 3, 224, 224]
	print('Completed parsing of ONNX file')
	print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
	engine = builder.build_cuda_engine(network)
	print("Completed creating Engine")
	with open(engine_file_path, "wb") as f:
	f.write(engine.serialize())
	return engine

	if os.path.exists(engine_file_path):
	# If a serialized engine exists, use it instead of building an engine.
	print("Reading engine from file {}".format(engine_file_path))
	with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
	return runtime.deserialize_cuda_engine(f.read())
	else:
	return build_engine()


	import argparse

	parser = argparse.ArgumentParser(description='')
	parser.add_argument('onnx_file', type=str)
	args = parser.parse_args()

	def main():
	"""Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

	# Try to load a previously generated YOLOv3-608 network graph in ONNX format:
	onnx_file_path = args.onnx_file
	engine_file_path = "ssd_model.trt"
	# Download a dog image and save it to the following file path:
	input_image_path = download_file('dog.jpg',
	'https://github.com/pjreddie/darknet/raw/f86901f6177dfc6116360a13cc06ab680e0c86b0/data/dog.jpg', checksum_reference=None)

	# Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
	input_resolution_yolov3_HW = (640, 640)
	# Create a pre-processor object by specifying the required input resolution for YOLOv3
	preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
	# Load an image from the specified input path, and return it together with a pre-processed version
	image_raw, image = preprocessor.process(input_image_path)
	# Store the shape of the original input image in WH format, we will need it for later
	shape_orig_WH = image_raw.size

	# Output shapes expected by the post-processor
	output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
	# Do inference with TensorRT
	trt_outputs = []
	print(onnx_file_path)
	with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context:
	inputs, outputs, bindings, stream = common.allocate_buffers(engine)
	# Do inference
	print('Running inference on image {}...'.format(input_image_path))
	import time
	start = time.time()
	# Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
	inputs[0].host = image
	trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)

	print('finished inference')
	# Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
	trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]

	postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks
	"yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors
	(59, 119), (116, 90), (156, 198), (373, 326)],
	"obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1
	"nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1
	"yolo_input_resolution": input_resolution_yolov3_HW}

	postprocessor = PostprocessYOLO(**postprocessor_args)

	# Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
	boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH))
	end = time.time()
	print(f'Inference time: {end - start}')
	# Draw the bounding boxes onto the original input image and save it as a PNG file
	obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES)
	output_image_path = 'dog_bboxes.png'
	obj_detected_img.save(output_image_path, 'PNG')
	print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path))

	if __name__ == '__main__':
	main()