Ryan McCormick rmccorm4

## builder_config_migration.py
# Previously: builder.fp16 = True
config.set_flag(trt.BuilderFlag.FP16)
# Previously: builder.int8 = True
config.set_flag(trt.BuilderFlag.INT8)
# Previously: builder.int8_calibrator = MyCustomCalibrator()
config.int8_calibrator = MyCustomCalibrator()

# ...

# Previously: builder.build_cuda_engine(network)

## multi-context_inference.py
# Create multiple optimization profiles for different contexts to use
shape0 = (1, 3, 224, 224)
profile0 = builder.create_optimization_profile()
profile0.set_shape("input", min=shape0, opt=shape0, max=shape0)
config.add_optimization_profile(profile0)

shape1 = (1, 3, 448, 448)
profile1 = builder.create_optimization_profile()
profile1.set_shape("input", min=shape1, opt=shape1, max=shape1)
config.add_optimization_profile(profile1)

## trtexec_example.sh
# Export sample Alexnet model to ONNX with a dynamic batch dimension
wget https://gist.githubusercontent.com/rmccorm4/b72abac18aed6be4c1725db18eba4930/raw/3919c883b97a231877b454dae695fe074a1acdff/alexnet_onnx.py
python3 alexnet_onnx.py

# Emulate "maxBatchSize" behavior from implicit batch engines by setting
# an optimization profile with min=(1, *shape), opt=max=(maxBatchSize, *shape)
MAX_BATCH_SIZE=32
INPUT_NAME="actual_input_1"

# Convert dynamic batch ONNX model to TRT Engine with optimization profile defined

## execute_v2.py
# Load serialized engine file into memory
with open("alexnet_dynamic.engine", "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
    engine = runtime.deserialize_cuda_engine(f.read())

# Create context, this can be re-used
context = engine.create_execution_context()
# Profile 0 (first profile) is used by default
context.active_optimization_profile = 0

# These binding_idxs can change if either the context or the

## setup_binding_shapes.py
def setup_binding_shapes(
    engine: trt.ICudaEngine,
    context: trt.IExecutionContext,
    host_inputs: List[np.ndarray],
    input_binding_idxs: List[int],
    output_binding_idxs: List[int],
):
    # Explicitly set the dynamic input shapes, so the dynamic output
    # shapes can be computed internally
    for host_input, binding_index in zip(host_inputs, input_binding_idxs):

## generate_random_inputs.py
def is_dynamic(shape: Tuple[int]):
    return any(dim is None or dim < 0 for dim in shape)

def get_random_inputs(
    engine: trt.ICudaEngine,
    context: trt.IExecutionContext,
    input_binding_idxs: List[int],
):
    # Input data for inference
    host_inputs = []

## get_binding_idxs.py
def get_binding_idxs(engine: trt.ICudaEngine, profile_index: int):
    # Calculate start/end binding indices for current context's profile
    num_bindings_per_profile = engine.num_bindings // engine.num_optimization_profiles
    start_binding = profile_index * num_bindings_per_profile
    end_binding = start_binding + num_bindings_per_profile

    # Separate input and output binding indices for convenience
    input_binding_idxs = []
    output_binding_idxs = []
    for binding_index in range(start_binding, end_binding):

## dynamic_shape_inference.py
#!/usr/bin/env python3
import argparse
from typing import Tuple, List

import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt

TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

## one_optimization_profile.dict
{
    'Profile 0':
    {
        'actual_input_1':
        {
             'binding_dtype': DataType.FLOAT,
             'binding_index': 0,
             'binding_name': 'actual_input_1',      # context.get_binding_name(binding_index)
             'binding_shape': (-1, 3, 224, 224),    # context.get_binding_shape(binding_index)
             'binding_type': 'INPUT',               # engine.binding_is_input(binding_index) == True

## two_optimization_profile.dict
{
    'Profile 0':
    {
        'actual_input_1':
        {
             'binding_dtype': DataType.FLOAT,
             'binding_index': 0,
             'binding_name': 'actual_input_1',      # context.get_binding_name(binding_index)
             'binding_shape': (-1, 3, 224, 224),    # context.get_binding_shape(binding_index)
             'binding_type': 'INPUT',               # engine.binding_is_input(binding_index) == True
	# Previously: builder.fp16 = True
	config.set_flag(trt.BuilderFlag.FP16)
	# Previously: builder.int8 = True
	config.set_flag(trt.BuilderFlag.INT8)
	# Previously: builder.int8_calibrator = MyCustomCalibrator()
	config.int8_calibrator = MyCustomCalibrator()

	# ...

	# Previously: builder.build_cuda_engine(network)
	# Create multiple optimization profiles for different contexts to use
	shape0 = (1, 3, 224, 224)
	profile0 = builder.create_optimization_profile()
	profile0.set_shape("input", min=shape0, opt=shape0, max=shape0)
	config.add_optimization_profile(profile0)

	shape1 = (1, 3, 448, 448)
	profile1 = builder.create_optimization_profile()
	profile1.set_shape("input", min=shape1, opt=shape1, max=shape1)
	config.add_optimization_profile(profile1)
	# Export sample Alexnet model to ONNX with a dynamic batch dimension
	wget https://gist.githubusercontent.com/rmccorm4/b72abac18aed6be4c1725db18eba4930/raw/3919c883b97a231877b454dae695fe074a1acdff/alexnet_onnx.py
	python3 alexnet_onnx.py

	# Emulate "maxBatchSize" behavior from implicit batch engines by setting
	# an optimization profile with min=(1, shape), opt=max=(maxBatchSize, shape)
	MAX_BATCH_SIZE=32
	INPUT_NAME="actual_input_1"

	# Convert dynamic batch ONNX model to TRT Engine with optimization profile defined
	# Load serialized engine file into memory
	with open("alexnet_dynamic.engine", "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
	engine = runtime.deserialize_cuda_engine(f.read())

	# Create context, this can be re-used
	context = engine.create_execution_context()
	# Profile 0 (first profile) is used by default
	context.active_optimization_profile = 0

	# These binding_idxs can change if either the context or the
	def setup_binding_shapes(
	engine: trt.ICudaEngine,
	context: trt.IExecutionContext,
	host_inputs: List[np.ndarray],
	input_binding_idxs: List[int],
	output_binding_idxs: List[int],
	):
	# Explicitly set the dynamic input shapes, so the dynamic output
	# shapes can be computed internally
	for host_input, binding_index in zip(host_inputs, input_binding_idxs):
	def is_dynamic(shape: Tuple[int]):
	return any(dim is None or dim < 0 for dim in shape)

	def get_random_inputs(
	engine: trt.ICudaEngine,
	context: trt.IExecutionContext,
	input_binding_idxs: List[int],
	):
	# Input data for inference
	host_inputs = []
	def get_binding_idxs(engine: trt.ICudaEngine, profile_index: int):
	# Calculate start/end binding indices for current context's profile
	num_bindings_per_profile = engine.num_bindings // engine.num_optimization_profiles
	start_binding = profile_index * num_bindings_per_profile
	end_binding = start_binding + num_bindings_per_profile

	# Separate input and output binding indices for convenience
	input_binding_idxs = []
	output_binding_idxs = []
	for binding_index in range(start_binding, end_binding):
	#!/usr/bin/env python3
	import argparse
	from typing import Tuple, List

	import numpy as np
	import pycuda.driver as cuda
	import pycuda.autoinit
	import tensorrt as trt

	TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
	{
	'Profile 0':
	{
	'actual_input_1':
	{
	'binding_dtype': DataType.FLOAT,
	'binding_index': 0,
	'binding_name': 'actual_input_1', # context.get_binding_name(binding_index)
	'binding_shape': (-1, 3, 224, 224), # context.get_binding_shape(binding_index)
	'binding_type': 'INPUT', # engine.binding_is_input(binding_index) == True