Skip to content

Instantly share code, notes, and snippets.

View rmccorm4's full-sized avatar
💭
I may be slow to respond.

Ryan McCormick rmccorm4

💭
I may be slow to respond.
View GitHub Profile
@rmccorm4
rmccorm4 / pokefetch.md
Last active September 4, 2019 19:05
Pokefetch

Alt text

import argparse
import torch
import torchvision
parser = argparse.ArgumentParser()
parser.add_argument("--opset", type=int, default=11, help="ONNX opset version to generate models with.")
args = parser.parse_args()
dummy_input = torch.randn(10, 3, 224, 224, device='cuda')
model = torchvision.models.alexnet(pretrained=True).cuda()
@rmccorm4
rmccorm4 / onnx_parser_explicit_batch_fixed.py
Last active May 11, 2020 19:16
Minimal example of creating a TensorRT engine from an ONNX model with FIXED SHAPE
import tensorrt as trt
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
EXPLICIT_BATCH = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
with trt.Builder(TRT_LOGGER) as builder, \
builder.create_network(EXPLICIT_BATCH) as network, \
builder.create_builder_config() as config, \
trt.OnnxParser(network, TRT_LOGGER) as parser:
import tensorrt as trt
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
EXPLICIT_BATCH = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
with trt.Builder(TRT_LOGGER) as builder, \
builder.create_network(EXPLICIT_BATCH) as network, \
builder.create_builder_config() as config, \
trt.OnnxParser(network, TRT_LOGGER) as parser:
# Query input names and shapes from parsed TensorRT network
network_inputs = [network.get_input(i) for i in range(network.num_inputs)]
input_names = [_input.name for _input in network_inputs] # ex: ["actual_input1"]
# Note the original model must have dynamic (-1) dimensions for variable min/opt/max values
# in your profile dimensions such as the batch dimension in this example
input_shapes = [_input.shape for _input in network_inputs] # ex: [(-1, 3, 224, 224)]
max_batch_size = 64
# Create optimization profile for dynamic batch dimension
profile0 = builder.create_optimization_profile()
{
'Profile 0':
{
'actual_input_1':
{
'binding_dtype': DataType.FLOAT,
'binding_index': 0,
'binding_name': 'actual_input_1', # context.get_binding_name(binding_index)
'binding_shape': (-1, 3, 224, 224), # context.get_binding_shape(binding_index)
'binding_type': 'INPUT', # engine.binding_is_input(binding_index) == True
{
'Profile 0':
{
'actual_input_1':
{
'binding_dtype': DataType.FLOAT,
'binding_index': 0,
'binding_name': 'actual_input_1', # context.get_binding_name(binding_index)
'binding_shape': (-1, 3, 224, 224), # context.get_binding_shape(binding_index)
'binding_type': 'INPUT', # engine.binding_is_input(binding_index) == True
#!/usr/bin/env python3
import argparse
from typing import Tuple, List
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
def get_binding_idxs(engine: trt.ICudaEngine, profile_index: int):
# Calculate start/end binding indices for current context's profile
num_bindings_per_profile = engine.num_bindings // engine.num_optimization_profiles
start_binding = profile_index * num_bindings_per_profile
end_binding = start_binding + num_bindings_per_profile
# Separate input and output binding indices for convenience
input_binding_idxs = []
output_binding_idxs = []
for binding_index in range(start_binding, end_binding):
def is_dynamic(shape: Tuple[int]):
return any(dim is None or dim < 0 for dim in shape)
def get_random_inputs(
engine: trt.ICudaEngine,
context: trt.IExecutionContext,
input_binding_idxs: List[int],
):
# Input data for inference
host_inputs = []