rmccorm4/execute_v2.py

## execute_v2.py
# Load serialized engine file into memory
with open("alexnet_dynamic.engine", "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
    engine = runtime.deserialize_cuda_engine(f.read())

# Create context, this can be re-used
context = engine.create_execution_context()
# Profile 0 (first profile) is used by default
context.active_optimization_profile = 0

# These binding_idxs can change if either the context or the
# active_optimization_profile are changed
input_binding_idxs, output_binding_idxs = get_binding_idxs(
    engine, context.active_optimization_profile
)

# Generate random inputs based on profile shapes
host_inputs = get_random_inputs(engine, context, input_binding_idxs)

# Allocate device memory for inputs. This can be easily re-used if the
# input shapes don't change
device_inputs = [cuda.mem_alloc(h_input.nbytes) for h_input in host_inputs]
# Copy host inputs to device, this needs to be done for each new input
for h_input, d_input in zip(host_inputs, device_inputs):
    cuda.memcpy_htod(d_input, h_input)

# This needs to be called everytime your input shapes change
# If your inputs are always the same shape (same batch size, etc.),
# you can re-use the same output buffers
host_outputs, device_outputs = setup_binding_shapes(
    engine, context, host_inputs, input_binding_idxs, output_binding_idxs,
)

# Bindings are a list of device pointers for inputs and outputs
bindings = device_inputs + device_outputs

# Inference
context.execute_v2(bindings)

# Copy outputs back to host to view results
for h_output, d_output in zip(host_outputs, device_outputs):
    cuda.memcpy_dtoh(h_output, d_output)

# View outputs
print(host_outputs)
	# Load serialized engine file into memory
	with open("alexnet_dynamic.engine", "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
	engine = runtime.deserialize_cuda_engine(f.read())

	# Create context, this can be re-used
	context = engine.create_execution_context()
	# Profile 0 (first profile) is used by default
	context.active_optimization_profile = 0

	# These binding_idxs can change if either the context or the
	# active_optimization_profile are changed
	input_binding_idxs, output_binding_idxs = get_binding_idxs(
	engine, context.active_optimization_profile
	)

	# Generate random inputs based on profile shapes
	host_inputs = get_random_inputs(engine, context, input_binding_idxs)

	# Allocate device memory for inputs. This can be easily re-used if the
	# input shapes don't change
	device_inputs = [cuda.mem_alloc(h_input.nbytes) for h_input in host_inputs]
	# Copy host inputs to device, this needs to be done for each new input
	for h_input, d_input in zip(host_inputs, device_inputs):
	cuda.memcpy_htod(d_input, h_input)

	# This needs to be called everytime your input shapes change
	# If your inputs are always the same shape (same batch size, etc.),
	# you can re-use the same output buffers
	host_outputs, device_outputs = setup_binding_shapes(
	engine, context, host_inputs, input_binding_idxs, output_binding_idxs,
	)

	# Bindings are a list of device pointers for inputs and outputs
	bindings = device_inputs + device_outputs

	# Inference
	context.execute_v2(bindings)

	# Copy outputs back to host to view results
	for h_output, d_output in zip(host_outputs, device_outputs):
	cuda.memcpy_dtoh(h_output, d_output)

	# View outputs
	print(host_outputs)