Created
June 30, 2020 08:23
-
-
Save jaemin93/98c196e1c9eca9e1e7386330a996fe97 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import tensorrt as trt | |
import pycuda.driver as cuda | |
import pycuda.autoinit | |
import os | |
import numpy as np | |
from tqdm import tqdm | |
TRT_LOGGER = trt.Logger() | |
EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) | |
class HostDeviceMem(object): | |
def __init__(self, host_mem, device_mem): | |
self.host = host_mem | |
self.device = device_mem | |
def __str__(self): | |
return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device) | |
def __repr__(self): | |
return self.__str__() | |
def do_inference_v2(context, bindings, inputs, outputs, stream): | |
# Transfer input data to the GPU. | |
[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs] | |
# Run inference. | |
context.execute_async_v2(bindings=bindings, stream_handle=stream.handle) | |
# Transfer predictions back from the GPU. | |
[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs] | |
# Synchronize the stream | |
stream.synchronize() | |
# Return only the host outputs. | |
return [out.host for out in outputs] | |
def allocate_buffers(engine): | |
inputs = [] | |
outputs = [] | |
bindings = [] | |
stream = cuda.Stream() | |
for binding in engine: | |
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size | |
dtype = trt.nptype(engine.get_binding_dtype(binding)) | |
# Allocate host and device buffers | |
host_mem = cuda.pagelocked_empty(size, dtype) | |
device_mem = cuda.mem_alloc(host_mem.nbytes) | |
# Append the device buffer to device bindings. | |
bindings.append(int(device_mem)) | |
# Append to the appropriate list. | |
if engine.binding_is_input(binding): | |
inputs.append(HostDeviceMem(host_mem, device_mem)) | |
else: | |
outputs.append(HostDeviceMem(host_mem, device_mem)) | |
return inputs, outputs, bindings, stream |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment