Skip to content

Instantly share code, notes, and snippets.

@trevor-m
Created February 5, 2021 17:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save trevor-m/6c6a4d5226b3cb3653bbbec229dcb3e7 to your computer and use it in GitHub Desktop.
Save trevor-m/6c6a4d5226b3cb3653bbbec229dcb3e7 to your computer and use it in GitHub Desktop.
Compare cudnn and TRT with TVM
import tvm
import numpy as np
from tvm import relay
from tvm.contrib import graph_runtime
def compile_graph(use_trt=False):
x = relay.var("x", shape=(100, 2048, 33, 33), dtype="float32")
w0 = relay.var("w0", shape=(256, 2048, 3, 3), dtype="float32")
w1 = relay.var("w1", shape=(256, 256, 3, 3), dtype="float32")
w2 = relay.var("w2", shape=(256, 256, 3, 3), dtype="float32")
w3 = relay.var("w3", shape=(90, 256, 3, 3), dtype="float32")
b0 = relay.var("b0", shape=(1, 256, 1, 1), dtype="float32")
b1 = relay.var("b1", shape=(1, 256, 1, 1), dtype="float32")
b2 = relay.var("b2", shape=(1, 256, 1, 1), dtype="float32")
b3 = relay.var("b3", shape=(1, 90, 1, 1), dtype="float32")
y = relay.nn.conv2d(x, w0, padding=[1, 1, 1, 1], channels=256, kernel_size=[3, 3])
y = relay.add(y, b0)
y = relay.nn.relu(y)
y = relay.nn.conv2d(y, w1, padding=[1, 1, 1, 1], channels=256, kernel_size=[3, 3])
y = relay.add(y, b1)
y = relay.nn.relu(y)
y = relay.nn.conv2d(y, w2, padding=[1, 1, 1, 1], channels=256, kernel_size=[3, 3])
y = relay.add(y, b2)
y = relay.nn.relu(y)
y = relay.nn.conv2d(y, w3, padding=[1, 1, 1, 1], channels=90, kernel_size=[3, 3])
y = relay.add(y, b3)
f = relay.Function([x, w0, b0, w1, b1, w2, b2, w3, b3], y)
mod = tvm.IRModule()
mod["main"] = f
params = {
"w0": np.random.uniform(-1, 1, (256, 2048, 3, 3)).astype(np.float32),
"w1": np.random.uniform(-1, 1, (256, 256, 3, 3)).astype(np.float32),
"w2": np.random.uniform(-1, 1, (256, 256, 3, 3)).astype(np.float32),
"w3": np.random.uniform(-1, 1, (90, 256, 3, 3)).astype(np.float32),
"b0": np.random.uniform(-1, 1, (1, 256, 1, 1)).astype(np.float32),
"b1": np.random.uniform(-1, 1, (1, 256, 1, 1)).astype(np.float32),
"b2": np.random.uniform(-1, 1, (1, 256, 1, 1)).astype(np.float32),
"b3": np.random.uniform(-1, 1, (1, 90, 1, 1)).astype(np.float32)
}
if use_trt:
print("Compiling with tensorrt.")
from tvm.relay.op.contrib import tensorrt
mod, config = tensorrt.partition_for_tensorrt(mod, params)
with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
graph, lib, params = relay.build(mod, params=params, target="cuda")
else:
print("Compiling with cudnn.")
with tvm.transform.PassContext(opt_level=3):
graph, lib, params = relay.build(mod, params=params, target="cuda -libs=cudnn")
mod_ = graph_runtime.create(graph, lib, ctx=tvm.gpu(0))
mod_.set_input("x", np.random.uniform(-1, 1, (100, 2048, 33, 33)).astype(np.float32))
mod_.set_input(**params)
mod_.run()
#_cudart.cudaProfilerStart()
timer = mod_.module.time_evaluator("run", tvm.gpu(0), number=4, repeat=10)
tcost = timer()
prof_res = np.array(tcost.results) * 1000 # convert to millisecond
print("Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)))
if __name__ == "__main__":
compile_graph(use_trt=False)
compile_graph(use_trt=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment