Skip to content

Instantly share code, notes, and snippets.

@tiandiao123
Created January 23, 2022 22:45
Show Gist options
  • Save tiandiao123/bab72351c2b9a1915548076edf36d0cb to your computer and use it in GitHub Desktop.
Save tiandiao123/bab72351c2b9a1915548076edf36d0cb to your computer and use it in GitHub Desktop.
import tvm
from tvm import relay
from tvm.relay.op.contrib.tensorrt import partition_for_tensorrt
from tvm.contrib import graph_executor
import numpy as np
# 改成你自己的tvm .so 存储路径
my_lib_saved_path = "/data00/cuiqing.li/xperf_workplace/xperf_tools/xperf_tools/xperf_pipeline/bytetuner/rh2_tvm_lab.vulgar.pipeline_1.4/deploy_batch_id_32.so"
lib = tvm.runtime.load_module(my_lib_saved_path)
target = "cuda"
ctx = tvm.device(target, 0)
module = graph_executor.GraphModule(lib["default"](ctx))
# 改成模型本身的输入信息,如input nanes 还有对应的input shape, input dtype 的信息
input_names = ["data"]
input_shapes = [[32, 3, 224, 224]]
dtypes = ["float32"]
batch_size = int(input_shapes[0][0])
for i in range(len(input_names)):
input_name = input_names[i]
input_shape = input_shapes[i]
dtype = dtypes[i]
data_tvm = tvm.nd.array(np.random.uniform(size = input_shape).astype(dtype), ctx)
module.set_input(input_name, data_tvm)
# get output of tvm
module.run()
out = module.get_output(0)
print("warming up ... ")
for i in range(10):
module.run()
print("Evaluate inference time cost...")
ftimer = module.module.time_evaluator("run", ctx, repeat=10, min_repeat_ms=500)
prof_res = np.array(ftimer().results) * 1e3 # convert to millisecond
message = "Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res))
print(message)
ms_to_s = np.mean(prof_res) * 0.001
print("the QPS of tvm model is {} ".format(str(batch_size/ ms_to_s)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment