Skip to content

Instantly share code, notes, and snippets.

@masahi
Created December 11, 2020 07:48
Show Gist options
  • Save masahi/00d89affc357b7c89f5a2047d6a85893 to your computer and use it in GitHub Desktop.
Save masahi/00d89affc357b7c89f5a2047d6a85893 to your computer and use it in GitHub Desktop.
import logging
import sys
import numpy as np
import tvm
import tvm.topi.testing
from tvm import te, testing
from tvm.topi.utils import get_const_tuple
from tvm import autotvm, topi
batch = 1
in_size = 56
in_channel = 64
num_filter = 64
kernel = 3
stride = 1
padding = 1
dilation = 1
in_height = in_width = in_size
dtype = "float32"
vec_width = 16
@autotvm.template("conv2d_nchwc")
def conv2d_nchwc():
dshape = (batch, in_channel // vec_width, in_height, in_width, vec_width)
wshape = (num_filter // vec_width, in_channel // vec_width, kernel, kernel, vec_width, vec_width)
A = te.placeholder(dshape, name="A")
W = te.placeholder(wshape, name="W")
fcompute, fschedule = tvm.topi.testing.get_conv2d_nchw_implement("llvm")
C = fcompute(A, W, (stride, stride), padding, (dilation, dilation), dtype)
s = fschedule([C])
return s, [A, W, C]
target = "llvm -mcpu=icelake-client"
task = autotvm.task.create("conv2d_nchwc", args=(), target=target)
print(task.config_space)
logging.getLogger("autotvm").setLevel(logging.DEBUG)
logging.getLogger("autotvm").addHandler(logging.StreamHandler(sys.stdout))
measure_option = autotvm.measure_option(builder="local", runner=autotvm.LocalRunner(number=5))
tuner = autotvm.tuner.RandomTuner(task)
# tuner = autotvm.tuner.XGBTuner(task, loss_type="rank")
log_file = "fp32_conv2d_nchwc.log"
tuner.tune(
n_trial=100,
measure_option=measure_option,
callbacks=[autotvm.callback.log_to_file(log_file)],
)
with autotvm.apply_history_best(log_file):
with tvm.target.Target(target):
s, arg_bufs = conv2d_nchwc()
func = tvm.build(s, arg_bufs)
A, W, B = arg_bufs
a_shape = get_const_tuple(A.shape)
w_shape = get_const_tuple(W.shape)
a_np = np.random.uniform(size=a_shape).astype(dtype)
w_np = np.random.uniform(size=w_shape).astype(dtype)
ctx = tvm.cpu(0)
a = tvm.nd.array(a_np, ctx)
w = tvm.nd.array(w_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
func = tvm.build(s, [A, W, B], target)
func(a, w, b)
ftimer = func.time_evaluator(func.entry_name, ctx, number=1, repeat=100)
prof_res = np.array(ftimer(a, w, b).results) * 1000 # multiply 1000 for converting to millisecond
print(prof_res.mean())
# print(func.get_source("asm"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment