Last active
October 9, 2019 20:45
-
-
Save yaoyaoding/6f846c02a05d9b1d8e01ab77638e7226 to your computer and use it in GitHub Desktop.
The code to reproduce error using the auto-tuned schedule, which is modified from the auto-tune tutorial.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import numpy as np | |
import tvm | |
from tvm import autotvm | |
from tvm import relay | |
import tvm.relay.testing | |
from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner | |
from tvm.contrib.util import tempdir | |
import tvm.contrib.graph_runtime as runtime | |
def get_network(name, batch_size): | |
"""Get the symbol definition and random weight of a network""" | |
weight_name = 'weight' | |
input_shape = (1, 80, 73, 73) | |
output_shape = (1, 192, 71, 71) | |
params = {} | |
weight = tvm.nd.empty(shape=(192, 80, 3, 3)) | |
params[weight_name] = weight | |
v = relay.var('data', shape=input_shape) # 1 x 80 x 73 x 73 | |
v = relay.nn.conv2d(v, weight=relay.var(weight_name, shape=weight.shape), strides=(1, 1), padding=(0, 0), channels=192, kernel_size=(3, 3)) # 1 x 192 x 71 x 71 | |
fn = relay.Function(relay.analysis.free_vars(v), v) | |
mod = relay.Module.from_expr(fn) | |
return mod, params, input_shape, output_shape | |
#### DEVICE CONFIG #### | |
target = tvm.target.cuda() | |
#### TUNING OPTION #### | |
network = ['inception_v3', 'squeezenet', 'sample'][2] | |
log_file = "%s.log" % network | |
dtype = 'float32' | |
tuning_option = { | |
'log_filename': log_file, | |
'tuner': 'xgb', | |
'n_trial': 100, | |
'early_stopping': 600, | |
'measure_option': autotvm.measure_option( | |
builder=autotvm.LocalBuilder(timeout=10), | |
runner=autotvm.LocalRunner(number=20, repeat=3, timeout=4, min_repeat_ms=150), | |
# runner=autotvm.RPCRunner( | |
# '1080ti', # change the device key to your key | |
# '0.0.0.0', 9190, | |
# number=20, repeat=3, timeout=4, min_repeat_ms=150) | |
), | |
} | |
# You can skip the implementation of this function for this tutorial. | |
def tune_tasks(tasks, | |
measure_option, | |
tuner='xgb', | |
n_trial=1000, | |
early_stopping=None, | |
log_filename='tuning.log', | |
use_transfer_learning=True, | |
try_winograd=True): | |
if try_winograd: | |
for i in range(len(tasks)): | |
try: # try winograd template | |
tsk = autotvm.task.create(tasks[i].name, tasks[i].args, | |
tasks[i].target, tasks[i].target_host, 'winograd') | |
input_channel = tsk.workload[1][1] | |
if input_channel >= 64: | |
tasks[i] = tsk | |
except Exception: | |
pass | |
# create tmp log file | |
tmp_log_file = log_filename + ".tmp" | |
if os.path.exists(tmp_log_file): | |
os.remove(tmp_log_file) | |
for i, tsk in enumerate(reversed(tasks)): | |
prefix = "[Task %2d/%2d] " %(i+1, len(tasks)) | |
# create tuner | |
if tuner == 'xgb' or tuner == 'xgb-rank': | |
tuner_obj = XGBTuner(tsk, loss_type='rank') | |
elif tuner == 'ga': | |
tuner_obj = GATuner(tsk, pop_size=100) | |
elif tuner == 'random': | |
tuner_obj = RandomTuner(tsk) | |
elif tuner == 'gridsearch': | |
tuner_obj = GridSearchTuner(tsk) | |
else: | |
raise ValueError("Invalid tuner: " + tuner) | |
if use_transfer_learning: | |
if os.path.isfile(tmp_log_file): | |
tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file)) | |
# do tuning | |
n_trial = min(n_trial, len(tsk.config_space)) | |
tuner_obj.tune(n_trial=n_trial, | |
early_stopping=early_stopping, | |
measure_option=measure_option, | |
callbacks=[ | |
autotvm.callback.progress_bar(n_trial, prefix=prefix), | |
autotvm.callback.log_to_file(tmp_log_file)]) | |
# pick best records to a cache file | |
autotvm.record.pick_best(tmp_log_file, log_filename) | |
os.remove(tmp_log_file) | |
def tune_and_evaluate(tuning_opt): | |
# extract workloads from relay program | |
print("Extract tasks...") | |
mod, params, input_shape, out_shape = get_network(network, batch_size=1) | |
tasks = autotvm.task.extract_from_program(mod["main"], target=target, | |
params=params, ops=(relay.op.nn.conv2d,)) | |
if os.path.exists(log_file): | |
print(f"Found existing log file {log_file} and skip tuning") | |
else: | |
# run tuning tasks | |
print("Tuning...") | |
tune_tasks(tasks, **tuning_opt) | |
# compile kernels with history best records | |
with autotvm.apply_history_best(log_file): | |
print("Compile...") | |
with relay.build_config(opt_level=3): | |
graph, lib, params = relay.build_module.build( | |
mod, target=target, params=params) | |
# export library | |
tmp = tempdir() | |
filename = "net.tar" | |
lib.export_library(tmp.relpath(filename)) | |
# load parameters | |
ctx = tvm.context(str(target), 0) | |
module = runtime.create(graph, lib, ctx) | |
data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype)) | |
module.set_input('data', data_tvm) | |
module.set_input(**params) | |
# evaluate | |
print("Evaluate inference time cost...") | |
ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=600) | |
prof_res = np.array(ftimer().results) * 1000 # convert to millisecond | |
print("Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res))) | |
# We do not run the tuning in our webpage server since it takes too long. | |
# Uncomment the following line to run it by yourself. | |
tune_and_evaluate(tuning_option) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment