-
-
Save Wheest/87ae338e9b127cf6e552792063afcb3f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
import tvm | |
import time | |
import numpy as np | |
from tvm.contrib import graph_runtime | |
from tvm.relay import data_dep_optimization as ddo | |
import onnx | |
import itertools | |
import scipy.sparse as sp | |
import os | |
from tvm import relay | |
import argparse | |
parser = argparse.ArgumentParser(description='TVM Evaluation') | |
parser.add_argument('--test_numpy', action='store_true', help='Test numpy version') | |
parser.add_argument('--opt_level', type=int, default=3, help='Optimisation level') | |
parser.add_argument('--backend', default='cpu', choices=['cpu', 'gpu'], help='Backend to use') | |
args = parser.parse_args() | |
# network definition | |
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
from torch.autograd import Variable | |
import torch | |
import torch.nn as nn | |
import torch | |
class WeeNet(nn.Module): | |
def __init__(self, in_c, num_filters, kdim, | |
stride=1, padding=0, groups=1): | |
super(WeeNet, self).__init__() | |
self.layer1 = nn.Conv2d(in_c, num_filters, kernel_size=kdim, | |
stride=stride, padding=padding, groups=groups, | |
bias=False) | |
def forward(self, x): | |
out = self.layer1(x) | |
out = F.relu(out) | |
return out | |
torch.manual_seed(0) | |
def setup_model(save_name, input_shape, kernel_shape): | |
n, in_c, in_h, in_w = input_shape | |
out_c, _, kdim_h, kdim_w = kernel_shape | |
input_data = np.zeros(input_shape, dtype=float) | |
input_data = np.arange(input_data.size).reshape(input_shape) | |
x = Variable(torch.from_numpy(input_data)).float() | |
stride = 1 | |
padding = 2 | |
model = WeeNet(in_c, out_c, kdim_h, stride=stride, padding=padding) | |
kernels = np.zeros(kernel_shape) | |
#kernels = np.arange(kernels.size).reshape(kernel_shape) | |
kernels[0,0,0,0] = 1 | |
kernels[0,0,1,1] = 2 | |
kernels[0,1,0,0] = 3 | |
kernels[1,1,0,0] = 4 | |
from scipy import sparse | |
kernels = sparse.rand(out_c, in_c*kdim_h*kdim_h, density=0.2) | |
kernels = np.squeeze(np.asarray(kernels.todense())) | |
kernels *= 10 | |
kernels = np.round(kernels, 0) | |
kernels = kernels.reshape((out_c, in_c, kdim_h, kdim_h)) | |
state_dict = model.state_dict() | |
state_dict['layer1.weight'] = torch.from_numpy(kernels).float() | |
model.load_state_dict(state_dict, strict=True) | |
from scipy.sparse import csr_matrix | |
kernels_sp = csr_matrix(kernels.reshape(kernels.shape[0], -1)) | |
py_out = model(x) | |
input_names = ['input_1'] | |
torch.onnx.export(model, x, save_name, input_names=input_names) | |
return input_data | |
def import_onnx(name, shape_dict): | |
model = onnx.load(name) | |
mod, params = relay.frontend.from_onnx(model, shape_dict) | |
return mod, params, shape_dict | |
def run_relay_graph(mod, params, shape_dict, input_data, target, ctx): | |
with relay.build_config(opt_level=args.opt_level, | |
# disabled_pass=[ | |
# "SimplifyInference", | |
# "OpFusion", | |
# "FoldConstant", | |
# "FoldScaleAxis", | |
# "AlterOpLayout", | |
# "CanonicalizeOps", | |
# "CanonicalizeCast", | |
# "EliminateCommonSubexpr", | |
# "CombineParallelConv2D", | |
# "CombineParallelDense", | |
# "CombineParallelBatchMatmul", | |
# "FastMath" | |
# ], | |
# required_pass=[ | |
# "SimplifyInference", | |
# "OpFusion", | |
# "FoldConstant", | |
# "FoldScaleAxis", | |
# "AlterOpLayout", | |
# "CanonicalizeOps", | |
# "CanonicalizeCast", | |
# "EliminateCommonSubexpr", | |
# "CombineParallelConv2D", | |
# "CombineParallelDense", | |
# "CombineParallelBatchMatmul", | |
# "FastMath" | |
# ] | |
): | |
lib = relay.build(mod, target=target, params=params) | |
print('built lib!') | |
input_shape = shape_dict["input_1"] | |
m = graph_runtime.GraphModule(lib['default'](ctx)) | |
m.set_input(0, input_data) | |
m.run() | |
tvm_output = m.get_output(0) | |
ftimer = m.module.time_evaluator("run", ctx, repeat=5, number=5) | |
prof_res = np.array(ftimer().results) * 1000 | |
return tvm_output | |
def run_dense(mod, params, shape_dict, input_data, target, ctx): | |
# print("Dense Model Benchmark:") | |
return run_relay_graph(mod, params, shape_dict, target, ctx) | |
if __name__ == "__main__": | |
if args.backend == 'gpu': | |
os.environ['TVM_GPU'] = "1" | |
target = tvm.target.cuda() | |
ctx = tvm.gpu(0) | |
elif args.backend == 'cpu': | |
os.environ['TVM_GPU'] = "0" | |
target = "llvm" | |
ctx = tvm.cpu() | |
# get model | |
n, in_c, in_h, in_w = 1, 16, 16, 16 | |
out_c, kdim_h, kdim_w = 16, 4, 4 | |
input_shape = (n, in_c, in_h, in_w) | |
kernel_shape = (out_c, in_c, in_h, in_w) | |
save_name = 'weenet.onnx' | |
input_data = setup_model( | |
save_name=save_name, input_shape=input_shape, kernel_shape=kernel_shape | |
) | |
# get TVM graph | |
mod, params, shape_dict = import_onnx(save_name, {'input_1': input_shape}) | |
# run model in non-sparse TVM | |
true_outs = run_relay_graph(mod, params, shape_dict, input_data, target, ctx) | |
# run model in sparse TVM model | |
mod, params, shape_dict = import_onnx(save_name, {'input_1': input_shape}) | |
modt, paramst = ddo.simplify_fc_transpose.convert(mod["main"], params) | |
mods, paramss = ddo.csr_conv2d.convert(modt, paramst, sparsity_threshold=0.7) | |
out = run_relay_graph(mods, paramss, shape_dict, input_data, target, ctx) | |
# Assert our output is correct | |
np.testing.assert_allclose(true_outs.asnumpy(), out.asnumpy(), rtol=1e-5, atol=0) | |
print("done!, and correct", ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment