Skip to content

Instantly share code, notes, and snippets.

@Wheest
Created January 19, 2021 17:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Wheest/87ae338e9b127cf6e552792063afcb3f to your computer and use it in GitHub Desktop.
Save Wheest/87ae338e9b127cf6e552792063afcb3f to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# coding: utf-8
import tvm
import time
import numpy as np
from tvm.contrib import graph_runtime
from tvm.relay import data_dep_optimization as ddo
import onnx
import itertools
import scipy.sparse as sp
import os
from tvm import relay
import argparse
parser = argparse.ArgumentParser(description='TVM Evaluation')
parser.add_argument('--test_numpy', action='store_true', help='Test numpy version')
parser.add_argument('--opt_level', type=int, default=3, help='Optimisation level')
parser.add_argument('--backend', default='cpu', choices=['cpu', 'gpu'], help='Backend to use')
args = parser.parse_args()
# network definition
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch
import torch.nn as nn
import torch
class WeeNet(nn.Module):
def __init__(self, in_c, num_filters, kdim,
stride=1, padding=0, groups=1):
super(WeeNet, self).__init__()
self.layer1 = nn.Conv2d(in_c, num_filters, kernel_size=kdim,
stride=stride, padding=padding, groups=groups,
bias=False)
def forward(self, x):
out = self.layer1(x)
out = F.relu(out)
return out
torch.manual_seed(0)
def setup_model(save_name, input_shape, kernel_shape):
n, in_c, in_h, in_w = input_shape
out_c, _, kdim_h, kdim_w = kernel_shape
input_data = np.zeros(input_shape, dtype=float)
input_data = np.arange(input_data.size).reshape(input_shape)
x = Variable(torch.from_numpy(input_data)).float()
stride = 1
padding = 2
model = WeeNet(in_c, out_c, kdim_h, stride=stride, padding=padding)
kernels = np.zeros(kernel_shape)
#kernels = np.arange(kernels.size).reshape(kernel_shape)
kernels[0,0,0,0] = 1
kernels[0,0,1,1] = 2
kernels[0,1,0,0] = 3
kernels[1,1,0,0] = 4
from scipy import sparse
kernels = sparse.rand(out_c, in_c*kdim_h*kdim_h, density=0.2)
kernels = np.squeeze(np.asarray(kernels.todense()))
kernels *= 10
kernels = np.round(kernels, 0)
kernels = kernels.reshape((out_c, in_c, kdim_h, kdim_h))
state_dict = model.state_dict()
state_dict['layer1.weight'] = torch.from_numpy(kernels).float()
model.load_state_dict(state_dict, strict=True)
from scipy.sparse import csr_matrix
kernels_sp = csr_matrix(kernels.reshape(kernels.shape[0], -1))
py_out = model(x)
input_names = ['input_1']
torch.onnx.export(model, x, save_name, input_names=input_names)
return input_data
def import_onnx(name, shape_dict):
model = onnx.load(name)
mod, params = relay.frontend.from_onnx(model, shape_dict)
return mod, params, shape_dict
def run_relay_graph(mod, params, shape_dict, input_data, target, ctx):
with relay.build_config(opt_level=args.opt_level,
# disabled_pass=[
# "SimplifyInference",
# "OpFusion",
# "FoldConstant",
# "FoldScaleAxis",
# "AlterOpLayout",
# "CanonicalizeOps",
# "CanonicalizeCast",
# "EliminateCommonSubexpr",
# "CombineParallelConv2D",
# "CombineParallelDense",
# "CombineParallelBatchMatmul",
# "FastMath"
# ],
# required_pass=[
# "SimplifyInference",
# "OpFusion",
# "FoldConstant",
# "FoldScaleAxis",
# "AlterOpLayout",
# "CanonicalizeOps",
# "CanonicalizeCast",
# "EliminateCommonSubexpr",
# "CombineParallelConv2D",
# "CombineParallelDense",
# "CombineParallelBatchMatmul",
# "FastMath"
# ]
):
lib = relay.build(mod, target=target, params=params)
print('built lib!')
input_shape = shape_dict["input_1"]
m = graph_runtime.GraphModule(lib['default'](ctx))
m.set_input(0, input_data)
m.run()
tvm_output = m.get_output(0)
ftimer = m.module.time_evaluator("run", ctx, repeat=5, number=5)
prof_res = np.array(ftimer().results) * 1000
return tvm_output
def run_dense(mod, params, shape_dict, input_data, target, ctx):
# print("Dense Model Benchmark:")
return run_relay_graph(mod, params, shape_dict, target, ctx)
if __name__ == "__main__":
if args.backend == 'gpu':
os.environ['TVM_GPU'] = "1"
target = tvm.target.cuda()
ctx = tvm.gpu(0)
elif args.backend == 'cpu':
os.environ['TVM_GPU'] = "0"
target = "llvm"
ctx = tvm.cpu()
# get model
n, in_c, in_h, in_w = 1, 16, 16, 16
out_c, kdim_h, kdim_w = 16, 4, 4
input_shape = (n, in_c, in_h, in_w)
kernel_shape = (out_c, in_c, in_h, in_w)
save_name = 'weenet.onnx'
input_data = setup_model(
save_name=save_name, input_shape=input_shape, kernel_shape=kernel_shape
)
# get TVM graph
mod, params, shape_dict = import_onnx(save_name, {'input_1': input_shape})
# run model in non-sparse TVM
true_outs = run_relay_graph(mod, params, shape_dict, input_data, target, ctx)
# run model in sparse TVM model
mod, params, shape_dict = import_onnx(save_name, {'input_1': input_shape})
modt, paramst = ddo.simplify_fc_transpose.convert(mod["main"], params)
mods, paramss = ddo.csr_conv2d.convert(modt, paramst, sparsity_threshold=0.7)
out = run_relay_graph(mods, paramss, shape_dict, input_data, target, ctx)
# Assert our output is correct
np.testing.assert_allclose(true_outs.asnumpy(), out.asnumpy(), rtol=1e-5, atol=0)
print("done!, and correct", )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment