snowolfhawk/grouped_onnx_tvm_opencl.ipynb

## grouped_onnx_tvm_opencl.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tvm import rpc\n",
    "import tvm\n",
    "import onnx\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch \n",
    "import torch.nn as nn\n",
    "import torchvision\n",
    "import torchvision.transforms as transforms\n",
    "from torch.autograd import Variable\n",
    "import tvm.relay as relay\n",
    "import numpy as np\n",
    "import torch.onnx\n",
    "from tvm.contrib import util, graph_runtime as runtime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "class ConvNet(nn.Module):\n",
    "    def __init__(self, groups=1):\n",
    "        super(ConvNet, self).__init__()\n",
    "        self.layer1 = nn.Sequential(\n",
    "            nn.Conv2d(in_c, num_filters, kernel_size=kdim, \n",
    "                      stride=stride, padding=padding, groups=groups,\n",
    "                      bias=False))    \n",
    "    def forward(self, x):\n",
    "        out = self.layer1(x)\n",
    "        return out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Device:\n",
    "    'Container object for tvm device data'\n",
    "    def __init__(self, name, session, target, ctx, target_host=None, \n",
    "                 opt_level=1):\n",
    "        self.name = name\n",
    "        self.session = session\n",
    "        self.target = target\n",
    "        self.ctx = ctx\n",
    "        self.target_host = target_host\n",
    "        self.opt_level = opt_level"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "def run_tvm_model(host, onnx_model, inputs, inputs_name):\n",
    "    shape_dict = {inputs_name: inputs.shape}\n",
    "    model_name = 'model'\n",
    "    syms, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n",
    "    target = host.target\n",
    "    target_host = host.target_host\n",
    "\n",
    "    with relay.build_config(opt_level=host.opt_level):\n",
    "            graph, lib, params = relay.build_module.build(\n",
    "                syms, target, params=params,\n",
    "                target_host=target_host)\n",
    "\n",
    "    # After `relay.build`, you will get three return values: graph,\n",
    "    # library and the new parameter, since we do some optimization that will\n",
    "    # change the parameters but keep the result of model as the same.\n",
    "\n",
    "    # Save the library at local temporary directory.\n",
    "    tmp = util.tempdir()\n",
    "    tarname = str(model_name) + '.tar'\n",
    "    lib_fname = tmp.relpath(tarname)\n",
    "    lib.export_library(lib_fname)\n",
    "\n",
    "    # obtain an RPC session from remote device.\n",
    "    remote = host.session\n",
    "\n",
    "    # upload the library to remote device and load it\n",
    "    remote.upload(lib_fname)\n",
    "    rlib = remote.load_module(tarname)\n",
    "\n",
    "    # create the remote runtime module\n",
    "    ctx = host.ctx\n",
    "    module = runtime.create(graph, rlib, ctx)\n",
    "\n",
    "    # set parameter (upload params to the remote device.\n",
    "    # This may take a while)        \n",
    "    module.set_input(**params)\n",
    "    # set input data\n",
    "    module.set_input(key=inputs_name, value=inputs)\n",
    "\n",
    "    # run\n",
    "    module.run()\n",
    "    # get output\n",
    "    out = module.get_output(0)\n",
    "\n",
    "    f = module.module.time_evaluator('run', ctx)\n",
    "    mean_time = f().mean\n",
    "    return mean_time"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create PyTorch models, and to ONNX, and load into tvm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# set hyperparams\n",
    "in_c = 256\n",
    "num_filters = 256\n",
    "in_h, in_w = 28, 28\n",
    "kdim = 3\n",
    "groups=1\n",
    "stride=1\n",
    "padding=0\n",
    "in_shape = (1, in_c, in_h, in_w)\n",
    "num_groups = [1, 2, 4, 8, 16, 32, 64, 128, 256]\n",
    "\n",
    "test_input = Variable(torch.randn(1, in_c, in_h, in_w))\n",
    "test_input_numpy = test_input.numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create models, and export to onnx\n",
    "models = [None] * len(num_groups)\n",
    "save_path = '/tmp/'\n",
    "fname = 'grouped_model_'\n",
    "\n",
    "for i, g in enumerate(num_groups):\n",
    "    save_name = save_path + fname + str(g) + '.onnx'\n",
    "    models[i] = ConvNet(g)\n",
    "    torch.onnx.export(models[i], test_input, save_name)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "save_path = '/tmp/'\n",
    "fname = 'grouped_model_'\n",
    "onnx_models = dict()\n",
    "\n",
    "for i, g in enumerate(num_groups):\n",
    "    save_name = save_path + fname + str(g) + '.onnx'\n",
    "    name = 'G({})'.format(g)\n",
    "    onnx_models[name] = onnx.load(save_name)\n",
    "    # Check that the IR is well formed\n",
    "    onnx.checker.check_model(onnx_models[name])\n",
    "    # Print a human readable representation of the graph\n",
    "    #print(onnx.helper.printable_graph(onnx_models[i].graph))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "## local\n",
    "hosts = []\n",
    "name = 'local'\n",
    "session = rpc.LocalSession()\n",
    "target = tvm.target.create('llvm')\n",
    "ctx = tvm.cpu(0)\n",
    "opt_level = 1\n",
    "local = Device(name, session, target, ctx, opt_level=opt_level) \n",
    "hosts.append(local)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "## local_ocl\n",
    "name = 'local_ocl'\n",
    "session = rpc.LocalSession()\n",
    "target = \"opencl\"\n",
    "ctx = session.cl(0)\n",
    "opt_level = 0\n",
    "local_ocl = Device(name, session, target, ctx, opt_level=opt_level) \n",
    "hosts.append(local_ocl)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "local G(64) 0.0015020487\n",
      "local G(2) 0.2356206468\n",
      "local G(128) 0.0011264666\n",
      "local G(16) 0.0254078226\n",
      "local G(8) 0.0531303658\n",
      "local G(4) 0.10466399239999999\n",
      "local G(256) 0.0004619161\n",
      "local G(1) 0.0545677866\n",
      "local G(32) 0.0105879979\n"
     ]
    },
    {
     "ename": "ValueError",
     "evalue": "Direct host side access to device memory is detected in fused_nn_conv2d_9. Did you forget to bind?",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-22-20b41053bf48>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mhost\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mhosts\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mmodel_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel\u001b[0m \u001b[0;32min\u001b[0m \u001b[0monnx_models\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m         \u001b[0mrun_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrun_tvm_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhost\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      7\u001b[0m         \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhost\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrun_time\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m<ipython-input-16-5c190c0a8820>\u001b[0m in \u001b[0;36mrun_tvm_model\u001b[0;34m(host, onnx_model, inputs, inputs_name)\u001b[0m\n\u001b[1;32m      9\u001b[0m             graph, lib, params = relay.build_module.build(\n\u001b[1;32m     10\u001b[0m                 \u001b[0msyms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m                 target_host=target_host)\n\u001b[0m\u001b[1;32m     12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     13\u001b[0m     \u001b[0;31m# After `relay.build`, you will get three return values: graph,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/tools/tvm/python/tvm/relay/build_module.py\u001b[0m in \u001b[0;36mbuild\u001b[0;34m(func, target, target_host, params)\u001b[0m\n\u001b[1;32m    303\u001b[0m         \u001b[0mfunc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mir_pass\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfer_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    304\u001b[0m         \u001b[0mgraph_gen\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_graph_gen\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGraphRuntimeCodegen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 305\u001b[0;31m         \u001b[0mgraph_json\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlowered_funcs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgraph_gen\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcodegen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    306\u001b[0m         mod = _tvm_build_module(\n\u001b[1;32m    307\u001b[0m             lowered_funcs, target=target, target_host=target_host)\n",
      "\u001b[0;32m~/tools/tvm/python/tvm/build_module.py\u001b[0m in \u001b[0;36mbuild\u001b[0;34m(inputs, args, target, target_host, name, binds)\u001b[0m\n\u001b[1;32m    615\u001b[0m     \u001b[0mdevice_modules\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    616\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mtar\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mflist\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtarget_flist\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 617\u001b[0;31m         \u001b[0mfhost\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmdev\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_build_for_device\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mflist\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtar\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_host\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    618\u001b[0m         \u001b[0;31m# Save the current lowered functions of the host and the device module.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    619\u001b[0m         \u001b[0mfhost_all\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mfhost\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/tools/tvm/python/tvm/build_module.py\u001b[0m in \u001b[0;36m_build_for_device\u001b[0;34m(flist, target, target_host)\u001b[0m\n\u001b[1;32m    444\u001b[0m             raise ValueError(\n\u001b[1;32m    445\u001b[0m                 \u001b[0;34m\"Direct host side access to device memory is detected in %s. \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 446\u001b[0;31m                 \"Did you forget to bind?\" % func.name)\n\u001b[0m\u001b[1;32m    447\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfunc_type\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mcontainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLoweredFunc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMixedFunc\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    448\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mcurrent_build_config\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetect_global_barrier\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mValueError\u001b[0m: Direct host side access to device memory is detected in fused_nn_conv2d_9. Did you forget to bind?"
     ]
    }
   ],
   "source": [
    "inputs_name = '0'\n",
    "inputs = test_input_numpy\n",
    "\n",
    "for host in hosts:\n",
    "    for model_name, model in onnx_models.items():\n",
    "        run_time = run_tvm_model(host, model, inputs, inputs_name)\n",
    "        print(host.name, model_name, run_time)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "meth",
   "language": "python",
   "name": "meth"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.3"
  },
  "notify_time": "5"
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [],
	"source": [
	"from tvm import rpc\n",
	"import tvm\n",
	"import onnx\n",
	"import numpy as np"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [],
	"source": [
	"import torch \n",
	"import torch.nn as nn\n",
	"import torchvision\n",
	"import torchvision.transforms as transforms\n",
	"from torch.autograd import Variable\n",
	"import tvm.relay as relay\n",
	"import numpy as np\n",
	"import torch.onnx\n",
	"from tvm.contrib import util, graph_runtime as runtime"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [],
	"source": [
	"class ConvNet(nn.Module):\n",
	" def __init__(self, groups=1):\n",
	" super(ConvNet, self).__init__()\n",
	" self.layer1 = nn.Sequential(\n",
	" nn.Conv2d(in_c, num_filters, kernel_size=kdim, \n",
	" stride=stride, padding=padding, groups=groups,\n",
	" bias=False)) \n",
	" def forward(self, x):\n",
	" out = self.layer1(x)\n",
	" return out"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [],
	"source": [
	"class Device:\n",
	" 'Container object for tvm device data'\n",
	" def __init__(self, name, session, target, ctx, target_host=None, \n",
	" opt_level=1):\n",
	" self.name = name\n",
	" self.session = session\n",
	" self.target = target\n",
	" self.ctx = ctx\n",
	" self.target_host = target_host\n",
	" self.opt_level = opt_level"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {},
	"outputs": [],
	"source": [
	"def run_tvm_model(host, onnx_model, inputs, inputs_name):\n",
	" shape_dict = {inputs_name: inputs.shape}\n",
	" model_name = 'model'\n",
	" syms, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n",
	" target = host.target\n",
	" target_host = host.target_host\n",
	"\n",
	" with relay.build_config(opt_level=host.opt_level):\n",
	" graph, lib, params = relay.build_module.build(\n",
	" syms, target, params=params,\n",
	" target_host=target_host)\n",
	"\n",
	" # After `relay.build`, you will get three return values: graph,\n",
	" # library and the new parameter, since we do some optimization that will\n",
	" # change the parameters but keep the result of model as the same.\n",
	"\n",
	" # Save the library at local temporary directory.\n",
	" tmp = util.tempdir()\n",
	" tarname = str(model_name) + '.tar'\n",
	" lib_fname = tmp.relpath(tarname)\n",
	" lib.export_library(lib_fname)\n",
	"\n",
	" # obtain an RPC session from remote device.\n",
	" remote = host.session\n",
	"\n",
	" # upload the library to remote device and load it\n",
	" remote.upload(lib_fname)\n",
	" rlib = remote.load_module(tarname)\n",
	"\n",
	" # create the remote runtime module\n",
	" ctx = host.ctx\n",
	" module = runtime.create(graph, rlib, ctx)\n",
	"\n",
	" # set parameter (upload params to the remote device.\n",
	" # This may take a while) \n",
	" module.set_input(**params)\n",
	" # set input data\n",
	" module.set_input(key=inputs_name, value=inputs)\n",
	"\n",
	" # run\n",
	" module.run()\n",
	" # get output\n",
	" out = module.get_output(0)\n",
	"\n",
	" f = module.module.time_evaluator('run', ctx)\n",
	" mean_time = f().mean\n",
	" return mean_time"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Create PyTorch models, and to ONNX, and load into tvm"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [],
	"source": [
	"# set hyperparams\n",
	"in_c = 256\n",
	"num_filters = 256\n",
	"in_h, in_w = 28, 28\n",
	"kdim = 3\n",
	"groups=1\n",
	"stride=1\n",
	"padding=0\n",
	"in_shape = (1, in_c, in_h, in_w)\n",
	"num_groups = [1, 2, 4, 8, 16, 32, 64, 128, 256]\n",
	"\n",
	"test_input = Variable(torch.randn(1, in_c, in_h, in_w))\n",
	"test_input_numpy = test_input.numpy()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {},
	"outputs": [],
	"source": [
	"# create models, and export to onnx\n",
	"models = [None] * len(num_groups)\n",
	"save_path = '/tmp/'\n",
	"fname = 'grouped_model_'\n",
	"\n",
	"for i, g in enumerate(num_groups):\n",
	" save_name = save_path + fname + str(g) + '.onnx'\n",
	" models[i] = ConvNet(g)\n",
	" torch.onnx.export(models[i], test_input, save_name)\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {},
	"outputs": [],
	"source": [
	"save_path = '/tmp/'\n",
	"fname = 'grouped_model_'\n",
	"onnx_models = dict()\n",
	"\n",
	"for i, g in enumerate(num_groups):\n",
	" save_name = save_path + fname + str(g) + '.onnx'\n",
	" name = 'G({})'.format(g)\n",
	" onnx_models[name] = onnx.load(save_name)\n",
	" # Check that the IR is well formed\n",
	" onnx.checker.check_model(onnx_models[name])\n",
	" # Print a human readable representation of the graph\n",
	" #print(onnx.helper.printable_graph(onnx_models[i].graph))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"metadata": {},
	"outputs": [],
	"source": [
	"## local\n",
	"hosts = []\n",
	"name = 'local'\n",
	"session = rpc.LocalSession()\n",
	"target = tvm.target.create('llvm')\n",
	"ctx = tvm.cpu(0)\n",
	"opt_level = 1\n",
	"local = Device(name, session, target, ctx, opt_level=opt_level) \n",
	"hosts.append(local)\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 21,
	"metadata": {},
	"outputs": [],
	"source": [
	"## local_ocl\n",
	"name = 'local_ocl'\n",
	"session = rpc.LocalSession()\n",
	"target = \"opencl\"\n",
	"ctx = session.cl(0)\n",
	"opt_level = 0\n",
	"local_ocl = Device(name, session, target, ctx, opt_level=opt_level) \n",
	"hosts.append(local_ocl)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 22,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"local G(64) 0.0015020487\n",
	"local G(2) 0.2356206468\n",
	"local G(128) 0.0011264666\n",
	"local G(16) 0.0254078226\n",
	"local G(8) 0.0531303658\n",
	"local G(4) 0.10466399239999999\n",
	"local G(256) 0.0004619161\n",
	"local G(1) 0.0545677866\n",
	"local G(32) 0.0105879979\n"
	]
	},
	{
	"ename": "ValueError",
	"evalue": "Direct host side access to device memory is detected in fused_nn_conv2d_9. Did you forget to bind?",
	"output_type": "error",
	"traceback": [
	"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
	"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
	"\u001b[0;32m<ipython-input-22-20b41053bf48>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mhost\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mhosts\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mmodel_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel\u001b[0m \u001b[0;32min\u001b[0m \u001b[0monnx_models\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mrun_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrun_tvm_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhost\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhost\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrun_time\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
	"\u001b[0;32m<ipython-input-16-5c190c0a8820>\u001b[0m in \u001b[0;36mrun_tvm_model\u001b[0;34m(host, onnx_model, inputs, inputs_name)\u001b[0m\n\u001b[1;32m 9\u001b[0m graph, lib, params = relay.build_module.build(\n\u001b[1;32m 10\u001b[0m \u001b[0msyms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m target_host=target_host)\n\u001b[0m\u001b[1;32m 12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;31m# After `relay.build`, you will get three return values: graph,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
	"\u001b[0;32m~/tools/tvm/python/tvm/relay/build_module.py\u001b[0m in \u001b[0;36mbuild\u001b[0;34m(func, target, target_host, params)\u001b[0m\n\u001b[1;32m 303\u001b[0m \u001b[0mfunc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mir_pass\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfer_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 304\u001b[0m \u001b[0mgraph_gen\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_graph_gen\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGraphRuntimeCodegen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 305\u001b[0;31m \u001b[0mgraph_json\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlowered_funcs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgraph_gen\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcodegen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 306\u001b[0m mod = _tvm_build_module(\n\u001b[1;32m 307\u001b[0m lowered_funcs, target=target, target_host=target_host)\n",
	"\u001b[0;32m~/tools/tvm/python/tvm/build_module.py\u001b[0m in \u001b[0;36mbuild\u001b[0;34m(inputs, args, target, target_host, name, binds)\u001b[0m\n\u001b[1;32m 615\u001b[0m \u001b[0mdevice_modules\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 616\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mtar\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mflist\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtarget_flist\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 617\u001b[0;31m \u001b[0mfhost\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmdev\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_build_for_device\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mflist\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtar\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_host\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 618\u001b[0m \u001b[0;31m# Save the current lowered functions of the host and the device module.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 619\u001b[0m \u001b[0mfhost_all\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mfhost\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
	"\u001b[0;32m~/tools/tvm/python/tvm/build_module.py\u001b[0m in \u001b[0;36m_build_for_device\u001b[0;34m(flist, target, target_host)\u001b[0m\n\u001b[1;32m 444\u001b[0m raise ValueError(\n\u001b[1;32m 445\u001b[0m \u001b[0;34m\"Direct host side access to device memory is detected in %s. \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 446\u001b[0;31m \"Did you forget to bind?\" % func.name)\n\u001b[0m\u001b[1;32m 447\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfunc_type\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mcontainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLoweredFunc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMixedFunc\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 448\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcurrent_build_config\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetect_global_barrier\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
	"\u001b[0;31mValueError\u001b[0m: Direct host side access to device memory is detected in fused_nn_conv2d_9. Did you forget to bind?"
	]
	}
	],
	"source": [
	"inputs_name = '0'\n",
	"inputs = test_input_numpy\n",
	"\n",
	"for host in hosts:\n",
	" for model_name, model in onnx_models.items():\n",
	" run_time = run_tvm_model(host, model, inputs, inputs_name)\n",
	" print(host.name, model_name, run_time)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "meth",
	"language": "python",
	"name": "meth"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.3"
	},
	"notify_time": "5"
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}