Cody Yu comaniac

## trt_demo.py
"""BYOC Demo using TensorRT."""
# pylint: disable=invalid-name,redefined-outer-name,missing-function-docstring

# config.cmake
# set(USE_TENSORRT_CODEGEN ON)
# set(USE_TENSORRT_RUNTIME ON)
# Add TensorRT to LD_LIBRARY_PATH if use tarball.
# export LD_LIBRARY_PATH=/path/to/tensorrt/lib:$LD_LIBRARY_PATH

import time

## dynamic_shape_example.py
import numpy as np

import tvm
from tvm import relay
from tvm.runtime.vm import VirtualMachine

target = "cuda"
data_shape = (relay.Any(), 3, 224, 224)
weight_shape = (32, 3, 3, 3)

## lorien_readme.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                comaniac
                / lorien_readme.md
            
            
              Created
              December 17, 2020 18:27
            
          
    Lorien: A Hyper-Automated Tuning System for Tensor Operators

Lorien is a system built on the top of TVM to massively explore/benchmark the best schedule configs
of TOPI schedules.
Motivation

Although TVM already has a TOPI (TVM Operator Inventory) with the implementations of algorithm
and schedules for commonly used operators such as conv2d and dense, there is a challenge makes
TOPI hard to be improved efficiently.

  
## manual_tune_conv2d_sche.py
import numpy as np

import tvm
from tvm import auto_scheduler, te, topi
from tvm.te import schedule

# The last layer in resnet
H, W, CO, CI, KH, KW, strides, padding = 7, 7, 512, 512, 3, 3, (1, 1), (1, 1)

def conv2d(N, H, W, CO, CI, KH, KW, stride, padding):

## eval_meta_subgraph.py
import timeit

import numpy as np
import torch

import tvm
from tvm import auto_scheduler

import mnm
from mnm.testing.utils import ir_fusion, ir_simplify, get_vm_executor, get_vm_profiler

## meta_train_e2e_resnet.py
# Pytorch reference
# https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#train-the-network

import numpy as np

import torch
import torchvision
import torchvision.transforms as transforms

import tvm

## erf_cuda_err.log
home/ubuntu/meta/src/op/dispatch/tvmjit/unary.cc:55: Error: Failed to JIT mnm_op_erf: RuntimeError:
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
#include <cuda_fp16.h>
__device__ half max(half a, half b)
{
  return __hgt(__half(a), __half(b)) ? a : b;
}
__device__ half min(half a, half b)
{
  return __hlt(__half(a), __half(b)) ? a : b;

## benchmark_cuda_injective_schedule.py
import numpy as np

import tvm
from tvm import te, tir, topi
from tvm.topi import utils

dev = tvm.device("gpu", 0)
target = tvm.target.Target("cuda")

### Copy from topi/cuda/injective.py and make block/thread num configurable

## target_debug.py
import numpy as np

import tvm
from tvm import relay, auto_scheduler
import tvm.relay.testing
from tvm.contrib import graph_executor

def get_network(name, batch_size, layout="NHWC", dtype="float32"):
    """Get the symbol definition and random weight of a network"""

## launch-github-runner.sh
#!/usr/bin/env bash
set -e

RUNNER_VERSION="2.287.1"

# The path to the new runner.
RUNNER_PATH=$1
# The target Github org.
GITHUB_ORG=$2
# Optional runner label.
	"""BYOC Demo using TensorRT."""
	# pylint: disable=invalid-name,redefined-outer-name,missing-function-docstring

	# config.cmake
	# set(USE_TENSORRT_CODEGEN ON)
	# set(USE_TENSORRT_RUNTIME ON)
	# Add TensorRT to LD_LIBRARY_PATH if use tarball.
	# export LD_LIBRARY_PATH=/path/to/tensorrt/lib:$LD_LIBRARY_PATH

	import time
	import numpy as np

	import tvm
	from tvm import relay
	from tvm.runtime.vm import VirtualMachine

	target = "cuda"
	data_shape = (relay.Any(), 3, 224, 224)
	weight_shape = (32, 3, 3, 3)
	import numpy as np

	import tvm
	from tvm import auto_scheduler, te, topi
	from tvm.te import schedule

	# The last layer in resnet
	H, W, CO, CI, KH, KW, strides, padding = 7, 7, 512, 512, 3, 3, (1, 1), (1, 1)

	def conv2d(N, H, W, CO, CI, KH, KW, stride, padding):
	import timeit

	import numpy as np
	import torch

	import tvm
	from tvm import auto_scheduler

	import mnm
	from mnm.testing.utils import ir_fusion, ir_simplify, get_vm_executor, get_vm_profiler
	# Pytorch reference
	# https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#train-the-network

	import numpy as np

	import torch
	import torchvision
	import torchvision.transforms as transforms

	import tvm
	home/ubuntu/meta/src/op/dispatch/tvmjit/unary.cc:55: Error: Failed to JIT mnm_op_erf: RuntimeError:
	#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
	#include <cuda_fp16.h>
	__device__ half max(half a, half b)
	{
	return __hgt(__half(a), __half(b)) ? a : b;
	}
	__device__ half min(half a, half b)
	{
	return __hlt(__half(a), __half(b)) ? a : b;
	import numpy as np

	import tvm
	from tvm import te, tir, topi
	from tvm.topi import utils

	dev = tvm.device("gpu", 0)
	target = tvm.target.Target("cuda")

	### Copy from topi/cuda/injective.py and make block/thread num configurable
	import numpy as np

	import tvm
	from tvm import relay, auto_scheduler
	import tvm.relay.testing
	from tvm.contrib import graph_executor

	def get_network(name, batch_size, layout="NHWC", dtype="float32"):
	"""Get the symbol definition and random weight of a network"""
	#!/usr/bin/env bash
	set -e

	RUNNER_VERSION="2.287.1"

	# The path to the new runner.
	RUNNER_PATH=$1
	# The target Github org.
	GITHUB_ORG=$2
	# Optional runner label.