Skip to content

Instantly share code, notes, and snippets.

View comaniac's full-sized avatar

Cody Yu comaniac

View GitHub Profile
"""BYOC Demo using TensorRT."""
# pylint: disable=invalid-name,redefined-outer-name,missing-function-docstring
# config.cmake
# set(USE_TENSORRT_CODEGEN ON)
# set(USE_TENSORRT_RUNTIME ON)
# Add TensorRT to LD_LIBRARY_PATH if use tarball.
# export LD_LIBRARY_PATH=/path/to/tensorrt/lib:$LD_LIBRARY_PATH
import time
import numpy as np
import tvm
from tvm import relay
from tvm.runtime.vm import VirtualMachine
target = "cuda"
data_shape = (relay.Any(), 3, 224, 224)
weight_shape = (32, 3, 3, 3)

Lorien: A Hyper-Automated Tuning System for Tensor Operators

Lorien is a system built on the top of TVM to massively explore/benchmark the best schedule configs of TOPI schedules.

Motivation

Although TVM already has a TOPI (TVM Operator Inventory) with the implementations of algorithm and schedules for commonly used operators such as conv2d and dense, there is a challenge makes TOPI hard to be improved efficiently.

import numpy as np
import tvm
from tvm import auto_scheduler, te, topi
from tvm.te import schedule
# The last layer in resnet
H, W, CO, CI, KH, KW, strides, padding = 7, 7, 512, 512, 3, 3, (1, 1), (1, 1)
def conv2d(N, H, W, CO, CI, KH, KW, stride, padding):
import timeit
import numpy as np
import torch
import tvm
from tvm import auto_scheduler
import mnm
from mnm.testing.utils import ir_fusion, ir_simplify, get_vm_executor, get_vm_profiler
# Pytorch reference
# https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#train-the-network
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
import tvm
home/ubuntu/meta/src/op/dispatch/tvmjit/unary.cc:55: Error: Failed to JIT mnm_op_erf: RuntimeError:
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
#include <cuda_fp16.h>
__device__ half max(half a, half b)
{
return __hgt(__half(a), __half(b)) ? a : b;
}
__device__ half min(half a, half b)
{
return __hlt(__half(a), __half(b)) ? a : b;
import numpy as np
import tvm
from tvm import te, tir, topi
from tvm.topi import utils
dev = tvm.device("gpu", 0)
target = tvm.target.Target("cuda")
### Copy from topi/cuda/injective.py and make block/thread num configurable
import numpy as np
import tvm
from tvm import relay, auto_scheduler
import tvm.relay.testing
from tvm.contrib import graph_executor
def get_network(name, batch_size, layout="NHWC", dtype="float32"):
"""Get the symbol definition and random weight of a network"""
@comaniac
comaniac / launch-github-runner.sh
Created February 10, 2022 01:24
Register and launch a Github Action Runner for org, and remove it when exit.
#!/usr/bin/env bash
set -e
RUNNER_VERSION="2.287.1"
# The path to the new runner.
RUNNER_PATH=$1
# The target Github org.
GITHUB_ORG=$2
# Optional runner label.