Skip to content

Instantly share code, notes, and snippets.

View yzhliu's full-sized avatar

Yizhi Liu yzhliu

  • Boson AI
  • Bay Area, the United States
View GitHub Profile
import argparse
import torch
pt_dtype_mappings = {
"float": torch.float,
"half": torch.half,
"float16": torch.float16,
"bfloat16": torch.bfloat16,
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
import numpy as np
import tvm
from tvm.contrib import graph_runtime
from tvm.contrib.util import tempdir
from tvm import autotvm
from tvm import relay
import tvm.relay.testing
#import mxnet
extern "C" __global__ void tvmop_kernel0( float* __restrict__ buffer, float* __restrict__ buffer1, float* __restrict__ buffer2, int tindex, int tindex1, int tindex2, int stride, int stride1, int stride2, int stride3, int stride4, int stride5, int stride6, int stride7, int stride8) {
if (((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) / tindex) / tindex1) < tindex2) {
if (((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) / tindex) % tindex1) < tindex1) {
if ((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) % tindex) < tindex) {
if ((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) / tindex) < (tindex2 * tindex1)) {
if (((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) < ((tindex2 * tindex1) * tindex)) {
if (((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) / tindex) / tindex1) < tindex2) {
if (0 <= ((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) / tindex) % tindex1)) {
if (((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) / tindex) % ti
extern "C" __global__ void tvmop_kernel0( float* __restrict__ buffer, float* __restrict__ buffer1, float* __restrict__ buffer2, int tindex, int tindex1, int tindex2, int stride, int stride1, int stride2, int stride3, int stride4, int stride5, int stride6, int stride7, int stride8) {
if (((int)blockIdx.x) < (((tindex * tindex1) * tindex2) >> 6)) {
if (((((tindex1 >= 0) && ((((((tindex2 >= 0) && ((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) % tindex2) >= 0)) || ((tindex2 < 0) && ((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) % tindex2) <= 0))) ? (((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) / tindex2) : ((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) / tindex2) - 1)) % tindex1) >= 0)) || ((tindex1 < 0) && ((((((tindex2 >= 0) && ((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) % tindex2) >= 0)) || ((tindex2 < 0) && ((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) % tindex2) <= 0))) ? (((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) / tindex2) : ((((((int)blockIdx.x) * 64) + ((int)threadIdx.
import tvm
import topi
from topi.util import get_const_tuple
import numpy as np
from topi.nn.pad import pad
# on a2: python3 -m tvm.exec.rpc_server --port=8499
# target = 'llvm -mcpu=core-avx2'
# target = 'llvm -device=arm_cpu -target=aarch64-linux-gnu -mattr=+v8.4a,+fp16fml,+fullfp16'
diff --git a/src/arithmetic/const_fold.h b/src/arithmetic/const_fold.h
index fbf8fe7e..1c397f40 100644
--- a/src/arithmetic/const_fold.h
+++ b/src/arithmetic/const_fold.h
@@ -101,33 +101,28 @@ inline bool IsIndexType(const Type& type) {
// specialization of constant folders.
template<>
inline Expr TryConstFold<ir::Add>(Expr a, Expr b) {
- TVM_ARITH_CONST_PROPAGATION({
+ TVM_INDEX_CONST_PROPAGATION({
{"i": ["llvm -mcpu=skylake-avx512", "topi_nn_conv2d", [["TENSOR", [1, 1024, 7, 7], "float32"], ["TENSOR", [1024, 1024, 1, 1], "float32"], [1, 1], [0, 0], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 7, 7, "float32"], [1024, 1024, 1, 1, "float32"], [1, 1], [0, 0], "NCHW", "float32"], {"i": 417, "c": null, "e": [["tile_ic", "sp", [1, 1024]], ["tile_oc", "sp", [64, 16]], ["tile_ow", "sp", [1, 7]], ["tile_oh", "ot", 2]], "t": "direct"}], "r": [[0.00010386519659715739], 0, 1.2961008548736572, 1541133775.742406], "v": 0.1}
{"i": ["llvm -mcpu=skylake-avx512", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 1024, 7, 7], "float32"], ["TENSOR", [1024, 1, 3, 3], "float32"], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 1024, 7, 7, "float32"], [1024, 1, 3, 3, "float32"], [1, 1], [1, 1], "float32"], {"i": 777271, "c": null, "e": [["tile_co", "sp", [1024, 1]], ["tile_oh", "sp", [1, 7]], ["tile_ow", "sp", [7, 1]], ["reorder_0", "re", [0, 1, 2, 3, 4, 5, 8, 6, 7]], ["reorder_1", "re", [0, 1, 2, 3, 4, 5, 6]],
Extract tasks...
Tuning...
[Task 1/19] Current/Best: 0.00/ 79.93 GFLOPS | Progress: (484/1000) | 2325.08 s Done.
[Task 2/19] Current/Best: 7.72/ 13.22 GFLOPS | Progress: (756/1000) | 1381.61 s Done.
[Task 3/19] Current/Best: 10.29/ 80.49 GFLOPS | Progress: (440/1000) | 1910.56 s Done.
[Task 4/19] Current/Best: 2.18/ 6.38 GFLOPS | Progress: (1000/1000) | 1627.55 s Done.
[Task 5/19] Current/Best: 13.69/ 116.10 GFLOPS | Progress: (504/1000) | 1762.37 s Done.
[Task 6/19] Current/Best: 8.73/ 8.73 GFLOPS | Progress: (36/1000) | 66.31 sLLVM ERROR: Cannot select: 0x56385e2c8368: i32 = X86ISD::CMP 0x56385e2ca1e8, 0x56385e2c9550
0x56385e2ca1e8: v16i1 = and 0x56385e202548, 0x56385e213948
0x56385e202548: v16i1 = bitcast 0x56385e202a28
{"i": ["llvm -mcpu=skylake-avx512", "topi_nn_conv2d", [["TENSOR", [1, 1024, 7, 7], "float32"], ["TENSOR", [1024, 1024, 1, 1], "float32"], [1, 1], [0, 0], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 7, 7, "float32"], [1024, 1024, 1, 1, "float32"], [1, 1], [0, 0], "NCHW", "float32"], {"i": 416, "c": null, "e": [["tile_ic", "sp", [2, 512]], ["tile_oc", "sp", [64, 16]], ["tile_ow", "sp", [1, 7]], ["tile_oh", "ot", 2]], "t": "direct"}], "r": [[0.0012855558738853504], 0, 1.3173747062683105, 1541097220.10334], "v": 0.1}
{"i": ["llvm -mcpu=skylake-avx512", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 1024, 7, 7], "float32"], ["TENSOR", [1024, 1, 3, 3], "float32"], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 1024, 7, 7, "float32"], [1024, 1, 3, 3, "float32"], [1, 1], [1, 1], "float32"], {"i": 354673, "c": null, "e": [["tile_co", "sp", [1024, 1]], ["tile_oh", "sp", [1, 7]], ["tile_ow", "sp", [1, 7]], ["reorder_0", "re", [0, 1, 2, 3, 4, 5, 6, 7, 8]], ["reorder_1", "re", [0, 1, 2, 3, 6, 4, 5]], ["a