This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import math | |
def sort_by_z_order(a,b): | |
return a.z_value - b.z_value | |
def push_zeros(a, n): | |
if len(a) < n: | |
for i in xrange(n-len(a)): | |
a = '0' + a |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import math | |
def sort_by_z_order(a,b): | |
return a.z_value - b.z_value | |
def push_zeros(a, n): | |
if len(a) < n: | |
for i in xrange(n-len(a)): | |
a = '0' + a |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[23:36:46] src/codegen/llvm/codegen_amdgpu.cc:177: ; ModuleID = 'myadd__kernel0' | |
source_filename = "myadd__kernel0" | |
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" | |
target triple = "amdgcn-amd-amdhsa-hcc" | |
; Function Attrs: nounwind | |
define amdgpu_kernel void @myadd__kernel0(float addrspace(1)* noalias nocapture, float addrspace(1)* noalias nocapture readonly, float addrspace(1)* noalias nocapture readonly, i32) local_unnamed_addr #0 { | |
entry: | |
%4 = tail call i32 @llvm.amdgcn.workitem.id.x() | |
%5 = tail call i32 @llvm.amdgcn.workgroup.id.x() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rocm_kernel.co: file format ELF64-amdgpu-hsacobj | |
Disassembly of section .text: | |
myadd__kernel0: | |
s_load_dword s0, s[4:5], 0x18 // 000000001100: C0020002 00000018 | |
v_lshlrev_b32_e32 v0, 7, v0 // 000000001108: 24000087 | |
s_waitcnt lgkmcnt(0) // 00000000110C: BF8C007F | |
v_sub_i32_e32 v1, vcc, s0, v0 // 000000001110: 34020000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.text | |
.hsa_code_object_version 2,1 | |
.hsa_code_object_isa 8,0,3,"AMD","AMDGPU" | |
.globl myadd__kernel0 ; -- Begin function myadd__kernel0 | |
.p2align 8 | |
.type myadd__kernel0,@function | |
.amdgpu_hsa_kernel myadd__kernel0 | |
myadd__kernel0: ; @myadd__kernel0 | |
.amd_kernel_code_t | |
amd_code_version_major = 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pylint: disable=invalid-name, no-member, too-many-locals, too-many-statements, too-many-arguments, too-many-branches | |
"""Schedule for conv2d_nchw with auto fusion""" | |
import tvm | |
from .. import util | |
from .. import tag | |
def conv2d_224_3_64(s, temp, temp_R, temp_S, Filter_S, Out, Out_L): | |
"""Schedule conv2d for specific feature_in_out_filter pattern""" | |
# scheduler params |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" Benchmark script for performance on GPUs. For example, run the file with: `python cuda_imagenet_bench.py --model='mobilenet'`. For more details about how to set up the inference environment on GPUs, please refer to NNVM Tutorial: ImageNet Inference on the GPU """ | |
import time | |
import argparse | |
import numpy as np | |
import tvm | |
import nnvm.compiler | |
import nnvm.testing | |
from tvm.contrib import util, nvcc | |
from tvm.contrib import graph_runtime as runtime |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pylint: disable=invalid-name, unused-argument | |
"""Definition of nn ops""" | |
from __future__ import absolute_import | |
import tvm | |
import topi | |
from topi.util import get_const_int | |
from .tensor import _fschedule_broadcast | |
from . import registry as reg | |
from .registry import OpPattern |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pylint: disable=invalid-name, unused-argument | |
"""Reduction ops""" | |
from __future__ import absolute_import | |
import tvm | |
import topi | |
import topi.cuda | |
from . import registry as reg | |
from .registry import OpPattern |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pylint: disable=invalid-name, unused-argument | |
"""Tensor ops""" | |
from __future__ import absolute_import | |
import tvm | |
import topi | |
import topi.cuda | |
from . import registry as reg | |
from .registry import OpPattern |
OlderNewer