Skip to content

Instantly share code, notes, and snippets.

View jerryyin's full-sized avatar

Zhuoran Yin jerryyin

View GitHub Profile
This file has been truncated, but you can view the full file.
// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- //
#map = affine_map<(d0) -> (d0)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
#map2 = affine_map<(d0, d1) -> (d1)>
module @module {
util.func public @fused_op_addmm_193375c332098e31212d197e370a78caca49a988_4096xbfloat16_150000x16384xbfloat16_16384x4096xbfloat16_perm_10$async(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view, %arg3: !hal.fence, %arg4: !hal.fence) -> !hal.buffer_view attributes {inlining_policy = #util.inline.never, iree.abi.model = "coarse-fences", iree.abi.stub} {
%cst = arith.constant 0.000000e+00 : f32
%0 = hal.tensor.import wait(%arg3) => %arg0 : !hal.buffer_view -> tensor<4096xbf16>
This file has been truncated, but you can view the full file.
; *** IR Dump After Annotation2MetadataPass on [module] ***
; ModuleID = 'conv_2d_bfloat16_forward_12x59x91x896_nhwc_896x1x1x896_fhwc_nhwf_1x1s_0x0p_1x1d_1g$async_dispatch_0'
source_filename = "conv_2d_bfloat16_forward_12x59x91x896_nhwc_896x1x1x896_fhwc_nhwf_1x1s_0x0p_1x1d_1g$async_dispatch_0"
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
@__shared_memory___0 = private addrspace(3) global [128 x [132 x bfloat]] undef, align 16
@__shared_memory__ = private addrspace(3) global [64 x [132 x bfloat]] undef, align 16
; Function Attrs: alwaysinline
define amdgpu_kernel void @"conv_2d_bfloat16_forward_12x59x91x896_nhwc_896x1x1x896_fhwc_nhwf_1x1s_0x0p_1x1d_1g$async_dispatch_0_matmul_like_64428x896x896_bf16xbf16xf32"(ptr addrspace(1) inreg noalias noundef nonnull readonly align 16 %0, ptr addrspace(1) inreg n
This file has been truncated, but you can view the full file.
// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- //
#map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d4, d2 + d5, d6)>
#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d3, d4, d5, d6)>
#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
#map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
module @module {
util.func public @conv_2d_bfloat16_forward_12x59x91x896_nhwc_896x1x1x896_fhwc_nhwf_1x1s_0x0p_1x1d_1g$async(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.fence, %arg3: !hal.fence) -> !hal.buffer_view attributes {inlining_policy = #util.inline.never, iree.abi.model = "coarse-fences", iree.abi.stub} {
%cst = arith.constant 0.000000e+00 : f32
%0 = hal.tensor.import wait(%arg2) => %arg0 : !hal.buffer_view -> tensor<12x59x91x896xbf16>
This file has been truncated, but you can view the full file.
// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- //
#map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d5, d2 + d6, d4)>
#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5, d6, d3)>
#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
#map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
module @module {
util.func public @conv_2d_bfloat16_input_backward_16x1x21x192_nhwc_384x1x1x192_fhwc_nhwf_1x1s_0x0p_1x1d_1g$async(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.fence, %arg3: !hal.fence) -> !hal.buffer_view attributes {inlining_policy = #util.inline.never, iree.abi.model = "coarse-fences", iree.abi.stub, preprocessing_pipeline = #util.preprocessing_pipeline<"iree-preprocessing-make-single-dispatch">} {
%cst = arith.constant 0.000000e+00 : f32
%0 = hal.tensor.import wait(%arg2) => %arg0 : !hal.buffer_view -> tensor<16x1x21x384xbf16>
This file has been truncated, but you can view the full file.
// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- //
#map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d5, d2 + d6, d4)>
#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5, d6, d3)>
#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
#map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
module @module {
util.func public @conv_2d_bfloat16_input_backward_16x1x21x192_nhwc_384x1x1x192_fhwc_nhwf_1x1s_0x0p_1x1d_1g$async(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.fence, %arg3: !hal.fence) -> !hal.buffer_view attributes {inlining_policy = #util.inline.never, iree.abi.model = "coarse-fences", iree.abi.stub, preprocessing_pipeline = #util.preprocessing_pipeline<"iree-preprocessing-make-single-dispatch">} {
%cst = arith.constant 0.000000e+00 : f32
%0 = hal.tensor.import wait(%arg2) => %arg0 : !hal.buffer_view -> tensor<16x1x21x384xbf16>
%1 = hal.tensor.import wait(%arg2) => %arg1 : !hal.buffer_view -> tenso
This file has been truncated, but you can view the full file.
// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- //
#map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d4, d2 + d5, d6)>
#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d3, d4, d5, d6)>
#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
#map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
module @module {
util.func public @conv_2d_bfloat16_forward_16x38x19x64_nhwc_64x5x1x64_fhwc_nhwf_1x1s_2x0p_1x1d_1g$async(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.fence, %arg3: !hal.fence) -> !hal.buffer_view attributes {inlining_policy = #util.inline.never, iree.abi.model = "coarse-fences", iree.abi.stub, preprocessing_pipeline = #util.preprocessing_pipeline<"iree-preprocessing-make-single-dispatch">} {
%cst = arith.constant 0.000000e+00 : bf16
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = hal.tensor.import wait(%arg2) => %arg0 : !hal.buffer_view -> tensor<16x38x19x64xbf16>
This file has been truncated, but you can view the full file.
// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- //
#map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d4, d2 + d5, d6)>
#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d3, d4, d5, d6)>
#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
#map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
module @module {
util.func public @conv_2d_bfloat16_forward_2x59x91x896_nhwc_896x1x1x896_fhwc_nhwf_1x1s_0x0p_1x1d_1g$async(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.fence, %arg3: !hal.fence) -> !hal.buffer_view attributes {inlining_policy = #util.inline.never, iree.abi.model = "coarse-fences", iree.abi.stub, preprocessing_pipeline = #util.preprocessing_pipeline<"iree-preprocessing-make-single-dispatch">} {
%cst = arith.constant 0.000000e+00 : f32
%0 = hal.tensor.import wait(%arg2) => %arg0 : !hal.buffer_view -> tensor<2x59x91x896xbf16>
%1 = hal.tensor.import wait(%arg2) => %arg1 : !hal.buffer_view -> tensor<896x1
" Vim indent file
" Language: mlir
" Maintainer: The MLIR team
" Adapted from the LLVM vim indent file
" What this indent plugin currently does:
" - If no other rule matches copy indent from previous non-empty,
" non-commented line.
" - On '}' align the same as the line containing the matching '{'.
" - If previous line starts with a block label, increase indentation.
" - If the current line is a block label and ends with ':' indent at the same
" Vim syntax file
" Language: mlir
" Maintainer: The MLIR team, http://github.com/tensorflow/mlir/
" Version: $Revision$
" Some parts adapted from the LLVM vim syntax file.
if version < 600
syntax clear
elseif exists("b:current_syntax")
finish