Zhuoran Yin jerryyin

## addmm_lowering.mlir

// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- //
#map = affine_map<(d0) -> (d0)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
#map2 = affine_map<(d0, d1) -> (d1)>
module @module {
  util.func public @fused_op_addmm_193375c332098e31212d197e370a78caca49a988_4096xbfloat16_150000x16384xbfloat16_16384x4096xbfloat16_perm_10$async(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view, %arg3: !hal.fence, %arg4: !hal.fence) -> !hal.buffer_view attributes {inlining_policy = #util.inline.never, iree.abi.model = "coarse-fences", iree.abi.stub} {
    %cst = arith.constant 0.000000e+00 : f32
    %0 = hal.tensor.import wait(%arg3) => %arg0 : !hal.buffer_view -> tensor<4096xbf16>

## conv_2d_64428x896x896_lowering.ll

; *** IR Dump After Annotation2MetadataPass on [module] ***
; ModuleID = 'conv_2d_bfloat16_forward_12x59x91x896_nhwc_896x1x1x896_fhwc_nhwf_1x1s_0x0p_1x1d_1g$async_dispatch_0'
source_filename = "conv_2d_bfloat16_forward_12x59x91x896_nhwc_896x1x1x896_fhwc_nhwf_1x1s_0x0p_1x1d_1g$async_dispatch_0"
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
@__shared_memory___0 = private addrspace(3) global [128 x [132 x bfloat]] undef, align 16
@__shared_memory__ = private addrspace(3) global [64 x [132 x bfloat]] undef, align 16
; Function Attrs: alwaysinline
define amdgpu_kernel void @"conv_2d_bfloat16_forward_12x59x91x896_nhwc_896x1x1x896_fhwc_nhwf_1x1s_0x0p_1x1d_1g$async_dispatch_0_matmul_like_64428x896x896_bf16xbf16xf32"(ptr addrspace(1) inreg noalias noundef nonnull readonly align 16 %0, ptr addrspace(1) inreg n

## conv_2d_64428x896x896_lowering.mlir

// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- //
#map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d4, d2 + d5, d6)>
#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d3, d4, d5, d6)>
#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
#map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
module @module {
  util.func public @conv_2d_bfloat16_forward_12x59x91x896_nhwc_896x1x1x896_fhwc_nhwf_1x1s_0x0p_1x1d_1g$async(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.fence, %arg3: !hal.fence) -> !hal.buffer_view attributes {inlining_policy = #util.inline.never, iree.abi.model = "coarse-fences", iree.abi.stub} {
    %cst = arith.constant 0.000000e+00 : f32
    %0 = hal.tensor.import wait(%arg2) => %arg0 : !hal.buffer_view -> tensor<12x59x91x896xbf16>

## conv_input_backward_16x1x21x192_combinelayout_before_fuseandhoist.mlir
// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- //
#map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d5, d2 + d6, d4)>
#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5, d6, d3)>
#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
#map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
module @module {
  util.func public @conv_2d_bfloat16_input_backward_16x1x21x192_nhwc_384x1x1x192_fhwc_nhwf_1x1s_0x0p_1x1d_1g$async(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.fence, %arg3: !hal.fence) -> !hal.buffer_view attributes {inlining_policy = #util.inline.never, iree.abi.model = "coarse-fences", iree.abi.stub, preprocessing_pipeline = #util.preprocessing_pipeline<"iree-preprocessing-make-single-dispatch">} {
    %cst = arith.constant 0.000000e+00 : f32
    %0 = hal.tensor.import wait(%arg2) => %arg0 : !hal.buffer_view -> tensor<16x1x21x384xbf16>

## conv_input_backward_16x1x21x192.mlir
// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- //
#map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d5, d2 + d6, d4)>
#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5, d6, d3)>
#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
#map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
module @module {
  util.func public @conv_2d_bfloat16_input_backward_16x1x21x192_nhwc_384x1x1x192_fhwc_nhwf_1x1s_0x0p_1x1d_1g$async(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.fence, %arg3: !hal.fence) -> !hal.buffer_view attributes {inlining_policy = #util.inline.never, iree.abi.model = "coarse-fences", iree.abi.stub, preprocessing_pipeline = #util.preprocessing_pipeline<"iree-preprocessing-make-single-dispatch">} {
    %cst = arith.constant 0.000000e+00 : f32
    %0 = hal.tensor.import wait(%arg2) => %arg0 : !hal.buffer_view -> tensor<16x1x21x384xbf16>
    %1 = hal.tensor.import wait(%arg2) => %arg1 : !hal.buffer_view -> tenso

## no-c-promotion.mlir
// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- //
#map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d4, d2 + d5, d6)>
#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d3, d4, d5, d6)>
#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
#map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
module @module {
  util.func public @conv_2d_bfloat16_forward_16x38x19x64_nhwc_64x5x1x64_fhwc_nhwf_1x1s_2x0p_1x1d_1g$async(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.fence, %arg3: !hal.fence) -> !hal.buffer_view attributes {inlining_policy = #util.inline.never, iree.abi.model = "coarse-fences", iree.abi.stub, preprocessing_pipeline = #util.preprocessing_pipeline<"iree-preprocessing-make-single-dispatch">} {
    %cst = arith.constant 0.000000e+00 : bf16
    %cst_0 = arith.constant 0.000000e+00 : f32
    %0 = hal.tensor.import wait(%arg2) => %arg0 : !hal.buffer_view -> tensor<16x38x19x64xbf16>

## progress2.mlir
// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- //
#map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d4, d2 + d5, d6)>
#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d3, d4, d5, d6)>
#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
#map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
module @module {
  util.func public @conv_2d_bfloat16_forward_2x59x91x896_nhwc_896x1x1x896_fhwc_nhwf_1x1s_0x0p_1x1d_1g$async(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.fence, %arg3: !hal.fence) -> !hal.buffer_view attributes {inlining_policy = #util.inline.never, iree.abi.model = "coarse-fences", iree.abi.stub, preprocessing_pipeline = #util.preprocessing_pipeline<"iree-preprocessing-make-single-dispatch">} {
    %cst = arith.constant 0.000000e+00 : f32
    %0 = hal.tensor.import wait(%arg2) => %arg0 : !hal.buffer_view -> tensor<2x59x91x896xbf16>
    %1 = hal.tensor.import wait(%arg2) => %arg1 : !hal.buffer_view -> tensor<896x1

## mlir.vim
" Vim indent file
" Language:   mlir
" Maintainer: The MLIR team
" Adapted from the LLVM vim indent file
" What this indent plugin currently does:
"  - If no other rule matches copy indent from previous non-empty,
"    non-commented line.
"  - On '}' align the same as the line containing the matching '{'.
"  - If previous line starts with a block label, increase indentation.
"  - If the current line is a block label and ends with ':' indent at the same

## mlir.vim
" Vim syntax file
" Language:   mlir
" Maintainer: The MLIR team, http://github.com/tensorflow/mlir/
" Version:      $Revision$
" Some parts adapted from the LLVM vim syntax file.

if version < 600
  syntax clear
elseif exists("b:current_syntax")
  finish

	// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- //
	#map = affine_map<(d0) -> (d0)>
	#map1 = affine_map<(d0, d1) -> (d0, d1)>
	#map2 = affine_map<(d0, d1) -> (d1)>
	module @module {
	util.func public @fused_op_addmm_193375c332098e31212d197e370a78caca49a988_4096xbfloat16_150000x16384xbfloat16_16384x4096xbfloat16_perm_10$async(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view, %arg3: !hal.fence, %arg4: !hal.fence) -> !hal.buffer_view attributes {inlining_policy = #util.inline.never, iree.abi.model = "coarse-fences", iree.abi.stub} {
	%cst = arith.constant 0.000000e+00 : f32
	%0 = hal.tensor.import wait(%arg3) => %arg0 : !hal.buffer_view -> tensor<4096xbf16>

	; * IR Dump After Annotation2MetadataPass on [module] *
	; ModuleID = 'conv_2d_bfloat16_forward_12x59x91x896_nhwc_896x1x1x896_fhwc_nhwf_1x1s_0x0p_1x1d_1g$async_dispatch_0'
	source_filename = "conv_2d_bfloat16_forward_12x59x91x896_nhwc_896x1x1x896_fhwc_nhwf_1x1s_0x0p_1x1d_1g$async_dispatch_0"
	target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
	@__shared_memory___0 = private addrspace(3) global [128 x [132 x bfloat]] undef, align 16
	@__shared_memory__ = private addrspace(3) global [64 x [132 x bfloat]] undef, align 16
	; Function Attrs: alwaysinline
	define amdgpu_kernel void @"conv_2d_bfloat16_forward_12x59x91x896_nhwc_896x1x1x896_fhwc_nhwf_1x1s_0x0p_1x1d_1g$async_dispatch_0_matmul_like_64428x896x896_bf16xbf16xf32"(ptr addrspace(1) inreg noalias noundef nonnull readonly align 16 %0, ptr addrspace(1) inreg n

	// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- //
	#map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d4, d2 + d5, d6)>
	#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d3, d4, d5, d6)>
	#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
	#map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
	module @module {
	util.func public @conv_2d_bfloat16_forward_12x59x91x896_nhwc_896x1x1x896_fhwc_nhwf_1x1s_0x0p_1x1d_1g$async(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.fence, %arg3: !hal.fence) -> !hal.buffer_view attributes {inlining_policy = #util.inline.never, iree.abi.model = "coarse-fences", iree.abi.stub} {
	%cst = arith.constant 0.000000e+00 : f32
	%0 = hal.tensor.import wait(%arg2) => %arg0 : !hal.buffer_view -> tensor<12x59x91x896xbf16>
	" Vim indent file
	" Language: mlir
	" Maintainer: The MLIR team
	" Adapted from the LLVM vim indent file
	" What this indent plugin currently does:
	" - If no other rule matches copy indent from previous non-empty,
	" non-commented line.
	" - On '}' align the same as the line containing the matching '{'.
	" - If previous line starts with a block label, increase indentation.
	" - If the current line is a block label and ends with ':' indent at the same
	" Vim syntax file
	" Language: mlir
	" Maintainer: The MLIR team, http://github.com/tensorflow/mlir/
	" Version: $Revision$
	" Some parts adapted from the LLVM vim syntax file.

	if version < 600
	syntax clear
	elseif exists("b:current_syntax")
	finish