makslevental/executable.mlir

## executable.mlir
module attributes {hal.device.targets = [#hal.device.target<"amd-aie-direct", [#hal.executable.target<"amd-aie-direct", "amdaie-xclbin-fb", {target_arch = "chip-tbd", ukernels = ""}>]>]} {
  hal.executable private @matmul_i32_dispatch_0 {
    hal.executable.variant public @amdaie_xclbin_fb target(<"amd-aie-direct", "amdaie-xclbin-fb", {target_arch = "chip-tbd", ukernels = ""}>) {
      hal.executable.export public @matmul_i32_dispatch_0_matmul_128x128x256_i32 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) attributes {hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>, #hal.interface.binding<0, 2>]} {
      ^bb0(%arg0: !hal.device):
        %x, %y, %z = flow.dispatch.workgroup_count_from_slice
        hal.return %x, %y, %z : index, index, index
      }
      builtin.module {
        func.func @matmul_i32_dispatch_0_matmul_128x128x256_i32() {
          %c0_i32 = arith.constant 0 : i32
          %c0 = arith.constant 0 : index
          %0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x256xi32>>
          %1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x128xi32>>
          %2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x128xi32>>
          %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x256xi32>> -> tensor<128x256xi32>
          %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 128], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x128xi32>> -> tensor<256x128xi32>
          %5 = tensor.empty() : tensor<128x128xi32>
          %6 = linalg.fill ins(%c0_i32 : i32) outs(%5 : tensor<128x128xi32>) -> tensor<128x128xi32>
          %7 = linalg.matmul ins(%3, %4 : tensor<128x256xi32>, tensor<256x128xi32>) outs(%6 : tensor<128x128xi32>) -> tensor<128x128xi32>
          flow.dispatch.tensor.store %7, %2, offsets = [0, 0], sizes = [128, 128], strides = [1, 1] : tensor<128x128xi32> -> !flow.dispatch.tensor<writeonly:tensor<128x128xi32>>
          return
        }
      }
    }
  }
  util.func public @matmul_i32(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_i32(%input0: tensor<128x256xi32>, %input1: tensor<256x128xi32>) -> (%output0: tensor<128x128xi32>)"}} {
    %c65536 = arith.constant 65536 : index
    %c131072 = arith.constant 131072 : index
    %c0 = arith.constant 0 : index
    %c256 = arith.constant 256 : index
    %c128 = arith.constant 128 : index
    %element_type_i32 = hal.element_type<i32> : i32
    %dense_row_major = hal.encoding_type<dense_row_major> : i32
    hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("input0") shape([%c128, %c256]) type(%element_type_i32) encoding(%dense_row_major)
    %0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<128x256xi32> in !stream.resource<external>{%c131072}
    hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("input1") shape([%c256, %c128]) type(%element_type_i32) encoding(%dense_row_major)
    %1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<256x128xi32> in !stream.resource<external>{%c131072}
    %result, %result_timepoint = stream.resource.alloca uninitialized : !stream.resource<external>{%c65536} => !stream.timepoint
    %2 = stream.cmd.execute await(%result_timepoint) => with(%0 as %arg2: !stream.resource<external>{%c131072}, %1 as %arg3: !stream.resource<external>{%c131072}, %result as %arg4: !stream.resource<external>{%c65536}) {
      stream.cmd.dispatch @matmul_i32_dispatch_0::@amdaie_xclbin_fb::@matmul_i32_dispatch_0_matmul_128x128x256_i32 {
        ro %arg2[%c0 for %c131072] : !stream.resource<external>{%c131072},
        ro %arg3[%c0 for %c131072] : !stream.resource<external>{%c131072},
        wo %arg4[%c0 for %c65536] : !stream.resource<external>{%c65536}
      }
    } => !stream.timepoint
    %3 = stream.timepoint.await %2 => %result : !stream.resource<external>{%c65536}
    %4 = stream.tensor.export %3 : tensor<128x128xi32> in !stream.resource<external>{%c65536} -> !hal.buffer_view
    util.return %4 : !hal.buffer_view
  }
}
	module attributes {hal.device.targets = [#hal.device.target<"amd-aie-direct", [#hal.executable.target<"amd-aie-direct", "amdaie-xclbin-fb", {target_arch = "chip-tbd", ukernels = ""}>]>]} {
	hal.executable private @matmul_i32_dispatch_0 {
	hal.executable.variant public @amdaie_xclbin_fb target(<"amd-aie-direct", "amdaie-xclbin-fb", {target_arch = "chip-tbd", ukernels = ""}>) {
	hal.executable.export public @matmul_i32_dispatch_0_matmul_128x128x256_i32 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) attributes {hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>, #hal.interface.binding<0, 2>]} {
	^bb0(%arg0: !hal.device):
	%x, %y, %z = flow.dispatch.workgroup_count_from_slice
	hal.return %x, %y, %z : index, index, index
	}
	builtin.module {
	func.func @matmul_i32_dispatch_0_matmul_128x128x256_i32() {
	%c0_i32 = arith.constant 0 : i32
	%c0 = arith.constant 0 : index
	%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x256xi32>>
	%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x128xi32>>
	%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x128xi32>>
	%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x256xi32>> -> tensor<128x256xi32>
	%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 128], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x128xi32>> -> tensor<256x128xi32>
	%5 = tensor.empty() : tensor<128x128xi32>
	%6 = linalg.fill ins(%c0_i32 : i32) outs(%5 : tensor<128x128xi32>) -> tensor<128x128xi32>
	%7 = linalg.matmul ins(%3, %4 : tensor<128x256xi32>, tensor<256x128xi32>) outs(%6 : tensor<128x128xi32>) -> tensor<128x128xi32>
	flow.dispatch.tensor.store %7, %2, offsets = [0, 0], sizes = [128, 128], strides = [1, 1] : tensor<128x128xi32> -> !flow.dispatch.tensor<writeonly:tensor<128x128xi32>>
	return
	}
	}
	}
	}
	util.func public @matmul_i32(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_i32(%input0: tensor<128x256xi32>, %input1: tensor<256x128xi32>) -> (%output0: tensor<128x128xi32>)"}} {
	%c65536 = arith.constant 65536 : index
	%c131072 = arith.constant 131072 : index
	%c0 = arith.constant 0 : index
	%c256 = arith.constant 256 : index
	%c128 = arith.constant 128 : index
	%element_type_i32 = hal.element_type<i32> : i32
	%dense_row_major = hal.encoding_type<dense_row_major> : i32
	hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("input0") shape([%c128, %c256]) type(%element_type_i32) encoding(%dense_row_major)
	%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<128x256xi32> in !stream.resource<external>{%c131072}
	hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("input1") shape([%c256, %c128]) type(%element_type_i32) encoding(%dense_row_major)
	%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<256x128xi32> in !stream.resource<external>{%c131072}
	%result, %result_timepoint = stream.resource.alloca uninitialized : !stream.resource<external>{%c65536} => !stream.timepoint
	%2 = stream.cmd.execute await(%result_timepoint) => with(%0 as %arg2: !stream.resource<external>{%c131072}, %1 as %arg3: !stream.resource<external>{%c131072}, %result as %arg4: !stream.resource<external>{%c65536}) {
	stream.cmd.dispatch @matmul_i32_dispatch_0::@amdaie_xclbin_fb::@matmul_i32_dispatch_0_matmul_128x128x256_i32 {
	ro %arg2[%c0 for %c131072] : !stream.resource<external>{%c131072},
	ro %arg3[%c0 for %c131072] : !stream.resource<external>{%c131072},
	wo %arg4[%c0 for %c65536] : !stream.resource<external>{%c65536}
	}
	} => !stream.timepoint
	%3 = stream.timepoint.await %2 => %result : !stream.resource<external>{%c65536}
	%4 = stream.tensor.export %3 : tensor<128x128xi32> in !stream.resource<external>{%c65536} -> !hal.buffer_view
	util.return %4 : !hal.buffer_view
	}
	}