Skip to content

Instantly share code, notes, and snippets.

@makslevental
Created July 15, 2024 16:22
Show Gist options
  • Save makslevental/436e253190521e1712e6edb74e1c81fd to your computer and use it in GitHub Desktop.
Save makslevental/436e253190521e1712e6edb74e1c81fd to your computer and use it in GitHub Desktop.
module attributes {hal.device.targets = [#hal.device.target<"amd-aie-direct", [#hal.executable.target<"amd-aie-direct", "amdaie-xclbin-fb", {target_arch = "chip-tbd", ukernels = ""}>]>]} {
hal.executable private @matmul_i32_dispatch_0 {
hal.executable.variant public @amdaie_xclbin_fb target(<"amd-aie-direct", "amdaie-xclbin-fb", {target_arch = "chip-tbd", ukernels = ""}>) {
hal.executable.export public @matmul_i32_dispatch_0_matmul_128x128x256_i32 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) attributes {hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>, #hal.interface.binding<0, 2>]} {
^bb0(%arg0: !hal.device):
%x, %y, %z = flow.dispatch.workgroup_count_from_slice
hal.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @matmul_i32_dispatch_0_matmul_128x128x256_i32() {
%c0_i32 = arith.constant 0 : i32
%c0 = arith.constant 0 : index
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x256xi32>>
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x128xi32>>
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x128xi32>>
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x256xi32>> -> tensor<128x256xi32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 128], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x128xi32>> -> tensor<256x128xi32>
%5 = tensor.empty() : tensor<128x128xi32>
%6 = linalg.fill ins(%c0_i32 : i32) outs(%5 : tensor<128x128xi32>) -> tensor<128x128xi32>
%7 = linalg.matmul ins(%3, %4 : tensor<128x256xi32>, tensor<256x128xi32>) outs(%6 : tensor<128x128xi32>) -> tensor<128x128xi32>
flow.dispatch.tensor.store %7, %2, offsets = [0, 0], sizes = [128, 128], strides = [1, 1] : tensor<128x128xi32> -> !flow.dispatch.tensor<writeonly:tensor<128x128xi32>>
return
}
}
}
}
util.func public @matmul_i32(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_i32(%input0: tensor<128x256xi32>, %input1: tensor<256x128xi32>) -> (%output0: tensor<128x128xi32>)"}} {
%c65536 = arith.constant 65536 : index
%c131072 = arith.constant 131072 : index
%c0 = arith.constant 0 : index
%c256 = arith.constant 256 : index
%c128 = arith.constant 128 : index
%element_type_i32 = hal.element_type<i32> : i32
%dense_row_major = hal.encoding_type<dense_row_major> : i32
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("input0") shape([%c128, %c256]) type(%element_type_i32) encoding(%dense_row_major)
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<128x256xi32> in !stream.resource<external>{%c131072}
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("input1") shape([%c256, %c128]) type(%element_type_i32) encoding(%dense_row_major)
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<256x128xi32> in !stream.resource<external>{%c131072}
%result, %result_timepoint = stream.resource.alloca uninitialized : !stream.resource<external>{%c65536} => !stream.timepoint
%2 = stream.cmd.execute await(%result_timepoint) => with(%0 as %arg2: !stream.resource<external>{%c131072}, %1 as %arg3: !stream.resource<external>{%c131072}, %result as %arg4: !stream.resource<external>{%c65536}) {
stream.cmd.dispatch @matmul_i32_dispatch_0::@amdaie_xclbin_fb::@matmul_i32_dispatch_0_matmul_128x128x256_i32 {
ro %arg2[%c0 for %c131072] : !stream.resource<external>{%c131072},
ro %arg3[%c0 for %c131072] : !stream.resource<external>{%c131072},
wo %arg4[%c0 for %c65536] : !stream.resource<external>{%c65536}
}
} => !stream.timepoint
%3 = stream.timepoint.await %2 => %result : !stream.resource<external>{%c65536}
%4 = stream.tensor.export %3 : tensor<128x128xi32> in !stream.resource<external>{%c65536} -> !hal.buffer_view
util.return %4 : !hal.buffer_view
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment