Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save vmurali/49466f72bcc80cdb2943d0636c0838cf to your computer and use it in GitHub Desktop.
Save vmurali/49466f72bcc80cdb2943d0636c0838cf to your computer and use it in GitHub Desktop.
module attributes {hal.device.targets = [#hal.device.target<"llvm-cpu", {executable_targets = [#hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "arm64-unknown-unknown-eabi-elf"}>]}>]} {
hal.executable private @encode_dispatch_285 {
hal.executable.variant public @embedded_elf_arm_64, target = <"llvm-cpu", "embedded-elf-arm_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "arm64-unknown-unknown-eabi-elf"}> {
hal.executable.export public @encode_dispatch_285_generic_768x50272 ordinal(0) layout(#hal.pipeline.layout<push_constants = 1, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
hal.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @encode_dispatch_285_generic_768x50272() {
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load[0] : i32
%1 = arith.index_castui %0 {stream.alignment = 128 : index, stream.values = [118450304 : index, 343369728 : index]} : i32 to index
%2 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%1) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<50272x768xf32>>
%3 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<768x50272xf32>>
%4 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [50272, 768], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<50272x768xf32>> -> tensor<50272x768xf32>
%5 = tensor.empty() : tensor<768x50272xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%4 : tensor<50272x768xf32>) outs(%5 : tensor<768x50272xf32>) {
^bb0(%in: f32, %out: f32):
linalg.yield %in : f32
} -> tensor<768x50272xf32>
flow.dispatch.tensor.store %6, %3, offsets = [0, 0], sizes = [768, 50272], strides = [1, 1] : tensor<768x50272xf32> -> !flow.dispatch.tensor<writeonly:tensor<768x50272xf32>>
return
}
}
}
}
util.global private mutable @encode_dispatch_285_embedded_elf_arm_64_encode_dispatch_285_generic_768x50272_768x50272_buffer : !hal.buffer
util.initializer {
%c656959744 = arith.constant 656959744 : index
%device = hal.ex.shared_device : !hal.device
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator
%buffer = hal.allocator.allocate<%allocator : !hal.allocator> type("DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") : !hal.buffer{%c656959744}
util.global.store %buffer, @encode_dispatch_285_embedded_elf_arm_64_encode_dispatch_285_generic_768x50272_768x50272_buffer : !hal.buffer
util.initializer.return
}
func.func @encode_dispatch_285_embedded_elf_arm_64_encode_dispatch_285_generic_768x50272_768x50272(%arg0: i32) attributes {iree.abi.stub, iree.reflection = {iree.benchmark = "dispatch"}} {
%c-1_i32 = arith.constant -1 : i32
%c-1_i64 = arith.constant -1 : i64
%c50272 = arith.constant 50272 : index
%c768 = arith.constant 768 : index
%c154435584 = arith.constant 154435584 : index
%c502524160 = arith.constant 502524160 : index
%c1 = arith.constant 1 : index
%c502524032 = arith.constant 502524032 : index
%c0 = arith.constant 0 : index
%c0_i32 = arith.constant 0 : i32
%0 = arith.index_cast %arg0 : i32 to index
%device = hal.ex.shared_device : !hal.device
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories(Dispatch) : !hal.command_buffer
%pipeline_layout = hal.pipeline_layout.lookup device(%device : !hal.device) layout(<push_constants = 1, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) : !hal.pipeline_layout
hal.command_buffer.push_constants<%cmd : !hal.command_buffer> layout(%pipeline_layout : !hal.pipeline_layout) offset(0) values([%c0_i32]) : i32
%encode_dispatch_285_embedded_elf_arm_64_encode_dispatch_285_generic_768x50272_768x50272_buffer = util.global.load @encode_dispatch_285_embedded_elf_arm_64_encode_dispatch_285_generic_768x50272_768x50272_buffer : !hal.buffer
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%pipeline_layout : !hal.pipeline_layout)[%c0] bindings([
%c0 = (%encode_dispatch_285_embedded_elf_arm_64_encode_dispatch_285_generic_768x50272_768x50272_buffer : !hal.buffer)[%c0, %c502524032],
%c1 = (%encode_dispatch_285_embedded_elf_arm_64_encode_dispatch_285_generic_768x50272_768x50272_buffer : !hal.buffer)[%c502524160, %c154435584]
])
%workgroup_x, %workgroup_y, %workgroup_z = hal.executable.calculate_workgroups device(%device : !hal.device) target(@encode_dispatch_285::@embedded_elf_arm_64::@encode_dispatch_285_generic_768x50272) workload([%c768, %c50272]) : index, index, index
scf.for %arg1 = %c0 to %0 step %c1 {
hal.command_buffer.dispatch.symbol<%cmd : !hal.command_buffer> target(@encode_dispatch_285::@embedded_elf_arm_64::@encode_dispatch_285_generic_768x50272) workgroups([%workgroup_x, %workgroup_y, %workgroup_z])
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|CommandRetire") target("CommandIssue|Dispatch") flags("None")
}
hal.command_buffer.finalize<%cmd : !hal.command_buffer>
%1 = util.null : !hal.fence
%fence = hal.fence.create device(%device : !hal.device) flags("None") : !hal.fence
hal.device.queue.execute<%device : !hal.device> affinity(%c-1_i64) wait(%1) signal(%fence) commands([%cmd])
%status = hal.fence.await until([%fence]) timeout_millis(%c-1_i32) : i32
util.status.check_ok %status, "failed to wait on timepoint"
return
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment