Created
March 1, 2023 19:35
-
-
Save vmurali/afec50c1bab4b3a4c4cc786271d3ef7f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module attributes {hal.device.targets = [#hal.device.target<"llvm-cpu", {executable_targets = [#hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "arm64-unknown-unknown-eabi-elf"}>]}>]} { | |
hal.executable private @encode_dispatch_285 { | |
hal.executable.variant public @embedded_elf_arm_64, target = <"llvm-cpu", "embedded-elf-arm_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "arm64-unknown-unknown-eabi-elf"}> { | |
hal.executable.export public @encode_dispatch_285_generic_8x13 ordinal(0) layout(#hal.pipeline.layout<push_constants = 1, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index): | |
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2 | |
hal.return %x, %y, %z : index, index, index | |
} | |
builtin.module { | |
func.func @encode_dispatch_285_generic_8x13() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = arith.index_castui %0 {stream.alignment = 128 : index, stream.values = [118450304 : index, 343369728 : index]} : i32 to index | |
%2 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%1) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<13x8xf32>> | |
%3 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<8x13xf32>> | |
%4 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [13, 8], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<13x8xf32>> -> tensor<13x8xf32> | |
%5 = tensor.empty() : tensor<8x13xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%4 : tensor<13x8xf32>) outs(%5 : tensor<8x13xf32>) { | |
^bb0(%in: f32, %out: f32): | |
linalg.yield %in : f32 | |
} -> tensor<8x13xf32> | |
flow.dispatch.tensor.store %6, %3, offsets = [0, 0], sizes = [8, 13], strides = [1, 1] : tensor<8x13xf32> -> !flow.dispatch.tensor<writeonly:tensor<8x13xf32>> | |
return | |
} | |
} | |
} | |
} | |
util.global private mutable @encode_dispatch_285_embedded_elf_arm_64_encode_dispatch_285_generic_8x13_8x13_buffer : !hal.buffer | |
util.initializer { | |
%c656959744 = arith.constant 656959744 : index | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
%buffer = hal.allocator.allocate<%allocator : !hal.allocator> type("DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") : !hal.buffer{%c656959744} | |
util.global.store %buffer, @encode_dispatch_285_embedded_elf_arm_64_encode_dispatch_285_generic_8x13_8x13_buffer : !hal.buffer | |
util.initializer.return | |
} | |
func.func @encode_dispatch_285_embedded_elf_arm_64_encode_dispatch_285_generic_8x13_8x13(%arg0: i32) attributes {iree.abi.stub, iree.reflection = {iree.benchmark = "dispatch"}} { | |
%c-1_i32 = arith.constant -1 : i32 | |
%c-1_i64 = arith.constant -1 : i64 | |
%c13 = arith.constant 13 : index | |
%c8 = arith.constant 8 : index | |
%c154435584 = arith.constant 154435584 : index | |
%c502524160 = arith.constant 502524160 : index | |
%c1 = arith.constant 1 : index | |
%c502524032 = arith.constant 502524032 : index | |
%c0 = arith.constant 0 : index | |
%c0_i32 = arith.constant 0 : i32 | |
%0 = arith.index_cast %arg0 : i32 to index | |
%device = hal.ex.shared_device : !hal.device | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories(Dispatch) : !hal.command_buffer | |
%pipeline_layout = hal.pipeline_layout.lookup device(%device : !hal.device) layout(<push_constants = 1, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) : !hal.pipeline_layout | |
hal.command_buffer.push_constants<%cmd : !hal.command_buffer> layout(%pipeline_layout : !hal.pipeline_layout) offset(0) values([%c0_i32]) : i32 | |
%encode_dispatch_285_embedded_elf_arm_64_encode_dispatch_285_generic_8x13_8x13_buffer = util.global.load @encode_dispatch_285_embedded_elf_arm_64_encode_dispatch_285_generic_8x13_8x13_buffer : !hal.buffer | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%pipeline_layout : !hal.pipeline_layout)[%c0] bindings([ | |
%c0 = (%encode_dispatch_285_embedded_elf_arm_64_encode_dispatch_285_generic_8x13_8x13_buffer : !hal.buffer)[%c0, %c502524032], | |
%c1 = (%encode_dispatch_285_embedded_elf_arm_64_encode_dispatch_285_generic_8x13_8x13_buffer : !hal.buffer)[%c502524160, %c154435584] | |
]) | |
%workgroup_x, %workgroup_y, %workgroup_z = hal.executable.calculate_workgroups device(%device : !hal.device) target(@encode_dispatch_285::@embedded_elf_arm_64::@encode_dispatch_285_generic_8x13) workload([%c8, %c13]) : index, index, index | |
scf.for %arg1 = %c0 to %0 step %c1 { | |
hal.command_buffer.dispatch.symbol<%cmd : !hal.command_buffer> target(@encode_dispatch_285::@embedded_elf_arm_64::@encode_dispatch_285_generic_8x13) workgroups([%workgroup_x, %workgroup_y, %workgroup_z]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|CommandRetire") target("CommandIssue|Dispatch") flags("None") | |
} | |
hal.command_buffer.finalize<%cmd : !hal.command_buffer> | |
%1 = util.null : !hal.fence | |
%fence = hal.fence.create device(%device : !hal.device) flags("None") : !hal.fence | |
hal.device.queue.execute<%device : !hal.device> affinity(%c-1_i64) wait(%1) signal(%fence) commands([%cmd]) | |
%status = hal.fence.await until([%fence]) timeout_millis(%c-1_i32) : i32 | |
util.status.check_ok %status, "failed to wait on timepoint" | |
return | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment