Created
February 28, 2023 17:45
-
-
Save vmurali/49466f72bcc80cdb2943d0636c0838cf to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module attributes {hal.device.targets = [#hal.device.target<"llvm-cpu", {executable_targets = [#hal.executable.target<"llvm-cpu", "embedded-elf-arm_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "arm64-unknown-unknown-eabi-elf"}>]}>]} { | |
hal.executable private @encode_dispatch_285 { | |
hal.executable.variant public @embedded_elf_arm_64, target = <"llvm-cpu", "embedded-elf-arm_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "arm64-unknown-unknown-eabi-elf"}> { | |
hal.executable.export public @encode_dispatch_285_generic_768x50272 ordinal(0) layout(#hal.pipeline.layout<push_constants = 1, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index): | |
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2 | |
hal.return %x, %y, %z : index, index, index | |
} | |
builtin.module { | |
func.func @encode_dispatch_285_generic_768x50272() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.constant.load[0] : i32 | |
%1 = arith.index_castui %0 {stream.alignment = 128 : index, stream.values = [118450304 : index, 343369728 : index]} : i32 to index | |
%2 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%1) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<50272x768xf32>> | |
%3 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<768x50272xf32>> | |
%4 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [50272, 768], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<50272x768xf32>> -> tensor<50272x768xf32> | |
%5 = tensor.empty() : tensor<768x50272xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%4 : tensor<50272x768xf32>) outs(%5 : tensor<768x50272xf32>) { | |
^bb0(%in: f32, %out: f32): | |
linalg.yield %in : f32 | |
} -> tensor<768x50272xf32> | |
flow.dispatch.tensor.store %6, %3, offsets = [0, 0], sizes = [768, 50272], strides = [1, 1] : tensor<768x50272xf32> -> !flow.dispatch.tensor<writeonly:tensor<768x50272xf32>> | |
return | |
} | |
} | |
} | |
} | |
util.global private mutable @encode_dispatch_285_embedded_elf_arm_64_encode_dispatch_285_generic_768x50272_768x50272_buffer : !hal.buffer | |
util.initializer { | |
%c656959744 = arith.constant 656959744 : index | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
%buffer = hal.allocator.allocate<%allocator : !hal.allocator> type("DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") : !hal.buffer{%c656959744} | |
util.global.store %buffer, @encode_dispatch_285_embedded_elf_arm_64_encode_dispatch_285_generic_768x50272_768x50272_buffer : !hal.buffer | |
util.initializer.return | |
} | |
func.func @encode_dispatch_285_embedded_elf_arm_64_encode_dispatch_285_generic_768x50272_768x50272(%arg0: i32) attributes {iree.abi.stub, iree.reflection = {iree.benchmark = "dispatch"}} { | |
%c-1_i32 = arith.constant -1 : i32 | |
%c-1_i64 = arith.constant -1 : i64 | |
%c50272 = arith.constant 50272 : index | |
%c768 = arith.constant 768 : index | |
%c154435584 = arith.constant 154435584 : index | |
%c502524160 = arith.constant 502524160 : index | |
%c1 = arith.constant 1 : index | |
%c502524032 = arith.constant 502524032 : index | |
%c0 = arith.constant 0 : index | |
%c0_i32 = arith.constant 0 : i32 | |
%0 = arith.index_cast %arg0 : i32 to index | |
%device = hal.ex.shared_device : !hal.device | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories(Dispatch) : !hal.command_buffer | |
%pipeline_layout = hal.pipeline_layout.lookup device(%device : !hal.device) layout(<push_constants = 1, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) : !hal.pipeline_layout | |
hal.command_buffer.push_constants<%cmd : !hal.command_buffer> layout(%pipeline_layout : !hal.pipeline_layout) offset(0) values([%c0_i32]) : i32 | |
%encode_dispatch_285_embedded_elf_arm_64_encode_dispatch_285_generic_768x50272_768x50272_buffer = util.global.load @encode_dispatch_285_embedded_elf_arm_64_encode_dispatch_285_generic_768x50272_768x50272_buffer : !hal.buffer | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%pipeline_layout : !hal.pipeline_layout)[%c0] bindings([ | |
%c0 = (%encode_dispatch_285_embedded_elf_arm_64_encode_dispatch_285_generic_768x50272_768x50272_buffer : !hal.buffer)[%c0, %c502524032], | |
%c1 = (%encode_dispatch_285_embedded_elf_arm_64_encode_dispatch_285_generic_768x50272_768x50272_buffer : !hal.buffer)[%c502524160, %c154435584] | |
]) | |
%workgroup_x, %workgroup_y, %workgroup_z = hal.executable.calculate_workgroups device(%device : !hal.device) target(@encode_dispatch_285::@embedded_elf_arm_64::@encode_dispatch_285_generic_768x50272) workload([%c768, %c50272]) : index, index, index | |
scf.for %arg1 = %c0 to %0 step %c1 { | |
hal.command_buffer.dispatch.symbol<%cmd : !hal.command_buffer> target(@encode_dispatch_285::@embedded_elf_arm_64::@encode_dispatch_285_generic_768x50272) workgroups([%workgroup_x, %workgroup_y, %workgroup_z]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|CommandRetire") target("CommandIssue|Dispatch") flags("None") | |
} | |
hal.command_buffer.finalize<%cmd : !hal.command_buffer> | |
%1 = util.null : !hal.fence | |
%fence = hal.fence.create device(%device : !hal.device) flags("None") : !hal.fence | |
hal.device.queue.execute<%device : !hal.device> affinity(%c-1_i64) wait(%1) signal(%fence) commands([%cmd]) | |
%status = hal.fence.await until([%fence]) timeout_millis(%c-1_i32) : i32 | |
util.status.check_ok %status, "failed to wait on timepoint" | |
return | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment