Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save Abhishek-Varma/b47e34aae0e33e7bc2e8d862902a603c to your computer and use it in GitHub Desktop.
Save Abhishek-Varma/b47e34aae0e33e7bc2e8d862902a603c to your computer and use it in GitHub Desktop.
IREE Runtime - `UNAVAILABLE; VK_ERROR_INITIALIZATION_FAILED` error
module attributes {hal.device.targets = [#hal.device.target<"vulkan", {executable_targets = [#hal.executable.target<"vulkan", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, api=Vulkan, AMD:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 65536, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 1024], subgroup_size = 64, min_subgroup_size = 32, max_subgroup_size = 64, cooperative_matrix_properties_nv = []>>}>], legacy_sync}>]} {
hal.executable private @forward_dispatch_28 {
hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, api=Vulkan, AMD:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 65536, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 1024], subgroup_size = 64, min_subgroup_size = 32, max_subgroup_size = 64, cooperative_matrix_properties_nv = []>>}> {
hal.executable.export public @forward_dispatch_28_matmul_4096x512x512 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2
hal.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @forward_dispatch_28_matmul_4096x512x512() {
%c8489024 = arith.constant 8489024 : index
%c578560 = arith.constant 578560 : index
%c43008 = arith.constant 43008 : index
%c12683328 = arith.constant 12683328 : index
%cst = arith.constant 0.000000e+00 : f16
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c8489024) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x512xf16>>
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c578560) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512x512xf16>>
%2 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c43008) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512xf16>>
%3 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) alignment(64) offset(%c12683328) : !flow.dispatch.tensor<writeonly:tensor<512x4096xf16>>
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [4096, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x512xf16>> -> tensor<4096x512xf16>
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x512xf16>> -> tensor<512x512xf16>
%6 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [512], strides = [1] : !flow.dispatch.tensor<readonly:tensor<512xf16>> -> tensor<512xf16>
%7 = tensor.empty() : tensor<512x4096xf16>
%8 = tensor.empty() : tensor<4096x512xf16>
%9 = linalg.fill ins(%cst : f16) outs(%8 : tensor<4096x512xf16>) -> tensor<4096x512xf16>
%10 = linalg.matmul ins(%4, %5 : tensor<4096x512xf16>, tensor<512x512xf16>) outs(%9 : tensor<4096x512xf16>) -> tensor<4096x512xf16>
%11 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%10, %6 : tensor<4096x512xf16>, tensor<512xf16>) outs(%7 : tensor<512x4096xf16>) {
^bb0(%in: f16, %in_0: f16, %out: f16):
%12 = arith.addf %in, %in_0 : f16
linalg.yield %12 : f16
} -> tensor<512x4096xf16>
flow.dispatch.tensor.store %11, %3, offsets = [0, 0], sizes = [512, 4096], strides = [1, 1] : tensor<512x4096xf16> -> !flow.dispatch.tensor<writeonly:tensor<512x4096xf16>>
return
}
}
}
}
util.global private mutable @forward_dispatch_28_vulkan_spirv_fb_forward_dispatch_28_matmul_4096x512x512_4096x512_buffer : !hal.buffer
util.initializer {
%c1441153280 = arith.constant 1441153280 : index
%device = hal.ex.shared_device : !hal.device
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator
%buffer = hal.allocator.allocate<%allocator : !hal.allocator> type("DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") : !hal.buffer{%c1441153280}
util.global.store %buffer, @forward_dispatch_28_vulkan_spirv_fb_forward_dispatch_28_matmul_4096x512x512_4096x512_buffer : !hal.buffer
util.initializer.return
}
func.func @forward_dispatch_28_vulkan_spirv_fb_forward_dispatch_28_matmul_4096x512x512_4096x512(%arg0: i32) attributes {iree.abi.stub, iree.reflection = {iree.benchmark = "dispatch"}} {
%c-1_i32 = arith.constant -1 : i32
%c-1_i64 = arith.constant -1 : i64
%c512 = arith.constant 512 : index
%c4096 = arith.constant 4096 : index
%c770064384 = arith.constant 770064384 : index
%c2 = arith.constant 2 : index
%c98975488 = arith.constant 98975488 : index
%c671088896 = arith.constant 671088896 : index
%c1 = arith.constant 1 : index
%c671088768 = arith.constant 671088768 : index
%c0 = arith.constant 0 : index
%0 = arith.index_cast %arg0 : i32 to index
%device = hal.ex.shared_device : !hal.device
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories(Dispatch) : !hal.command_buffer
%pipeline_layout = hal.pipeline_layout.lookup device(%device : !hal.device) layout(<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) : !hal.pipeline_layout
%forward_dispatch_28_vulkan_spirv_fb_forward_dispatch_28_matmul_4096x512x512_4096x512_buffer = util.global.load @forward_dispatch_28_vulkan_spirv_fb_forward_dispatch_28_matmul_4096x512x512_4096x512_buffer : !hal.buffer
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%pipeline_layout : !hal.pipeline_layout)[%c0] bindings([
%c0 = (%forward_dispatch_28_vulkan_spirv_fb_forward_dispatch_28_matmul_4096x512x512_4096x512_buffer : !hal.buffer)[%c0, %c671088768],
%c1 = (%forward_dispatch_28_vulkan_spirv_fb_forward_dispatch_28_matmul_4096x512x512_4096x512_buffer : !hal.buffer)[%c671088896, %c98975488],
%c2 = (%forward_dispatch_28_vulkan_spirv_fb_forward_dispatch_28_matmul_4096x512x512_4096x512_buffer : !hal.buffer)[%c770064384, %c671088768]
])
%workgroup_x, %workgroup_y, %workgroup_z = hal.executable.calculate_workgroups device(%device : !hal.device) target(@forward_dispatch_28::@vulkan_spirv_fb::@forward_dispatch_28_matmul_4096x512x512) workload([%c4096, %c512]) : index, index, index
scf.for %arg1 = %c0 to %0 step %c1 {
hal.command_buffer.dispatch.symbol<%cmd : !hal.command_buffer> target(@forward_dispatch_28::@vulkan_spirv_fb::@forward_dispatch_28_matmul_4096x512x512) workgroups([%workgroup_x, %workgroup_y, %workgroup_z])
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|CommandRetire") target("CommandIssue|Dispatch") flags("None")
}
hal.command_buffer.finalize<%cmd : !hal.command_buffer>
%1 = util.null : !hal.fence
%fence = hal.fence.create device(%device : !hal.device) flags("None") : !hal.fence
hal.device.queue.execute<%device : !hal.device> affinity(%c-1_i64) wait(%1) signal(%fence) commands([%cmd])
%status = hal.fence.await until([%fence]) timeout_millis(%c-1_i32) : i32
util.status.check_ok %status, "failed to wait on timepoint"
return
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment