Created
March 6, 2023 18:01
-
-
Save Abhishek-Varma/b47e34aae0e33e7bc2e8d862902a603c to your computer and use it in GitHub Desktop.
IREE Runtime - `UNAVAILABLE; VK_ERROR_INITIALIZATION_FAILED` error
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module attributes {hal.device.targets = [#hal.device.target<"vulkan", {executable_targets = [#hal.executable.target<"vulkan", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, api=Vulkan, AMD:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 65536, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 1024], subgroup_size = 64, min_subgroup_size = 32, max_subgroup_size = 64, cooperative_matrix_properties_nv = []>>}>], legacy_sync}>]} { | |
hal.executable private @forward_dispatch_28 { | |
hal.executable.variant public @vulkan_spirv_fb, target = <"vulkan", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, api=Vulkan, AMD:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 65536, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 1024], subgroup_size = 64, min_subgroup_size = 32, max_subgroup_size = 64, cooperative_matrix_properties_nv = []>>}> { | |
hal.executable.export public @forward_dispatch_28_matmul_4096x512x512 ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index): | |
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg1, %arg2 | |
hal.return %x, %y, %z : index, index, index | |
} | |
builtin.module { | |
func.func @forward_dispatch_28_matmul_4096x512x512() { | |
%c8489024 = arith.constant 8489024 : index | |
%c578560 = arith.constant 578560 : index | |
%c43008 = arith.constant 43008 : index | |
%c12683328 = arith.constant 12683328 : index | |
%cst = arith.constant 0.000000e+00 : f16 | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c8489024) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<4096x512xf16>> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c578560) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512x512xf16>> | |
%2 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c43008) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<512xf16>> | |
%3 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) alignment(64) offset(%c12683328) : !flow.dispatch.tensor<writeonly:tensor<512x4096xf16>> | |
%4 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [4096, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4096x512xf16>> -> tensor<4096x512xf16> | |
%5 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [512, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<512x512xf16>> -> tensor<512x512xf16> | |
%6 = flow.dispatch.tensor.load %2, offsets = [0], sizes = [512], strides = [1] : !flow.dispatch.tensor<readonly:tensor<512xf16>> -> tensor<512xf16> | |
%7 = tensor.empty() : tensor<512x4096xf16> | |
%8 = tensor.empty() : tensor<4096x512xf16> | |
%9 = linalg.fill ins(%cst : f16) outs(%8 : tensor<4096x512xf16>) -> tensor<4096x512xf16> | |
%10 = linalg.matmul ins(%4, %5 : tensor<4096x512xf16>, tensor<512x512xf16>) outs(%9 : tensor<4096x512xf16>) -> tensor<4096x512xf16> | |
%11 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%10, %6 : tensor<4096x512xf16>, tensor<512xf16>) outs(%7 : tensor<512x4096xf16>) { | |
^bb0(%in: f16, %in_0: f16, %out: f16): | |
%12 = arith.addf %in, %in_0 : f16 | |
linalg.yield %12 : f16 | |
} -> tensor<512x4096xf16> | |
flow.dispatch.tensor.store %11, %3, offsets = [0, 0], sizes = [512, 4096], strides = [1, 1] : tensor<512x4096xf16> -> !flow.dispatch.tensor<writeonly:tensor<512x4096xf16>> | |
return | |
} | |
} | |
} | |
} | |
util.global private mutable @forward_dispatch_28_vulkan_spirv_fb_forward_dispatch_28_matmul_4096x512x512_4096x512_buffer : !hal.buffer | |
util.initializer { | |
%c1441153280 = arith.constant 1441153280 : index | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
%buffer = hal.allocator.allocate<%allocator : !hal.allocator> type("DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") : !hal.buffer{%c1441153280} | |
util.global.store %buffer, @forward_dispatch_28_vulkan_spirv_fb_forward_dispatch_28_matmul_4096x512x512_4096x512_buffer : !hal.buffer | |
util.initializer.return | |
} | |
func.func @forward_dispatch_28_vulkan_spirv_fb_forward_dispatch_28_matmul_4096x512x512_4096x512(%arg0: i32) attributes {iree.abi.stub, iree.reflection = {iree.benchmark = "dispatch"}} { | |
%c-1_i32 = arith.constant -1 : i32 | |
%c-1_i64 = arith.constant -1 : i64 | |
%c512 = arith.constant 512 : index | |
%c4096 = arith.constant 4096 : index | |
%c770064384 = arith.constant 770064384 : index | |
%c2 = arith.constant 2 : index | |
%c98975488 = arith.constant 98975488 : index | |
%c671088896 = arith.constant 671088896 : index | |
%c1 = arith.constant 1 : index | |
%c671088768 = arith.constant 671088768 : index | |
%c0 = arith.constant 0 : index | |
%0 = arith.index_cast %arg0 : i32 to index | |
%device = hal.ex.shared_device : !hal.device | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories(Dispatch) : !hal.command_buffer | |
%pipeline_layout = hal.pipeline_layout.lookup device(%device : !hal.device) layout(<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) : !hal.pipeline_layout | |
%forward_dispatch_28_vulkan_spirv_fb_forward_dispatch_28_matmul_4096x512x512_4096x512_buffer = util.global.load @forward_dispatch_28_vulkan_spirv_fb_forward_dispatch_28_matmul_4096x512x512_4096x512_buffer : !hal.buffer | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%pipeline_layout : !hal.pipeline_layout)[%c0] bindings([ | |
%c0 = (%forward_dispatch_28_vulkan_spirv_fb_forward_dispatch_28_matmul_4096x512x512_4096x512_buffer : !hal.buffer)[%c0, %c671088768], | |
%c1 = (%forward_dispatch_28_vulkan_spirv_fb_forward_dispatch_28_matmul_4096x512x512_4096x512_buffer : !hal.buffer)[%c671088896, %c98975488], | |
%c2 = (%forward_dispatch_28_vulkan_spirv_fb_forward_dispatch_28_matmul_4096x512x512_4096x512_buffer : !hal.buffer)[%c770064384, %c671088768] | |
]) | |
%workgroup_x, %workgroup_y, %workgroup_z = hal.executable.calculate_workgroups device(%device : !hal.device) target(@forward_dispatch_28::@vulkan_spirv_fb::@forward_dispatch_28_matmul_4096x512x512) workload([%c4096, %c512]) : index, index, index | |
scf.for %arg1 = %c0 to %0 step %c1 { | |
hal.command_buffer.dispatch.symbol<%cmd : !hal.command_buffer> target(@forward_dispatch_28::@vulkan_spirv_fb::@forward_dispatch_28_matmul_4096x512x512) workgroups([%workgroup_x, %workgroup_y, %workgroup_z]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|CommandRetire") target("CommandIssue|Dispatch") flags("None") | |
} | |
hal.command_buffer.finalize<%cmd : !hal.command_buffer> | |
%1 = util.null : !hal.fence | |
%fence = hal.fence.create device(%device : !hal.device) flags("None") : !hal.fence | |
hal.device.queue.execute<%device : !hal.device> affinity(%c-1_i64) wait(%1) signal(%fence) commands([%cmd]) | |
%status = hal.fence.await until([%fence]) timeout_millis(%c-1_i32) : i32 | |
util.status.check_ok %status, "failed to wait on timepoint" | |
return | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment