Skip to content

Instantly share code, notes, and snippets.

@maleadt
Last active October 10, 2021 11:05
Show Gist options
  • Save maleadt/8041818a1eb927826469c09d6b288c44 to your computer and use it in GitHub Desktop.
Save maleadt/8041818a1eb927826469c09d6b288c44 to your computer and use it in GitHub Desktop.
Bug MWE
#include <stdint.h>
kernel void dummy(__global int* ptr) {
*ptr = 42;
return;
}
source_filename = "kernel.cl"
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
target triple = "spir64"
define spir_kernel void @dummy(i64) {
top:
%1 = inttoptr i64 %0 to i32 addrspace(1)*
store i32 2, i32 addrspace(1)* %1, align 1
ret void
}
define spir_kernel void @dummy_original(i32 addrspace(1)*) {
top:
store i32 42, i32 addrspace(1)* %0, align 1
ret void
}
!llvm.module.flags = !{!0}
!opencl.ocl.version = !{!1}
!opencl.spir.version = !{!2}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 1, i32 0}
!2 = !{i32 1, i32 2}
; SPIR-V
; Version: 1.0
; Generator: Khronos LLVM/SPIR-V Translator; 14
; Bound: 11
; Schema: 0
OpCapability Addresses
OpCapability Kernel
%1 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %5 "dummy"
OpSource OpenCL_C 100000
OpName %top "top"
%uint = OpTypeInt 32 0
%uint_2 = OpConstant %uint 2
%void = OpTypeVoid
%4 = OpTypeFunction %void %uint
%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
%5 = OpFunction %void None %4
%6 = OpFunctionParameter %uint
%top = OpLabel
%9 = OpConvertUToPtr %_ptr_CrossWorkgroup_uint %6
OpStore %9 %uint_2 Aligned 1
OpReturn
OpFunctionEnd
#include <cassert>
#include <climits>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <level_zero/ze_api.h>
#include <limits>
#include <vector>
#define BUFFERSIZE 1024
#define check(ans) \
{ do_check((ans), __FILE__, __LINE__); }
void do_check(ze_result_t code, const char *file, int line) {
if (code != ZE_RESULT_SUCCESS) {
fprintf(stderr, "Failed: %d at %s %d\n", code, file, line);
exit(1);
}
}
int main() {
// Initialize driver
check(zeInit(ZE_INIT_FLAG_GPU_ONLY));
// Retrieve driver
uint32_t driverCount = 0;
check(zeDriverGet(&driverCount, nullptr));
ze_driver_handle_t driverHandle;
check(zeDriverGet(&driverCount, &driverHandle));
ze_context_desc_t contextDesc = {};
ze_context_handle_t context;
check(zeContextCreate(driverHandle, &contextDesc, &context));
// Retrieve device
uint32_t deviceCount = 0;
check(zeDeviceGet(driverHandle, &deviceCount, nullptr));
ze_device_handle_t device;
deviceCount = 1;
check(zeDeviceGet(driverHandle, &deviceCount, &device));
// Print some properties
ze_device_properties_t deviceProperties = {};
check(zeDeviceGetProperties(device, &deviceProperties));
// Create command queue
uint32_t numQueueGroups = 0;
check(
zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr));
if (numQueueGroups == 0) {
return 1;
}
std::vector<ze_command_queue_group_properties_t> queueProperties(
numQueueGroups);
check(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups,
queueProperties.data()));
ze_command_queue_handle_t command_queue;
ze_command_queue_desc_t cmdQueueDesc = {};
for (uint32_t i = 0; i < numQueueGroups; i++) {
if (queueProperties[i].flags &
ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {
cmdQueueDesc.ordinal = i;
}
}
cmdQueueDesc.index = 0;
cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
check(zeCommandQueueCreate(context, device, &cmdQueueDesc, &command_queue));
// Create command list
ze_command_list_handle_t command_list;
ze_command_list_desc_t cmdListDesc = {};
cmdListDesc.commandQueueGroupOrdinal = cmdQueueDesc.ordinal;
check(zeCommandListCreate(context, device, &cmdListDesc, &command_list));
uint8_t *buffer = (uint8_t *)malloc(BUFFERSIZE);
FILE *filp = fopen("kernel.spv", "rb");
size_t bytes_read = fread(buffer, sizeof(uint8_t), BUFFERSIZE, filp);
ze_module_desc_t module_desc = {};
module_desc.format = ZE_MODULE_FORMAT_IL_SPIRV;
module_desc.inputSize = bytes_read;
module_desc.pInputModule = buffer;
ze_module_handle_t module;
check(zeModuleCreate(context, device, &module_desc, &module, nullptr));
ze_kernel_desc_t kernel_desc = {};
kernel_desc.pKernelName = "dummy";
ze_kernel_handle_t kernel;
check(zeKernelCreate(module, &kernel_desc, &kernel));
size_t size = 1;
uint8_t *out = (uint8_t *)malloc(sizeof(uint8_t));
memset(out, -1, size);
ze_device_mem_alloc_desc_t device_desc;
device_desc.ordinal = 0;
void *mem;
check(zeMemAllocDevice(context, &device_desc, size, 0, device, &mem));
check(zeKernelSetArgumentValue(kernel, 0, sizeof(int *), &mem));
ze_group_count_t kernel_args = {1, 1, 1};
check(zeCommandListAppendLaunchKernel(command_list, kernel, &kernel_args,
nullptr, 0, nullptr));
check(zeCommandListAppendBarrier(command_list, nullptr, 0, nullptr));
check(zeCommandListAppendMemoryCopy(command_list, out, mem, size, nullptr, 0,
nullptr));
check(zeCommandListClose(command_list));
check(zeCommandQueueExecuteCommandLists(command_queue, 1, &command_list,
nullptr));
check(zeCommandQueueSynchronize(command_queue,
std::numeric_limits<uint64_t>::max()));
for (size_t i = 0; i < size; i++) {
printf("out[%zu] = %d\n", i, out[i]);
fflush(stdout);
assert(out[i] == 42);
}
}
.PHONY: all
all: main
main: kernel.spv
main: CXXFLAGS=-lze_loader
%.ll: %.cl
clang -cc1 -triple spir64 -O3 $< -emit-llvm
%.bc: %.ll
llvm-as $<
%.spt: %.spv
spirv-dis $< -o $@
%.spv: %.bc
llvm-spirv $<
%: %.cpp
clang++ ${CXXFLAGS} $< -o $@
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment