Skip to content

Instantly share code, notes, and snippets.

@qedawkins
Created September 7, 2022 18:14
Show Gist options
  • Save qedawkins/25568f6f06d86f0c820e90094451ac3e to your computer and use it in GitHub Desktop.
Save qedawkins/25568f6f06d86f0c820e90094451ac3e to your computer and use it in GitHub Desktop.
Single Thread Level-Zero Dispatch
// Example for dispatching a SPIR-V Kernel using Level Zero on the Intel HD Graphics
// Sample based on the test-suite exanples from Level-Zero:
// https://github.com/intel/compute-runtime/blob/master/level_zero/core/test/black_box_tests/zello_world_gpu.cpp
#include "ze_api.h"
#include <chrono>
#include <cstring>
#include <fstream>
#include <iostream>
#include <limits>
#include <memory>
#include <vector>
#define VALIDATECALL(myZeCall) \
if (myZeCall != ZE_RESULT_SUCCESS){ \
std::cout << "Error at " \
<< #myZeCall << ": " \
<< __FUNCTION__ << ": " \
<< __LINE__ << std::endl; \
std::cout << "Exit with Error Code: " \
<< "0x" << std::hex \
<< myZeCall \
<< std::dec << std::endl; \
std::terminate(); \
}
int main(int argc, char **argv) {
// Initialization
VALIDATECALL(zeInit(ZE_INIT_FLAG_GPU_ONLY));
// Get the driver
uint32_t driverCount = 0;
VALIDATECALL(zeDriverGet(&driverCount, nullptr));
ze_driver_handle_t driverHandle;
VALIDATECALL(zeDriverGet(&driverCount, &driverHandle));
// Create the context
ze_context_desc_t contextDescription = {};
contextDescription.stype = ZE_STRUCTURE_TYPE_CONTEXT_DESC;
ze_context_handle_t context;
VALIDATECALL(zeContextCreate(driverHandle, &contextDescription, &context));
// Get the device
uint32_t deviceCount = 0;
VALIDATECALL(zeDeviceGet(driverHandle, &deviceCount, nullptr));
ze_device_handle_t device;
VALIDATECALL(zeDeviceGet(driverHandle, &deviceCount, &device));
// Print basic properties of the device
ze_device_properties_t deviceProperties = {};
VALIDATECALL(zeDeviceGetProperties(device, &deviceProperties));
std::cout << "Device : " << deviceProperties.name << "\n"
<< "Type : " << ((deviceProperties.type == ZE_DEVICE_TYPE_GPU) ? "GPU" : "FPGA") << "\n"
<< "Vendor ID: " << std::hex << deviceProperties.vendorId << std::dec << "\n";
// Create a command queue
uint32_t numQueueGroups = 0;
VALIDATECALL(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr));
if (numQueueGroups == 0) {
std::cout << "No queue groups found\n";
std::terminate();
} else {
std::cout << "#Queue Groups: " << numQueueGroups << std::endl;
}
std::vector<ze_command_queue_group_properties_t> queueProperties(numQueueGroups);
VALIDATECALL(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, queueProperties.data()));
ze_command_queue_handle_t cmdQueue;
ze_command_queue_desc_t cmdQueueDesc = {};
for (uint32_t i = 0; i < numQueueGroups; i++) {
if (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {
cmdQueueDesc.ordinal = i;
}
}
cmdQueueDesc.index = 0;
cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
VALIDATECALL(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue));
// Create a command list
ze_command_list_handle_t cmdList;
ze_command_list_desc_t cmdListDesc = {};
cmdListDesc.commandQueueGroupOrdinal = cmdQueueDesc.ordinal;
VALIDATECALL(zeCommandListCreate(context, device, &cmdListDesc, &cmdList));
// Create two buffers
const uint32_t items = 1;
constexpr size_t allocSize = items * sizeof(int);
ze_device_mem_alloc_desc_t memAllocDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC};
//memAllocDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED;
memAllocDesc.ordinal = 0;
ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC};
//hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED;
void *inputBuffer = nullptr;
VALIDATECALL(zeMemAllocShared(context, &memAllocDesc, &hostDesc, allocSize, 1, device, &inputBuffer));
void *outputBuffer = nullptr;
VALIDATECALL(zeMemAllocShared(context, &memAllocDesc, &hostDesc, allocSize, 1, device, &outputBuffer));
// memory initialization
constexpr uint32_t value = 120021;
uint32_t *inputInt = static_cast<uint32_t *>(inputBuffer);
for (int i = 0; i < items; i++) {
inputInt[i] = value;
}
// Module Initialization
ze_module_handle_t module = nullptr;
ze_kernel_handle_t kernel = nullptr;
std::ifstream file("copy.spv", std::ios::binary);
if (file.is_open()) {
file.seekg(0, file.end);
auto length = file.tellg();
file.seekg(0, file.beg);
std::unique_ptr<char[]> spirvInput(new char[length]);
file.read(spirvInput.get(), length);
ze_module_desc_t moduleDesc = {};
ze_module_build_log_handle_t buildLog;
moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV;
moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(spirvInput.get());
moduleDesc.inputSize = length;
moduleDesc.pBuildFlags = "";
auto status = zeModuleCreate(context, device, &moduleDesc, &module, &buildLog);
printf("size of pointer:%zu\n",sizeof(void*));
printf("size of pointer:%zu\n",sizeof(uint64_t*));
if (status != ZE_RESULT_SUCCESS) {
// print log
size_t szLog = 0;
zeModuleBuildLogGetString(buildLog, &szLog, nullptr);
char* stringLog = (char*)malloc(szLog);
zeModuleBuildLogGetString(buildLog, &szLog, stringLog);
std::cout << "Build log: " << stringLog << std::endl;
}
VALIDATECALL(zeModuleBuildLogDestroy(buildLog));
ze_kernel_desc_t kernelDesc = {};
kernelDesc.pKernelName = "copy";
VALIDATECALL(zeKernelCreate(module, &kernelDesc, &kernel));
uint32_t groupSizeX = 1u;
uint32_t groupSizeY = 1u;
uint32_t groupSizeZ = 1u;
VALIDATECALL(zeKernelSuggestGroupSize(kernel, items, items, 1U, &groupSizeX, &groupSizeY, &groupSizeZ));
VALIDATECALL(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ));
std::cout << "Group X: " << groupSizeX << std::endl;
std::cout << "Group Y: " << groupSizeY << std::endl;
std::cout << "Group Z: " << groupSizeZ << std::endl;
// Push arguments
VALIDATECALL(zeKernelSetArgumentValue(kernel, 0, sizeof(inputBuffer), &inputBuffer));
VALIDATECALL(zeKernelSetArgumentValue(kernel, 1, sizeof(outputBuffer), &outputBuffer));
// Kernel thread-dispatch
ze_group_count_t dispatch;
dispatch.groupCountX = groupSizeX;
dispatch.groupCountY = groupSizeY;
dispatch.groupCountZ = groupSizeZ;
// Launch kernel on the GPU
VALIDATECALL(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatch, nullptr, 0, nullptr));
file.close();
} else {
std::cout << "SPIR-V binary file not found\n";
std::terminate();
}
// Close list abd submit for execution
VALIDATECALL(zeCommandListClose(cmdList));
VALIDATECALL(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
VALIDATECALL(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint64_t>::max()));
// Validate
bool outputValidationSuccessful = true;
uint32_t *dstInt = static_cast<uint32_t *>(outputBuffer);
for (int i = 0; i < items; i++) {
if (dstInt[i] != value) {
std::cout << "Discrepancy: " << dstInt[i] << " != " << value << std::endl;
outputValidationSuccessful = false;
} else {
std::cout << "Validated: " << dstInt[i] << " == " << value << std::endl;
}
}
std::cout << "\nCopy validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n";
// Cleanup
VALIDATECALL(zeMemFree(context, outputBuffer));
VALIDATECALL(zeMemFree(context, inputBuffer));
VALIDATECALL(zeCommandListDestroy(cmdList));
VALIDATECALL(zeCommandQueueDestroy(cmdQueue));
VALIDATECALL(zeContextDestroy(context));
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment