Created
September 7, 2022 18:14
-
-
Save qedawkins/25568f6f06d86f0c820e90094451ac3e to your computer and use it in GitHub Desktop.
Single Thread Level-Zero Dispatch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Example for dispatching a SPIR-V Kernel using Level Zero on the Intel HD Graphics | |
// Sample based on the test-suite exanples from Level-Zero: | |
// https://github.com/intel/compute-runtime/blob/master/level_zero/core/test/black_box_tests/zello_world_gpu.cpp | |
#include "ze_api.h" | |
#include <chrono> | |
#include <cstring> | |
#include <fstream> | |
#include <iostream> | |
#include <limits> | |
#include <memory> | |
#include <vector> | |
#define VALIDATECALL(myZeCall) \ | |
if (myZeCall != ZE_RESULT_SUCCESS){ \ | |
std::cout << "Error at " \ | |
<< #myZeCall << ": " \ | |
<< __FUNCTION__ << ": " \ | |
<< __LINE__ << std::endl; \ | |
std::cout << "Exit with Error Code: " \ | |
<< "0x" << std::hex \ | |
<< myZeCall \ | |
<< std::dec << std::endl; \ | |
std::terminate(); \ | |
} | |
int main(int argc, char **argv) { | |
// Initialization | |
VALIDATECALL(zeInit(ZE_INIT_FLAG_GPU_ONLY)); | |
// Get the driver | |
uint32_t driverCount = 0; | |
VALIDATECALL(zeDriverGet(&driverCount, nullptr)); | |
ze_driver_handle_t driverHandle; | |
VALIDATECALL(zeDriverGet(&driverCount, &driverHandle)); | |
// Create the context | |
ze_context_desc_t contextDescription = {}; | |
contextDescription.stype = ZE_STRUCTURE_TYPE_CONTEXT_DESC; | |
ze_context_handle_t context; | |
VALIDATECALL(zeContextCreate(driverHandle, &contextDescription, &context)); | |
// Get the device | |
uint32_t deviceCount = 0; | |
VALIDATECALL(zeDeviceGet(driverHandle, &deviceCount, nullptr)); | |
ze_device_handle_t device; | |
VALIDATECALL(zeDeviceGet(driverHandle, &deviceCount, &device)); | |
// Print basic properties of the device | |
ze_device_properties_t deviceProperties = {}; | |
VALIDATECALL(zeDeviceGetProperties(device, &deviceProperties)); | |
std::cout << "Device : " << deviceProperties.name << "\n" | |
<< "Type : " << ((deviceProperties.type == ZE_DEVICE_TYPE_GPU) ? "GPU" : "FPGA") << "\n" | |
<< "Vendor ID: " << std::hex << deviceProperties.vendorId << std::dec << "\n"; | |
// Create a command queue | |
uint32_t numQueueGroups = 0; | |
VALIDATECALL(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr)); | |
if (numQueueGroups == 0) { | |
std::cout << "No queue groups found\n"; | |
std::terminate(); | |
} else { | |
std::cout << "#Queue Groups: " << numQueueGroups << std::endl; | |
} | |
std::vector<ze_command_queue_group_properties_t> queueProperties(numQueueGroups); | |
VALIDATECALL(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, queueProperties.data())); | |
ze_command_queue_handle_t cmdQueue; | |
ze_command_queue_desc_t cmdQueueDesc = {}; | |
for (uint32_t i = 0; i < numQueueGroups; i++) { | |
if (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { | |
cmdQueueDesc.ordinal = i; | |
} | |
} | |
cmdQueueDesc.index = 0; | |
cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; | |
VALIDATECALL(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); | |
// Create a command list | |
ze_command_list_handle_t cmdList; | |
ze_command_list_desc_t cmdListDesc = {}; | |
cmdListDesc.commandQueueGroupOrdinal = cmdQueueDesc.ordinal; | |
VALIDATECALL(zeCommandListCreate(context, device, &cmdListDesc, &cmdList)); | |
// Create two buffers | |
const uint32_t items = 1; | |
constexpr size_t allocSize = items * sizeof(int); | |
ze_device_mem_alloc_desc_t memAllocDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC}; | |
//memAllocDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; | |
memAllocDesc.ordinal = 0; | |
ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC}; | |
//hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED; | |
void *inputBuffer = nullptr; | |
VALIDATECALL(zeMemAllocShared(context, &memAllocDesc, &hostDesc, allocSize, 1, device, &inputBuffer)); | |
void *outputBuffer = nullptr; | |
VALIDATECALL(zeMemAllocShared(context, &memAllocDesc, &hostDesc, allocSize, 1, device, &outputBuffer)); | |
// memory initialization | |
constexpr uint32_t value = 120021; | |
uint32_t *inputInt = static_cast<uint32_t *>(inputBuffer); | |
for (int i = 0; i < items; i++) { | |
inputInt[i] = value; | |
} | |
// Module Initialization | |
ze_module_handle_t module = nullptr; | |
ze_kernel_handle_t kernel = nullptr; | |
std::ifstream file("copy.spv", std::ios::binary); | |
if (file.is_open()) { | |
file.seekg(0, file.end); | |
auto length = file.tellg(); | |
file.seekg(0, file.beg); | |
std::unique_ptr<char[]> spirvInput(new char[length]); | |
file.read(spirvInput.get(), length); | |
ze_module_desc_t moduleDesc = {}; | |
ze_module_build_log_handle_t buildLog; | |
moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; | |
moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(spirvInput.get()); | |
moduleDesc.inputSize = length; | |
moduleDesc.pBuildFlags = ""; | |
auto status = zeModuleCreate(context, device, &moduleDesc, &module, &buildLog); | |
printf("size of pointer:%zu\n",sizeof(void*)); | |
printf("size of pointer:%zu\n",sizeof(uint64_t*)); | |
if (status != ZE_RESULT_SUCCESS) { | |
// print log | |
size_t szLog = 0; | |
zeModuleBuildLogGetString(buildLog, &szLog, nullptr); | |
char* stringLog = (char*)malloc(szLog); | |
zeModuleBuildLogGetString(buildLog, &szLog, stringLog); | |
std::cout << "Build log: " << stringLog << std::endl; | |
} | |
VALIDATECALL(zeModuleBuildLogDestroy(buildLog)); | |
ze_kernel_desc_t kernelDesc = {}; | |
kernelDesc.pKernelName = "copy"; | |
VALIDATECALL(zeKernelCreate(module, &kernelDesc, &kernel)); | |
uint32_t groupSizeX = 1u; | |
uint32_t groupSizeY = 1u; | |
uint32_t groupSizeZ = 1u; | |
VALIDATECALL(zeKernelSuggestGroupSize(kernel, items, items, 1U, &groupSizeX, &groupSizeY, &groupSizeZ)); | |
VALIDATECALL(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ)); | |
std::cout << "Group X: " << groupSizeX << std::endl; | |
std::cout << "Group Y: " << groupSizeY << std::endl; | |
std::cout << "Group Z: " << groupSizeZ << std::endl; | |
// Push arguments | |
VALIDATECALL(zeKernelSetArgumentValue(kernel, 0, sizeof(inputBuffer), &inputBuffer)); | |
VALIDATECALL(zeKernelSetArgumentValue(kernel, 1, sizeof(outputBuffer), &outputBuffer)); | |
// Kernel thread-dispatch | |
ze_group_count_t dispatch; | |
dispatch.groupCountX = groupSizeX; | |
dispatch.groupCountY = groupSizeY; | |
dispatch.groupCountZ = groupSizeZ; | |
// Launch kernel on the GPU | |
VALIDATECALL(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatch, nullptr, 0, nullptr)); | |
file.close(); | |
} else { | |
std::cout << "SPIR-V binary file not found\n"; | |
std::terminate(); | |
} | |
// Close list abd submit for execution | |
VALIDATECALL(zeCommandListClose(cmdList)); | |
VALIDATECALL(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); | |
VALIDATECALL(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint64_t>::max())); | |
// Validate | |
bool outputValidationSuccessful = true; | |
uint32_t *dstInt = static_cast<uint32_t *>(outputBuffer); | |
for (int i = 0; i < items; i++) { | |
if (dstInt[i] != value) { | |
std::cout << "Discrepancy: " << dstInt[i] << " != " << value << std::endl; | |
outputValidationSuccessful = false; | |
} else { | |
std::cout << "Validated: " << dstInt[i] << " == " << value << std::endl; | |
} | |
} | |
std::cout << "\nCopy validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n"; | |
// Cleanup | |
VALIDATECALL(zeMemFree(context, outputBuffer)); | |
VALIDATECALL(zeMemFree(context, inputBuffer)); | |
VALIDATECALL(zeCommandListDestroy(cmdList)); | |
VALIDATECALL(zeCommandQueueDestroy(cmdQueue)); | |
VALIDATECALL(zeContextDestroy(context)); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment