Last active
August 10, 2018 15:58
-
-
Save neXyon/859b2e52bac9a5a56b804d8a9d5fa4a5 to your computer and use it in GitHub Desktop.
Vulkan Device - Host - Device synchronization with VkEvent
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cstring> | |
#include <iostream> | |
#include <map> | |
#include <set> | |
#include <string> | |
#include <thread> | |
#include <vector> | |
#include <vulkan/vulkan.h> | |
// 64 KB | |
constexpr int BUFFER_SIZE = 64 * 1024; | |
constexpr int NUMBER_COUNT = BUFFER_SIZE / sizeof(int); | |
constexpr int SINGLE_OFFSET = 64; | |
// alter and check all elements if defined, otherwise just a single element in the buffer | |
#define ALL | |
// define to use the bug workaround | |
//#define SECOND_HALF | |
template <typename T, typename F> | |
std::vector<T> queryList(F queryFunc) | |
{ | |
uint32_t count; | |
queryFunc(&count, nullptr); | |
std::vector<T> list(count); | |
if(count) | |
queryFunc(&count, list.data()); | |
return list; | |
} | |
template <typename T, typename F> | |
std::set<std::string> checkAvailable(std::vector<T>& available, std::vector<const char*>& required, F nameFunc) | |
{ | |
std::set<std::string> set(required.begin(), required.end()); | |
for(const auto& a : available) | |
set.erase(nameFunc(a)); | |
return set; | |
} | |
void createBuffer(VkPhysicalDevice phyiscal_device, VkDevice device, VkBuffer& buffer, VkDeviceMemory& memory, VkDeviceSize size, VkBufferUsageFlags usage, | |
VkMemoryPropertyFlags properties) | |
{ | |
VkPhysicalDeviceMemoryProperties memory_properties; | |
vkGetPhysicalDeviceMemoryProperties(phyiscal_device, &memory_properties); | |
VkMemoryRequirements memory_requirements; | |
VkBufferCreateInfo buffer_create_info = {}; | |
buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; | |
buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; | |
buffer_create_info.size = size; | |
buffer_create_info.usage = usage; | |
if(vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer) != VK_SUCCESS) | |
throw std::runtime_error("failed to create buffer!"); | |
vkGetBufferMemoryRequirements(device, buffer, &memory_requirements); | |
VkMemoryAllocateInfo allocInfo = {}; | |
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; | |
allocInfo.allocationSize = memory_requirements.size; | |
for(uint32_t i = 0; i < memory_properties.memoryTypeCount; i++) | |
{ | |
if((memory_requirements.memoryTypeBits & (1 << i)) && (memory_properties.memoryTypes[i].propertyFlags & properties) == properties) | |
{ | |
allocInfo.memoryTypeIndex = i; | |
break; | |
} | |
} | |
if(vkAllocateMemory(device, &allocInfo, nullptr, &memory) != VK_SUCCESS) | |
throw std::runtime_error("failed to allocate buffer memory!"); | |
vkBindBufferMemory(device, buffer, memory, 0); | |
} | |
VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objType, uint64_t obj, size_t location, int32_t code, const char* layerPrefix, | |
const char* msg, void* userData) | |
{ | |
switch(flags) | |
{ | |
case VK_DEBUG_REPORT_INFORMATION_BIT_EXT: | |
std::cerr << "INFO"; | |
break; | |
case VK_DEBUG_REPORT_WARNING_BIT_EXT: | |
std::cerr << "WARN"; | |
break; | |
case VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT: | |
std::cerr << "PERF"; | |
break; | |
case VK_DEBUG_REPORT_ERROR_BIT_EXT: | |
std::cerr << "ERRR"; | |
break; | |
case VK_DEBUG_REPORT_DEBUG_BIT_EXT: | |
std::cerr << "DEBG"; | |
break; | |
} | |
std::cerr << ": " << msg << std::endl; | |
return VK_FALSE; | |
} | |
int findQueueFamilyIndex(VkPhysicalDevice device) | |
{ | |
auto queueFamilies = | |
queryList<VkQueueFamilyProperties>([device](uint32_t* count, VkQueueFamilyProperties* props) { vkGetPhysicalDeviceQueueFamilyProperties(device, count, props); }); | |
int i = 0; | |
for(const VkQueueFamilyProperties& queueFamily : queueFamilies) | |
{ | |
if((queueFamily.queueFlags & VK_QUEUE_GRAPHICS_BIT) == VK_QUEUE_GRAPHICS_BIT) | |
return i; | |
i++; | |
} | |
return -1; | |
} | |
void createInstance(VkInstance& instance, VkPhysicalDevice& physical_device, VkDebugReportCallbackEXT& debug_report_callback) | |
{ | |
std::vector<const char*> extensions, layers; | |
extensions.push_back(VK_EXT_DEBUG_REPORT_EXTENSION_NAME); | |
layers.push_back("VK_LAYER_LUNARG_standard_validation"); | |
auto availableExtensions = | |
queryList<VkExtensionProperties>([](uint32_t* count, VkExtensionProperties* prop) { return vkEnumerateInstanceExtensionProperties(nullptr, count, prop); }); | |
auto missingExtensions = checkAvailable(availableExtensions, extensions, [](const VkExtensionProperties& prop) { return prop.extensionName; }); | |
if(!missingExtensions.empty()) | |
throw std::runtime_error(*missingExtensions.begin() + " extensions requested, but not available!"); | |
auto availableLayers = queryList<VkLayerProperties>(vkEnumerateInstanceLayerProperties); | |
auto missingLayers = checkAvailable(availableLayers, layers, [](const VkLayerProperties& prop) { return prop.layerName; }); | |
if(!missingLayers.empty()) | |
throw std::runtime_error(*missingLayers.begin() + " layer requested, but not available!"); | |
VkApplicationInfo appInfo = {}; | |
appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; | |
appInfo.pApplicationName = "Vulkan Application"; | |
appInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0); | |
appInfo.pEngineName = "No Engine"; | |
appInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0); | |
appInfo.apiVersion = VK_API_VERSION_1_0; | |
VkInstanceCreateInfo instance_create_info = {}; | |
instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; | |
instance_create_info.pApplicationInfo = &appInfo; | |
instance_create_info.enabledExtensionCount = static_cast<uint32_t>(extensions.size()); | |
instance_create_info.ppEnabledExtensionNames = extensions.data(); | |
instance_create_info.enabledLayerCount = static_cast<uint32_t>(layers.size()); | |
instance_create_info.ppEnabledLayerNames = layers.data(); | |
if(vkCreateInstance(&instance_create_info, nullptr, &instance) != VK_SUCCESS) | |
throw std::runtime_error("failed to create instance!"); | |
VkDebugReportCallbackCreateInfoEXT debug_report_callback_create_info = {}; | |
debug_report_callback_create_info.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT; | |
debug_report_callback_create_info.flags = VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT | VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT | | |
VK_DEBUG_REPORT_INFORMATION_BIT_EXT | VK_DEBUG_REPORT_DEBUG_BIT_EXT; | |
debug_report_callback_create_info.pfnCallback = debugCallback; | |
debug_report_callback_create_info.pUserData = nullptr; | |
auto CreateDebugReportCallbackEXT = (PFN_vkCreateDebugReportCallbackEXT) vkGetInstanceProcAddr(instance, "vkCreateDebugReportCallbackEXT"); | |
if(CreateDebugReportCallbackEXT(instance, &debug_report_callback_create_info, nullptr, &debug_report_callback) != VK_SUCCESS) | |
throw std::runtime_error("failed to set up debug callback!"); | |
auto devices = queryList<VkPhysicalDevice>([&instance](uint32_t* count, VkPhysicalDevice* devices) { return vkEnumeratePhysicalDevices(instance, count, devices); }); | |
if(devices.size() == 0) | |
throw std::runtime_error("failed to find GPUs with Vulkan support!"); | |
for(const auto& device : devices) | |
{ | |
if(findQueueFamilyIndex(device) < 0) | |
continue; | |
physical_device = device; | |
return; | |
} | |
throw std::runtime_error("failed to find a suitable GPU!"); | |
} | |
int main(int argc, char* argv[]) | |
{ | |
VkInstance instance; | |
VkPhysicalDevice physical_device; | |
VkDebugReportCallbackEXT debug_report_callback; | |
createInstance(instance, physical_device, debug_report_callback); | |
// create device | |
auto queue_family_index = findQueueFamilyIndex(physical_device); | |
float queue_priority = 1.0f; | |
VkDeviceQueueCreateInfo queueCreateInfo = {}; | |
queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; | |
queueCreateInfo.queueFamilyIndex = queue_family_index; | |
queueCreateInfo.queueCount = 1; | |
queueCreateInfo.pQueuePriorities = &queue_priority; | |
VkPhysicalDeviceFeatures deviceFeatures = {}; | |
VkDeviceCreateInfo createInfo = {}; | |
createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; | |
createInfo.pQueueCreateInfos = &queueCreateInfo; | |
createInfo.queueCreateInfoCount = 1; | |
createInfo.pEnabledFeatures = &deviceFeatures; | |
VkDevice device; | |
if(vkCreateDevice(physical_device, &createInfo, nullptr, &device) != VK_SUCCESS) | |
throw std::runtime_error("failed to create logical device!"); | |
VkQueue graphicsQueue; | |
vkGetDeviceQueue(device, queue_family_index, 0, &graphicsQueue); | |
// allocate memory and create buffers | |
VkBuffer device_buffer; | |
VkDeviceMemory device_memory; | |
createBuffer(physical_device, device, device_buffer, device_memory, BUFFER_SIZE, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, | |
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); | |
VkBuffer host_buffer; | |
VkDeviceMemory host_memory; | |
createBuffer(physical_device, device, host_buffer, host_memory, BUFFER_SIZE * 2, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, | |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); | |
VkBuffer result_buffer; | |
VkDeviceMemory result_memory; | |
createBuffer(physical_device, device, result_buffer, result_memory, BUFFER_SIZE, VK_BUFFER_USAGE_TRANSFER_DST_BIT, | |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); | |
// create events | |
VkEvent device_to_host_sync_event; | |
VkEvent host_to_device_sync_event; | |
VkEventCreateInfo event_create_info = {}; | |
event_create_info.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO; | |
if(vkCreateEvent(device, &event_create_info, nullptr, &device_to_host_sync_event) != VK_SUCCESS) | |
throw std::runtime_error("failed to create event!"); | |
if(vkCreateEvent(device, &event_create_info, nullptr, &host_to_device_sync_event) != VK_SUCCESS) | |
throw std::runtime_error("failed to create event!"); | |
// create command pool and allocate command buffer | |
VkCommandPool command_pool; | |
VkCommandPoolCreateInfo command_pool_create_info = {}; | |
command_pool_create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; | |
command_pool_create_info.queueFamilyIndex = queue_family_index; | |
command_pool_create_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; | |
if(vkCreateCommandPool(device, &command_pool_create_info, nullptr, &command_pool) != VK_SUCCESS) | |
throw std::runtime_error("failed to create command pool!"); | |
VkCommandBuffer command_buffer; | |
VkCommandBufferAllocateInfo command_buffer_allocation_info = {}; | |
command_buffer_allocation_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; | |
command_buffer_allocation_info.commandPool = command_pool; | |
command_buffer_allocation_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; | |
command_buffer_allocation_info.commandBufferCount = 1; | |
if(vkAllocateCommandBuffers(device, &command_buffer_allocation_info, &command_buffer) != VK_SUCCESS) | |
throw std::runtime_error("failed to allocate command buffers!"); | |
// data perparation | |
std::vector<int> data(NUMBER_COUNT); | |
for(int i = 0; i < NUMBER_COUNT; i++) | |
data[i] = i; | |
// record command buffer | |
VkCommandBufferBeginInfo begin_info = {}; | |
begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; | |
VkMemoryBarrier barrier = {}; | |
barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; | |
VkBufferCopy copy_region = {}; | |
vkBeginCommandBuffer(command_buffer, &begin_info); | |
// we load the data into the device buffer | |
vkCmdUpdateBuffer(command_buffer, device_buffer, 0, BUFFER_SIZE, data.data()); | |
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; | |
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; | |
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, &barrier, 0, nullptr, 0, nullptr); | |
// copy device to host buffer | |
copy_region.srcOffset = 0; | |
copy_region.dstOffset = 0; | |
copy_region.size = BUFFER_SIZE; | |
vkCmdCopyBuffer(command_buffer, device_buffer, host_buffer, 1, ©_region); | |
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; | |
barrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT; | |
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 1, &barrier, 0, nullptr, 0, nullptr); | |
// set device to host sync event | |
vkCmdSetEvent(command_buffer, device_to_host_sync_event, VK_PIPELINE_STAGE_HOST_BIT); | |
barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; | |
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; | |
// wait for host to device sync event | |
vkCmdWaitEvents(command_buffer, 1, &host_to_device_sync_event, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 1, &barrier, 0, nullptr, 0, nullptr); | |
// copy host to device buffer | |
copy_region.dstOffset = 0; | |
copy_region.size = BUFFER_SIZE; | |
#ifdef SECOND_HALF | |
copyRegion.srcOffset = BUFFER_SIZE; | |
#else | |
copy_region.srcOffset = 0; | |
#endif | |
vkCmdCopyBuffer(command_buffer, host_buffer, device_buffer, 1, ©_region); | |
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; | |
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; | |
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, &barrier, 0, nullptr, 0, nullptr); | |
// now copy device to result buffer to check if the data was copied correctly before | |
copy_region.dstOffset = 0; | |
#ifdef ALL | |
copy_region.srcOffset = 0; | |
copy_region.size = BUFFER_SIZE; | |
#else | |
copyRegion.srcOffset = SINGLE_OFFSET * sizeof(int); | |
copyRegion.size = sizeof(int); | |
#endif | |
vkCmdCopyBuffer(command_buffer, device_buffer, result_buffer, 1, ©_region); | |
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; | |
barrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT; | |
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 1, &barrier, 0, nullptr, 0, nullptr); | |
vkEndCommandBuffer(command_buffer); | |
// submit the command buffer | |
VkSubmitInfo submitInfo = {}; | |
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; | |
submitInfo.commandBufferCount = 1; | |
submitInfo.pCommandBuffers = &command_buffer; | |
std::vector<int> wrong(NUMBER_COUNT, -1); | |
vkQueueSubmit(graphicsQueue, 1, &submitInfo, VK_NULL_HANDLE); | |
// wait for the device to host sync event | |
while(vkGetEventStatus(device, device_to_host_sync_event) != VK_EVENT_SET) | |
std::this_thread::sleep_for(std::chrono::microseconds(10)); | |
int* numbers; | |
// alter the data in the host buffer | |
vkMapMemory(device, host_memory, 0, 2 * BUFFER_SIZE, 0, reinterpret_cast<void**>(&numbers)); | |
#ifdef ALL | |
for(int i = 0; i < NUMBER_COUNT; i++) | |
#endif | |
{ | |
#ifndef ALL | |
int i = SINGLE_OFFSET; | |
#endif | |
numbers[i]++; | |
if(numbers[i] != data[i] + 1) | |
wrong[i] = numbers[i]; | |
} | |
// this prints if the device to host buffer copy wasn't completed before the first event was set! | |
for(int i = 0; i < NUMBER_COUNT; i++) | |
{ | |
if(wrong[i] != -1) | |
std::cout << "Wrong 1: " << i << " " << wrong[i] << " " << numbers[i] << " " << data[i] + 1 << std::endl; | |
} | |
#ifdef SECOND_HALF | |
std::memcpy(numbers + NUMBER_COUNT, numbers, BUFFER_SIZE); | |
#endif | |
vkUnmapMemory(device, host_memory); | |
// std::this_thread::sleep_for(std::chrono::milliseconds(1000)); | |
// set the host to device sync event | |
vkSetEvent(device, host_to_device_sync_event); | |
// wait until command buffer execution finishes | |
vkDeviceWaitIdle(device); | |
vkMapMemory(device, result_memory, 0, BUFFER_SIZE, 0, reinterpret_cast<void**>(&numbers)); | |
// this prints if the host to host device copy was started before the second event was set! | |
#ifdef ALL | |
for(int i = 0; i < NUMBER_COUNT; i++) | |
#endif | |
{ | |
#ifndef ALL | |
int i = SINGLE_OFFSET; | |
if(numbers[0] != data[i] + 1) | |
std::cout << "Wrong 2: " << i << " " << numbers[0] << " " << data[i] + 1 << std::endl; | |
#else | |
if(numbers[i] != data[i] + 1) | |
std::cout << "Wrong 2: " << i << " " << numbers[i] << " " << data[i] + 1 << std::endl; | |
#endif | |
} | |
vkUnmapMemory(device, result_memory); | |
// cleanup | |
vkDestroyCommandPool(device, command_pool, nullptr); | |
vkDestroyEvent(device, device_to_host_sync_event, nullptr); | |
vkDestroyEvent(device, host_to_device_sync_event, nullptr); | |
vkDestroyBuffer(device, device_buffer, nullptr); | |
vkFreeMemory(device, device_memory, nullptr); | |
vkDestroyBuffer(device, host_buffer, nullptr); | |
vkFreeMemory(device, host_memory, nullptr); | |
vkDestroyBuffer(device, result_buffer, nullptr); | |
vkFreeMemory(device, result_memory, nullptr); | |
vkDestroyDevice(device, nullptr); | |
auto DestroyDebugReportCallbackEXT = (PFN_vkDestroyDebugReportCallbackEXT) vkGetInstanceProcAddr(instance, "vkDestroyDebugReportCallbackEXT"); | |
DestroyDebugReportCallbackEXT(instance, debug_report_callback, nullptr); | |
vkDestroyInstance(instance, nullptr); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment