Skip to content

Instantly share code, notes, and snippets.

@neXyon
Last active August 10, 2018 15:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save neXyon/859b2e52bac9a5a56b804d8a9d5fa4a5 to your computer and use it in GitHub Desktop.
Save neXyon/859b2e52bac9a5a56b804d8a9d5fa4a5 to your computer and use it in GitHub Desktop.
Vulkan Device - Host - Device synchronization with VkEvent
#include <cstring>
#include <iostream>
#include <map>
#include <set>
#include <string>
#include <thread>
#include <vector>
#include <vulkan/vulkan.h>
// 64 KB
constexpr int BUFFER_SIZE = 64 * 1024;
constexpr int NUMBER_COUNT = BUFFER_SIZE / sizeof(int);
constexpr int SINGLE_OFFSET = 64;
// alter and check all elements if defined, otherwise just a single element in the buffer
#define ALL
// define to use the bug workaround
//#define SECOND_HALF
template <typename T, typename F>
std::vector<T> queryList(F queryFunc)
{
uint32_t count;
queryFunc(&count, nullptr);
std::vector<T> list(count);
if(count)
queryFunc(&count, list.data());
return list;
}
template <typename T, typename F>
std::set<std::string> checkAvailable(std::vector<T>& available, std::vector<const char*>& required, F nameFunc)
{
std::set<std::string> set(required.begin(), required.end());
for(const auto& a : available)
set.erase(nameFunc(a));
return set;
}
void createBuffer(VkPhysicalDevice phyiscal_device, VkDevice device, VkBuffer& buffer, VkDeviceMemory& memory, VkDeviceSize size, VkBufferUsageFlags usage,
VkMemoryPropertyFlags properties)
{
VkPhysicalDeviceMemoryProperties memory_properties;
vkGetPhysicalDeviceMemoryProperties(phyiscal_device, &memory_properties);
VkMemoryRequirements memory_requirements;
VkBufferCreateInfo buffer_create_info = {};
buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
buffer_create_info.size = size;
buffer_create_info.usage = usage;
if(vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer) != VK_SUCCESS)
throw std::runtime_error("failed to create buffer!");
vkGetBufferMemoryRequirements(device, buffer, &memory_requirements);
VkMemoryAllocateInfo allocInfo = {};
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocInfo.allocationSize = memory_requirements.size;
for(uint32_t i = 0; i < memory_properties.memoryTypeCount; i++)
{
if((memory_requirements.memoryTypeBits & (1 << i)) && (memory_properties.memoryTypes[i].propertyFlags & properties) == properties)
{
allocInfo.memoryTypeIndex = i;
break;
}
}
if(vkAllocateMemory(device, &allocInfo, nullptr, &memory) != VK_SUCCESS)
throw std::runtime_error("failed to allocate buffer memory!");
vkBindBufferMemory(device, buffer, memory, 0);
}
VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objType, uint64_t obj, size_t location, int32_t code, const char* layerPrefix,
const char* msg, void* userData)
{
switch(flags)
{
case VK_DEBUG_REPORT_INFORMATION_BIT_EXT:
std::cerr << "INFO";
break;
case VK_DEBUG_REPORT_WARNING_BIT_EXT:
std::cerr << "WARN";
break;
case VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT:
std::cerr << "PERF";
break;
case VK_DEBUG_REPORT_ERROR_BIT_EXT:
std::cerr << "ERRR";
break;
case VK_DEBUG_REPORT_DEBUG_BIT_EXT:
std::cerr << "DEBG";
break;
}
std::cerr << ": " << msg << std::endl;
return VK_FALSE;
}
int findQueueFamilyIndex(VkPhysicalDevice device)
{
auto queueFamilies =
queryList<VkQueueFamilyProperties>([device](uint32_t* count, VkQueueFamilyProperties* props) { vkGetPhysicalDeviceQueueFamilyProperties(device, count, props); });
int i = 0;
for(const VkQueueFamilyProperties& queueFamily : queueFamilies)
{
if((queueFamily.queueFlags & VK_QUEUE_GRAPHICS_BIT) == VK_QUEUE_GRAPHICS_BIT)
return i;
i++;
}
return -1;
}
void createInstance(VkInstance& instance, VkPhysicalDevice& physical_device, VkDebugReportCallbackEXT& debug_report_callback)
{
std::vector<const char*> extensions, layers;
extensions.push_back(VK_EXT_DEBUG_REPORT_EXTENSION_NAME);
layers.push_back("VK_LAYER_LUNARG_standard_validation");
auto availableExtensions =
queryList<VkExtensionProperties>([](uint32_t* count, VkExtensionProperties* prop) { return vkEnumerateInstanceExtensionProperties(nullptr, count, prop); });
auto missingExtensions = checkAvailable(availableExtensions, extensions, [](const VkExtensionProperties& prop) { return prop.extensionName; });
if(!missingExtensions.empty())
throw std::runtime_error(*missingExtensions.begin() + " extensions requested, but not available!");
auto availableLayers = queryList<VkLayerProperties>(vkEnumerateInstanceLayerProperties);
auto missingLayers = checkAvailable(availableLayers, layers, [](const VkLayerProperties& prop) { return prop.layerName; });
if(!missingLayers.empty())
throw std::runtime_error(*missingLayers.begin() + " layer requested, but not available!");
VkApplicationInfo appInfo = {};
appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
appInfo.pApplicationName = "Vulkan Application";
appInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0);
appInfo.pEngineName = "No Engine";
appInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0);
appInfo.apiVersion = VK_API_VERSION_1_0;
VkInstanceCreateInfo instance_create_info = {};
instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
instance_create_info.pApplicationInfo = &appInfo;
instance_create_info.enabledExtensionCount = static_cast<uint32_t>(extensions.size());
instance_create_info.ppEnabledExtensionNames = extensions.data();
instance_create_info.enabledLayerCount = static_cast<uint32_t>(layers.size());
instance_create_info.ppEnabledLayerNames = layers.data();
if(vkCreateInstance(&instance_create_info, nullptr, &instance) != VK_SUCCESS)
throw std::runtime_error("failed to create instance!");
VkDebugReportCallbackCreateInfoEXT debug_report_callback_create_info = {};
debug_report_callback_create_info.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT;
debug_report_callback_create_info.flags = VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT | VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT |
VK_DEBUG_REPORT_INFORMATION_BIT_EXT | VK_DEBUG_REPORT_DEBUG_BIT_EXT;
debug_report_callback_create_info.pfnCallback = debugCallback;
debug_report_callback_create_info.pUserData = nullptr;
auto CreateDebugReportCallbackEXT = (PFN_vkCreateDebugReportCallbackEXT) vkGetInstanceProcAddr(instance, "vkCreateDebugReportCallbackEXT");
if(CreateDebugReportCallbackEXT(instance, &debug_report_callback_create_info, nullptr, &debug_report_callback) != VK_SUCCESS)
throw std::runtime_error("failed to set up debug callback!");
auto devices = queryList<VkPhysicalDevice>([&instance](uint32_t* count, VkPhysicalDevice* devices) { return vkEnumeratePhysicalDevices(instance, count, devices); });
if(devices.size() == 0)
throw std::runtime_error("failed to find GPUs with Vulkan support!");
for(const auto& device : devices)
{
if(findQueueFamilyIndex(device) < 0)
continue;
physical_device = device;
return;
}
throw std::runtime_error("failed to find a suitable GPU!");
}
int main(int argc, char* argv[])
{
VkInstance instance;
VkPhysicalDevice physical_device;
VkDebugReportCallbackEXT debug_report_callback;
createInstance(instance, physical_device, debug_report_callback);
// create device
auto queue_family_index = findQueueFamilyIndex(physical_device);
float queue_priority = 1.0f;
VkDeviceQueueCreateInfo queueCreateInfo = {};
queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queueCreateInfo.queueFamilyIndex = queue_family_index;
queueCreateInfo.queueCount = 1;
queueCreateInfo.pQueuePriorities = &queue_priority;
VkPhysicalDeviceFeatures deviceFeatures = {};
VkDeviceCreateInfo createInfo = {};
createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
createInfo.pQueueCreateInfos = &queueCreateInfo;
createInfo.queueCreateInfoCount = 1;
createInfo.pEnabledFeatures = &deviceFeatures;
VkDevice device;
if(vkCreateDevice(physical_device, &createInfo, nullptr, &device) != VK_SUCCESS)
throw std::runtime_error("failed to create logical device!");
VkQueue graphicsQueue;
vkGetDeviceQueue(device, queue_family_index, 0, &graphicsQueue);
// allocate memory and create buffers
VkBuffer device_buffer;
VkDeviceMemory device_memory;
createBuffer(physical_device, device, device_buffer, device_memory, BUFFER_SIZE, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
VkBuffer host_buffer;
VkDeviceMemory host_memory;
createBuffer(physical_device, device, host_buffer, host_memory, BUFFER_SIZE * 2, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
VkBuffer result_buffer;
VkDeviceMemory result_memory;
createBuffer(physical_device, device, result_buffer, result_memory, BUFFER_SIZE, VK_BUFFER_USAGE_TRANSFER_DST_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
// create events
VkEvent device_to_host_sync_event;
VkEvent host_to_device_sync_event;
VkEventCreateInfo event_create_info = {};
event_create_info.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO;
if(vkCreateEvent(device, &event_create_info, nullptr, &device_to_host_sync_event) != VK_SUCCESS)
throw std::runtime_error("failed to create event!");
if(vkCreateEvent(device, &event_create_info, nullptr, &host_to_device_sync_event) != VK_SUCCESS)
throw std::runtime_error("failed to create event!");
// create command pool and allocate command buffer
VkCommandPool command_pool;
VkCommandPoolCreateInfo command_pool_create_info = {};
command_pool_create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
command_pool_create_info.queueFamilyIndex = queue_family_index;
command_pool_create_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
if(vkCreateCommandPool(device, &command_pool_create_info, nullptr, &command_pool) != VK_SUCCESS)
throw std::runtime_error("failed to create command pool!");
VkCommandBuffer command_buffer;
VkCommandBufferAllocateInfo command_buffer_allocation_info = {};
command_buffer_allocation_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
command_buffer_allocation_info.commandPool = command_pool;
command_buffer_allocation_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
command_buffer_allocation_info.commandBufferCount = 1;
if(vkAllocateCommandBuffers(device, &command_buffer_allocation_info, &command_buffer) != VK_SUCCESS)
throw std::runtime_error("failed to allocate command buffers!");
// data perparation
std::vector<int> data(NUMBER_COUNT);
for(int i = 0; i < NUMBER_COUNT; i++)
data[i] = i;
// record command buffer
VkCommandBufferBeginInfo begin_info = {};
begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
VkMemoryBarrier barrier = {};
barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
VkBufferCopy copy_region = {};
vkBeginCommandBuffer(command_buffer, &begin_info);
// we load the data into the device buffer
vkCmdUpdateBuffer(command_buffer, device_buffer, 0, BUFFER_SIZE, data.data());
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, &barrier, 0, nullptr, 0, nullptr);
// copy device to host buffer
copy_region.srcOffset = 0;
copy_region.dstOffset = 0;
copy_region.size = BUFFER_SIZE;
vkCmdCopyBuffer(command_buffer, device_buffer, host_buffer, 1, &copy_region);
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 1, &barrier, 0, nullptr, 0, nullptr);
// set device to host sync event
vkCmdSetEvent(command_buffer, device_to_host_sync_event, VK_PIPELINE_STAGE_HOST_BIT);
barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
// wait for host to device sync event
vkCmdWaitEvents(command_buffer, 1, &host_to_device_sync_event, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 1, &barrier, 0, nullptr, 0, nullptr);
// copy host to device buffer
copy_region.dstOffset = 0;
copy_region.size = BUFFER_SIZE;
#ifdef SECOND_HALF
copyRegion.srcOffset = BUFFER_SIZE;
#else
copy_region.srcOffset = 0;
#endif
vkCmdCopyBuffer(command_buffer, host_buffer, device_buffer, 1, &copy_region);
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, &barrier, 0, nullptr, 0, nullptr);
// now copy device to result buffer to check if the data was copied correctly before
copy_region.dstOffset = 0;
#ifdef ALL
copy_region.srcOffset = 0;
copy_region.size = BUFFER_SIZE;
#else
copyRegion.srcOffset = SINGLE_OFFSET * sizeof(int);
copyRegion.size = sizeof(int);
#endif
vkCmdCopyBuffer(command_buffer, device_buffer, result_buffer, 1, &copy_region);
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 1, &barrier, 0, nullptr, 0, nullptr);
vkEndCommandBuffer(command_buffer);
// submit the command buffer
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &command_buffer;
std::vector<int> wrong(NUMBER_COUNT, -1);
vkQueueSubmit(graphicsQueue, 1, &submitInfo, VK_NULL_HANDLE);
// wait for the device to host sync event
while(vkGetEventStatus(device, device_to_host_sync_event) != VK_EVENT_SET)
std::this_thread::sleep_for(std::chrono::microseconds(10));
int* numbers;
// alter the data in the host buffer
vkMapMemory(device, host_memory, 0, 2 * BUFFER_SIZE, 0, reinterpret_cast<void**>(&numbers));
#ifdef ALL
for(int i = 0; i < NUMBER_COUNT; i++)
#endif
{
#ifndef ALL
int i = SINGLE_OFFSET;
#endif
numbers[i]++;
if(numbers[i] != data[i] + 1)
wrong[i] = numbers[i];
}
// this prints if the device to host buffer copy wasn't completed before the first event was set!
for(int i = 0; i < NUMBER_COUNT; i++)
{
if(wrong[i] != -1)
std::cout << "Wrong 1: " << i << " " << wrong[i] << " " << numbers[i] << " " << data[i] + 1 << std::endl;
}
#ifdef SECOND_HALF
std::memcpy(numbers + NUMBER_COUNT, numbers, BUFFER_SIZE);
#endif
vkUnmapMemory(device, host_memory);
// std::this_thread::sleep_for(std::chrono::milliseconds(1000));
// set the host to device sync event
vkSetEvent(device, host_to_device_sync_event);
// wait until command buffer execution finishes
vkDeviceWaitIdle(device);
vkMapMemory(device, result_memory, 0, BUFFER_SIZE, 0, reinterpret_cast<void**>(&numbers));
// this prints if the host to host device copy was started before the second event was set!
#ifdef ALL
for(int i = 0; i < NUMBER_COUNT; i++)
#endif
{
#ifndef ALL
int i = SINGLE_OFFSET;
if(numbers[0] != data[i] + 1)
std::cout << "Wrong 2: " << i << " " << numbers[0] << " " << data[i] + 1 << std::endl;
#else
if(numbers[i] != data[i] + 1)
std::cout << "Wrong 2: " << i << " " << numbers[i] << " " << data[i] + 1 << std::endl;
#endif
}
vkUnmapMemory(device, result_memory);
// cleanup
vkDestroyCommandPool(device, command_pool, nullptr);
vkDestroyEvent(device, device_to_host_sync_event, nullptr);
vkDestroyEvent(device, host_to_device_sync_event, nullptr);
vkDestroyBuffer(device, device_buffer, nullptr);
vkFreeMemory(device, device_memory, nullptr);
vkDestroyBuffer(device, host_buffer, nullptr);
vkFreeMemory(device, host_memory, nullptr);
vkDestroyBuffer(device, result_buffer, nullptr);
vkFreeMemory(device, result_memory, nullptr);
vkDestroyDevice(device, nullptr);
auto DestroyDebugReportCallbackEXT = (PFN_vkDestroyDebugReportCallbackEXT) vkGetInstanceProcAddr(instance, "vkDestroyDebugReportCallbackEXT");
DestroyDebugReportCallbackEXT(instance, debug_report_callback, nullptr);
vkDestroyInstance(instance, nullptr);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment