Skip to content

Instantly share code, notes, and snippets.

@Saancreed
Created December 22, 2023 19:22
Show Gist options
  • Save Saancreed/227bc1d96b6dba3f2dd32012ee2116fb to your computer and use it in GitHub Desktop.
Save Saancreed/227bc1d96b6dba3f2dd32012ee2116fb to your computer and use it in GitHub Desktop.
Reflex + OMM patches
diff --git a/src/d3d12/nvapi_d3d12_device.cpp b/src/d3d12/nvapi_d3d12_device.cpp
index 0d30d34..4ebe918 100644
--- a/src/d3d12/nvapi_d3d12_device.cpp
+++ b/src/d3d12/nvapi_d3d12_device.cpp
@@ -120,6 +120,91 @@ namespace dxvk {
return cubinDevice != nullptr;
}
+ bool NvapiD3d12Device::AreOpacityMicromapsSupported(ID3D12Device* device) {
+ auto ommDevice = GetOmmDevice(device);
+ return ommDevice != nullptr;
+ }
+
+ std::optional<NvAPI_Status> NvapiD3d12Device::SetCreatePipelineStateOptions(ID3D12Device5* device, const NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS* params) {
+ auto ommDevice = GetOmmDevice(device);
+ if (ommDevice == nullptr)
+ return std::nullopt;
+
+ return static_cast<NvAPI_Status>(ommDevice->SetCreatePipelineStateOptions(params));
+ }
+
+ std::optional<NvAPI_Status> NvapiD3d12Device::CheckDriverMatchingIdentifierEx(ID3D12Device5* device, NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS* params) {
+ auto ommDevice = GetOmmDevice(device);
+ if (ommDevice == nullptr)
+ return std::nullopt;
+
+ return static_cast<NvAPI_Status>(ommDevice->CheckDriverMatchingIdentifierEx(params));
+ }
+
+ std::optional<NvAPI_Status> NvapiD3d12Device::GetRaytracingAccelerationStructurePrebuildInfoEx(ID3D12Device5* device, NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS* params) {
+ auto ommDevice = GetOmmDevice(device);
+ if (ommDevice == nullptr)
+ return std::nullopt;
+
+ return static_cast<NvAPI_Status>(ommDevice->GetRaytracingAccelerationStructurePrebuildInfoEx(params));
+ }
+
+ std::optional<NvAPI_Status> NvapiD3d12Device::GetRaytracingOpacityMicromapArrayPrebuildInfo(ID3D12Device5* device, NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS* params) {
+ auto ommDevice = GetOmmDevice(device);
+ if (ommDevice == nullptr)
+ return std::nullopt;
+
+ return static_cast<NvAPI_Status>(ommDevice->GetRaytracingOpacityMicromapArrayPrebuildInfo(params));
+ }
+
+ std::optional<NvAPI_Status> NvapiD3d12Device::BuildRaytracingAccelerationStructureEx(ID3D12GraphicsCommandList4* commandList, const NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS* params) {
+ auto commandListExt = GetCommandListExt(commandList);
+ if (!commandListExt.has_value())
+ return std::nullopt;
+
+ auto commandListVer = commandListExt.value();
+ if (commandListVer.InterfaceVersion < 2)
+ return std::nullopt;
+
+ return static_cast<NvAPI_Status>(commandListVer.CommandListExt->BuildRaytracingAccelerationStructureEx(params));
+ }
+
+ std::optional<NvAPI_Status> NvapiD3d12Device::BuildRaytracingOpacityMicromapArray(ID3D12GraphicsCommandList4* commandList, NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS* params) {
+ auto commandListExt = GetCommandListExt(commandList);
+ if (!commandListExt.has_value())
+ return std::nullopt;
+
+ auto commandListVer = commandListExt.value();
+ if (commandListVer.InterfaceVersion < 2)
+ return std::nullopt;
+
+ return static_cast<NvAPI_Status>(commandListVer.CommandListExt->BuildRaytracingOpacityMicromapArray(params));
+ }
+
+ std::optional<NvAPI_Status> NvapiD3d12Device::RelocateRaytracingOpacityMicromapArray(ID3D12GraphicsCommandList4* commandList, const NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS* params) {
+ auto commandListExt = GetCommandListExt(commandList);
+ if (!commandListExt.has_value())
+ return std::nullopt;
+
+ auto commandListVer = commandListExt.value();
+ if (commandListVer.InterfaceVersion < 2)
+ return std::nullopt;
+
+ return static_cast<NvAPI_Status>(commandListVer.CommandListExt->RelocateRaytracingOpacityMicromapArray(params));
+ }
+
+ std::optional<NvAPI_Status> NvapiD3d12Device::EmitRaytracingOpacityMicromapArrayPostbuildInfo(ID3D12GraphicsCommandList4* commandList, const NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS* params) {
+ auto commandListExt = GetCommandListExt(commandList);
+ if (!commandListExt.has_value())
+ return std::nullopt;
+
+ auto commandListVer = commandListExt.value();
+ if (commandListVer.InterfaceVersion < 2)
+ return std::nullopt;
+
+ return static_cast<NvAPI_Status>(commandListVer.CommandListExt->EmitRaytracingOpacityMicromapArrayPostbuildInfo(params));
+ }
+
// We are going to have single map for storing devices with extensions D3D12_VK_NVX_BINARY_IMPORT & D3D12_VK_NVX_IMAGE_VIEW_HANDLE.
// These are specific to NVIDIA and both of these extensions goes together.
Com<ID3D12DeviceExt> NvapiD3d12Device::GetCubinDevice(ID3D12Device* device) {
@@ -128,15 +213,29 @@ namespace dxvk {
if (it != m_cubinDeviceMap.end())
return it->second;
- auto cubinDevice = GetDeviceExt(device, D3D12_VK_NVX_BINARY_IMPORT);
+ auto cubinDevice = GetDeviceExt<ID3D12DeviceExt>(device, D3D12_VK_NVX_BINARY_IMPORT);
if (cubinDevice != nullptr)
m_cubinDeviceMap.emplace(device, cubinDevice.ptr());
return cubinDevice;
}
- Com<ID3D12DeviceExt> NvapiD3d12Device::GetDeviceExt(ID3D12Device* device, D3D12_VK_EXTENSION extension) {
- Com<ID3D12DeviceExt> deviceExt;
+ Com<ID3D12DeviceExt1> NvapiD3d12Device::GetOmmDevice(ID3D12Device* device) {
+ std::scoped_lock lock(m_ommDeviceMutex);
+ auto it = m_ommDeviceMap.find(device);
+ if (it != m_ommDeviceMap.end())
+ return it->second;
+
+ auto ommDevice = GetDeviceExt<ID3D12DeviceExt1>(device, D3D12_VK_EXT_OPACITY_MICROMAP);
+ if (ommDevice != nullptr)
+ m_ommDeviceMap.emplace(device, ommDevice.ptr());
+
+ return ommDevice;
+ }
+
+ template <typename T>
+ Com<T> NvapiD3d12Device::GetDeviceExt(ID3D12Device* device, D3D12_VK_EXTENSION extension) {
+ Com<T> deviceExt;
if (FAILED(device->QueryInterface(IID_PPV_ARGS(&deviceExt))))
return nullptr;
@@ -152,15 +251,21 @@ namespace dxvk {
if (it != m_commandListMap.end())
return it->second;
+ Com<ID3D12GraphicsCommandListExt2> commandListExt2 = nullptr;
+ if (SUCCEEDED(commandList->QueryInterface(IID_PPV_ARGS(&commandListExt2)))) {
+ NvapiD3d12Device::CommandListExtWithVersion cmdListVer{commandListExt2.ptr(), 2};
+ return std::make_optional(m_commandListMap.emplace(commandList, cmdListVer).first->second);
+ }
+
Com<ID3D12GraphicsCommandListExt1> commandListExt1 = nullptr;
if (SUCCEEDED(commandList->QueryInterface(IID_PPV_ARGS(&commandListExt1)))) {
- NvapiD3d12Device::CommandListExtWithVersion cmdListVer{commandListExt1.ptr(), 1};
+ NvapiD3d12Device::CommandListExtWithVersion cmdListVer{reinterpret_cast<ID3D12GraphicsCommandListExt2*>(commandListExt1.ptr()), 1};
return std::make_optional(m_commandListMap.emplace(commandList, cmdListVer).first->second);
}
Com<ID3D12GraphicsCommandListExt> commandListExt = nullptr;
if (SUCCEEDED(commandList->QueryInterface(IID_PPV_ARGS(&commandListExt)))) {
- NvapiD3d12Device::CommandListExtWithVersion cmdListVer{reinterpret_cast<ID3D12GraphicsCommandListExt1*>(commandListExt.ptr()), 0};
+ NvapiD3d12Device::CommandListExtWithVersion cmdListVer{reinterpret_cast<ID3D12GraphicsCommandListExt2*>(commandListExt.ptr()), 0};
return std::make_optional(m_commandListMap.emplace(commandList, cmdListVer).first->second);
}
diff --git a/src/d3d12/nvapi_d3d12_device.h b/src/d3d12/nvapi_d3d12_device.h
index ef419d3..8ebe527 100644
--- a/src/d3d12/nvapi_d3d12_device.h
+++ b/src/d3d12/nvapi_d3d12_device.h
@@ -9,7 +9,7 @@ namespace dxvk {
class NvapiD3d12Device {
struct CommandListExtWithVersion {
- ID3D12GraphicsCommandListExt1* CommandListExt;
+ ID3D12GraphicsCommandListExt2* CommandListExt;
uint32_t InterfaceVersion;
};
@@ -28,22 +28,37 @@ namespace dxvk {
static bool CaptureUAVInfo(ID3D12Device* device, NVAPI_UAV_INFO* uavInfo);
static bool IsFatbinPTXSupported(ID3D12Device* device);
+ static bool AreOpacityMicromapsSupported(ID3D12Device* device);
+ static std::optional<NvAPI_Status> SetCreatePipelineStateOptions(ID3D12Device5* device, const NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS* params);
+ static std::optional<NvAPI_Status> CheckDriverMatchingIdentifierEx(ID3D12Device5* device, NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS* params);
+ static std::optional<NvAPI_Status> GetRaytracingAccelerationStructurePrebuildInfoEx(ID3D12Device5* device, NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS* params);
+ static std::optional<NvAPI_Status> GetRaytracingOpacityMicromapArrayPrebuildInfo(ID3D12Device5* device, NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS* params);
+ static std::optional<NvAPI_Status> BuildRaytracingAccelerationStructureEx(ID3D12GraphicsCommandList4* commandList, const NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS* params);
+ static std::optional<NvAPI_Status> BuildRaytracingOpacityMicromapArray(ID3D12GraphicsCommandList4* commandList, NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS* params);
+ static std::optional<NvAPI_Status> RelocateRaytracingOpacityMicromapArray(ID3D12GraphicsCommandList4* commandList, const NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS* params);
+ static std::optional<NvAPI_Status> EmitRaytracingOpacityMicromapArrayPostbuildInfo(ID3D12GraphicsCommandList4* commandList, const NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS* params);
+
static void ClearCacheMaps();
private:
+ inline static std::unordered_map<ID3D12Device*, ID3D12DeviceExt1*> m_ommDeviceMap;
inline static std::unordered_map<ID3D12Device*, ID3D12DeviceExt*> m_cubinDeviceMap;
inline static std::unordered_map<ID3D12CommandQueue*, ID3D12CommandQueueExt*> m_commandQueueMap;
inline static std::unordered_map<ID3D12GraphicsCommandList*, CommandListExtWithVersion> m_commandListMap;
inline static std::unordered_map<NVDX_ObjectHandle, NvU32> m_cubinSmemMap;
inline static std::mutex m_commandListMutex;
inline static std::mutex m_commandQueueMutex;
+ inline static std::mutex m_ommDeviceMutex;
inline static std::mutex m_cubinDeviceMutex;
inline static std::mutex m_cubinSmemMutex;
+ [[nodiscard]] static Com<ID3D12DeviceExt1> GetOmmDevice(ID3D12Device* device);
[[nodiscard]] static Com<ID3D12DeviceExt> GetCubinDevice(ID3D12Device* device);
- [[nodiscard]] static Com<ID3D12DeviceExt> GetDeviceExt(ID3D12Device* device, D3D12_VK_EXTENSION extension);
[[nodiscard]] static Com<ID3D12CommandQueueExt> GetCommandQueueExt(ID3D12CommandQueue* commandQueue);
[[nodiscard]] static std::optional<CommandListExtWithVersion> GetCommandListExt(ID3D12GraphicsCommandList* commandList);
+
+ template <typename T>
+ [[nodiscard]] static Com<T> GetDeviceExt(ID3D12Device* device, D3D12_VK_EXTENSION extension);
};
}
diff --git a/src/nvapi_d3d12.cpp b/src/nvapi_d3d12.cpp
index aadd7f2..9f70133 100644
--- a/src/nvapi_d3d12.cpp
+++ b/src/nvapi_d3d12.cpp
@@ -270,12 +270,16 @@ extern "C" {
*(NVAPI_D3D12_RAYTRACING_THREAD_REORDERING_CAPS*)pData = NVAPI_D3D12_RAYTRACING_THREAD_REORDERING_CAP_NONE;
break;
- case NVAPI_D3D12_RAYTRACING_CAPS_TYPE_OPACITY_MICROMAP:
+ case NVAPI_D3D12_RAYTRACING_CAPS_TYPE_OPACITY_MICROMAP: {
if (dataSize != sizeof(NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAPS))
return InvalidArgument(n);
- *(NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAPS*)pData = NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAP_NONE;
+ *(NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAPS*)pData = NvapiD3d12Device::AreOpacityMicromapsSupported(pDevice)
+ ? NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAP_STANDARD
+ : NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAP_NONE;
+
break;
+ }
case NVAPI_D3D12_RAYTRACING_CAPS_TYPE_DISPLACEMENT_MICROMAP:
if (dataSize != sizeof(NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_CAPS))
@@ -291,10 +295,86 @@ extern "C" {
return Ok(str::format(n, " (", type, ")"));
}
+ NvAPI_Status __cdecl NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo(ID3D12Device5* pDevice, NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS* pParams) {
+ constexpr auto n = __func__;
+ static bool alreadyLoggedOk = false;
+
+ if (pDevice == nullptr || pParams == nullptr)
+ return InvalidArgument(n);
+
+ if (auto result = NvapiD3d12Device::GetRaytracingOpacityMicromapArrayPrebuildInfo(pDevice, pParams); result.has_value()) {
+ auto value = result.value();
+ if (value == NVAPI_OK) {
+ return Ok(n, alreadyLoggedOk);
+ } else {
+ log::write(str::format(n, ": ", value));
+ return value;
+ }
+ }
+
+ return NotSupported(n);
+ }
+
+ NvAPI_Status __cdecl NvAPI_D3D12_SetCreatePipelineStateOptions(ID3D12Device5* pDevice, const NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS* pState) {
+ constexpr auto n = __func__;
+ static bool alreadyLoggedOk = false;
+
+ if (pDevice == nullptr || pState == nullptr)
+ return InvalidArgument(n);
+
+ if (pState->version != NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_VER1)
+ return IncompatibleStructVersion(n);
+
+ if (auto result = NvapiD3d12Device::SetCreatePipelineStateOptions(pDevice, pState); result.has_value()) {
+ auto value = result.value();
+ if (value == NVAPI_OK) {
+ return Ok(str::format(n, "(", pState->flags, ")"), alreadyLoggedOk);
+ } else {
+ log::write(str::format(n, "(", pState->flags, "): ", value));
+ return value;
+ }
+ }
+
+ return NotSupported(str::format(n, "(", pState->flags, ")"));
+ }
+
+ NvAPI_Status __cdecl NvAPI_D3D12_CheckDriverMatchingIdentifierEx(ID3D12Device5* pDevice, NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS* pParams) {
+ constexpr auto n = __func__;
+ static bool alreadyLoggedOk = false;
+
+ if (pDevice == nullptr || pParams == nullptr)
+ return InvalidArgument(n);
+
+ if (auto result = NvapiD3d12Device::CheckDriverMatchingIdentifierEx(pDevice, pParams); result.has_value()) {
+ auto value = result.value();
+ if (value == NVAPI_OK) {
+ return Ok(n, alreadyLoggedOk);
+ } else {
+ log::write(str::format(n, ": ", value));
+ return value;
+ }
+ }
+
+ if (pParams->version != NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_VER1)
+ return IncompatibleStructVersion(n);
+
+ if (pParams->serializedDataType == NVAPI_D3D12_SERIALIZED_DATA_RAYTRACING_ACCELERATION_STRUCTURE_EX) {
+ pParams->checkStatus = pDevice->CheckDriverMatchingIdentifier(D3D12_SERIALIZED_DATA_RAYTRACING_ACCELERATION_STRUCTURE, pParams->pIdentifierToCheck);
+ return Ok(n, alreadyLoggedOk);
+ }
+
+ return NotSupported(n);
+ }
+
static bool ConvertBuildRaytracingAccelerationStructureInputs(const NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX* nvDesc, std::vector<D3D12_RAYTRACING_GEOMETRY_DESC>& geometryDescs, D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS* d3dDesc) {
+ // assume that micromaps are not supported, allow only standard stuff to be passed
+ if ((nvDesc->flags & ~0x3f) != 0) {
+ log::write("Nonstandard flags passed to acceleration structure build");
+ return false;
+ }
+
d3dDesc->Type = nvDesc->type;
- // assume that OMM via VK_EXT_opacity_micromap and DMM via VK_NV_displacement_micromap are not supported, allow only standard flags to be passed
- d3dDesc->Flags = static_cast<D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS>(nvDesc->flags & 0x3f);
+ d3dDesc->Flags = static_cast<D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS>(nvDesc->flags);
d3dDesc->NumDescs = nvDesc->numDescs;
d3dDesc->DescsLayout = nvDesc->descsLayout;
@@ -304,6 +384,13 @@ extern "C" {
}
if (d3dDesc->Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL && d3dDesc->DescsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS) {
+ for (unsigned i = 0; i < nvDesc->numDescs; ++i) {
+ if (auto desc = nvDesc->ppGeometryDescs[i]; desc->type != NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES_EX && desc->type != NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS_EX) {
+ log::write("Triangles with micromap attachment passed to acceleration structure build when micromaps are not supported");
+ return false;
+ }
+ }
+
d3dDesc->ppGeometryDescs = reinterpret_cast<const D3D12_RAYTRACING_GEOMETRY_DESC* const*>(nvDesc->ppGeometryDescs);
return true;
}
@@ -326,10 +413,10 @@ extern "C" {
d3dGeoDesc.Type = D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS;
d3dGeoDesc.AABBs = nvGeoDesc.aabbs;
break;
- case NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_OMM_TRIANGLES_EX: // GetRaytracingCaps reports no OMM caps, we shouldn't reach this
+ case NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_OMM_TRIANGLES_EX:
log::write("Triangles with OMM attachment passed to acceleration structure build when OMM is not supported");
return false;
- case NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_DMM_TRIANGLES_EX: // GetRaytracingCaps reports no DMM caps, we shouldn't reach this
+ case NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_DMM_TRIANGLES_EX:
log::write("Triangles with DMM attachment passed to acceleration structure build when DMM is not supported");
return false;
default:
@@ -352,6 +439,16 @@ extern "C" {
if (pDevice == nullptr || pParams == nullptr)
return InvalidArgument(n);
+ if (auto result = NvapiD3d12Device::GetRaytracingAccelerationStructurePrebuildInfoEx(pDevice, pParams); result.has_value()) {
+ auto value = result.value();
+ if (value == NVAPI_OK) {
+ return Ok(n, alreadyLoggedOk);
+ } else {
+ log::write(str::format(n, ": ", value));
+ return value;
+ }
+ }
+
if (pParams->version != NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_VER1)
return IncompatibleStructVersion(n);
@@ -362,13 +459,73 @@ extern "C" {
D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS desc{};
if (!ConvertBuildRaytracingAccelerationStructureInputs(pParams->pDesc, geometryDescs, &desc))
- return InvalidArgument(n);
+ return NotSupported(n);
pDevice->GetRaytracingAccelerationStructurePrebuildInfo(&desc, pParams->pInfo);
return Ok(n, alreadyLoggedOk);
}
+ NvAPI_Status __cdecl NvAPI_D3D12_BuildRaytracingOpacityMicromapArray(ID3D12GraphicsCommandList4* pCommandList, NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS* pParams) {
+ constexpr auto n = __func__;
+ static bool alreadyLoggedOk = false;
+
+ if (pCommandList == nullptr || pParams == nullptr)
+ return InvalidArgument(n);
+
+ if (auto result = NvapiD3d12Device::BuildRaytracingOpacityMicromapArray(pCommandList, pParams); result.has_value()) {
+ auto value = result.value();
+ if (value == NVAPI_OK) {
+ return Ok(n, alreadyLoggedOk);
+ } else {
+ log::write(str::format(n, ": ", value));
+ return value;
+ }
+ }
+
+ return NotSupported(n);
+ }
+
+ NvAPI_Status __cdecl NvAPI_D3D12_RelocateRaytracingOpacityMicromapArray(ID3D12GraphicsCommandList4* pCommandList, const NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS* pParams) {
+ constexpr auto n = __func__;
+ static bool alreadyLoggedOk = false;
+
+ if (pCommandList == nullptr || pParams == nullptr)
+ return InvalidArgument(n);
+
+ if (auto result = NvapiD3d12Device::RelocateRaytracingOpacityMicromapArray(pCommandList, pParams); result.has_value()) {
+ auto value = result.value();
+ if (value == NVAPI_OK) {
+ return Ok(n, alreadyLoggedOk);
+ } else {
+ log::write(str::format(n, ": ", value));
+ return value;
+ }
+ }
+
+ return NotSupported(n);
+ }
+
+ NvAPI_Status __cdecl NvAPI_D3D12_EmitRaytracingOpacityMicromapArrayPostbuildInfo(ID3D12GraphicsCommandList4* pCommandList, const NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS* pParams) {
+ constexpr auto n = __func__;
+ static bool alreadyLoggedOk = false;
+
+ if (pCommandList == nullptr || pParams == nullptr)
+ return InvalidArgument(n);
+
+ if (auto result = NvapiD3d12Device::EmitRaytracingOpacityMicromapArrayPostbuildInfo(pCommandList, pParams); result.has_value()) {
+ auto value = result.value();
+ if (value == NVAPI_OK) {
+ return Ok(n, alreadyLoggedOk);
+ } else {
+ log::write(str::format(n, ": ", value));
+ return value;
+ }
+ }
+
+ return NotSupported(n);
+ }
+
NvAPI_Status __cdecl NvAPI_D3D12_BuildRaytracingAccelerationStructureEx(ID3D12GraphicsCommandList4* pCommandList, const NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS* pParams) {
constexpr auto n = __func__;
static bool alreadyLoggedOk = false;
@@ -376,6 +533,16 @@ extern "C" {
if (pCommandList == nullptr || pParams == nullptr)
return InvalidArgument(n);
+ if (auto result = NvapiD3d12Device::BuildRaytracingAccelerationStructureEx(pCommandList, pParams); result.has_value()) {
+ auto value = result.value();
+ if (value == NVAPI_OK) {
+ return Ok(n, alreadyLoggedOk);
+ } else {
+ log::write(str::format(n, ": ", value));
+ return value;
+ }
+ }
+
if (pParams->version != NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_VER1)
return IncompatibleStructVersion(n);
@@ -391,7 +558,7 @@ extern "C" {
};
if (!ConvertBuildRaytracingAccelerationStructureInputs(&pParams->pDesc->inputs, geometryDescs, &desc.Inputs))
- return InvalidArgument(n);
+ return NotSupported(n);
pCommandList->BuildRaytracingAccelerationStructure(&desc, pParams->numPostbuildInfoDescs, pParams->pPostbuildInfoDescs);
diff --git a/src/nvapi_interface.cpp b/src/nvapi_interface.cpp
index b461166..6c8728c 100644
--- a/src/nvapi_interface.cpp
+++ b/src/nvapi_interface.cpp
@@ -67,7 +67,13 @@ extern "C" {
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_GetGraphicsCapabilities)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_IsFatbinPTXSupported)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_GetRaytracingCaps)
+ INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo)
+ INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_SetCreatePipelineStateOptions)
+ INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_CheckDriverMatchingIdentifierEx)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_GetRaytracingAccelerationStructurePrebuildInfoEx)
+ INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_BuildRaytracingOpacityMicromapArray)
+ INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_RelocateRaytracingOpacityMicromapArray)
+ INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_EmitRaytracingOpacityMicromapArrayPostbuildInfo)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_BuildRaytracingAccelerationStructureEx)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D_GetObjectHandleForResource)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D_SetResourceHint)
diff --git a/src/vkd3d-proton/vkd3d-proton_interfaces.cpp b/src/vkd3d-proton/vkd3d-proton_interfaces.cpp
index 03a4acd..bf811d8 100644
--- a/src/vkd3d-proton/vkd3d-proton_interfaces.cpp
+++ b/src/vkd3d-proton/vkd3d-proton_interfaces.cpp
@@ -18,6 +18,8 @@
#include "vkd3d-proton_interfaces.h"
const GUID ID3D12DeviceExt::guid = {0x11ea7a1a, 0x0f6a, 0x49bf, {0xb6, 0x12, 0x3e, 0x30, 0xf8, 0xe2, 0x01, 0xdd}};
+const GUID ID3D12DeviceExt1::guid = {0x11ea7a1a, 0x0f6a, 0x49bf, {0xb6, 0x12, 0x3e, 0x30, 0xf8, 0xe2, 0x01, 0xde}};
const GUID ID3D12GraphicsCommandListExt::guid = {0x77a86b09, 0x2bea, 0x4801, {0xb8, 0x9a, 0x37, 0x64, 0x8e, 0x10, 0x4a, 0xf1}};
const GUID ID3D12GraphicsCommandListExt1::guid = {0xd53b0028, 0xafb4, 0x4b65, {0xa4, 0xf1, 0x7b, 0x0d, 0xaa, 0xa6, 0x5b, 0x4f}};
+const GUID ID3D12GraphicsCommandListExt2::guid = {0xd53b0028, 0xafb4, 0x4b65, {0xa4, 0xf1, 0x7b, 0x0d, 0xaa, 0xa6, 0x5b, 0x50}};
const GUID ID3D12CommandQueueExt::guid = {0x40ed3f96, 0xe773, 0xe9bc, {0xfc, 0x0c, 0xe9, 0x55, 0x60, 0xc9, 0x9a, 0xd6}};
diff --git a/src/vkd3d-proton/vkd3d-proton_interfaces.h b/src/vkd3d-proton/vkd3d-proton_interfaces.h
index 8f388ac..d03a215 100644
--- a/src/vkd3d-proton/vkd3d-proton_interfaces.h
+++ b/src/vkd3d-proton/vkd3d-proton_interfaces.h
@@ -28,6 +28,7 @@
inline GUID const& __mingw_uuidof<iface>() { return iface::guid; }
enum D3D12_VK_EXTENSION : uint32_t {
+ D3D12_VK_EXT_OPACITY_MICROMAP = 0x0,
D3D12_VK_NVX_BINARY_IMPORT = 0x1,
D3D12_VK_NVX_IMAGE_VIEW_HANDLE = 0x2,
D3D12_VK_NV_LOW_LATENCY_2 = 0x3
@@ -83,6 +84,22 @@ ID3D12DeviceExt : public IUnknown {
D3D12_UAV_INFO * uav_info) = 0;
};
+MIDL_INTERFACE("11ea7a1a-0f6a-49bf-b612-3e30f8e201de")
+ID3D12DeviceExt1 : public ID3D12DeviceExt {
+ static const GUID guid;
+ virtual HRESULT STDMETHODCALLTYPE SetCreatePipelineStateOptions(
+ const void* params) = 0;
+
+ virtual HRESULT STDMETHODCALLTYPE CheckDriverMatchingIdentifierEx(
+ void* params) = 0;
+
+ virtual HRESULT STDMETHODCALLTYPE GetRaytracingAccelerationStructurePrebuildInfoEx(
+ void* params) = 0;
+
+ virtual HRESULT STDMETHODCALLTYPE GetRaytracingOpacityMicromapArrayPrebuildInfo(
+ void* params) = 0;
+};
+
MIDL_INTERFACE("77a86b09-2bea-4801-b89a-37648e104af1")
ID3D12GraphicsCommandListExt : public IUnknown {
static const GUID guid;
@@ -114,15 +131,34 @@ ID3D12GraphicsCommandListExt1 : public ID3D12GraphicsCommandListExt {
UINT32 raw_params_count) = 0;
};
+MIDL_INTERFACE("d53b0028-afb4-4b65-a4f1-7b0daaa65b50")
+ID3D12GraphicsCommandListExt2 : public ID3D12GraphicsCommandListExt1 {
+ static const GUID guid;
+
+ virtual HRESULT STDMETHODCALLTYPE BuildRaytracingAccelerationStructureEx(
+ const void* params) = 0;
+
+ virtual HRESULT STDMETHODCALLTYPE BuildRaytracingOpacityMicromapArray(
+ void* params) = 0;
+
+ virtual HRESULT STDMETHODCALLTYPE RelocateRaytracingOpacityMicromapArray(
+ const void* params) = 0;
+
+ virtual HRESULT STDMETHODCALLTYPE EmitRaytracingOpacityMicromapArrayPostbuildInfo(
+ const void* params) = 0;
+};
+
MIDL_INTERFACE("40ed3f96-e773-e9bc-fc0c-e95560c99ad6")
ID3D12CommandQueueExt : public IUnknown {
static const GUID guid;
virtual HRESULT STDMETHODCALLTYPE NotifyOutOfBandCommandQueue(
D3D12_OUT_OF_BAND_CQ_TYPE type) = 0;
};
VKD3D_PROTON_GUID(ID3D12DeviceExt)
+VKD3D_PROTON_GUID(ID3D12DeviceExt1)
VKD3D_PROTON_GUID(ID3D12GraphicsCommandListExt)
VKD3D_PROTON_GUID(ID3D12GraphicsCommandListExt1)
+VKD3D_PROTON_GUID(ID3D12GraphicsCommandListExt2)
VKD3D_PROTON_GUID(ID3D12CommandQueueExt)
diff --git a/tests/nvapi_d3d12.cpp b/tests/nvapi_d3d12.cpp
index 7e58728..f78b116 100644
--- a/tests/nvapi_d3d12.cpp
+++ b/tests/nvapi_d3d12.cpp
@@ -29,6 +29,10 @@ TEST_CASE("D3D12 methods succeed", "[.d3d12]") {
.LR_SIDE_EFFECT(*_2 = static_cast<ID3D12DeviceExt*>(&device))
.LR_SIDE_EFFECT(deviceRefCount++)
.RETURN(S_OK);
+ ALLOW_CALL(device, QueryInterface(ID3D12DeviceExt1::guid, _))
+ .LR_SIDE_EFFECT(*_2 = static_cast<ID3D12DeviceExt1*>(&device))
+ .LR_SIDE_EFFECT(deviceRefCount++)
+ .RETURN(S_OK);
ALLOW_CALL(device, AddRef())
.LR_SIDE_EFFECT(deviceRefCount++)
.RETURN(deviceRefCount);
@@ -48,7 +52,11 @@ TEST_CASE("D3D12 methods succeed", "[.d3d12]") {
.LR_SIDE_EFFECT(commandListRefCount++)
.RETURN(S_OK);
ALLOW_CALL(commandList, QueryInterface(ID3D12GraphicsCommandListExt1::guid, _))
- .LR_SIDE_EFFECT(*_2 = static_cast<ID3D12GraphicsCommandListExt*>(&commandList))
+ .LR_SIDE_EFFECT(*_2 = static_cast<ID3D12GraphicsCommandListExt1*>(&commandList))
+ .LR_SIDE_EFFECT(commandListRefCount++)
+ .RETURN(S_OK);
+ ALLOW_CALL(commandList, QueryInterface(ID3D12GraphicsCommandListExt2::guid, _))
+ .LR_SIDE_EFFECT(*_2 = static_cast<ID3D12GraphicsCommandListExt2*>(&commandList))
.LR_SIDE_EFFECT(commandListRefCount++)
.RETURN(S_OK);
ALLOW_CALL(commandList, AddRef())
@@ -75,18 +83,30 @@ TEST_CASE("D3D12 methods succeed", "[.d3d12]") {
SECTION("D3D12 methods without VKD3D-Proton return error") {
ALLOW_CALL(device, QueryInterface(ID3D12DeviceExt::guid, _))
.RETURN(E_NOINTERFACE);
+ ALLOW_CALL(device, QueryInterface(ID3D12DeviceExt1::guid, _))
+ .RETURN(E_NOINTERFACE);
ALLOW_CALL(commandList, QueryInterface(ID3D12GraphicsCommandListExt::guid, _))
.RETURN(E_NOINTERFACE);
ALLOW_CALL(commandList, QueryInterface(ID3D12GraphicsCommandListExt1::guid, _))
.RETURN(E_NOINTERFACE);
+ ALLOW_CALL(commandList, QueryInterface(ID3D12GraphicsCommandListExt2::guid, _))
+ .RETURN(E_NOINTERFACE);
FORBID_CALL(device, CreateCubinComputeShaderWithName(_, _, _, _, _, _, _));
FORBID_CALL(device, DestroyCubinComputeShader(_));
FORBID_CALL(device, GetCudaTextureObject(_, _, _));
FORBID_CALL(device, GetCudaSurfaceObject(_, _));
FORBID_CALL(device, CaptureUAVInfo(_));
+ FORBID_CALL(device, SetCreatePipelineStateOptions(_));
+ FORBID_CALL(device, CheckDriverMatchingIdentifierEx(_));
+ FORBID_CALL(device, GetRaytracingAccelerationStructurePrebuildInfoEx(_));
+ FORBID_CALL(device, GetRaytracingOpacityMicromapArrayPrebuildInfo(_));
FORBID_CALL(commandList, LaunchCubinShader(_, _, _, _, _, _));
FORBID_CALL(commandList, LaunchCubinShaderEx(_, _, _, _, _, _, _, _, _));
+ FORBID_CALL(commandList, BuildRaytracingAccelerationStructureEx(_));
+ FORBID_CALL(commandList, BuildRaytracingOpacityMicromapArray(_));
+ FORBID_CALL(commandList, RelocateRaytracingOpacityMicromapArray(_));
+ FORBID_CALL(commandList, EmitRaytracingOpacityMicromapArrayPostbuildInfo(_));
REQUIRE(NvAPI_D3D12_CreateCubinComputeShaderWithName(static_cast<ID3D12Device*>(&device), nullptr, 0, 0, 0, 0, "shader_name", nullptr) == NVAPI_ERROR);
REQUIRE(NvAPI_D3D12_CreateCubinComputeShader(static_cast<ID3D12Device*>(&device), nullptr, 0, 0, 0, 0, nullptr) == NVAPI_ERROR);
@@ -101,11 +121,46 @@ TEST_CASE("D3D12 methods succeed", "[.d3d12]") {
REQUIRE(NvAPI_D3D12_IsFatbinPTXSupported(static_cast<ID3D12Device*>(&device), &isPTXSupported) == NVAPI_ERROR);
REQUIRE(NvAPI_D3D12_LaunchCubinShader(static_cast<ID3D12GraphicsCommandList*>(&commandList), reinterpret_cast<NVDX_ObjectHandle>(0), 0, 0, 0, nullptr, 0) == NVAPI_ERROR);
+
+ {
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAPS caps = NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAP_STANDARD;
+ REQUIRE(NvAPI_D3D12_GetRaytracingCaps(static_cast<ID3D12Device*>(&device), NVAPI_D3D12_RAYTRACING_CAPS_TYPE_OPACITY_MICROMAP, &caps, sizeof(caps)) == NVAPI_OK);
+ REQUIRE(caps == NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAP_NONE);
+ }
+ {
+ NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS params{};
+ REQUIRE(NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo(static_cast<ID3D12Device5*>(&device), &params) != NVAPI_OK);
+ }
+ {
+ NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS params{};
+ params.version = NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_VER1;
+ params.flags = NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS_ENABLE_OMM_SUPPORT;
+ REQUIRE(NvAPI_D3D12_SetCreatePipelineStateOptions(static_cast<ID3D12Device5*>(&device), &params) != NVAPI_OK);
+ }
+ {
+ NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS params{};
+ params.version = NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_VER1;
+ params.serializedDataType = NVAPI_D3D12_SERIALIZED_DATA_RAYTRACING_OPACITY_MICROMAP_ARRAY_EX;
+ REQUIRE(NvAPI_D3D12_CheckDriverMatchingIdentifierEx(static_cast<ID3D12Device5*>(&device), &params) != NVAPI_OK);
+ }
+ {
+ NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS params{};
+ REQUIRE(NvAPI_D3D12_BuildRaytracingOpacityMicromapArray(static_cast<ID3D12GraphicsCommandList4*>(&commandList), &params) != NVAPI_OK);
+ }
+ {
+ NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS params{};
+ REQUIRE(NvAPI_D3D12_RelocateRaytracingOpacityMicromapArray(static_cast<ID3D12GraphicsCommandList4*>(&commandList), &params) != NVAPI_OK);
+ }
+ {
+ NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS params{};
+ REQUIRE(NvAPI_D3D12_EmitRaytracingOpacityMicromapArrayPostbuildInfo(static_cast<ID3D12GraphicsCommandList4*>(&commandList), &params) != NVAPI_OK);
+ }
+
REQUIRE(deviceRefCount == 0);
REQUIRE(commandListRefCount == 0);
}
- SECTION("D3D12 methods without cubin extension return error") {
+ SECTION("D3D12 cubin methods without cubin extension return error") {
ALLOW_CALL(device, GetExtensionSupport(D3D12_VK_NVX_BINARY_IMPORT))
.RETURN(false);
@@ -130,6 +185,36 @@ TEST_CASE("D3D12 methods succeed", "[.d3d12]") {
REQUIRE(commandListRefCount == 0);
}
+ SECTION("D3D12 OMM methods without OMM extension return not-supported") {
+ ALLOW_CALL(device, GetExtensionSupport(D3D12_VK_EXT_OPACITY_MICROMAP))
+ .RETURN(false);
+
+ FORBID_CALL(device, SetCreatePipelineStateOptions(_));
+ FORBID_CALL(device, CheckDriverMatchingIdentifierEx(_));
+ FORBID_CALL(device, GetRaytracingAccelerationStructurePrebuildInfoEx(_));
+ FORBID_CALL(device, GetRaytracingOpacityMicromapArrayPrebuildInfo(_));
+
+ {
+ NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS params{};
+ REQUIRE(NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo(static_cast<ID3D12Device5*>(&device), &params) != NVAPI_OK);
+ }
+ {
+ NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS params{};
+ params.version = NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_VER1;
+ params.flags = NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS_ENABLE_OMM_SUPPORT;
+ REQUIRE(NvAPI_D3D12_SetCreatePipelineStateOptions(static_cast<ID3D12Device5*>(&device), &params) != NVAPI_OK);
+ }
+ {
+ NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS params{};
+ params.version = NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_VER1;
+ params.serializedDataType = NVAPI_D3D12_SERIALIZED_DATA_RAYTRACING_OPACITY_MICROMAP_ARRAY_EX;
+ REQUIRE(NvAPI_D3D12_CheckDriverMatchingIdentifierEx(static_cast<ID3D12Device5*>(&device), &params) != NVAPI_OK);
+ }
+
+ REQUIRE(deviceRefCount == 0);
+ REQUIRE(commandListRefCount == 0);
+ }
+
SECTION("IsNvShaderExtnOpCodeSupported returns OK") {
auto supported = true;
REQUIRE(NvAPI_D3D12_IsNvShaderExtnOpCodeSupported(&device, 1U, &supported) == NVAPI_OK);
@@ -434,6 +519,8 @@ TEST_CASE("D3D12 methods succeed", "[.d3d12]") {
SECTION("Launch CuBIN without ID3D12GraphicsCommandListExt1 returns OK") {
ALLOW_CALL(commandList, QueryInterface(ID3D12GraphicsCommandListExt1::guid, _))
.RETURN(E_NOINTERFACE);
+ ALLOW_CALL(commandList, QueryInterface(ID3D12GraphicsCommandListExt2::guid, _))
+ .RETURN(E_NOINTERFACE);
auto shaderHandle = reinterpret_cast<D3D12_CUBIN_DATA_HANDLE*>(0xbadcf00d);
auto blockX = 1U;
@@ -457,8 +544,20 @@ TEST_CASE("D3D12 methods succeed", "[.d3d12]") {
REQUIRE(caps == NVAPI_D3D12_RAYTRACING_THREAD_REORDERING_CAP_NONE);
}
- SECTION("GetRaytracingCaps returns OK and claims that Opacity Micromap is not supported") {
- NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAPS caps;
+ SECTION("GetRaytracingCaps returns OK and claims that Opacity Micromaps are not supported if ID3D12DeviceExt1 interface can't be found") {
+ ALLOW_CALL(device, QueryInterface(ID3D12DeviceExt1::guid, _))
+ .RETURN(E_NOINTERFACE);
+
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAPS caps = NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAP_STANDARD;
+ REQUIRE(NvAPI_D3D12_GetRaytracingCaps(static_cast<ID3D12Device*>(&device), NVAPI_D3D12_RAYTRACING_CAPS_TYPE_OPACITY_MICROMAP, &caps, sizeof(caps)) == NVAPI_OK);
+ REQUIRE(caps == NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAP_NONE);
+ }
+
+ SECTION("GetRaytracingCaps returns OK and claims that Opacity Micromaps are not supported if OMM extension is not supported") {
+ ALLOW_CALL(device, GetExtensionSupport(D3D12_VK_EXT_OPACITY_MICROMAP))
+ .RETURN(false);
+
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAPS caps = NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAP_STANDARD;
REQUIRE(NvAPI_D3D12_GetRaytracingCaps(static_cast<ID3D12Device*>(&device), NVAPI_D3D12_RAYTRACING_CAPS_TYPE_OPACITY_MICROMAP, &caps, sizeof(caps)) == NVAPI_OK);
REQUIRE(caps == NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAP_NONE);
}
@@ -469,7 +568,10 @@ TEST_CASE("D3D12 methods succeed", "[.d3d12]") {
REQUIRE(caps == NVAPI_D3D12_RAYTRACING_DISPLACEMENT_MICROMAP_CAP_NONE);
}
- SECTION("GetRaytracingAccelerationStructurePrebuildInfoEx succeeds") {
+ SECTION("GetRaytracingAccelerationStructurePrebuildInfoEx succeeds when Opacity Micromaps are not supported") {
+ ALLOW_CALL(device, GetExtensionSupport(D3D12_VK_EXT_OPACITY_MICROMAP))
+ .RETURN(false);
+
SECTION("GetRaytracingAccelerationStructurePrebuildInfoEx returns OK") {
NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX desc{};
D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO info{};
@@ -601,7 +703,181 @@ TEST_CASE("D3D12 methods succeed", "[.d3d12]") {
}
}
- SECTION("BuildRaytracingAccelerationStructureEx succeeds") {
+ SECTION("GetRaytracingAccelerationStructurePrebuildInfoEx fails when Opacity Micromaps are used but they are not supported") {
+ ALLOW_CALL(device, GetExtensionSupport(D3D12_VK_EXT_OPACITY_MICROMAP))
+ .RETURN(false);
+
+ FORBID_CALL(device, GetRaytracingAccelerationStructurePrebuildInfo(_, _));
+ FORBID_CALL(device, GetRaytracingAccelerationStructurePrebuildInfoEx(_));
+
+ NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX geometryDescEx{};
+ NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX desc{};
+ D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO info{};
+ NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS params{};
+ params.version = NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_VER1;
+ params.pDesc = &desc;
+ params.pInfo = &info;
+
+ SECTION("GetRaytracingAccelerationStructurePrebuildInfoEx with allow OMM update flag") {
+ desc.type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL;
+ desc.flags = NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_OMM_UPDATE_EX;
+
+ REQUIRE(NvAPI_D3D12_GetRaytracingAccelerationStructurePrebuildInfoEx(static_cast<ID3D12Device5*>(&device), &params) != NVAPI_OK);
+ }
+
+ SECTION("GetRaytracingAccelerationStructurePrebuildInfoEx with allow disable OMMs flag") {
+ desc.type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL;
+ desc.flags = NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_DISABLE_OMMS_EX;
+
+ REQUIRE(NvAPI_D3D12_GetRaytracingAccelerationStructurePrebuildInfoEx(static_cast<ID3D12Device5*>(&device), &params) != NVAPI_OK);
+ }
+
+ SECTION("GetRaytracingAccelerationStructurePrebuildInfoEx with allow OMM opacity states update flag") {
+ desc.type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL;
+ desc.flags = NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_OMM_OPACITY_STATES_UPDATE_EX;
+
+ REQUIRE(NvAPI_D3D12_GetRaytracingAccelerationStructurePrebuildInfoEx(static_cast<ID3D12Device5*>(&device), &params) != NVAPI_OK);
+ }
+
+ SECTION("GetRaytracingAccelerationStructurePrebuildInfoEx with BLAS for array and OMM triangles") {
+ desc.type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL;
+ desc.descsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;
+ desc.numDescs = 1;
+ desc.pGeometryDescs = &geometryDescEx;
+ desc.geometryDescStrideInBytes = sizeof(geometryDescEx);
+ geometryDescEx.type = NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_OMM_TRIANGLES_EX;
+
+ REQUIRE(NvAPI_D3D12_GetRaytracingAccelerationStructurePrebuildInfoEx(static_cast<ID3D12Device5*>(&device), &params) != NVAPI_OK);
+ }
+
+ SECTION("GetRaytracingAccelerationStructurePrebuildInfoEx with BLAS for pointer array and OMM triangles") {
+ NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX* geometryDescExArray[] = {&geometryDescEx};
+ desc.type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL;
+ desc.descsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS;
+ desc.numDescs = 1;
+ desc.ppGeometryDescs = geometryDescExArray;
+ desc.geometryDescStrideInBytes = sizeof(geometryDescEx);
+ geometryDescEx.type = NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_OMM_TRIANGLES_EX;
+
+ REQUIRE(NvAPI_D3D12_GetRaytracingAccelerationStructurePrebuildInfoEx(static_cast<ID3D12Device5*>(&device), &params) != NVAPI_OK);
+ }
+ }
+
+ SECTION("GetRaytracingAccelerationStructurePrebuildInfoEx succeeds when Opacity Micromaps are supported") {
+ SECTION("GetRaytracingAccelerationStructurePrebuildInfoEx returns OK and calls to ID3D12DeviceExt1") {
+ NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX geometryDescEx{};
+ NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX desc{};
+ D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO info{};
+ NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS params{};
+ params.version = NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_VER1;
+ params.pDesc = &desc;
+ params.pInfo = &info;
+
+ REQUIRE_CALL(device, GetRaytracingAccelerationStructurePrebuildInfoEx(&params))
+ .RETURN(NVAPI_OK)
+ .TIMES(1);
+
+ SECTION("GetRaytracingAccelerationStructurePrebuildInfoEx with TLAS") {
+ desc.type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL;
+ desc.instanceDescs = D3D12_GPU_VIRTUAL_ADDRESS{};
+ desc.flags = NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_OMM_UPDATE_EX;
+
+ REQUIRE(NvAPI_D3D12_GetRaytracingAccelerationStructurePrebuildInfoEx(static_cast<ID3D12Device5*>(&device), &params) == NVAPI_OK);
+ }
+
+ SECTION("GetRaytracingAccelerationStructurePrebuildInfoEx with BLAS for pointer array") {
+ NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX* geometryDescExArray[] = {};
+ desc.type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL;
+ desc.descsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS;
+ desc.ppGeometryDescs = geometryDescExArray;
+ desc.flags = NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_DISABLE_OMMS_EX;
+
+ REQUIRE(NvAPI_D3D12_GetRaytracingAccelerationStructurePrebuildInfoEx(static_cast<ID3D12Device5*>(&device), &params) == NVAPI_OK);
+ }
+
+ SECTION("GetRaytracingAccelerationStructurePrebuildInfoEx with BLAS for array") {
+ desc.type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL;
+ desc.descsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;
+ desc.numDescs = 1;
+ desc.pGeometryDescs = &geometryDescEx;
+ desc.geometryDescStrideInBytes = sizeof(geometryDescEx);
+ desc.flags = NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_OMM_OPACITY_STATES_UPDATE_EX;
+
+ SECTION("GetRaytracingAccelerationStructurePrebuildInfoEx for OMM triangles geometry") {
+ geometryDescEx.type = NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_OMM_TRIANGLES_EX;
+ geometryDescEx.ommTriangles.triangles.IndexBuffer = D3D12_GPU_VIRTUAL_ADDRESS{};
+ geometryDescEx.ommTriangles.ommAttachment.opacityMicromapArray = D3D12_GPU_VIRTUAL_ADDRESS{};
+
+ REQUIRE(NvAPI_D3D12_GetRaytracingAccelerationStructurePrebuildInfoEx(static_cast<ID3D12Device5*>(&device), &params) == NVAPI_OK);
+ }
+
+ SECTION("GetRaytracingAccelerationStructurePrebuildInfoEx for triangles geometry") {
+ geometryDescEx.type = NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES_EX;
+ geometryDescEx.triangles.IndexBuffer = D3D12_GPU_VIRTUAL_ADDRESS{};
+
+ REQUIRE(NvAPI_D3D12_GetRaytracingAccelerationStructurePrebuildInfoEx(static_cast<ID3D12Device5*>(&device), &params) == NVAPI_OK);
+ }
+
+ SECTION("GetRaytracingAccelerationStructurePrebuildInfoEx with AABBs geometry") {
+ geometryDescEx.type = NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS_EX;
+ geometryDescEx.aabbs.AABBCount = 3;
+
+ REQUIRE(NvAPI_D3D12_GetRaytracingAccelerationStructurePrebuildInfoEx(static_cast<ID3D12Device5*>(&device), &params) == NVAPI_OK);
+ }
+ }
+ }
+ }
+
+ SECTION("GetRaytracingOpacityMicromapArrayPrebuildInfo returns OK") {
+ NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS params{};
+
+ REQUIRE_CALL(device, GetRaytracingOpacityMicromapArrayPrebuildInfo(&params))
+ .RETURN(NVAPI_OK)
+ .TIMES(1);
+
+ REQUIRE(NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo(static_cast<ID3D12Device5*>(&device), &params) == NVAPI_OK);
+ REQUIRE(deviceRefCount == 0);
+ REQUIRE(commandListRefCount == 0);
+ }
+
+ SECTION("SetCreatePipelineStateOptions returns OK") {
+ NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS params{};
+ params.version = NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_VER1;
+ params.flags = NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS_ENABLE_OMM_SUPPORT;
+
+ REQUIRE_CALL(device, SetCreatePipelineStateOptions(&params))
+ .RETURN(NVAPI_OK)
+ .TIMES(1);
+
+ REQUIRE(NvAPI_D3D12_SetCreatePipelineStateOptions(static_cast<ID3D12Device5*>(&device), &params) == NVAPI_OK);
+ REQUIRE(deviceRefCount == 0);
+ REQUIRE(commandListRefCount == 0);
+ }
+
+ SECTION("CheckDriverMatchingIdentifierEx returns OK") {
+ NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS params{};
+ params.version = NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_VER1;
+ params.serializedDataType = NVAPI_D3D12_SERIALIZED_DATA_RAYTRACING_OPACITY_MICROMAP_ARRAY_EX;
+
+ REQUIRE_CALL(device, CheckDriverMatchingIdentifierEx(&params))
+ .RETURN(NVAPI_OK)
+ .TIMES(1);
+
+ REQUIRE(NvAPI_D3D12_CheckDriverMatchingIdentifierEx(static_cast<ID3D12Device5*>(&device), &params) == NVAPI_OK);
+ REQUIRE(deviceRefCount == 0);
+ REQUIRE(commandListRefCount == 0);
+ }
+
+ SECTION("GetRaytracingCaps returns OK and claims that Opacity Micromaps are supported") {
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAPS caps = NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAP_NONE;
+ REQUIRE(NvAPI_D3D12_GetRaytracingCaps(static_cast<ID3D12Device*>(&device), NVAPI_D3D12_RAYTRACING_CAPS_TYPE_OPACITY_MICROMAP, &caps, sizeof(caps)) == NVAPI_OK);
+ REQUIRE(caps == NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAP_STANDARD);
+ }
+
+ SECTION("BuildRaytracingAccelerationStructureEx succeeds when ID3D12GraphicsCommandListExt2 interface can't be found") {
+ ALLOW_CALL(commandList, QueryInterface(ID3D12GraphicsCommandListExt2::guid, _))
+ .RETURN(E_NOINTERFACE);
+
SECTION("BuildRaytracingAccelerationStructureEx returns OK") {
NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC_EX desc{};
NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS params{};
@@ -748,4 +1024,161 @@ TEST_CASE("D3D12 methods succeed", "[.d3d12]") {
REQUIRE(NvAPI_D3D12_BuildRaytracingAccelerationStructureEx(static_cast<ID3D12GraphicsCommandList4*>(&commandList), &params) != NVAPI_INCOMPATIBLE_STRUCT_VERSION);
}
}
+
+ SECTION("BuildRaytracingAccelerationStructureEx fails when Opacity Micromaps are used but ID3D12GraphicsCommandListExt2 interface can't be found") {
+ ALLOW_CALL(commandList, QueryInterface(ID3D12GraphicsCommandListExt2::guid, _))
+ .RETURN(E_NOINTERFACE);
+
+ FORBID_CALL(commandList, BuildRaytracingAccelerationStructure(_, _, _));
+ FORBID_CALL(commandList, BuildRaytracingAccelerationStructureEx(_));
+
+ NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX geometryDescEx{};
+ NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC_EX desc{};
+ NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS params{};
+ params.version = NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_VER1;
+ params.pDesc = &desc;
+
+ SECTION("BuildRaytracingAccelerationStructureEx with allow OMM update flag") {
+ desc.inputs.type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL;
+ desc.inputs.flags = NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_OMM_UPDATE_EX;
+
+ REQUIRE(NvAPI_D3D12_BuildRaytracingAccelerationStructureEx(static_cast<ID3D12GraphicsCommandList4*>(&commandList), &params) != NVAPI_OK);
+ }
+
+ SECTION("BuildRaytracingAccelerationStructureEx with allow disable OMMs flag") {
+ desc.inputs.type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL;
+ desc.inputs.flags = NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_DISABLE_OMMS_EX;
+
+ REQUIRE(NvAPI_D3D12_BuildRaytracingAccelerationStructureEx(static_cast<ID3D12GraphicsCommandList4*>(&commandList), &params) != NVAPI_OK);
+ }
+
+ SECTION("BuildRaytracingAccelerationStructureEx with allow OMM opacity states update flag") {
+ desc.inputs.type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL;
+ desc.inputs.flags = NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_OMM_OPACITY_STATES_UPDATE_EX;
+
+ REQUIRE(NvAPI_D3D12_BuildRaytracingAccelerationStructureEx(static_cast<ID3D12GraphicsCommandList4*>(&commandList), &params) != NVAPI_OK);
+ }
+
+ SECTION("BuildRaytracingAccelerationStructureEx with BLAS for array and OMM triangles") {
+ desc.inputs.type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL;
+ desc.inputs.descsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;
+ desc.inputs.numDescs = 1;
+ desc.inputs.pGeometryDescs = &geometryDescEx;
+ desc.inputs.geometryDescStrideInBytes = sizeof(geometryDescEx);
+ geometryDescEx.type = NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_OMM_TRIANGLES_EX;
+
+ REQUIRE(NvAPI_D3D12_BuildRaytracingAccelerationStructureEx(static_cast<ID3D12GraphicsCommandList4*>(&commandList), &params) != NVAPI_OK);
+ }
+
+ SECTION("BuildRaytracingAccelerationStructureEx with BLAS for pointer array and OMM triangles") {
+ NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX* geometryDescExArray[] = {&geometryDescEx};
+ desc.inputs.type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL;
+ desc.inputs.descsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS;
+ desc.inputs.numDescs = 1;
+ desc.inputs.ppGeometryDescs = geometryDescExArray;
+ desc.inputs.geometryDescStrideInBytes = sizeof(geometryDescEx);
+ geometryDescEx.type = NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_OMM_TRIANGLES_EX;
+
+ REQUIRE(NvAPI_D3D12_BuildRaytracingAccelerationStructureEx(static_cast<ID3D12GraphicsCommandList4*>(&commandList), &params) != NVAPI_OK);
+ }
+ }
+
+ SECTION("BuildRaytracingAccelerationStructureEx succeeds when ID3D12GraphicsCommandListExt2 interface can be found") {
+ SECTION("BuildRaytracingAccelerationStructureEx returns OK and calls to ID3D12DeviceExt1") {
+ NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX geometryDescEx{};
+ NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC_EX desc{};
+ NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS params{};
+ params.version = NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_VER1;
+ params.pDesc = &desc;
+
+ REQUIRE_CALL(commandList, BuildRaytracingAccelerationStructureEx(&params))
+ .RETURN(NVAPI_OK)
+ .TIMES(1);
+
+ SECTION("BuildRaytracingAccelerationStructureEx with TLAS") {
+ desc.inputs.type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL;
+ desc.inputs.instanceDescs = D3D12_GPU_VIRTUAL_ADDRESS{};
+ desc.inputs.flags = NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_OMM_UPDATE_EX;
+
+ REQUIRE(NvAPI_D3D12_BuildRaytracingAccelerationStructureEx(static_cast<ID3D12GraphicsCommandList4*>(&commandList), &params) == NVAPI_OK);
+ }
+
+ SECTION("BuildRaytracingAccelerationStructureEx with BLAS for pointer array") {
+ NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX* geometryDescExArray[] = {};
+ desc.inputs.type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL;
+ desc.inputs.descsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS;
+ desc.inputs.ppGeometryDescs = geometryDescExArray;
+ desc.inputs.flags = NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_DISABLE_OMMS_EX;
+
+ REQUIRE(NvAPI_D3D12_BuildRaytracingAccelerationStructureEx(static_cast<ID3D12GraphicsCommandList4*>(&commandList), &params) == NVAPI_OK);
+ }
+
+ SECTION("BuildRaytracingAccelerationStructureEx with BLAS for array") {
+ desc.inputs.type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL;
+ desc.inputs.descsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;
+ desc.inputs.numDescs = 1;
+ desc.inputs.pGeometryDescs = &geometryDescEx;
+ desc.inputs.geometryDescStrideInBytes = sizeof(geometryDescEx);
+ desc.inputs.flags = NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_OMM_OPACITY_STATES_UPDATE_EX;
+
+ SECTION("BuildRaytracingAccelerationStructureEx for OMM triangles geometry") {
+ geometryDescEx.type = NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_OMM_TRIANGLES_EX;
+ geometryDescEx.ommTriangles.triangles.IndexBuffer = D3D12_GPU_VIRTUAL_ADDRESS{};
+ geometryDescEx.ommTriangles.ommAttachment.opacityMicromapArray = D3D12_GPU_VIRTUAL_ADDRESS{};
+
+ REQUIRE(NvAPI_D3D12_BuildRaytracingAccelerationStructureEx(static_cast<ID3D12GraphicsCommandList4*>(&commandList), &params) == NVAPI_OK);
+ }
+
+ SECTION("BuildRaytracingAccelerationStructureEx for triangles geometry") {
+ geometryDescEx.type = NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES_EX;
+ geometryDescEx.triangles.IndexBuffer = D3D12_GPU_VIRTUAL_ADDRESS{};
+
+ REQUIRE(NvAPI_D3D12_BuildRaytracingAccelerationStructureEx(static_cast<ID3D12GraphicsCommandList4*>(&commandList), &params) == NVAPI_OK);
+ }
+
+ SECTION("BuildRaytracingAccelerationStructureEx for AABBs geometry") {
+ geometryDescEx.type = NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS_EX;
+ geometryDescEx.aabbs.AABBCount = 3;
+
+ REQUIRE(NvAPI_D3D12_BuildRaytracingAccelerationStructureEx(static_cast<ID3D12GraphicsCommandList4*>(&commandList), &params) == NVAPI_OK);
+ }
+ }
+ }
+ }
+
+ SECTION("BuildRaytracingOpacityMicromapArray returns OK") {
+ NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS params{};
+
+ REQUIRE_CALL(commandList, BuildRaytracingOpacityMicromapArray(&params))
+ .RETURN(NVAPI_OK)
+ .TIMES(1);
+
+ REQUIRE(NvAPI_D3D12_BuildRaytracingOpacityMicromapArray(static_cast<ID3D12GraphicsCommandList4*>(&commandList), &params) == NVAPI_OK);
+ REQUIRE(deviceRefCount == 0);
+ REQUIRE(commandListRefCount == 0);
+ }
+
+ SECTION("RelocateRaytracingOpacityMicromapArray returns OK") {
+ NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS params{};
+
+ REQUIRE_CALL(commandList, RelocateRaytracingOpacityMicromapArray(&params))
+ .RETURN(NVAPI_OK)
+ .TIMES(1);
+
+ REQUIRE(NvAPI_D3D12_RelocateRaytracingOpacityMicromapArray(static_cast<ID3D12GraphicsCommandList4*>(&commandList), &params) == NVAPI_OK);
+ REQUIRE(deviceRefCount == 0);
+ REQUIRE(commandListRefCount == 0);
+ }
+
+ SECTION("EmitRaytracingOpacityMicromapArrayPostbuildInfo returns OK") {
+ NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS params{};
+
+ REQUIRE_CALL(commandList, EmitRaytracingOpacityMicromapArrayPostbuildInfo(&params))
+ .RETURN(NVAPI_OK)
+ .TIMES(1);
+
+ REQUIRE(NvAPI_D3D12_EmitRaytracingOpacityMicromapArrayPostbuildInfo(static_cast<ID3D12GraphicsCommandList4*>(&commandList), &params) == NVAPI_OK);
+ REQUIRE(deviceRefCount == 0);
+ REQUIRE(commandListRefCount == 0);
+ }
}
diff --git a/tests/nvapi_d3d12_mocks.h b/tests/nvapi_d3d12_mocks.h
index a3cff71..6a357f2 100644
--- a/tests/nvapi_d3d12_mocks.h
+++ b/tests/nvapi_d3d12_mocks.h
@@ -3,7 +3,7 @@
#include "nvapi_tests_private.h"
#include "../src/vkd3d-proton/vkd3d-proton_interfaces.h"
-class ID3D12Vkd3dDevice : public ID3D12Device5, public ID3D12DeviceExt {};
+class ID3D12Vkd3dDevice : public ID3D12Device5, public ID3D12DeviceExt1 {};
class D3D12Vkd3dDeviceMock final : public trompeloeil::mock_interface<ID3D12Vkd3dDevice> {
MAKE_MOCK2(QueryInterface, HRESULT(REFIID, void**), override);
@@ -78,9 +78,13 @@ class D3D12Vkd3dDeviceMock final : public trompeloeil::mock_interface<ID3D12Vkd3
IMPLEMENT_MOCK3(GetCudaTextureObject);
IMPLEMENT_MOCK2(GetCudaSurfaceObject);
IMPLEMENT_MOCK1(CaptureUAVInfo);
+ IMPLEMENT_MOCK1(SetCreatePipelineStateOptions);
+ IMPLEMENT_MOCK1(CheckDriverMatchingIdentifierEx);
+ IMPLEMENT_MOCK1(GetRaytracingAccelerationStructurePrebuildInfoEx);
+ IMPLEMENT_MOCK1(GetRaytracingOpacityMicromapArrayPrebuildInfo);
};
-class ID3D12Vkd3dGraphicsCommandList : public ID3D12GraphicsCommandList4, public ID3D12GraphicsCommandListExt1 {};
+class ID3D12Vkd3dGraphicsCommandList : public ID3D12GraphicsCommandList4, public ID3D12GraphicsCommandListExt2 {};
class D3D12Vkd3dGraphicsCommandListMock final : public trompeloeil::mock_interface<ID3D12Vkd3dGraphicsCommandList> {
MAKE_MOCK2(QueryInterface, HRESULT(REFIID, void**), override);
@@ -163,4 +167,8 @@ class D3D12Vkd3dGraphicsCommandListMock final : public trompeloeil::mock_interfa
IMPLEMENT_MOCK1(GetVulkanHandle);
IMPLEMENT_MOCK6(LaunchCubinShader);
IMPLEMENT_MOCK9(LaunchCubinShaderEx);
+ IMPLEMENT_MOCK1(BuildRaytracingAccelerationStructureEx);
+ IMPLEMENT_MOCK1(BuildRaytracingOpacityMicromapArray);
+ IMPLEMENT_MOCK1(RelocateRaytracingOpacityMicromapArray);
+ IMPLEMENT_MOCK1(EmitRaytracingOpacityMicromapArrayPostbuildInfo);
};
From 2518e1c71e64679be3f38c622f8294e9354e44a0 Mon Sep 17 00:00:00 2001
From: Eric Sullivan <esullivan@nvidia.com>
Date: Mon, 9 Oct 2023 01:51:16 -0700
Subject: [PATCH 2/2] Add VK_NV_low_latency2 support
This commit add support for the VK_NV_low_latency2 extension, and
implements the ID3DLowLatencyDevice interface.
---
src/d3d11/d3d11_device.cpp | 168 +++++++++++++++++++++++++++++++---
src/d3d11/d3d11_device.h | 82 ++++++++++++++---
src/d3d11/d3d11_interfaces.h | 58 +++++++++++-
src/d3d11/d3d11_swapchain.cpp | 36 +++++++-
src/d3d11/d3d11_swapchain.h | 18 +++-
src/dxvk/dxvk_adapter.cpp | 13 ++-
src/dxvk/dxvk_cmdlist.cpp | 17 +++-
src/dxvk/dxvk_cmdlist.h | 5 +-
src/dxvk/dxvk_device.cpp | 2 +
src/dxvk/dxvk_device.h | 51 ++++++++++-
src/dxvk/dxvk_device_info.h | 3 +-
src/dxvk/dxvk_extensions.h | 1 +
src/dxvk/dxvk_presenter.cpp | 99 +++++++++++++++++++-
src/dxvk/dxvk_presenter.h | 43 +++++++++
src/dxvk/dxvk_queue.cpp | 4 +-
src/dxvk/dxvk_queue.h | 1 +
src/vulkan/vulkan_loader.h | 8 ++
17 files changed, 563 insertions(+), 46 deletions(-)
diff --git a/src/d3d11/d3d11_device.cpp b/src/d3d11/d3d11_device.cpp
index 9398e484208..22760b48922 100644
--- a/src/d3d11/d3d11_device.cpp
+++ b/src/d3d11/d3d11_device.cpp
@@ -15,6 +15,7 @@
#include "d3d11_device.h"
#include "d3d11_fence.h"
#include "d3d11_input_layout.h"
+#include "d3d11_interfaces.h"
#include "d3d11_interop.h"
#include "d3d11_query.h"
#include "d3d11_resource.h"
@@ -2469,12 +2470,14 @@ namespace dxvk {
return deviceFeatures.nvxBinaryImport
&& deviceFeatures.vk12.bufferDeviceAddress;
+ case D3D11_VK_NV_LOW_LATENCY_2:
+ return deviceFeatures.nvLowLatency2;
+
default:
return false;
}
}
-
-
+
bool STDMETHODCALLTYPE D3D11DeviceExt::GetCudaTextureObjectNVX(uint32_t srvDriverHandle, uint32_t samplerDriverHandle, uint32_t* pCudaTextureHandle) {
ID3D11ShaderResourceView* srv = HandleToSrvNVX(srvDriverHandle);
@@ -2783,8 +2786,133 @@ namespace dxvk {
+
+ D3D11LowLatencyDevice::D3D11LowLatencyDevice(
+ D3D11DXGIDevice* pContainer,
+ D3D11Device* pDevice)
+ : m_container(pContainer), m_device(pDevice) {
+
+ }
+ ULONG STDMETHODCALLTYPE D3D11LowLatencyDevice::AddRef() {
+ return m_container->AddRef();
+ }
+
+
+ ULONG STDMETHODCALLTYPE D3D11LowLatencyDevice::Release() {
+ return m_container->Release();
+ }
+
+
+ HRESULT STDMETHODCALLTYPE D3D11LowLatencyDevice::QueryInterface(
+ REFIID riid,
+ void** ppvObject) {
+ return m_container->QueryInterface(riid, ppvObject);
+ }
+
+ BOOL STDMETHODCALLTYPE D3D11LowLatencyDevice::SupportsLowLatency() {
+ return m_device->GetDXVKDevice()->features().nvLowLatency2;
+ }
+
+ HRESULT STDMETHODCALLTYPE D3D11LowLatencyDevice::LatencySleep() {
+ if (!m_device->GetDXVKDevice()->features().nvLowLatency2) {
+ return E_NOINTERFACE;
+ }
+
+ D3D11SwapChain* pSwapChain = m_device->GetLowLatencySwapChain();
+ if (pSwapChain && pSwapChain->LowLatencyEnabled()) {
+ VkResult res = pSwapChain->LatencySleep();
+ if (res != VK_SUCCESS) {
+ return S_FALSE;
+ }
+ }
+
+ return S_OK;
+ }
+
+ HRESULT STDMETHODCALLTYPE D3D11LowLatencyDevice::SetLatencySleepMode(BOOL lowLatencyMode, BOOL lowLatencyBoost, uint32_t minimumIntervalUs) {
+ if (!m_device->GetDXVKDevice()->features().nvLowLatency2) {
+ return E_NOINTERFACE;
+ }
+
+ D3D11SwapChain* pSwapChain = m_device->GetLowLatencySwapChain();
+ if (pSwapChain) {
+ VkResult res = pSwapChain->SetLatencySleepMode(lowLatencyMode, lowLatencyBoost, minimumIntervalUs);
+ if (res != VK_SUCCESS) {
+ return S_FALSE;
+ }
+ }
+
+ return S_OK;
+ }
+
+ HRESULT STDMETHODCALLTYPE D3D11LowLatencyDevice::SetLatencyMarker(uint64_t frameID, uint32_t markerType) {
+ if (!m_device->GetDXVKDevice()->features().nvLowLatency2) {
+ return E_NOINTERFACE;
+ }
+
+ D3D11SwapChain* pSwapChain = m_device->GetLowLatencySwapChain();
+ VkLatencyMarkerNV marker = static_cast<VkLatencyMarkerNV>(markerType);
+ uint64_t internalFrameId = frameID + DXGI_MAX_SWAP_CHAIN_BUFFERS;
+
+ m_device->GetDXVKDevice()->setLatencyMarker(marker, internalFrameId);
+
+ if (pSwapChain && pSwapChain->LowLatencyEnabled()) {
+ pSwapChain->SetLatencyMarker(marker, internalFrameId);
+ }
+
+ return S_OK;
+ }
+
+ HRESULT STDMETHODCALLTYPE D3D11LowLatencyDevice::GetLatencyInfo(D3D11_LATENCY_RESULTS* latencyResults)
+ {
+ if (!m_device->GetDXVKDevice()->features().nvLowLatency2) {
+ return E_NOINTERFACE;
+ }
+
+ constexpr uint32_t frameReportSize = 64;
+ D3D11SwapChain* pSwapChain = m_device->GetLowLatencySwapChain();
+
+ if (pSwapChain && pSwapChain->LowLatencyEnabled()) {
+ std::vector<VkLatencyTimingsFrameReportNV> frameReports;
+ pSwapChain->GetLatencyTimings(frameReports);
+
+ if (frameReports.size() >= frameReportSize) {
+ for (uint32_t i = 0; i < frameReportSize; i++) {
+ VkLatencyTimingsFrameReportNV& frameReport = frameReports[i];
+ latencyResults->frame_reports[i].frameID = frameReport.presentID - DXGI_MAX_SWAP_CHAIN_BUFFERS;
+ latencyResults->frame_reports[i].inputSampleTime = frameReport.inputSampleTimeUs;
+ latencyResults->frame_reports[i].simStartTime = frameReport.simStartTimeUs;
+ latencyResults->frame_reports[i].simEndTime = frameReport.simEndTimeUs;
+ latencyResults->frame_reports[i].renderSubmitStartTime = frameReport.renderSubmitStartTimeUs;
+ latencyResults->frame_reports[i].renderSubmitEndTime = frameReport.renderSubmitEndTimeUs;
+ latencyResults->frame_reports[i].presentStartTime = frameReport.presentStartTimeUs;
+ latencyResults->frame_reports[i].presentEndTime = frameReport.presentEndTimeUs;
+ latencyResults->frame_reports[i].driverStartTime = frameReport.driverStartTimeUs;
+ latencyResults->frame_reports[i].driverEndTime = frameReport.driverEndTimeUs;
+ latencyResults->frame_reports[i].osRenderQueueStartTime = frameReport.osRenderQueueStartTimeUs;
+ latencyResults->frame_reports[i].osRenderQueueEndTime = frameReport.osRenderQueueEndTimeUs;
+ latencyResults->frame_reports[i].gpuRenderStartTime = frameReport.gpuRenderStartTimeUs;
+ latencyResults->frame_reports[i].gpuRenderEndTime = frameReport.gpuRenderEndTimeUs;
+ latencyResults->frame_reports[i].gpuActiveRenderTimeUs =
+ frameReport.gpuRenderEndTimeUs - frameReport.gpuRenderStartTimeUs;
+ latencyResults->frame_reports[i].gpuFrameTimeUs = 0;
+
+ if (i) {
+ latencyResults->frame_reports[i].gpuFrameTimeUs =
+ frameReports[i].gpuRenderEndTimeUs - frameReports[i - 1].gpuRenderEndTimeUs;
+ }
+ }
+ }
+ }
+
+ return S_OK;
+ }
+
+
+
+
D3D11VideoDevice::D3D11VideoDevice(
D3D11DXGIDevice* pContainer,
D3D11Device* pDevice)
@@ -3021,7 +3149,11 @@ namespace dxvk {
Com<D3D11SwapChain> presenter = new D3D11SwapChain(
m_container, m_device, pSurfaceFactory, pDesc);
-
+
+ if (m_device->GetDXVKDevice()->features().nvLowLatency2) {
+ m_device->AddSwapchain(presenter.ref());
+ }
+
*ppSwapChain = presenter.ref();
return S_OK;
} catch (const DxvkError& e) {
@@ -3078,17 +3210,18 @@ namespace dxvk {
Rc<DxvkDevice> pDxvkDevice,
D3D_FEATURE_LEVEL FeatureLevel,
UINT FeatureFlags)
- : m_dxgiAdapter (pAdapter),
- m_dxvkInstance (pDxvkInstance),
- m_dxvkAdapter (pDxvkAdapter),
- m_dxvkDevice (pDxvkDevice),
- m_d3d11Device (this, FeatureLevel, FeatureFlags),
- m_d3d11DeviceExt(this, &m_d3d11Device),
- m_d3d11Interop (this, &m_d3d11Device),
- m_d3d11Video (this, &m_d3d11Device),
- m_d3d11on12 (this, &m_d3d11Device, pD3D12Device, pD3D12Queue),
- m_metaDevice (this),
- m_dxvkFactory (this, &m_d3d11Device) {
+ : m_dxgiAdapter (pAdapter),
+ m_dxvkInstance (pDxvkInstance),
+ m_dxvkAdapter (pDxvkAdapter),
+ m_dxvkDevice (pDxvkDevice),
+ m_d3d11Device (this, FeatureLevel, FeatureFlags),
+ m_d3d11DeviceExt (this, &m_d3d11Device),
+ m_d3d11Interop (this, &m_d3d11Device),
+ m_d3dLowLatencyDevice (this, &m_d3d11Device),
+ m_d3d11Video (this, &m_d3d11Device),
+ m_d3d11on12 (this, &m_d3d11Device, pD3D12Device, pD3D12Queue),
+ m_metaDevice (this),
+ m_dxvkFactory (this, &m_d3d11Device) {
}
@@ -3142,7 +3275,12 @@ namespace dxvk {
*ppvObject = ref(&m_d3d11DeviceExt);
return S_OK;
}
-
+
+ if (riid == __uuidof(ID3DLowLatencyDevice)) {
+ *ppvObject = ref(&m_d3dLowLatencyDevice);
+ return S_OK;
+ }
+
if (riid == __uuidof(IDXGIDXVKDevice)) {
*ppvObject = ref(&m_metaDevice);
return S_OK;
diff --git a/src/d3d11/d3d11_device.h b/src/d3d11/d3d11_device.h
index 7a44b5ad99c..7372bbec168 100644
--- a/src/d3d11/d3d11_device.h
+++ b/src/d3d11/d3d11_device.h
@@ -24,6 +24,7 @@
#include "d3d11_options.h"
#include "d3d11_shader.h"
#include "d3d11_state.h"
+#include "d3d11_swapchain.h"
#include "d3d11_util.h"
namespace dxvk {
@@ -428,6 +429,22 @@ namespace dxvk {
bool Is11on12Device() const;
+ void AddSwapchain(D3D11SwapChain* swapchain) {
+ m_swapchains.push_back(swapchain);
+ }
+
+ void RemoveSwapchain(D3D11SwapChain* swapchain) {
+ std::remove(m_swapchains.begin(), m_swapchains.end(), swapchain);
+ }
+
+ UINT GetSwapchainCount() {
+ return m_swapchains.size();
+ }
+
+ D3D11SwapChain* GetLowLatencySwapChain() {
+ return (m_swapchains.size()) == 1 ? m_swapchains[0] : nullptr;
+ }
+
static D3D_FEATURE_LEVEL GetMaxFeatureLevel(
const Rc<DxvkInstance>& Instance,
const Rc<DxvkAdapter>& Adapter);
@@ -464,6 +481,8 @@ namespace dxvk {
D3D_FEATURE_LEVEL m_maxFeatureLevel;
D3D11DeviceFeatures m_deviceFeatures;
+ std::vector<D3D11SwapChain*> m_swapchains;
+
HRESULT CreateShaderModule(
D3D11CommonShader* pShaderModule,
DxvkShaderKey ShaderKey,
@@ -545,28 +564,28 @@ namespace dxvk {
uint64_t* gpuVAStart,
uint64_t* gpuVASize);
- bool STDMETHODCALLTYPE CreateUnorderedAccessViewAndGetDriverHandleNVX(
+ bool STDMETHODCALLTYPE CreateUnorderedAccessViewAndGetDriverHandleNVX(
ID3D11Resource* pResource,
const D3D11_UNORDERED_ACCESS_VIEW_DESC* pDesc,
ID3D11UnorderedAccessView** ppUAV,
uint32_t* pDriverHandle);
- bool STDMETHODCALLTYPE CreateShaderResourceViewAndGetDriverHandleNVX(
+ bool STDMETHODCALLTYPE CreateShaderResourceViewAndGetDriverHandleNVX(
ID3D11Resource* pResource,
const D3D11_SHADER_RESOURCE_VIEW_DESC* pDesc,
ID3D11ShaderResourceView** ppSRV,
uint32_t* pDriverHandle);
- bool STDMETHODCALLTYPE CreateSamplerStateAndGetDriverHandleNVX(
+ bool STDMETHODCALLTYPE CreateSamplerStateAndGetDriverHandleNVX(
const D3D11_SAMPLER_DESC* pSamplerDesc,
ID3D11SamplerState** ppSamplerState,
uint32_t* pDriverHandle);
-
+
private:
D3D11DXGIDevice* m_container;
D3D11Device* m_device;
-
+
void AddSamplerAndHandleNVX(
ID3D11SamplerState* pSampler,
uint32_t Handle);
@@ -586,6 +605,46 @@ namespace dxvk {
std::unordered_map<uint32_t, ID3D11ShaderResourceView*> m_srvHandleToPtr;
};
+ /**
+ * \brief Extended D3D11 device
+ */
+ class D3D11LowLatencyDevice : public ID3DLowLatencyDevice {
+
+ public:
+
+ D3D11LowLatencyDevice(
+ D3D11DXGIDevice* pContainer,
+ D3D11Device* pDevice);
+
+ ULONG STDMETHODCALLTYPE AddRef();
+
+ ULONG STDMETHODCALLTYPE Release();
+
+ HRESULT STDMETHODCALLTYPE QueryInterface(
+ REFIID riid,
+ void** ppvObject);
+
+ BOOL STDMETHODCALLTYPE SupportsLowLatency();
+
+ HRESULT STDMETHODCALLTYPE LatencySleep();
+
+ HRESULT STDMETHODCALLTYPE SetLatencySleepMode(
+ BOOL lowLatencyMode,
+ BOOL lowLatencyBoost,
+ uint32_t minimumIntervalUs);
+
+ HRESULT STDMETHODCALLTYPE SetLatencyMarker(
+ uint64_t frameID,
+ uint32_t markerType);
+
+ HRESULT STDMETHODCALLTYPE GetLatencyInfo(
+ D3D11_LATENCY_RESULTS* latencyResults);
+
+ private:
+
+ D3D11DXGIDevice* m_container;
+ D3D11Device* m_device;
+ };
/**
* \brief D3D11 video device
@@ -856,12 +915,13 @@ namespace dxvk {
Rc<DxvkAdapter> m_dxvkAdapter;
Rc<DxvkDevice> m_dxvkDevice;
- D3D11Device m_d3d11Device;
- D3D11DeviceExt m_d3d11DeviceExt;
- D3D11VkInterop m_d3d11Interop;
- D3D11VideoDevice m_d3d11Video;
- D3D11on12Device m_d3d11on12;
- DXGIDXVKDevice m_metaDevice;
+ D3D11Device m_d3d11Device;
+ D3D11DeviceExt m_d3d11DeviceExt;
+ D3D11VkInterop m_d3d11Interop;
+ D3D11LowLatencyDevice m_d3dLowLatencyDevice;
+ D3D11VideoDevice m_d3d11Video;
+ D3D11on12Device m_d3d11on12;
+ DXGIDXVKDevice m_metaDevice;
DXGIVkSwapChainFactory m_dxvkFactory;
diff --git a/src/d3d11/d3d11_interfaces.h b/src/d3d11/d3d11_interfaces.h
index 587cde1394e..49b301b0fdb 100644
--- a/src/d3d11/d3d11_interfaces.h
+++ b/src/d3d11/d3d11_interfaces.h
@@ -16,6 +16,7 @@ enum D3D11_VK_EXTENSION : uint32_t {
D3D11_VK_EXT_BARRIER_CONTROL = 3,
D3D11_VK_NVX_BINARY_IMPORT = 4,
D3D11_VK_NVX_IMAGE_VIEW_HANDLE = 5,
+ D3D11_VK_NV_LOW_LATENCY_2 = 6
};
@@ -27,6 +28,33 @@ enum D3D11_VK_BARRIER_CONTROL : uint32_t {
D3D11_VK_BARRIER_CONTROL_IGNORE_GRAPHICS_UAV = 1 << 1,
};
+/**
+ * \brief Frame Report Info
+ */
+typedef struct D3D11_LATENCY_RESULTS
+{
+ UINT32 version;
+ struct D3D11_FRAME_REPORT {
+ UINT64 frameID;
+ UINT64 inputSampleTime;
+ UINT64 simStartTime;
+ UINT64 simEndTime;
+ UINT64 renderSubmitStartTime;
+ UINT64 renderSubmitEndTime;
+ UINT64 presentStartTime;
+ UINT64 presentEndTime;
+ UINT64 driverStartTime;
+ UINT64 driverEndTime;
+ UINT64 osRenderQueueStartTime;
+ UINT64 osRenderQueueEndTime;
+ UINT64 gpuRenderStartTime;
+ UINT64 gpuRenderEndTime;
+ UINT32 gpuActiveRenderTimeUs;
+ UINT32 gpuFrameTimeUs;
+ UINT8 rsvd[120];
+ } frame_reports[64];
+ UINT8 rsvd[32];
+} D3D11_LATENCY_RESULTS;
/**
* \brief Extended shader interface
@@ -114,6 +142,33 @@ ID3D11VkExtDevice1 : public ID3D11VkExtDevice {
uint32_t* pCudaTextureHandle) = 0;
};
+/**
+ * \brief Extended extended D3D11 device
+ *
+ * Introduces methods to get virtual addresses and driver
+ * handles for resources, and create and destroy objects
+ * for D3D11-Cuda interop.
+ */
+MIDL_INTERFACE("f3112584-41f9-348d-a59b-00b7e1d285d6")
+ID3DLowLatencyDevice : public IUnknown {
+ static const GUID guid;
+
+ virtual BOOL STDMETHODCALLTYPE SupportsLowLatency() = 0;
+
+ virtual HRESULT STDMETHODCALLTYPE LatencySleep() = 0;
+
+ virtual HRESULT STDMETHODCALLTYPE SetLatencySleepMode(
+ BOOL lowLatencyMode,
+ BOOL lowLatencyBoost,
+ uint32_t minimumIntervalUs) = 0;
+
+ virtual HRESULT STDMETHODCALLTYPE SetLatencyMarker(
+ uint64_t frameID,
+ uint32_t markerType) = 0;
+
+ virtual HRESULT STDMETHODCALLTYPE GetLatencyInfo(
+ D3D11_LATENCY_RESULTS* latencyResults) = 0;
+};
/**
* \brief Extended D3D11 context
@@ -182,17 +237,18 @@ ID3D11VkExtContext1 : public ID3D11VkExtContext {
uint32_t numWriteResources) = 0;
};
-
#ifdef _MSC_VER
struct __declspec(uuid("bb8a4fb9-3935-4762-b44b-35189a26414a")) ID3D11VkExtShader;
struct __declspec(uuid("8a6e3c42-f74c-45b7-8265-a231b677ca17")) ID3D11VkExtDevice;
struct __declspec(uuid("cfcf64ef-9586-46d0-bca4-97cf2ca61b06")) ID3D11VkExtDevice1;
struct __declspec(uuid("fd0bca13-5cb6-4c3a-987e-4750de2ca791")) ID3D11VkExtContext;
struct __declspec(uuid("874b09b2-ae0b-41d8-8476-5f3b7a0e879d")) ID3D11VkExtContext1;
+struct __declspec(uuid("f3112584-41f9-348d-a59b-00b7e1d285d6")) ID3DLowLatencyDevice;
#else
__CRT_UUID_DECL(ID3D11VkExtShader, 0xbb8a4fb9,0x3935,0x4762,0xb4,0x4b,0x35,0x18,0x9a,0x26,0x41,0x4a);
__CRT_UUID_DECL(ID3D11VkExtDevice, 0x8a6e3c42,0xf74c,0x45b7,0x82,0x65,0xa2,0x31,0xb6,0x77,0xca,0x17);
__CRT_UUID_DECL(ID3D11VkExtDevice1, 0xcfcf64ef,0x9586,0x46d0,0xbc,0xa4,0x97,0xcf,0x2c,0xa6,0x1b,0x06);
__CRT_UUID_DECL(ID3D11VkExtContext, 0xfd0bca13,0x5cb6,0x4c3a,0x98,0x7e,0x47,0x50,0xde,0x2c,0xa7,0x91);
__CRT_UUID_DECL(ID3D11VkExtContext1, 0x874b09b2,0xae0b,0x41d8,0x84,0x76,0x5f,0x3b,0x7a,0x0e,0x87,0x9d);
+__CRT_UUID_DECL(ID3DLowLatencyDevice, 0xf3112584,0x41f9,0x348d,0xa5,0x9b,0x00,0xb7,0xe1,0xd2,0x85,0xd6);
#endif
diff --git a/src/d3d11/d3d11_swapchain.cpp b/src/d3d11/d3d11_swapchain.cpp
index 0e823f410ef..4faffa00e48 100644
--- a/src/d3d11/d3d11_swapchain.cpp
+++ b/src/d3d11/d3d11_swapchain.cpp
@@ -351,6 +351,34 @@ namespace dxvk {
*pFrameStatistics = m_frameStatistics;
}
+ VkResult D3D11SwapChain::SetLatencySleepMode(
+ bool lowLatencyMode,
+ bool lowLatencyBoost,
+ uint32_t minimumIntervalUs) {
+ if (lowLatencyMode && !LowLatencyEnabled()) {
+ RecreateSwapChain();
+ }
+ return m_presenter->setLatencySleepMode(lowLatencyMode, lowLatencyBoost, minimumIntervalUs);
+ }
+
+ VkResult D3D11SwapChain::LatencySleep() {
+ return m_presenter->latencySleep();
+ }
+
+ void D3D11SwapChain::SetLatencyMarker(
+ VkLatencyMarkerNV marker,
+ uint64_t presentId) {
+ m_presenter->setLatencyMarker(marker, presentId);
+ }
+
+ VkResult D3D11SwapChain::GetLatencyTimings(
+ std::vector<VkLatencyTimingsFrameReportNV>& frameReports) {
+ return m_presenter->getLatencyTimings(frameReports);
+ }
+
+ bool D3D11SwapChain::LowLatencyEnabled() {
+ return m_presenter->lowLatencyEnabled();
+ }
HRESULT D3D11SwapChain::PresentImage(UINT SyncInterval) {
// Flush pending rendering commands before
@@ -410,9 +438,11 @@ namespace dxvk {
uint32_t Repeat) {
auto lock = pContext->LockContext();
- // Bump frame ID as necessary
- if (!Repeat)
- m_frameId += 1;
+ if (!Repeat) {
+ m_frameId = (m_presenter->lowLatencyEnabled() && m_device->getLatencyMarkers().present) ?
+ m_device->getLatencyMarkers().present :
+ m_frameId + 1;
+ }
// Present from CS thread so that we don't
// have to synchronize with it first.
diff --git a/src/d3d11/d3d11_swapchain.h b/src/d3d11/d3d11_swapchain.h
index 00073d7690e..a3ecf634381 100644
--- a/src/d3d11/d3d11_swapchain.h
+++ b/src/d3d11/d3d11_swapchain.h
@@ -86,6 +86,22 @@ namespace dxvk {
void STDMETHODCALLTYPE GetFrameStatistics(
DXGI_VK_FRAME_STATISTICS* pFrameStatistics);
+ VkResult SetLatencySleepMode(
+ bool lowLatencyMode,
+ bool lowLatencyBoost,
+ uint32_t minimumIntervalUs);
+
+ VkResult LatencySleep();
+
+ void SetLatencyMarker(
+ VkLatencyMarkerNV marker,
+ uint64_t presentId);
+
+ VkResult GetLatencyTimings(
+ std::vector<VkLatencyTimingsFrameReportNV>& frameReports);
+
+ bool LowLatencyEnabled();
+
private:
enum BindingIds : uint32_t {
@@ -176,4 +192,4 @@ namespace dxvk {
};
-}
\ No newline at end of file
+}
diff --git a/src/dxvk/dxvk_adapter.cpp b/src/dxvk/dxvk_adapter.cpp
index cf4c3cce68f..6a3b4f08180 100644
--- a/src/dxvk/dxvk_adapter.cpp
+++ b/src/dxvk/dxvk_adapter.cpp
@@ -927,6 +927,9 @@ namespace dxvk {
m_deviceFeatures.khrPresentWait.pNext = std::exchange(m_deviceFeatures.core.pNext, &m_deviceFeatures.khrPresentWait);
}
+ if (m_deviceExtensions.supports(VK_NV_LOW_LATENCY_2_EXTENSION_NAME))
+ m_deviceFeatures.nvLowLatency2 = VK_TRUE;
+
if (m_deviceExtensions.supports(VK_NVX_BINARY_IMPORT_EXTENSION_NAME))
m_deviceFeatures.nvxBinaryImport = VK_TRUE;
@@ -994,6 +997,7 @@ namespace dxvk {
&devExtensions.khrPresentWait,
&devExtensions.khrSwapchain,
&devExtensions.khrWin32KeyedMutex,
+ &devExtensions.nvLowLatency2,
&devExtensions.nvxBinaryImport,
&devExtensions.nvxImageViewHandle,
}};
@@ -1133,8 +1137,13 @@ namespace dxvk {
enabledFeatures.khrPresentWait.pNext = std::exchange(enabledFeatures.core.pNext, &enabledFeatures.khrPresentWait);
}
- if (devExtensions.nvxBinaryImport)
+ if (devExtensions.nvxBinaryImport) {
enabledFeatures.nvxBinaryImport = VK_TRUE;
+ }
+
+ if (devExtensions.nvLowLatency2) {
+ enabledFeatures.nvLowLatency2 = VK_TRUE;
+ }
if (devExtensions.nvxImageViewHandle)
enabledFeatures.nvxImageViewHandle = VK_TRUE;
@@ -1279,6 +1288,8 @@ namespace dxvk {
"\n presentId : ", features.khrPresentId.presentId ? "1" : "0",
"\n", VK_KHR_PRESENT_WAIT_EXTENSION_NAME,
"\n presentWait : ", features.khrPresentWait.presentWait ? "1" : "0",
+ "\n", VK_NV_LOW_LATENCY_2_EXTENSION_NAME,
+ "\n extension supported : ", features.nvLowLatency2 ? "1" : "0",
"\n", VK_NVX_BINARY_IMPORT_EXTENSION_NAME,
"\n extension supported : ", features.nvxBinaryImport ? "1" : "0",
"\n", VK_NVX_IMAGE_VIEW_HANDLE_EXTENSION_NAME,
diff --git a/src/dxvk/dxvk_cmdlist.cpp b/src/dxvk/dxvk_cmdlist.cpp
index 3bd3aa953d4..54b50ea533c 100644
--- a/src/dxvk/dxvk_cmdlist.cpp
+++ b/src/dxvk/dxvk_cmdlist.cpp
@@ -56,10 +56,12 @@ namespace dxvk {
VkResult DxvkCommandSubmission::submit(
DxvkDevice* device,
- VkQueue queue) {
+ VkQueue queue,
+ uint64_t frameId) {
auto vk = device->vkd();
VkSubmitInfo2 submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO_2 };
+ VkLatencySubmissionPresentIdNV latencySubmitInfo = { VK_STRUCTURE_TYPE_LATENCY_SUBMISSION_PRESENT_ID_NV };
if (!m_semaphoreWaits.empty()) {
submitInfo.waitSemaphoreInfoCount = m_semaphoreWaits.size();
@@ -76,6 +78,11 @@ namespace dxvk {
submitInfo.pSignalSemaphoreInfos = m_semaphoreSignals.data();
}
+ if (device->features().nvLowLatency2 && frameId && !m_commandBuffers.empty()) {
+ latencySubmitInfo.presentID = frameId;
+ latencySubmitInfo.pNext = std::exchange(submitInfo.pNext, &latencySubmitInfo);
+ }
+
VkResult vr = VK_SUCCESS;
if (!this->isEmpty())
@@ -206,7 +213,7 @@ namespace dxvk {
}
- VkResult DxvkCommandList::submit() {
+ VkResult DxvkCommandList::submit(uint64_t frameId) {
VkResult status = VK_SUCCESS;
const auto& graphics = m_device->queues().graphics;
@@ -238,7 +245,7 @@ namespace dxvk {
// for any prior submissions, then block any subsequent ones
m_commandSubmission.signalSemaphore(m_bindSemaphore, 0, VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT);
- if ((status = m_commandSubmission.submit(m_device, graphics.queueHandle)))
+ if ((status = m_commandSubmission.submit(m_device, graphics.queueHandle, frameId)))
return status;
sparseBind->waitSemaphore(m_bindSemaphore, 0);
@@ -259,7 +266,7 @@ namespace dxvk {
if (m_device->hasDedicatedTransferQueue() && !m_commandSubmission.isEmpty()) {
m_commandSubmission.signalSemaphore(m_sdmaSemaphore, 0, VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT);
- if ((status = m_commandSubmission.submit(m_device, transfer.queueHandle)))
+ if ((status = m_commandSubmission.submit(m_device, transfer.queueHandle, frameId)))
return status;
m_commandSubmission.waitSemaphore(m_sdmaSemaphore, 0, VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT);
@@ -297,7 +304,7 @@ namespace dxvk {
}
// Finally, submit all graphics commands of the current submission
- if ((status = m_commandSubmission.submit(m_device, graphics.queueHandle)))
+ if ((status = m_commandSubmission.submit(m_device, graphics.queueHandle, frameId)))
return status;
}
diff --git a/src/dxvk/dxvk_cmdlist.h b/src/dxvk/dxvk_cmdlist.h
index b9b9a165dd3..f9527516e17 100644
--- a/src/dxvk/dxvk_cmdlist.h
+++ b/src/dxvk/dxvk_cmdlist.h
@@ -94,7 +94,8 @@ namespace dxvk {
*/
VkResult submit(
DxvkDevice* device,
- VkQueue queue);
+ VkQueue queue,
+ uint64_t frameId);
/**
* \brief Resets object
@@ -199,7 +200,7 @@ namespace dxvk {
* \brief Submits command list
* \returns Submission status
*/
- VkResult submit();
+ VkResult submit(uint64_t frameId);
/**
* \brief Stat counters
diff --git a/src/dxvk/dxvk_device.cpp b/src/dxvk/dxvk_device.cpp
index 9a053791a7b..44d208c41aa 100644
--- a/src/dxvk/dxvk_device.cpp
+++ b/src/dxvk/dxvk_device.cpp
@@ -18,6 +18,7 @@ namespace dxvk {
m_properties (adapter->devicePropertiesExt()),
m_perfHints (getPerfHints()),
m_objects (this),
+ m_latencyMarkers ({}),
m_queues (queues),
m_submissionQueue (this, queueCallback) {
@@ -274,6 +275,7 @@ namespace dxvk {
DxvkSubmitStatus* status) {
DxvkSubmitInfo submitInfo = { };
submitInfo.cmdList = commandList;
+ submitInfo.frameId = m_latencyMarkers.render;
m_submissionQueue.submit(submitInfo, status);
std::lock_guard<sync::Spinlock> statLock(m_statLock);
diff --git a/src/dxvk/dxvk_device.h b/src/dxvk/dxvk_device.h
index a24ee311bf5..cfef76a0874 100644
--- a/src/dxvk/dxvk_device.h
+++ b/src/dxvk/dxvk_device.h
@@ -66,7 +66,16 @@ namespace dxvk {
DxvkDeviceQueue transfer;
DxvkDeviceQueue sparse;
};
-
+
+ /**
+ * \brief Latency marker frame ids
+ */
+ struct DxvkDeviceLowLatencyMarkers {
+ uint64_t simulation;
+ uint64_t render;
+ uint64_t present;
+ };
+
/**
* \brief DXVK device
*
@@ -534,6 +543,44 @@ namespace dxvk {
* used by the GPU can be safely destroyed.
*/
void waitForIdle();
+
+ /**
+ * \brief Updates the frame id for the given frame marker
+ *
+ * \param [in] marker The marker to set the frame ID for
+ * \param [in] id The frame ID to set
+ */
+ void setLatencyMarker(VkLatencyMarkerNV marker, uint64_t id) {
+ switch (marker) {
+ case VK_LATENCY_MARKER_SIMULATION_START_NV:
+ m_latencyMarkers.simulation = id;
+ break;
+ case VK_LATENCY_MARKER_RENDERSUBMIT_START_NV:
+ m_latencyMarkers.render = id;
+ break;
+ case VK_LATENCY_MARKER_PRESENT_START_NV:
+ m_latencyMarkers.present = id;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /**
+ * \brief Resets the latency markers back to zero
+ */
+ void resetLatencyMarkers() {
+ m_latencyMarkers = {};
+ }
+
+ /**
+ * \brief Returns the current set of latency marker frame IDs
+ *
+ * \returns The current set of frame marker IDs
+ */
+ DxvkDeviceLowLatencyMarkers getLatencyMarkers() {
+ return m_latencyMarkers;
+ }
private:
@@ -549,6 +596,8 @@ namespace dxvk {
DxvkDevicePerfHints m_perfHints;
DxvkObjects m_objects;
+ DxvkDeviceLowLatencyMarkers m_latencyMarkers;
+
sync::Spinlock m_statLock;
DxvkStatCounters m_statCounters;
diff --git a/src/dxvk/dxvk_device_info.h b/src/dxvk/dxvk_device_info.h
index e23a0e1812e..ec0bc5a645e 100644
--- a/src/dxvk/dxvk_device_info.h
+++ b/src/dxvk/dxvk_device_info.h
@@ -68,9 +68,10 @@ namespace dxvk {
VkPhysicalDeviceMaintenance5FeaturesKHR khrMaintenance5;
VkPhysicalDevicePresentIdFeaturesKHR khrPresentId;
VkPhysicalDevicePresentWaitFeaturesKHR khrPresentWait;
+ VkBool32 nvLowLatency2;
VkBool32 nvxBinaryImport;
VkBool32 nvxImageViewHandle;
VkBool32 khrWin32KeyedMutex;
};
-}
\ No newline at end of file
+}
diff --git a/src/dxvk/dxvk_extensions.h b/src/dxvk/dxvk_extensions.h
index 8164ccf6ad6..041d00c3cee 100644
--- a/src/dxvk/dxvk_extensions.h
+++ b/src/dxvk/dxvk_extensions.h
@@ -325,6 +325,7 @@ namespace dxvk {
DxvkExt khrPresentWait = { VK_KHR_PRESENT_WAIT_EXTENSION_NAME, DxvkExtMode::Optional };
DxvkExt khrSwapchain = { VK_KHR_SWAPCHAIN_EXTENSION_NAME, DxvkExtMode::Required };
DxvkExt khrWin32KeyedMutex = { VK_KHR_WIN32_KEYED_MUTEX_EXTENSION_NAME, DxvkExtMode::Optional };
+ DxvkExt nvLowLatency2 = { VK_NV_LOW_LATENCY_2_EXTENSION_NAME, DxvkExtMode::Optional };
DxvkExt nvxBinaryImport = { VK_NVX_BINARY_IMPORT_EXTENSION_NAME, DxvkExtMode::Disabled };
DxvkExt nvxImageViewHandle = { VK_NVX_IMAGE_VIEW_HANDLE_EXTENSION_NAME, DxvkExtMode::Disabled };
};
diff --git a/src/dxvk/dxvk_presenter.cpp b/src/dxvk/dxvk_presenter.cpp
index 10f13da2783..9f7c6a0def0 100644
--- a/src/dxvk/dxvk_presenter.cpp
+++ b/src/dxvk/dxvk_presenter.cpp
@@ -18,6 +18,15 @@ namespace dxvk {
// with present operations and periodically signals the event
if (m_device->features().khrPresentWait.presentWait && m_signal != nullptr)
m_frameThread = dxvk::thread([this] { runFrameThread(); });
+
+ // If nvLowLatency2 is supported create the fence
+ if (m_device->features().nvLowLatency2) {
+ DxvkFenceCreateInfo info = {};
+ info.initialValue = 0;
+ info.sharedType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_FLAG_BITS_MAX_ENUM;
+
+ m_lowLatencyFence = DxvkFenceValuePair(m_device->createFence(info), 0u);
+ }
}
@@ -48,6 +57,7 @@ namespace dxvk {
VkResult Presenter::acquireNextImage(PresenterSync& sync, uint32_t& index) {
+ std::lock_guard<dxvk::mutex> lock(m_lowLatencyMutex);
sync = m_semaphores.at(m_frameIndex);
// Don't acquire more than one image at a time
@@ -68,11 +78,13 @@ namespace dxvk {
VkResult Presenter::presentImage(
VkPresentModeKHR mode,
uint64_t frameId) {
+ std::lock_guard<dxvk::mutex> lock(m_lowLatencyMutex);
+
PresenterSync sync = m_semaphores.at(m_frameIndex);
VkPresentIdKHR presentId = { VK_STRUCTURE_TYPE_PRESENT_ID_KHR };
presentId.swapchainCount = 1;
- presentId.pPresentIds = &frameId;
+ presentId.pPresentIds = &frameId;
VkSwapchainPresentModeInfoEXT modeInfo = { VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_EXT };
modeInfo.swapchainCount = 1;
@@ -151,6 +163,8 @@ namespace dxvk {
VkResult Presenter::recreateSwapChain(const PresenterDesc& desc) {
+ std::lock_guard<dxvk::mutex> lock(m_lowLatencyMutex);
+
if (m_swapchain)
destroySwapchain();
@@ -293,6 +307,9 @@ namespace dxvk {
modeInfo.presentModeCount = compatibleModes.size();
modeInfo.pPresentModes = compatibleModes.data();
+ VkSwapchainLatencyCreateInfoNV lowLatencyInfo = { VK_STRUCTURE_TYPE_SWAPCHAIN_LATENCY_CREATE_INFO_NV };
+ lowLatencyInfo.latencyModeEnable = VK_TRUE;
+
VkSwapchainCreateInfoKHR swapInfo = { VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR };
swapInfo.surface = m_surface;
swapInfo.minImageCount = m_info.imageCount;
@@ -314,6 +331,9 @@ namespace dxvk {
if (m_device->features().extSwapchainMaintenance1.swapchainMaintenance1)
modeInfo.pNext = std::exchange(swapInfo.pNext, &modeInfo);
+ if (m_device->features().nvLowLatency2)
+ lowLatencyInfo.pNext = std::exchange(swapInfo.pNext, &lowLatencyInfo);
+
Logger::info(str::format(
"Presenter: Actual swap chain properties:"
"\n Format: ", m_info.format.format,
@@ -322,11 +342,21 @@ namespace dxvk {
"\n Buffer size: ", m_info.imageExtent.width, "x", m_info.imageExtent.height,
"\n Image count: ", m_info.imageCount,
"\n Exclusive FS: ", desc.fullScreenExclusive));
-
+
if ((status = m_vkd->vkCreateSwapchainKHR(m_vkd->device(),
&swapInfo, nullptr, &m_swapchain)))
return status;
-
+
+ if (m_device->features().nvLowLatency2) {
+ VkLatencySleepModeInfoNV sleepModeInfo = { VK_STRUCTURE_TYPE_LATENCY_SLEEP_MODE_INFO_NV };
+ sleepModeInfo.lowLatencyMode = m_lowLatencyEnabled;
+ sleepModeInfo.lowLatencyBoost = m_lowLatencyBoost;
+ sleepModeInfo.minimumIntervalUs = m_minimumIntervalUs;
+
+ if ((status = m_vkd->vkSetLatencySleepModeNV(m_vkd->device(), m_swapchain, &sleepModeInfo)))
+ return status;
+ }
+
// Acquire images and create views
std::vector<VkImage> images;
@@ -422,6 +452,69 @@ namespace dxvk {
m_vkd->vkSetHdrMetadataEXT(m_vkd->device(), 1, &m_swapchain, &hdrMetadata);
}
+ VkResult Presenter::setLatencySleepMode(bool lowLatencyMode, bool lowLatencyBoost, uint32_t minimumIntervalUs) {
+ VkLatencySleepModeInfoNV sleepModeInfo = { VK_STRUCTURE_TYPE_LATENCY_SLEEP_MODE_INFO_NV };
+ sleepModeInfo.lowLatencyMode = lowLatencyMode;
+ sleepModeInfo.lowLatencyBoost = lowLatencyBoost;
+ sleepModeInfo.minimumIntervalUs = minimumIntervalUs;
+
+ std::lock_guard<dxvk::mutex> lock(m_lowLatencyMutex);
+ VkResult status = m_vkd->vkSetLatencySleepModeNV(m_vkd->device(), m_swapchain, &sleepModeInfo);
+
+ m_lowLatencyEnabled = lowLatencyMode;
+ m_lowLatencyBoost = lowLatencyBoost;
+ m_minimumIntervalUs = minimumIntervalUs;
+
+ if (!lowLatencyMode)
+ m_device->resetLatencyMarkers();
+
+ return status;
+ }
+
+ VkResult Presenter::latencySleep() {
+ VkSemaphore sem = m_lowLatencyFence.fence->handle();
+ uint64_t waitValue = m_lowLatencyFence.value + 1;
+ m_lowLatencyFence.value++;
+
+ VkLatencySleepInfoNV sleepInfo = { VK_STRUCTURE_TYPE_LATENCY_SLEEP_INFO_NV };
+ sleepInfo.signalSemaphore = sem;
+ sleepInfo.value = waitValue;
+
+ {
+ std::lock_guard<dxvk::mutex> lock(m_lowLatencyMutex);
+ m_vkd->vkLatencySleepNV(m_vkd->device(), m_swapchain, &sleepInfo);
+ }
+
+ m_lowLatencyFence.fence->wait(waitValue);
+
+ return VK_SUCCESS;
+ }
+
+ void Presenter::setLatencyMarker(VkLatencyMarkerNV marker, uint64_t presentId) {
+ VkSetLatencyMarkerInfoNV markerInfo = { VK_STRUCTURE_TYPE_SET_LATENCY_MARKER_INFO_NV };
+ markerInfo.presentID = presentId;
+ markerInfo.marker = marker;
+
+ std::lock_guard<dxvk::mutex> lock(m_lowLatencyMutex);
+ m_vkd->vkSetLatencyMarkerNV(m_vkd->device(), m_swapchain, &markerInfo);
+ }
+
+ VkResult Presenter::getLatencyTimings(std::vector<VkLatencyTimingsFrameReportNV>& frameReports) {
+ VkGetLatencyMarkerInfoNV markerInfo = { VK_STRUCTURE_TYPE_GET_LATENCY_MARKER_INFO_NV };
+ uint32_t timingCount = 0;
+
+ std::lock_guard<dxvk::mutex> lock(m_lowLatencyMutex);
+ m_vkd->vkGetLatencyTimingsNV(m_vkd->device(), m_swapchain, &timingCount, &markerInfo);
+
+ if (timingCount != 0) {
+ frameReports.resize(timingCount, { VK_STRUCTURE_TYPE_GET_LATENCY_MARKER_INFO_NV });
+ markerInfo.pTimings = frameReports.data();
+
+ m_vkd->vkGetLatencyTimingsNV(m_vkd->device(), m_swapchain, &timingCount, &markerInfo);
+ }
+
+ return VK_SUCCESS;
+ }
VkResult Presenter::getSupportedFormats(std::vector<VkSurfaceFormatKHR>& formats, VkFullScreenExclusiveEXT fullScreenExclusive) const {
uint32_t numFormats = 0;
diff --git a/src/dxvk/dxvk_presenter.h b/src/dxvk/dxvk_presenter.h
index c5ba1273364..aa52b97b4ce 100644
--- a/src/dxvk/dxvk_presenter.h
+++ b/src/dxvk/dxvk_presenter.h
@@ -15,6 +15,7 @@
#include "../vulkan/vulkan_loader.h"
#include "dxvk_format.h"
+#include "dxvk_fence.h"
namespace dxvk {
@@ -224,6 +225,42 @@ namespace dxvk {
*/
void setHdrMetadata(const VkHdrMetadataEXT& hdrMetadata);
+ /**
+ * \brief Set the latency mode of the swapchain
+ *
+ * \param [in] enableLowLatency Determines if the low latency
+ * mode should be enabled of disabled
+ */
+ VkResult setLatencySleepMode(bool lowLatencyMode, bool lowLatencyBoost, uint32_t minimumIntervalUs);
+
+ /**
+ * \brief Delay rendering work for lower latency
+ */
+ VkResult latencySleep();
+
+ /**
+ * \brief Set a latency marker for the given stage
+ *
+ * \param [in] marker The stage this marker is for
+ * \param [in] presentId The presentId this marker is for
+ */
+ void setLatencyMarker(VkLatencyMarkerNV marker, uint64_t presentId);
+
+ /**
+ * \brief Get the low latency timing info
+ *
+ * \param [out] latencyInfo The structure to place
+ * the latency timings into
+ */
+ VkResult getLatencyTimings(std::vector<VkLatencyTimingsFrameReportNV>& frameReports);
+
+ /**
+ * \brief Returns the low latency enabled state
+ */
+ bool lowLatencyEnabled() {
+ return m_lowLatencyEnabled;
+ }
+
private:
Rc<DxvkDevice> m_device;
@@ -237,6 +274,11 @@ namespace dxvk {
VkSurfaceKHR m_surface = VK_NULL_HANDLE;
VkSwapchainKHR m_swapchain = VK_NULL_HANDLE;
+ DxvkFenceValuePair m_lowLatencyFence = {};
+ bool m_lowLatencyEnabled = false;
+ bool m_lowLatencyBoost = false;
+ uint32_t m_minimumIntervalUs = 0;
+
std::vector<PresenterImage> m_images;
std::vector<PresenterSync> m_semaphores;
@@ -250,6 +292,7 @@ namespace dxvk {
FpsLimiter m_fpsLimiter;
dxvk::mutex m_frameMutex;
+ dxvk::mutex m_lowLatencyMutex;
dxvk::condition_variable m_frameCond;
dxvk::thread m_frameThread;
std::queue<PresenterFrame> m_frameQueue;
diff --git a/src/dxvk/dxvk_queue.cpp b/src/dxvk/dxvk_queue.cpp
index 7273a37d608..546a1f838b8 100644
--- a/src/dxvk/dxvk_queue.cpp
+++ b/src/dxvk/dxvk_queue.cpp
@@ -126,7 +126,7 @@ namespace dxvk {
m_callback(true);
if (entry.submit.cmdList != nullptr)
- entry.result = entry.submit.cmdList->submit();
+ entry.result = entry.submit.cmdList->submit(entry.submit.frameId);
else if (entry.present.presenter != nullptr)
entry.result = entry.present.presenter->presentImage(entry.present.presentMode, entry.present.frameId);
@@ -226,4 +226,4 @@ namespace dxvk {
}
}
-}
\ No newline at end of file
+}
diff --git a/src/dxvk/dxvk_queue.h b/src/dxvk/dxvk_queue.h
index 38d91f5dd09..a3c6e581b31 100644
--- a/src/dxvk/dxvk_queue.h
+++ b/src/dxvk/dxvk_queue.h
@@ -32,6 +32,7 @@ namespace dxvk {
*/
struct DxvkSubmitInfo {
Rc<DxvkCommandList> cmdList;
+ uint64_t frameId;
};
diff --git a/src/vulkan/vulkan_loader.h b/src/vulkan/vulkan_loader.h
index 1741ccb8722..6b0f80ea248 100644
--- a/src/vulkan/vulkan_loader.h
+++ b/src/vulkan/vulkan_loader.h
@@ -452,6 +452,14 @@ namespace dxvk::vk {
VULKAN_FN(wine_vkAcquireKeyedMutex);
VULKAN_FN(wine_vkReleaseKeyedMutex);
#endif
+
+ #ifdef VK_NV_LOW_LATENCY_2_EXTENSION_NAME
+ VULKAN_FN(vkSetLatencySleepModeNV);
+ VULKAN_FN(vkLatencySleepNV);
+ VULKAN_FN(vkSetLatencyMarkerNV);
+ VULKAN_FN(vkGetLatencyTimingsNV);
+ VULKAN_FN(vkQueueNotifyOutOfBandNV);
+ #endif
};
}
diff --git a/src/dxvk/dxvk_presenter.cpp b/src/dxvk/dxvk_presenter.cpp
index 9f7c6a0..dbe5b80 100644
--- a/src/dxvk/dxvk_presenter.cpp
+++ b/src/dxvk/dxvk_presenter.cpp
@@ -501,16 +501,15 @@ namespace dxvk {
VkResult Presenter::getLatencyTimings(std::vector<VkLatencyTimingsFrameReportNV>& frameReports) {
VkGetLatencyMarkerInfoNV markerInfo = { VK_STRUCTURE_TYPE_GET_LATENCY_MARKER_INFO_NV };
- uint32_t timingCount = 0;
std::lock_guard<dxvk::mutex> lock(m_lowLatencyMutex);
- m_vkd->vkGetLatencyTimingsNV(m_vkd->device(), m_swapchain, &timingCount, &markerInfo);
+ m_vkd->vkGetLatencyTimingsNV(m_vkd->device(), m_swapchain, &markerInfo);
- if (timingCount != 0) {
- frameReports.resize(timingCount, { VK_STRUCTURE_TYPE_GET_LATENCY_MARKER_INFO_NV });
+ if (markerInfo.timingCount != 0) {
+ frameReports.resize(markerInfo.timingCount, { VK_STRUCTURE_TYPE_LATENCY_TIMINGS_FRAME_REPORT_NV });
markerInfo.pTimings = frameReports.data();
- m_vkd->vkGetLatencyTimingsNV(m_vkd->device(), m_swapchain, &timingCount, &markerInfo);
+ m_vkd->vkGetLatencyTimingsNV(m_vkd->device(), m_swapchain, &markerInfo);
}
return VK_SUCCESS;
diff --git a/include/private/nvapi.h b/include/private/nvapi.h
new file mode 100644
index 00000000..96ced08d
--- /dev/null
+++ b/include/private/nvapi.h
@@ -0,0 +1,2795 @@
+/*****************************************************************************\
+|* *|
+|* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. *|
+|* *|
+|* Permission is hereby granted, free of charge, to any person obtaining a *|
+|* copy of this software and associated documentation files (the "Software"), *|
+|* to deal in the Software without restriction, including without limitation *|
+|* the rights to use, copy, modify, merge, publish, distribute, sublicense, *|
+|* and/or sell copies of the Software, and to permit persons to whom the *|
+|* Software is furnished to do so, subject to the following conditions: *|
+|* *|
+|* The above copyright notice and this permission notice shall be included in *|
+|* all copies or substantial portions of the Software. *|
+|* *|
+|* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *|
+|* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *|
+|* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *|
+|* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *|
+|* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *|
+|* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *|
+|* DEALINGS IN THE SOFTWARE. *|
+|* *|
+|* *|
+\*****************************************************************************/
+///////////////////////////////////////////////////////////////////////////////
+//
+// Date: Feb 27, 2023
+// File: nvapi.h
+//
+// NvAPI provides an interface to NVIDIA devices. This file contains the
+// interface constants, structure definitions and function prototypes.
+//
+// Target Profile: Open-Source
+// Target Platform: windows
+//
+///////////////////////////////////////////////////////////////////////////////
+#ifndef _NVAPI_H
+#define _NVAPI_H
+
+#include "vkd3d_d3d12.h"
+
+#pragma pack(push,8) // Make sure we have consistent structure packings
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+// ====================================================
+// Universal NvAPI Definitions
+// ====================================================
+#ifndef _WIN32
+#define __cdecl
+#endif
+
+// ====================================================
+// SAL related support
+// ====================================================
+
+#ifndef __ecount
+ #define __nvapi_undef__ecount
+ #define __ecount(size)
+#endif
+#ifndef __bcount
+ #define __nvapi_undef__bcount
+ #define __bcount(size)
+#endif
+#ifndef __in
+ #define __nvapi_undef__in
+ #define __in
+#endif
+#ifndef __in_ecount
+ #define __nvapi_undef__in_ecount
+ #define __in_ecount(size)
+#endif
+#ifndef __in_bcount
+ #define __nvapi_undef__in_bcount
+ #define __in_bcount(size)
+#endif
+#ifndef __in_z
+ #define __nvapi_undef__in_z
+ #define __in_z
+#endif
+#ifndef __in_ecount_z
+ #define __nvapi_undef__in_ecount_z
+ #define __in_ecount_z(size)
+#endif
+#ifndef __in_bcount_z
+ #define __nvapi_undef__in_bcount_z
+ #define __in_bcount_z(size)
+#endif
+#ifndef __in_nz
+ #define __nvapi_undef__in_nz
+ #define __in_nz
+#endif
+#ifndef __in_ecount_nz
+ #define __nvapi_undef__in_ecount_nz
+ #define __in_ecount_nz(size)
+#endif
+#ifndef __in_bcount_nz
+ #define __nvapi_undef__in_bcount_nz
+ #define __in_bcount_nz(size)
+#endif
+#ifndef __out
+ #define __nvapi_undef__out
+ #define __out
+#endif
+#ifndef __out_ecount
+ #define __nvapi_undef__out_ecount
+ #define __out_ecount(size)
+#endif
+#ifndef __out_bcount
+ #define __nvapi_undef__out_bcount
+ #define __out_bcount(size)
+#endif
+#ifndef __out_ecount_part
+ #define __nvapi_undef__out_ecount_part
+ #define __out_ecount_part(size,length)
+#endif
+#ifndef __out_bcount_part
+ #define __nvapi_undef__out_bcount_part
+ #define __out_bcount_part(size,length)
+#endif
+#ifndef __out_ecount_full
+ #define __nvapi_undef__out_ecount_full
+ #define __out_ecount_full(size)
+#endif
+#ifndef __out_bcount_full
+ #define __nvapi_undef__out_bcount_full
+ #define __out_bcount_full(size)
+#endif
+#ifndef __out_z
+ #define __nvapi_undef__out_z
+ #define __out_z
+#endif
+#ifndef __out_z_opt
+ #define __nvapi_undef__out_z_opt
+ #define __out_z_opt
+#endif
+#ifndef __out_ecount_z
+ #define __nvapi_undef__out_ecount_z
+ #define __out_ecount_z(size)
+#endif
+#ifndef __out_bcount_z
+ #define __nvapi_undef__out_bcount_z
+ #define __out_bcount_z(size)
+#endif
+#ifndef __out_ecount_part_z
+ #define __nvapi_undef__out_ecount_part_z
+ #define __out_ecount_part_z(size,length)
+#endif
+#ifndef __out_bcount_part_z
+ #define __nvapi_undef__out_bcount_part_z
+ #define __out_bcount_part_z(size,length)
+#endif
+#ifndef __out_ecount_full_z
+ #define __nvapi_undef__out_ecount_full_z
+ #define __out_ecount_full_z(size)
+#endif
+#ifndef __out_bcount_full_z
+ #define __nvapi_undef__out_bcount_full_z
+ #define __out_bcount_full_z(size)
+#endif
+#ifndef __out_nz
+ #define __nvapi_undef__out_nz
+ #define __out_nz
+#endif
+#ifndef __out_nz_opt
+ #define __nvapi_undef__out_nz_opt
+ #define __out_nz_opt
+#endif
+#ifndef __out_ecount_nz
+ #define __nvapi_undef__out_ecount_nz
+ #define __out_ecount_nz(size)
+#endif
+#ifndef __out_bcount_nz
+ #define __nvapi_undef__out_bcount_nz
+ #define __out_bcount_nz(size)
+#endif
+#ifndef __inout
+ #define __nvapi_undef__inout
+ #define __inout
+#endif
+#ifndef __inout_ecount
+ #define __nvapi_undef__inout_ecount
+ #define __inout_ecount(size)
+#endif
+#ifndef __inout_bcount
+ #define __nvapi_undef__inout_bcount
+ #define __inout_bcount(size)
+#endif
+#ifndef __inout_ecount_part
+ #define __nvapi_undef__inout_ecount_part
+ #define __inout_ecount_part(size,length)
+#endif
+#ifndef __inout_bcount_part
+ #define __nvapi_undef__inout_bcount_part
+ #define __inout_bcount_part(size,length)
+#endif
+#ifndef __inout_ecount_full
+ #define __nvapi_undef__inout_ecount_full
+ #define __inout_ecount_full(size)
+#endif
+#ifndef __inout_bcount_full
+ #define __nvapi_undef__inout_bcount_full
+ #define __inout_bcount_full(size)
+#endif
+#ifndef __inout_z
+ #define __nvapi_undef__inout_z
+ #define __inout_z
+#endif
+#ifndef __inout_ecount_z
+ #define __nvapi_undef__inout_ecount_z
+ #define __inout_ecount_z(size)
+#endif
+#ifndef __inout_bcount_z
+ #define __nvapi_undef__inout_bcount_z
+ #define __inout_bcount_z(size)
+#endif
+#ifndef __inout_nz
+ #define __nvapi_undef__inout_nz
+ #define __inout_nz
+#endif
+#ifndef __inout_ecount_nz
+ #define __nvapi_undef__inout_ecount_nz
+ #define __inout_ecount_nz(size)
+#endif
+#ifndef __inout_bcount_nz
+ #define __nvapi_undef__inout_bcount_nz
+ #define __inout_bcount_nz(size)
+#endif
+#ifndef __ecount_opt
+ #define __nvapi_undef__ecount_opt
+ #define __ecount_opt(size)
+#endif
+#ifndef __bcount_opt
+ #define __nvapi_undef__bcount_opt
+ #define __bcount_opt(size)
+#endif
+#ifndef __in_opt
+ #define __nvapi_undef__in_opt
+ #define __in_opt
+#endif
+#ifndef __in_ecount_opt
+ #define __nvapi_undef__in_ecount_opt
+ #define __in_ecount_opt(size)
+#endif
+#ifndef __in_bcount_opt
+ #define __nvapi_undef__in_bcount_opt
+ #define __in_bcount_opt(size)
+#endif
+#ifndef __in_z_opt
+ #define __nvapi_undef__in_z_opt
+ #define __in_z_opt
+#endif
+#ifndef __in_ecount_z_opt
+ #define __nvapi_undef__in_ecount_z_opt
+ #define __in_ecount_z_opt(size)
+#endif
+#ifndef __in_bcount_z_opt
+ #define __nvapi_undef__in_bcount_z_opt
+ #define __in_bcount_z_opt(size)
+#endif
+#ifndef __in_nz_opt
+ #define __nvapi_undef__in_nz_opt
+ #define __in_nz_opt
+#endif
+#ifndef __in_ecount_nz_opt
+ #define __nvapi_undef__in_ecount_nz_opt
+ #define __in_ecount_nz_opt(size)
+#endif
+#ifndef __in_bcount_nz_opt
+ #define __nvapi_undef__in_bcount_nz_opt
+ #define __in_bcount_nz_opt(size)
+#endif
+#ifndef __out_opt
+ #define __nvapi_undef__out_opt
+ #define __out_opt
+#endif
+#ifndef __out_ecount_opt
+ #define __nvapi_undef__out_ecount_opt
+ #define __out_ecount_opt(size)
+#endif
+#ifndef __out_bcount_opt
+ #define __nvapi_undef__out_bcount_opt
+ #define __out_bcount_opt(size)
+#endif
+#ifndef __out_ecount_part_opt
+ #define __nvapi_undef__out_ecount_part_opt
+ #define __out_ecount_part_opt(size,length)
+#endif
+#ifndef __out_bcount_part_opt
+ #define __nvapi_undef__out_bcount_part_opt
+ #define __out_bcount_part_opt(size,length)
+#endif
+#ifndef __out_ecount_full_opt
+ #define __nvapi_undef__out_ecount_full_opt
+ #define __out_ecount_full_opt(size)
+#endif
+#ifndef __out_bcount_full_opt
+ #define __nvapi_undef__out_bcount_full_opt
+ #define __out_bcount_full_opt(size)
+#endif
+#ifndef __out_ecount_z_opt
+ #define __nvapi_undef__out_ecount_z_opt
+ #define __out_ecount_z_opt(size)
+#endif
+#ifndef __out_bcount_z_opt
+ #define __nvapi_undef__out_bcount_z_opt
+ #define __out_bcount_z_opt(size)
+#endif
+#ifndef __out_ecount_part_z_opt
+ #define __nvapi_undef__out_ecount_part_z_opt
+ #define __out_ecount_part_z_opt(size,length)
+#endif
+#ifndef __out_bcount_part_z_opt
+ #define __nvapi_undef__out_bcount_part_z_opt
+ #define __out_bcount_part_z_opt(size,length)
+#endif
+#ifndef __out_ecount_full_z_opt
+ #define __nvapi_undef__out_ecount_full_z_opt
+ #define __out_ecount_full_z_opt(size)
+#endif
+#ifndef __out_bcount_full_z_opt
+ #define __nvapi_undef__out_bcount_full_z_opt
+ #define __out_bcount_full_z_opt(size)
+#endif
+#ifndef __out_ecount_nz_opt
+ #define __nvapi_undef__out_ecount_nz_opt
+ #define __out_ecount_nz_opt(size)
+#endif
+#ifndef __out_bcount_nz_opt
+ #define __nvapi_undef__out_bcount_nz_opt
+ #define __out_bcount_nz_opt(size)
+#endif
+#ifndef __inout_opt
+ #define __nvapi_undef__inout_opt
+ #define __inout_opt
+#endif
+#ifndef __inout_ecount_opt
+ #define __nvapi_undef__inout_ecount_opt
+ #define __inout_ecount_opt(size)
+#endif
+#ifndef __inout_bcount_opt
+ #define __nvapi_undef__inout_bcount_opt
+ #define __inout_bcount_opt(size)
+#endif
+#ifndef __inout_ecount_part_opt
+ #define __nvapi_undef__inout_ecount_part_opt
+ #define __inout_ecount_part_opt(size,length)
+#endif
+#ifndef __inout_bcount_part_opt
+ #define __nvapi_undef__inout_bcount_part_opt
+ #define __inout_bcount_part_opt(size,length)
+#endif
+#ifndef __inout_ecount_full_opt
+ #define __nvapi_undef__inout_ecount_full_opt
+ #define __inout_ecount_full_opt(size)
+#endif
+#ifndef __inout_bcount_full_opt
+ #define __nvapi_undef__inout_bcount_full_opt
+ #define __inout_bcount_full_opt(size)
+#endif
+#ifndef __inout_z_opt
+ #define __nvapi_undef__inout_z_opt
+ #define __inout_z_opt
+#endif
+#ifndef __inout_ecount_z_opt
+ #define __nvapi_undef__inout_ecount_z_opt
+ #define __inout_ecount_z_opt(size)
+#endif
+#ifndef __inout_ecount_z_opt
+ #define __nvapi_undef__inout_ecount_z_opt
+ #define __inout_ecount_z_opt(size)
+#endif
+#ifndef __inout_bcount_z_opt
+ #define __nvapi_undef__inout_bcount_z_opt
+ #define __inout_bcount_z_opt(size)
+#endif
+#ifndef __inout_nz_opt
+ #define __nvapi_undef__inout_nz_opt
+ #define __inout_nz_opt
+#endif
+#ifndef __inout_ecount_nz_opt
+ #define __nvapi_undef__inout_ecount_nz_opt
+ #define __inout_ecount_nz_opt(size)
+#endif
+#ifndef __inout_bcount_nz_opt
+ #define __nvapi_undef__inout_bcount_nz_opt
+ #define __inout_bcount_nz_opt(size)
+#endif
+#ifndef __deref_ecount
+ #define __nvapi_undef__deref_ecount
+ #define __deref_ecount(size)
+#endif
+#ifndef __deref_bcount
+ #define __nvapi_undef__deref_bcount
+ #define __deref_bcount(size)
+#endif
+#ifndef __deref_out
+ #define __nvapi_undef__deref_out
+ #define __deref_out
+#endif
+#ifndef __deref_out_ecount
+ #define __nvapi_undef__deref_out_ecount
+ #define __deref_out_ecount(size)
+#endif
+#ifndef __deref_out_bcount
+ #define __nvapi_undef__deref_out_bcount
+ #define __deref_out_bcount(size)
+#endif
+#ifndef __deref_out_ecount_part
+ #define __nvapi_undef__deref_out_ecount_part
+ #define __deref_out_ecount_part(size,length)
+#endif
+#ifndef __deref_out_bcount_part
+ #define __nvapi_undef__deref_out_bcount_part
+ #define __deref_out_bcount_part(size,length)
+#endif
+#ifndef __deref_out_ecount_full
+ #define __nvapi_undef__deref_out_ecount_full
+ #define __deref_out_ecount_full(size)
+#endif
+#ifndef __deref_out_bcount_full
+ #define __nvapi_undef__deref_out_bcount_full
+ #define __deref_out_bcount_full(size)
+#endif
+#ifndef __deref_out_z
+ #define __nvapi_undef__deref_out_z
+ #define __deref_out_z
+#endif
+#ifndef __deref_out_ecount_z
+ #define __nvapi_undef__deref_out_ecount_z
+ #define __deref_out_ecount_z(size)
+#endif
+#ifndef __deref_out_bcount_z
+ #define __nvapi_undef__deref_out_bcount_z
+ #define __deref_out_bcount_z(size)
+#endif
+#ifndef __deref_out_nz
+ #define __nvapi_undef__deref_out_nz
+ #define __deref_out_nz
+#endif
+#ifndef __deref_out_ecount_nz
+ #define __nvapi_undef__deref_out_ecount_nz
+ #define __deref_out_ecount_nz(size)
+#endif
+#ifndef __deref_out_bcount_nz
+ #define __nvapi_undef__deref_out_bcount_nz
+ #define __deref_out_bcount_nz(size)
+#endif
+#ifndef __deref_inout
+ #define __nvapi_undef__deref_inout
+ #define __deref_inout
+#endif
+#ifndef __deref_inout_z
+ #define __nvapi_undef__deref_inout_z
+ #define __deref_inout_z
+#endif
+#ifndef __deref_inout_ecount
+ #define __nvapi_undef__deref_inout_ecount
+ #define __deref_inout_ecount(size)
+#endif
+#ifndef __deref_inout_bcount
+ #define __nvapi_undef__deref_inout_bcount
+ #define __deref_inout_bcount(size)
+#endif
+#ifndef __deref_inout_ecount_part
+ #define __nvapi_undef__deref_inout_ecount_part
+ #define __deref_inout_ecount_part(size,length)
+#endif
+#ifndef __deref_inout_bcount_part
+ #define __nvapi_undef__deref_inout_bcount_part
+ #define __deref_inout_bcount_part(size,length)
+#endif
+#ifndef __deref_inout_ecount_full
+ #define __nvapi_undef__deref_inout_ecount_full
+ #define __deref_inout_ecount_full(size)
+#endif
+#ifndef __deref_inout_bcount_full
+ #define __nvapi_undef__deref_inout_bcount_full
+ #define __deref_inout_bcount_full(size)
+#endif
+#ifndef __deref_inout_z
+ #define __nvapi_undef__deref_inout_z
+ #define __deref_inout_z
+#endif
+#ifndef __deref_inout_ecount_z
+ #define __nvapi_undef__deref_inout_ecount_z
+ #define __deref_inout_ecount_z(size)
+#endif
+#ifndef __deref_inout_bcount_z
+ #define __nvapi_undef__deref_inout_bcount_z
+ #define __deref_inout_bcount_z(size)
+#endif
+#ifndef __deref_inout_nz
+ #define __nvapi_undef__deref_inout_nz
+ #define __deref_inout_nz
+#endif
+#ifndef __deref_inout_ecount_nz
+ #define __nvapi_undef__deref_inout_ecount_nz
+ #define __deref_inout_ecount_nz(size)
+#endif
+#ifndef __deref_inout_bcount_nz
+ #define __nvapi_undef__deref_inout_bcount_nz
+ #define __deref_inout_bcount_nz(size)
+#endif
+#ifndef __deref_ecount_opt
+ #define __nvapi_undef__deref_ecount_opt
+ #define __deref_ecount_opt(size)
+#endif
+#ifndef __deref_bcount_opt
+ #define __nvapi_undef__deref_bcount_opt
+ #define __deref_bcount_opt(size)
+#endif
+#ifndef __deref_out_opt
+ #define __nvapi_undef__deref_out_opt
+ #define __deref_out_opt
+#endif
+#ifndef __deref_out_ecount_opt
+ #define __nvapi_undef__deref_out_ecount_opt
+ #define __deref_out_ecount_opt(size)
+#endif
+#ifndef __deref_out_bcount_opt
+ #define __nvapi_undef__deref_out_bcount_opt
+ #define __deref_out_bcount_opt(size)
+#endif
+#ifndef __deref_out_ecount_part_opt
+ #define __nvapi_undef__deref_out_ecount_part_opt
+ #define __deref_out_ecount_part_opt(size,length)
+#endif
+#ifndef __deref_out_bcount_part_opt
+ #define __nvapi_undef__deref_out_bcount_part_opt
+ #define __deref_out_bcount_part_opt(size,length)
+#endif
+#ifndef __deref_out_ecount_full_opt
+ #define __nvapi_undef__deref_out_ecount_full_opt
+ #define __deref_out_ecount_full_opt(size)
+#endif
+#ifndef __deref_out_bcount_full_opt
+ #define __nvapi_undef__deref_out_bcount_full_opt
+ #define __deref_out_bcount_full_opt(size)
+#endif
+#ifndef __deref_out_z_opt
+ #define __nvapi_undef__deref_out_z_opt
+ #define __deref_out_z_opt
+#endif
+#ifndef __deref_out_ecount_z_opt
+ #define __nvapi_undef__deref_out_ecount_z_opt
+ #define __deref_out_ecount_z_opt(size)
+#endif
+#ifndef __deref_out_bcount_z_opt
+ #define __nvapi_undef__deref_out_bcount_z_opt
+ #define __deref_out_bcount_z_opt(size)
+#endif
+#ifndef __deref_out_nz_opt
+ #define __nvapi_undef__deref_out_nz_opt
+ #define __deref_out_nz_opt
+#endif
+#ifndef __deref_out_ecount_nz_opt
+ #define __nvapi_undef__deref_out_ecount_nz_opt
+ #define __deref_out_ecount_nz_opt(size)
+#endif
+#ifndef __deref_out_bcount_nz_opt
+ #define __nvapi_undef__deref_out_bcount_nz_opt
+ #define __deref_out_bcount_nz_opt(size)
+#endif
+#ifndef __deref_inout_opt
+ #define __nvapi_undef__deref_inout_opt
+ #define __deref_inout_opt
+#endif
+#ifndef __deref_inout_ecount_opt
+ #define __nvapi_undef__deref_inout_ecount_opt
+ #define __deref_inout_ecount_opt(size)
+#endif
+#ifndef __deref_inout_bcount_opt
+ #define __nvapi_undef__deref_inout_bcount_opt
+ #define __deref_inout_bcount_opt(size)
+#endif
+#ifndef __deref_inout_ecount_part_opt
+ #define __nvapi_undef__deref_inout_ecount_part_opt
+ #define __deref_inout_ecount_part_opt(size,length)
+#endif
+#ifndef __deref_inout_bcount_part_opt
+ #define __nvapi_undef__deref_inout_bcount_part_opt
+ #define __deref_inout_bcount_part_opt(size,length)
+#endif
+#ifndef __deref_inout_ecount_full_opt
+ #define __nvapi_undef__deref_inout_ecount_full_opt
+ #define __deref_inout_ecount_full_opt(size)
+#endif
+#ifndef __deref_inout_bcount_full_opt
+ #define __nvapi_undef__deref_inout_bcount_full_opt
+ #define __deref_inout_bcount_full_opt(size)
+#endif
+#ifndef __deref_inout_z_opt
+ #define __nvapi_undef__deref_inout_z_opt
+ #define __deref_inout_z_opt
+#endif
+#ifndef __deref_inout_ecount_z_opt
+ #define __nvapi_undef__deref_inout_ecount_z_opt
+ #define __deref_inout_ecount_z_opt(size)
+#endif
+#ifndef __deref_inout_bcount_z_opt
+ #define __nvapi_undef__deref_inout_bcount_z_opt
+ #define __deref_inout_bcount_z_opt(size)
+#endif
+#ifndef __deref_inout_nz_opt
+ #define __nvapi_undef__deref_inout_nz_opt
+ #define __deref_inout_nz_opt
+#endif
+#ifndef __deref_inout_ecount_nz_opt
+ #define __nvapi_undef__deref_inout_ecount_nz_opt
+ #define __deref_inout_ecount_nz_opt(size)
+#endif
+#ifndef __deref_inout_bcount_nz_opt
+ #define __nvapi_undef__deref_inout_bcount_nz_opt
+ #define __deref_inout_bcount_nz_opt(size)
+#endif
+#ifndef __deref_opt_ecount
+ #define __nvapi_undef__deref_opt_ecount
+ #define __deref_opt_ecount(size)
+#endif
+#ifndef __deref_opt_bcount
+ #define __nvapi_undef__deref_opt_bcount
+ #define __deref_opt_bcount(size)
+#endif
+#ifndef __deref_opt_out
+ #define __nvapi_undef__deref_opt_out
+ #define __deref_opt_out
+#endif
+#ifndef __deref_opt_out_z
+ #define __nvapi_undef__deref_opt_out_z
+ #define __deref_opt_out_z
+#endif
+#ifndef __deref_opt_out_ecount
+ #define __nvapi_undef__deref_opt_out_ecount
+ #define __deref_opt_out_ecount(size)
+#endif
+#ifndef __deref_opt_out_bcount
+ #define __nvapi_undef__deref_opt_out_bcount
+ #define __deref_opt_out_bcount(size)
+#endif
+#ifndef __deref_opt_out_ecount_part
+ #define __nvapi_undef__deref_opt_out_ecount_part
+ #define __deref_opt_out_ecount_part(size,length)
+#endif
+#ifndef __deref_opt_out_bcount_part
+ #define __nvapi_undef__deref_opt_out_bcount_part
+ #define __deref_opt_out_bcount_part(size,length)
+#endif
+#ifndef __deref_opt_out_ecount_full
+ #define __nvapi_undef__deref_opt_out_ecount_full
+ #define __deref_opt_out_ecount_full(size)
+#endif
+#ifndef __deref_opt_out_bcount_full
+ #define __nvapi_undef__deref_opt_out_bcount_full
+ #define __deref_opt_out_bcount_full(size)
+#endif
+#ifndef __deref_opt_inout
+ #define __nvapi_undef__deref_opt_inout
+ #define __deref_opt_inout
+#endif
+#ifndef __deref_opt_inout_ecount
+ #define __nvapi_undef__deref_opt_inout_ecount
+ #define __deref_opt_inout_ecount(size)
+#endif
+#ifndef __deref_opt_inout_bcount
+ #define __nvapi_undef__deref_opt_inout_bcount
+ #define __deref_opt_inout_bcount(size)
+#endif
+#ifndef __deref_opt_inout_ecount_part
+ #define __nvapi_undef__deref_opt_inout_ecount_part
+ #define __deref_opt_inout_ecount_part(size,length)
+#endif
+#ifndef __deref_opt_inout_bcount_part
+ #define __nvapi_undef__deref_opt_inout_bcount_part
+ #define __deref_opt_inout_bcount_part(size,length)
+#endif
+#ifndef __deref_opt_inout_ecount_full
+ #define __nvapi_undef__deref_opt_inout_ecount_full
+ #define __deref_opt_inout_ecount_full(size)
+#endif
+#ifndef __deref_opt_inout_bcount_full
+ #define __nvapi_undef__deref_opt_inout_bcount_full
+ #define __deref_opt_inout_bcount_full(size)
+#endif
+#ifndef __deref_opt_inout_z
+ #define __nvapi_undef__deref_opt_inout_z
+ #define __deref_opt_inout_z
+#endif
+#ifndef __deref_opt_inout_ecount_z
+ #define __nvapi_undef__deref_opt_inout_ecount_z
+ #define __deref_opt_inout_ecount_z(size)
+#endif
+#ifndef __deref_opt_inout_bcount_z
+ #define __nvapi_undef__deref_opt_inout_bcount_z
+ #define __deref_opt_inout_bcount_z(size)
+#endif
+#ifndef __deref_opt_inout_nz
+ #define __nvapi_undef__deref_opt_inout_nz
+ #define __deref_opt_inout_nz
+#endif
+#ifndef __deref_opt_inout_ecount_nz
+ #define __nvapi_undef__deref_opt_inout_ecount_nz
+ #define __deref_opt_inout_ecount_nz(size)
+#endif
+#ifndef __deref_opt_inout_bcount_nz
+ #define __nvapi_undef__deref_opt_inout_bcount_nz
+ #define __deref_opt_inout_bcount_nz(size)
+#endif
+#ifndef __deref_opt_ecount_opt
+ #define __nvapi_undef__deref_opt_ecount_opt
+ #define __deref_opt_ecount_opt(size)
+#endif
+#ifndef __deref_opt_bcount_opt
+ #define __nvapi_undef__deref_opt_bcount_opt
+ #define __deref_opt_bcount_opt(size)
+#endif
+#ifndef __deref_opt_out_opt
+ #define __nvapi_undef__deref_opt_out_opt
+ #define __deref_opt_out_opt
+#endif
+#ifndef __deref_opt_out_ecount_opt
+ #define __nvapi_undef__deref_opt_out_ecount_opt
+ #define __deref_opt_out_ecount_opt(size)
+#endif
+#ifndef __deref_opt_out_bcount_opt
+ #define __nvapi_undef__deref_opt_out_bcount_opt
+ #define __deref_opt_out_bcount_opt(size)
+#endif
+#ifndef __deref_opt_out_ecount_part_opt
+ #define __nvapi_undef__deref_opt_out_ecount_part_opt
+ #define __deref_opt_out_ecount_part_opt(size,length)
+#endif
+#ifndef __deref_opt_out_bcount_part_opt
+ #define __nvapi_undef__deref_opt_out_bcount_part_opt
+ #define __deref_opt_out_bcount_part_opt(size,length)
+#endif
+#ifndef __deref_opt_out_ecount_full_opt
+ #define __nvapi_undef__deref_opt_out_ecount_full_opt
+ #define __deref_opt_out_ecount_full_opt(size)
+#endif
+#ifndef __deref_opt_out_bcount_full_opt
+ #define __nvapi_undef__deref_opt_out_bcount_full_opt
+ #define __deref_opt_out_bcount_full_opt(size)
+#endif
+#ifndef __deref_opt_out_z_opt
+ #define __nvapi_undef__deref_opt_out_z_opt
+ #define __deref_opt_out_z_opt
+#endif
+#ifndef __deref_opt_out_ecount_z_opt
+ #define __nvapi_undef__deref_opt_out_ecount_z_opt
+ #define __deref_opt_out_ecount_z_opt(size)
+#endif
+#ifndef __deref_opt_out_bcount_z_opt
+ #define __nvapi_undef__deref_opt_out_bcount_z_opt
+ #define __deref_opt_out_bcount_z_opt(size)
+#endif
+#ifndef __deref_opt_out_nz_opt
+ #define __nvapi_undef__deref_opt_out_nz_opt
+ #define __deref_opt_out_nz_opt
+#endif
+#ifndef __deref_opt_out_ecount_nz_opt
+ #define __nvapi_undef__deref_opt_out_ecount_nz_opt
+ #define __deref_opt_out_ecount_nz_opt(size)
+#endif
+#ifndef __deref_opt_out_bcount_nz_opt
+ #define __nvapi_undef__deref_opt_out_bcount_nz_opt
+ #define __deref_opt_out_bcount_nz_opt(size)
+#endif
+#ifndef __deref_opt_inout_opt
+ #define __nvapi_undef__deref_opt_inout_opt
+ #define __deref_opt_inout_opt
+#endif
+#ifndef __deref_opt_inout_ecount_opt
+ #define __nvapi_undef__deref_opt_inout_ecount_opt
+ #define __deref_opt_inout_ecount_opt(size)
+#endif
+#ifndef __deref_opt_inout_bcount_opt
+ #define __nvapi_undef__deref_opt_inout_bcount_opt
+ #define __deref_opt_inout_bcount_opt(size)
+#endif
+#ifndef __deref_opt_inout_ecount_part_opt
+ #define __nvapi_undef__deref_opt_inout_ecount_part_opt
+ #define __deref_opt_inout_ecount_part_opt(size,length)
+#endif
+#ifndef __deref_opt_inout_bcount_part_opt
+ #define __nvapi_undef__deref_opt_inout_bcount_part_opt
+ #define __deref_opt_inout_bcount_part_opt(size,length)
+#endif
+#ifndef __deref_opt_inout_ecount_full_opt
+ #define __nvapi_undef__deref_opt_inout_ecount_full_opt
+ #define __deref_opt_inout_ecount_full_opt(size)
+#endif
+#ifndef __deref_opt_inout_bcount_full_opt
+ #define __nvapi_undef__deref_opt_inout_bcount_full_opt
+ #define __deref_opt_inout_bcount_full_opt(size)
+#endif
+#ifndef __deref_opt_inout_z_opt
+ #define __nvapi_undef__deref_opt_inout_z_opt
+ #define __deref_opt_inout_z_opt
+#endif
+#ifndef __deref_opt_inout_ecount_z_opt
+ #define __nvapi_undef__deref_opt_inout_ecount_z_opt
+ #define __deref_opt_inout_ecount_z_opt(size)
+#endif
+#ifndef __deref_opt_inout_bcount_z_opt
+ #define __nvapi_undef__deref_opt_inout_bcount_z_opt
+ #define __deref_opt_inout_bcount_z_opt(size)
+#endif
+#ifndef __deref_opt_inout_nz_opt
+ #define __nvapi_undef__deref_opt_inout_nz_opt
+ #define __deref_opt_inout_nz_opt
+#endif
+#ifndef __deref_opt_inout_ecount_nz_opt
+ #define __nvapi_undef__deref_opt_inout_ecount_nz_opt
+ #define __deref_opt_inout_ecount_nz_opt(size)
+#endif
+#ifndef __deref_opt_inout_bcount_nz_opt
+ #define __nvapi_undef__deref_opt_inout_bcount_nz_opt
+ #define __deref_opt_inout_bcount_nz_opt(size)
+#endif
+#ifndef __success
+ #define __nvapi_success
+ #define __success(epxr)
+#endif
+#ifndef _Ret_notnull_
+ #define __nvapi__Ret_notnull_
+ #define _Ret_notnull_
+#endif
+#ifndef _Post_writable_byte_size_
+ #define __nvapi__Post_writable_byte_size_
+ #define _Post_writable_byte_size_(n)
+#endif
+#ifndef _Outptr_
+ #define __nvapi_Outptr_
+ #define _Outptr_
+#endif
+
+
+#define NVAPI_INTERFACE extern __success(return == NVAPI_OK) NvAPI_Status __cdecl
+
+#if (defined(WIN32) || defined(_WIN32)) && defined(_MSC_VER) && (_MSC_VER > 1399) && !defined(NVAPI_INTERNAL) && !defined(NVAPI_DEPRECATED_OLD)
+#ifndef __nvapi_deprecated_function
+#define __nvapi_deprecated_function(message) __declspec(deprecated(message))
+#endif
+#ifndef __nvapi_deprecated_datatype
+#define __nvapi_deprecated_datatype(FirstRelease) __declspec(deprecated("Do not use this data type - it is deprecated in release " #FirstRelease "."))
+#endif
+#else
+#ifndef __nvapi_deprecated_function
+#define __nvapi_deprecated_function(message)
+#endif
+#ifndef __nvapi_deprecated_datatype
+#define __nvapi_deprecated_datatype(FirstRelease)
+#endif
+#endif
+
+
+/* 64-bit types for compilers that support them, plus some obsolete variants */
+#if defined(__GNUC__) || defined(__arm) || defined(__IAR_SYSTEMS_ICC__) || defined(__ghs__) || defined(_WIN64)
+typedef unsigned long long NvU64; /* 0 to 18446744073709551615 */
+typedef long long NvS64; /* -9223372036854775808 to 9223372036854775807 */
+#else
+typedef unsigned __int64 NvU64; /* 0 to 18446744073709551615 */
+typedef __int64 NvS64; /* -9223372036854775808 to 9223372036854775807 */
+#endif
+
+// mac os 32-bit still needs this
+#if (defined(macintosh) || defined(__APPLE__)) && !defined(__LP64__)
+typedef signed long NvS32; /* -2147483648 to 2147483647 */
+#else
+typedef signed int NvS32; /* -2147483648 to 2147483647 */
+#endif
+
+#ifndef __unix
+// mac os 32-bit still needs this
+#if ( (defined(macintosh) && defined(__LP64__) && (__NVAPI_RESERVED0__)) || \
+ (!defined(macintosh) && defined(__NVAPI_RESERVED0__)) )
+typedef unsigned int NvU32; /* 0 to 4294967295 */
+#else
+typedef unsigned long NvU32; /* 0 to 4294967295 */
+#endif
+#else
+typedef unsigned int NvU32; /* 0 to 4294967295 */
+#endif
+
+typedef unsigned long temp_NvU32; /* 0 to 4294967295 */
+typedef signed short NvS16;
+typedef unsigned short NvU16;
+typedef unsigned char NvU8;
+typedef signed char NvS8;
+typedef float NvF32;
+typedef double NvF64;
+
+/*!
+ * Macro to convert NvU32 to NvF32.
+ */
+#define NvU32TONvF32(_pData) *(NvF32 *)(_pData)
+/*!
+ * Macro to convert NvF32 to NvU32.
+ */
+#define NvF32TONvU32(_pData) *(NvU32 *)(_pData)
+
+/* Boolean type */
+typedef NvU8 NvBool;
+#define NV_TRUE ((NvBool)(0 == 0))
+#define NV_FALSE ((NvBool)(0 != 0))
+
+typedef struct _NV_RECT
+{
+ NvU32 left;
+ NvU32 top;
+ NvU32 right;
+ NvU32 bottom;
+} NV_RECT;
+
+
+#define NV_DECLARE_HANDLE(name) struct name##__ { int unused; }; typedef struct name##__ *name
+
+//! \addtogroup nvapihandles
+//! NVAPI Handles - These handles are retrieved from various calls and passed in to others in NvAPI
+//! These are meant to be opaque types. Do not assume they correspond to indices, HDCs,
+//! display indexes or anything else.
+//!
+//! Most handles remain valid until a display re-configuration (display mode set) or GPU
+//! reconfiguration (going into or out of SLI modes) occurs. If NVAPI_HANDLE_INVALIDATED
+//! is received by an app, it should discard all handles, and re-enumerate them.
+//! @{
+NV_DECLARE_HANDLE(NvLogicalGpuHandle); //!< One or more physical GPUs acting in concert (SLI)
+NV_DECLARE_HANDLE(NvPhysicalGpuHandle); //!< A single physical GPU
+NV_DECLARE_HANDLE(NvDisplayHandle); //!< Display Device driven by NVIDIA GPU(s) (an attached display)
+NV_DECLARE_HANDLE(NvMonitorHandle); //!< Monitor handle
+NV_DECLARE_HANDLE(NvUnAttachedDisplayHandle); //!< Unattached Display Device driven by NVIDIA GPU(s)
+NV_DECLARE_HANDLE(NvVisualComputingDeviceHandle); //!< A handle to a Visual Computing Device
+NV_DECLARE_HANDLE(NvEventHandle); //!< A handle to an event registration instance
+
+
+NV_DECLARE_HANDLE(NvHICHandle); //!< A handle to a Host Interface Card
+NV_DECLARE_HANDLE(NvGSyncDeviceHandle); //!< A handle to a Sync device
+NV_DECLARE_HANDLE(NvVioHandle); //!< A handle to an SDI device
+NV_DECLARE_HANDLE(NvTransitionHandle); //!< A handle to address a single transition request
+NV_DECLARE_HANDLE(NvAudioHandle); //!< NVIDIA HD Audio Device
+NV_DECLARE_HANDLE(Nv3DVPContextHandle); //!< A handle for a 3D Vision Pro (3DVP) context
+NV_DECLARE_HANDLE(Nv3DVPTransceiverHandle); //!< A handle for a 3DVP RF transceiver
+NV_DECLARE_HANDLE(Nv3DVPGlassesHandle); //!< A handle for a pair of 3DVP RF shutter glasses
+NV_DECLARE_HANDLE(NvPcfClientHandle); //!< A handle for NVPCF clients
+
+typedef void* StereoHandle; //!< A stereo handle, that corresponds to the device interface
+
+NV_DECLARE_HANDLE(NvSourceHandle); //!< Unique source handle on the system
+NV_DECLARE_HANDLE(NvTargetHandle); //!< Unique target handle on the system
+NV_DECLARE_HANDLE(NVDX_SwapChainHandle); //!< DirectX SwapChain objects
+static const NVDX_SwapChainHandle NVDX_SWAPCHAIN_NONE = 0;
+NV_DECLARE_HANDLE(NvPresentBarrierClientHandle); //!< PresentBarrier client object
+//! @}
+
+//! \ingroup nvapihandles
+//! @{
+#define NVAPI_DEFAULT_HANDLE 0
+#define NV_BIT(x) (1 << (x))
+//! @}
+
+
+
+//! \addtogroup nvapitypes
+//! @{
+#define NVAPI_GENERIC_STRING_MAX 4096
+#define NVAPI_LONG_STRING_MAX 256
+#define NVAPI_SHORT_STRING_MAX 64
+
+typedef struct
+{
+ NvS32 sX;
+ NvS32 sY;
+ NvS32 sWidth;
+ NvS32 sHeight;
+} NvSBox;
+
+#ifndef NvGUID_Defined
+#define NvGUID_Defined
+
+typedef struct
+{
+ NvU32 data1;
+ NvU16 data2;
+ NvU16 data3;
+ NvU8 data4[8];
+} NvGUID, NvLUID;
+
+
+#endif //#ifndef NvGUID_Defined
+#define NVAPI_MAX_PHYSICAL_GPUS 64
+
+
+#define NVAPI_MAX_PHYSICAL_BRIDGES 100
+#define NVAPI_PHYSICAL_GPUS 32
+#define NVAPI_MAX_LOGICAL_GPUS 64
+#define NVAPI_MAX_AVAILABLE_GPU_TOPOLOGIES 256
+#define NVAPI_MAX_AVAILABLE_SLI_GROUPS 256
+#define NVAPI_MAX_GPU_TOPOLOGIES NVAPI_MAX_PHYSICAL_GPUS
+#define NVAPI_MAX_GPU_PER_TOPOLOGY 8
+#define NVAPI_MAX_DISPLAY_HEADS 2
+#define NVAPI_ADVANCED_DISPLAY_HEADS 4
+#define NVAPI_MAX_DISPLAYS NVAPI_PHYSICAL_GPUS * NVAPI_ADVANCED_DISPLAY_HEADS
+#define NVAPI_MAX_ACPI_IDS 16
+#define NVAPI_MAX_VIEW_MODES 8
+
+
+#define NVAPI_SYSTEM_MAX_HWBCS 128
+#define NVAPI_SYSTEM_HWBC_INVALID_ID 0xffffffff
+
+#define NVAPI_SYSTEM_MAX_DISPLAYS NVAPI_MAX_PHYSICAL_GPUS * NV_MAX_HEADS
+#define NV_MAX_HEADS 4 //!< Maximum heads, each with NVAPI_DESKTOP_RES resolution
+#define NVAPI_MAX_HEADS_PER_GPU 32
+#define NV_MAX_VID_STREAMS 4 //!< Maximum number of input video streams, each with a #NVAPI_VIDEO_SRC_INFO
+#define NV_MAX_VID_STREAMS_EX 20 //!< Increasing MAX no. of input video streams, each with a #NVAPI_VIDEO_SRC_INFO
+#define NV_MAX_VID_PROFILES 4 //!< Maximum number of output video profiles supported
+
+#define NVAPI_MAX_AUDIO_DEVICES 16
+
+
+typedef char NvAPI_String[NVAPI_GENERIC_STRING_MAX];
+typedef char NvAPI_LongString[NVAPI_LONG_STRING_MAX];
+typedef char NvAPI_ShortString[NVAPI_SHORT_STRING_MAX];
+typedef NvU16 NvAPI_UnicodeShortString[NVAPI_SHORT_STRING_MAX];
+//! @}
+
+
+// =========================================================================================
+//! NvAPI Version Definition \n
+//! Maintain per structure specific version define using the MAKE_NVAPI_VERSION macro. \n
+//! Usage: #define NV_GENLOCK_STATUS_VER MAKE_NVAPI_VERSION(NV_GENLOCK_STATUS, 1)
+//! \ingroup nvapitypes
+// =========================================================================================
+#define MAKE_NVAPI_VERSION(typeName,ver) (NvU32)(sizeof(typeName) | ((ver)<<16))
+
+//! \ingroup nvapitypes
+#define GET_NVAPI_VERSION(ver) (NvU32)((ver)>>16)
+
+//! \ingroup nvapitypes
+#define GET_NVAPI_SIZE(ver) (NvU32)((ver) & 0xffff)
+
+
+// ====================================================
+//! NvAPI Status Values
+//! All NvAPI functions return one of these codes.
+//! \ingroup nvapistatus
+// ====================================================
+
+
+typedef enum _NvAPI_Status
+{
+ NVAPI_OK = 0, //!< Success. Request is completed.
+ NVAPI_ERROR = -1, //!< Generic error
+ NVAPI_LIBRARY_NOT_FOUND = -2, //!< NVAPI support library cannot be loaded.
+ NVAPI_NO_IMPLEMENTATION = -3, //!< not implemented in current driver installation
+ NVAPI_API_NOT_INITIALIZED = -4, //!< NvAPI_Initialize has not been called (successfully)
+ NVAPI_INVALID_ARGUMENT = -5, //!< The argument/parameter value is not valid or NULL.
+ NVAPI_NVIDIA_DEVICE_NOT_FOUND = -6, //!< No NVIDIA display driver, or NVIDIA GPU driving a display, was found.
+ NVAPI_END_ENUMERATION = -7, //!< No more items to enumerate
+ NVAPI_INVALID_HANDLE = -8, //!< Invalid handle
+ NVAPI_INCOMPATIBLE_STRUCT_VERSION = -9, //!< An argument's structure version is not supported
+ NVAPI_HANDLE_INVALIDATED = -10, //!< The handle is no longer valid (likely due to GPU or display re-configuration)
+ NVAPI_OPENGL_CONTEXT_NOT_CURRENT = -11, //!< No NVIDIA OpenGL context is current (but needs to be)
+ NVAPI_INVALID_POINTER = -14, //!< An invalid pointer, usually NULL, was passed as a parameter
+ NVAPI_NO_GL_EXPERT = -12, //!< OpenGL Expert is not supported by the current drivers
+ NVAPI_INSTRUMENTATION_DISABLED = -13, //!< OpenGL Expert is supported, but driver instrumentation is currently disabled
+ NVAPI_NO_GL_NSIGHT = -15, //!< OpenGL does not support Nsight
+
+ NVAPI_EXPECTED_LOGICAL_GPU_HANDLE = -100, //!< Expected a logical GPU handle for one or more parameters
+ NVAPI_EXPECTED_PHYSICAL_GPU_HANDLE = -101, //!< Expected a physical GPU handle for one or more parameters
+ NVAPI_EXPECTED_DISPLAY_HANDLE = -102, //!< Expected an NV display handle for one or more parameters
+ NVAPI_INVALID_COMBINATION = -103, //!< The combination of parameters is not valid.
+ NVAPI_NOT_SUPPORTED = -104, //!< Requested feature is not supported in the selected GPU
+ NVAPI_PORTID_NOT_FOUND = -105, //!< No port ID was found for the I2C transaction
+ NVAPI_EXPECTED_UNATTACHED_DISPLAY_HANDLE = -106, //!< Expected an unattached display handle as one of the input parameters.
+ NVAPI_INVALID_PERF_LEVEL = -107, //!< Invalid perf level
+ NVAPI_DEVICE_BUSY = -108, //!< Device is busy; request not fulfilled
+ NVAPI_NV_PERSIST_FILE_NOT_FOUND = -109, //!< NV persist file is not found
+ NVAPI_PERSIST_DATA_NOT_FOUND = -110, //!< NV persist data is not found
+ NVAPI_EXPECTED_TV_DISPLAY = -111, //!< Expected a TV output display
+ NVAPI_EXPECTED_TV_DISPLAY_ON_DCONNECTOR = -112, //!< Expected a TV output on the D Connector - HDTV_EIAJ4120.
+ NVAPI_NO_ACTIVE_SLI_TOPOLOGY = -113, //!< SLI is not active on this device.
+ NVAPI_SLI_RENDERING_MODE_NOTALLOWED = -114, //!< Setup of SLI rendering mode is not possible right now.
+ NVAPI_EXPECTED_DIGITAL_FLAT_PANEL = -115, //!< Expected a digital flat panel.
+ NVAPI_ARGUMENT_EXCEED_MAX_SIZE = -116, //!< Argument exceeds the expected size.
+ NVAPI_DEVICE_SWITCHING_NOT_ALLOWED = -117, //!< Inhibit is ON due to one of the flags in NV_GPU_DISPLAY_CHANGE_INHIBIT or SLI active.
+ NVAPI_TESTING_CLOCKS_NOT_SUPPORTED = -118, //!< Testing of clocks is not supported.
+ NVAPI_UNKNOWN_UNDERSCAN_CONFIG = -119, //!< The specified underscan config is from an unknown source (e.g. INF)
+ NVAPI_TIMEOUT_RECONFIGURING_GPU_TOPO = -120, //!< Timeout while reconfiguring GPUs
+ NVAPI_DATA_NOT_FOUND = -121, //!< Requested data was not found
+ NVAPI_EXPECTED_ANALOG_DISPLAY = -122, //!< Expected an analog display
+ NVAPI_NO_VIDLINK = -123, //!< No SLI video bridge is present
+ NVAPI_REQUIRES_REBOOT = -124, //!< NVAPI requires a reboot for the settings to take effect
+ NVAPI_INVALID_HYBRID_MODE = -125, //!< The function is not supported with the current Hybrid mode.
+ NVAPI_MIXED_TARGET_TYPES = -126, //!< The target types are not all the same
+ NVAPI_SYSWOW64_NOT_SUPPORTED = -127, //!< The function is not supported from 32-bit on a 64-bit system.
+ NVAPI_IMPLICIT_SET_GPU_TOPOLOGY_CHANGE_NOT_ALLOWED = -128, //!< There is no implicit GPU topology active. Use NVAPI_SetHybridMode to change topology.
+ NVAPI_REQUEST_USER_TO_CLOSE_NON_MIGRATABLE_APPS = -129, //!< Prompt the user to close all non-migratable applications.
+ NVAPI_OUT_OF_MEMORY = -130, //!< Could not allocate sufficient memory to complete the call.
+ NVAPI_WAS_STILL_DRAWING = -131, //!< The previous operation that is transferring information to or from this surface is incomplete.
+ NVAPI_FILE_NOT_FOUND = -132, //!< The file was not found.
+ NVAPI_TOO_MANY_UNIQUE_STATE_OBJECTS = -133, //!< There are too many unique instances of a particular type of state object.
+ NVAPI_INVALID_CALL = -134, //!< The method call is invalid. For example, a method's parameter may not be a valid pointer.
+ NVAPI_D3D10_1_LIBRARY_NOT_FOUND = -135, //!< d3d10_1.dll cannot be loaded.
+ NVAPI_FUNCTION_NOT_FOUND = -136, //!< Couldn't find the function in the loaded DLL.
+ NVAPI_INVALID_USER_PRIVILEGE = -137, //!< The application will require Administrator privileges to access this API.
+ //!< The application can be elevated to a higher permission level by selecting "Run as Administrator".
+ NVAPI_EXPECTED_NON_PRIMARY_DISPLAY_HANDLE = -138, //!< The handle corresponds to GDIPrimary.
+ NVAPI_EXPECTED_COMPUTE_GPU_HANDLE = -139, //!< Setting Physx GPU requires that the GPU is compute-capable.
+ NVAPI_STEREO_NOT_INITIALIZED = -140, //!< The Stereo part of NVAPI failed to initialize completely. Check if the stereo driver is installed.
+ NVAPI_STEREO_REGISTRY_ACCESS_FAILED = -141, //!< Access to stereo-related registry keys or values has failed.
+ NVAPI_STEREO_REGISTRY_PROFILE_TYPE_NOT_SUPPORTED = -142, //!< The given registry profile type is not supported.
+ NVAPI_STEREO_REGISTRY_VALUE_NOT_SUPPORTED = -143, //!< The given registry value is not supported.
+ NVAPI_STEREO_NOT_ENABLED = -144, //!< Stereo is not enabled and the function needed it to execute completely.
+ NVAPI_STEREO_NOT_TURNED_ON = -145, //!< Stereo is not turned on and the function needed it to execute completely.
+ NVAPI_STEREO_INVALID_DEVICE_INTERFACE = -146, //!< Invalid device interface.
+ NVAPI_STEREO_PARAMETER_OUT_OF_RANGE = -147, //!< Separation percentage or JPEG image capture quality is out of [0-100] range.
+ NVAPI_STEREO_FRUSTUM_ADJUST_MODE_NOT_SUPPORTED = -148, //!< The given frustum adjust mode is not supported.
+ NVAPI_TOPO_NOT_POSSIBLE = -149, //!< The mosaic topology is not possible given the current state of the hardware.
+ NVAPI_MODE_CHANGE_FAILED = -150, //!< An attempt to do a display resolution mode change has failed.
+ NVAPI_D3D11_LIBRARY_NOT_FOUND = -151, //!< d3d11.dll/d3d11_beta.dll cannot be loaded.
+ NVAPI_INVALID_ADDRESS = -152, //!< Address is outside of valid range.
+ NVAPI_STRING_TOO_SMALL = -153, //!< The pre-allocated string is too small to hold the result.
+ NVAPI_MATCHING_DEVICE_NOT_FOUND = -154, //!< The input does not match any of the available devices.
+ NVAPI_DRIVER_RUNNING = -155, //!< Driver is running.
+ NVAPI_DRIVER_NOTRUNNING = -156, //!< Driver is not running.
+ NVAPI_ERROR_DRIVER_RELOAD_REQUIRED = -157, //!< A driver reload is required to apply these settings.
+ NVAPI_SET_NOT_ALLOWED = -158, //!< Intended setting is not allowed.
+ NVAPI_ADVANCED_DISPLAY_TOPOLOGY_REQUIRED = -159, //!< Information can't be returned due to "advanced display topology".
+ NVAPI_SETTING_NOT_FOUND = -160, //!< Setting is not found.
+ NVAPI_SETTING_SIZE_TOO_LARGE = -161, //!< Setting size is too large.
+ NVAPI_TOO_MANY_SETTINGS_IN_PROFILE = -162, //!< There are too many settings for a profile.
+ NVAPI_PROFILE_NOT_FOUND = -163, //!< Profile is not found.
+ NVAPI_PROFILE_NAME_IN_USE = -164, //!< Profile name is duplicated.
+ NVAPI_PROFILE_NAME_EMPTY = -165, //!< Profile name is empty.
+ NVAPI_EXECUTABLE_NOT_FOUND = -166, //!< Application not found in the Profile.
+ NVAPI_EXECUTABLE_ALREADY_IN_USE = -167, //!< Application already exists in the other profile.
+ NVAPI_DATATYPE_MISMATCH = -168, //!< Data Type mismatch
+ NVAPI_PROFILE_REMOVED = -169, //!< The profile passed as parameter has been removed and is no longer valid.
+ NVAPI_UNREGISTERED_RESOURCE = -170, //!< An unregistered resource was passed as a parameter.
+ NVAPI_ID_OUT_OF_RANGE = -171, //!< The DisplayId corresponds to a display which is not within the normal outputId range.
+ NVAPI_DISPLAYCONFIG_VALIDATION_FAILED = -172, //!< Display topology is not valid so the driver cannot do a mode set on this configuration.
+ NVAPI_DPMST_CHANGED = -173, //!< Display Port Multi-Stream topology has been changed.
+ NVAPI_INSUFFICIENT_BUFFER = -174, //!< Input buffer is insufficient to hold the contents.
+ NVAPI_ACCESS_DENIED = -175, //!< No access to the caller.
+ NVAPI_MOSAIC_NOT_ACTIVE = -176, //!< The requested action cannot be performed without Mosaic being enabled.
+ NVAPI_SHARE_RESOURCE_RELOCATED = -177, //!< The surface is relocated away from video memory.
+ NVAPI_REQUEST_USER_TO_DISABLE_DWM = -178, //!< The user should disable DWM before calling NvAPI.
+ NVAPI_D3D_DEVICE_LOST = -179, //!< D3D device status is D3DERR_DEVICELOST or D3DERR_DEVICENOTRESET - the user has to reset the device.
+ NVAPI_INVALID_CONFIGURATION = -180, //!< The requested action cannot be performed in the current state.
+ NVAPI_STEREO_HANDSHAKE_NOT_DONE = -181, //!< Call failed as stereo handshake not completed.
+ NVAPI_EXECUTABLE_PATH_IS_AMBIGUOUS = -182, //!< The path provided was too short to determine the correct NVDRS_APPLICATION
+ NVAPI_DEFAULT_STEREO_PROFILE_IS_NOT_DEFINED = -183, //!< Default stereo profile is not currently defined
+ NVAPI_DEFAULT_STEREO_PROFILE_DOES_NOT_EXIST = -184, //!< Default stereo profile does not exist
+ NVAPI_CLUSTER_ALREADY_EXISTS = -185, //!< A cluster is already defined with the given configuration.
+ NVAPI_DPMST_DISPLAY_ID_EXPECTED = -186, //!< The input display id is not that of a multi stream enabled connector or a display device in a multi stream topology
+ NVAPI_INVALID_DISPLAY_ID = -187, //!< The input display id is not valid or the monitor associated to it does not support the current operation
+ NVAPI_STREAM_IS_OUT_OF_SYNC = -188, //!< While playing secure audio stream, stream goes out of sync
+ NVAPI_INCOMPATIBLE_AUDIO_DRIVER = -189, //!< Older audio driver version than required
+ NVAPI_VALUE_ALREADY_SET = -190, //!< Value already set, setting again not allowed.
+ NVAPI_TIMEOUT = -191, //!< Requested operation timed out
+ NVAPI_GPU_WORKSTATION_FEATURE_INCOMPLETE = -192, //!< The requested workstation feature set has incomplete driver internal allocation resources
+ NVAPI_STEREO_INIT_ACTIVATION_NOT_DONE = -193, //!< Call failed because InitActivation was not called.
+ NVAPI_SYNC_NOT_ACTIVE = -194, //!< The requested action cannot be performed without Sync being enabled.
+ NVAPI_SYNC_MASTER_NOT_FOUND = -195, //!< The requested action cannot be performed without Sync Master being enabled.
+ NVAPI_INVALID_SYNC_TOPOLOGY = -196, //!< Invalid displays passed in the NV_GSYNC_DISPLAY pointer.
+ NVAPI_ECID_SIGN_ALGO_UNSUPPORTED = -197, //!< The specified signing algorithm is not supported. Either an incorrect value was entered or the current installed driver/hardware does not support the input value.
+ NVAPI_ECID_KEY_VERIFICATION_FAILED = -198, //!< The encrypted public key verification has failed.
+ NVAPI_FIRMWARE_OUT_OF_DATE = -199, //!< The device's firmware is out of date.
+ NVAPI_FIRMWARE_REVISION_NOT_SUPPORTED = -200, //!< The device's firmware is not supported.
+ NVAPI_LICENSE_CALLER_AUTHENTICATION_FAILED = -201, //!< The caller is not authorized to modify the License.
+ NVAPI_D3D_DEVICE_NOT_REGISTERED = -202, //!< The user tried to use a deferred context without registering the device first
+ NVAPI_RESOURCE_NOT_ACQUIRED = -203, //!< Head or SourceId was not reserved for the VR Display before doing the Modeset or the dedicated display.
+ NVAPI_TIMING_NOT_SUPPORTED = -204, //!< Provided timing is not supported.
+ NVAPI_HDCP_ENCRYPTION_FAILED = -205, //!< HDCP Encryption Failed for the device. Would be applicable when the device is HDCP Capable.
+ NVAPI_PCLK_LIMITATION_FAILED = -206, //!< Provided mode is over sink device pclk limitation.
+ NVAPI_NO_CONNECTOR_FOUND = -207, //!< No connector on GPU found.
+ NVAPI_HDCP_DISABLED = -208, //!< When a non-HDCP capable HMD is connected, we would inform user by this code.
+ NVAPI_API_IN_USE = -209, //!< Atleast an API is still being called
+ NVAPI_NVIDIA_DISPLAY_NOT_FOUND = -210, //!< No display found on Nvidia GPU(s).
+ NVAPI_PRIV_SEC_VIOLATION = -211, //!< Priv security violation, improper access to a secured register.
+ NVAPI_INCORRECT_VENDOR = -212, //!< NVAPI cannot be called by this vendor
+ NVAPI_DISPLAY_IN_USE = -213, //!< DirectMode Display is already in use
+ NVAPI_UNSUPPORTED_CONFIG_NON_HDCP_HMD = -214, //!< The Config is having Non-NVidia GPU with Non-HDCP HMD connected
+ NVAPI_MAX_DISPLAY_LIMIT_REACHED = -215, //!< GPU's Max Display Limit has Reached
+ NVAPI_INVALID_DIRECT_MODE_DISPLAY = -216, //!< DirectMode not Enabled on the Display
+ NVAPI_GPU_IN_DEBUG_MODE = -217, //!< GPU is in debug mode, OC is NOT allowed.
+ NVAPI_D3D_CONTEXT_NOT_FOUND = -218, //!< No NvAPI context was found for this D3D object
+ NVAPI_STEREO_VERSION_MISMATCH = -219, //!< there is version mismatch between stereo driver and dx driver
+ NVAPI_GPU_NOT_POWERED = -220, //!< GPU is not powered and so the request cannot be completed.
+ NVAPI_ERROR_DRIVER_RELOAD_IN_PROGRESS = -221, //!< The display driver update in progress.
+ NVAPI_WAIT_FOR_HW_RESOURCE = -222, //!< Wait for HW resources allocation
+ NVAPI_REQUIRE_FURTHER_HDCP_ACTION = -223, //!< operation requires further HDCP action
+ NVAPI_DISPLAY_MUX_TRANSITION_FAILED = -224, //!< Dynamic Mux transition failure
+ NVAPI_INVALID_DSC_VERSION = -225, //!< Invalid DSC version
+ NVAPI_INVALID_DSC_SLICECOUNT = -226, //!< Invalid DSC slice count
+ NVAPI_INVALID_DSC_OUTPUT_BPP = -227, //!< Invalid DSC output BPP
+ NVAPI_FAILED_TO_LOAD_FROM_DRIVER_STORE = -228, //!< There was an error while loading nvapi.dll from the driver store.
+ NVAPI_NO_VULKAN = -229, //!< OpenGL does not export Vulkan fake extensions
+ NVAPI_REQUEST_PENDING = -230, //!< A request for NvTOPPs telemetry CData has already been made and is pending a response.
+ NVAPI_RESOURCE_IN_USE = -231, //!< Operation cannot be performed because the resource is in use.
+ NVAPI_INVALID_IMAGE = -232, //!< Device kernel image is invalid
+ NVAPI_INVALID_PTX = -233, //!< PTX JIT compilation failed
+ NVAPI_NVLINK_UNCORRECTABLE = -234, //!< Uncorrectable NVLink error was detected during the execution
+ NVAPI_JIT_COMPILER_NOT_FOUND = -235, //!< PTX JIT compiler library was not found.
+ NVAPI_INVALID_SOURCE = -236, //!< Device kernel source is invalid.
+ NVAPI_ILLEGAL_INSTRUCTION = -237, //!< While executing a kernel, the device encountered an illegal instruction.
+ NVAPI_INVALID_PC = -238, //!< While executing a kernel, the device program counter wrapped its address space
+ NVAPI_LAUNCH_FAILED = -239, //!< An exception occurred on the device while executing a kernel
+ NVAPI_NOT_PERMITTED = -240, //!< Attempted operation is not permitted.
+ NVAPI_CALLBACK_ALREADY_REGISTERED = -241, //!< The callback function has already been registered.
+ NVAPI_CALLBACK_NOT_FOUND = -242, //!< The callback function is not found or not registered.
+} NvAPI_Status;
+
+
+//! @}
+
+#if defined(__vkd3d_d3d12_h__)
+//! Flags specifying raytracing thread reordering hardware support.
+//! Additional flags will be added as support becomes available.
+//!
+//! \ingroup dx
+typedef enum _NVAPI_D3D12_RAYTRACING_THREAD_REORDERING_CAPS
+{
+ NVAPI_D3D12_RAYTRACING_THREAD_REORDERING_CAP_NONE = 0x0, //!< Thread reordering acts as a no-op
+ NVAPI_D3D12_RAYTRACING_THREAD_REORDERING_CAP_STANDARD = NV_BIT(0) //!< Standard thread reordering is supported
+} NVAPI_D3D12_RAYTRACING_THREAD_REORDERING_CAPS;
+
+//! Flags specifying raytracing Opacity Micromap support.
+//! Additional flags will be added as support becomes available.
+//!
+//! \ingroup dx
+typedef enum _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAPS
+{
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAP_NONE = 0x0, //!< Opacity Micromap support is not available.
+ //!< The application must not attempt to use any OMM entrypoints or flags.
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAP_STANDARD = NV_BIT(0) //!< Standard Opacity Micromap support is available
+} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAPS;
+
+//! List of Raytracing CAPS types that can be queried.
+//!
+//! \ingroup dx
+typedef enum _NVAPI_D3D12_RAYTRACING_CAPS_TYPE
+{
+ NVAPI_D3D12_RAYTRACING_CAPS_TYPE_THREAD_REORDERING = 0,
+ NVAPI_D3D12_RAYTRACING_CAPS_TYPE_OPACITY_MICROMAP = 1,
+ NVAPI_D3D12_RAYTRACING_CAPS_TYPE_INVALID = -1
+} NVAPI_D3D12_RAYTRACING_CAPS_TYPE;
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// FUNCTION NAME: NvAPI_D3D12_GetRaytracingCaps
+//
+//! DESCRIPTION: Query raytracing capabilities of a device.
+//!
+//! SUPPORTED OS: Windows 10 and higher
+//!
+//!
+//! \since Release: 520
+//!
+//! \param [in] pDevice Pointer to the device on which raytracing caps should be queried from.
+//! \param [in] type Raytracing caps type requested. (ex: NVAPI_D3D12_RAYTRACING_CAPS_TYPE_THREAD_REORDERING)
+//! \param [out] pData Pointer to memory that receives caps. (ex: NVAPI_D3D12_RAYTRACING_THREAD_REORDERING_CAPS*)
+//! \param [in] dataSize Size in bytes to return to pData. Must match the size of the caps data requested. (ex: sizeof(NVAPI_D3D12_RAYTRACING_THREAD_REORDERING_CAPS))
+//!
+//! \return This API can return any of the error codes enumerated in #NvAPI_Status.
+//! If there are return error codes with specific meaning for this API, they are listed below.
+//!
+//! \retval ::NVAPI_OK Completed request
+//! \retval ::NVAPI_INVALID_POINTER A null pointer was passed as an argument
+//! \retval ::NVAPI_INVALID_ARGUMENT At least one of the arguments are invalid
+//! \retval ::NVAPI_ERROR Error occurred
+//! \ingroup dx
+///////////////////////////////////////////////////////////////////////////////
+NVAPI_INTERFACE NvAPI_D3D12_GetRaytracingCaps(
+ __in ID3D12Device* pDevice,
+ __in NVAPI_D3D12_RAYTRACING_CAPS_TYPE type,
+ __out void* pData,
+ __in size_t dataSize);
+#endif // defined(__vkd3d_d3d12_h__)
+
+//! SUPPORTED OS: Windows 10 and higher
+//!
+#if defined(__vkd3d_d3d12_h__) && (defined(__ID3D12Device5_INTERFACE_DEFINED__) || defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__))
+
+// Types used by both device and command list functions.
+
+//! Flags specifying building instructions and hints when constructing an OMM Array.
+//!
+//! \ingroup dx
+typedef enum _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAGS
+{
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAG_NONE = 0x0, //!< No options specified for the OMM Array build.
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAG_PREFER_FAST_TRACE = NV_BIT(0), //!< Allow the OMM Array build to take a little longer in order to optimize for traversal performance.
+ //!< This flag is incompatible with #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAG_PREFER_FAST_BUILD.
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAG_PREFER_FAST_BUILD = NV_BIT(1) //!< Spend as little time as possible on the OMM Array build with some potential loss to traversal performance.
+ //!< This flag is incompatible with #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAG_PREFER_FAST_TRACE.
+} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAGS;
+
+//! Specifies the input Opacity Micromap formats.
+//! The OC1 (Opacity Compression 1) format follows the space-filling curve in barycentric space over the uniformly tessellated micro-triangles.
+//!
+//! \note This is a 16-bit value when used in #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_DESC.
+//!
+//! \ingroup dx
+typedef enum _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT
+{
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT_OC1_2_STATE = 0x1, //!< 2-state (Transparent/Opaque) format.
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT_OC1_4_STATE = 0x2 //!< 4-state (Transparent/Opaque, Known/Unknown) format.
+} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT;
+
+//! Number of OMMs of a specific configuration in an OMM Array.
+//! Used to compute conservative buffer size estimates for OMM Array builds.
+//!
+//! \ingroup dx
+typedef struct _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_USAGE_COUNT
+{
+ NvU32 count; //!< Total number of OMMs in the OMM Array with the particular \p subdivisionLevel and \p format specified in this descriptor.
+ NvU32 subdivisionLevel; //!< Number of subdivisions for the OMM; valid inputs are [0, 12] (#NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_OC1_MAX_SUBDIVISION_LEVEL).
+ //!< The total number of micro-triangles is 4<sup><tt>subdivisionLevel</tt></sup>.
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT format; //!< Opacity Micromap format.
+} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_USAGE_COUNT;
+
+//! Describes one Opacity Micromap.
+//!
+//! \ingroup dx
+typedef struct _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_DESC
+{
+ NvU32 byteOffset; //!< Byte offset from the \c inputBuffer, specified in the input structure #NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_INPUTS, to where the input OMM data is located.
+ NvU16 subdivisionLevel; //!< Number of subdivisions for the OMM; valid inputs are [0, 12] (#NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_OC1_MAX_SUBDIVISION_LEVEL).
+ //!< The total number of micro-triangles is 4<sup><tt>subdivisionLevel</tt></sup>.
+ NvU16 format; //!< Format of the OMM of type #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT.
+} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_DESC;
+
+//! Input structure to OMM Array construction.
+//! Individual OMMs are accessed via indices when used in bottom-level acceleration structure (BLAS) construction.
+//!
+//! \ingroup dx
+typedef struct _NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_INPUTS
+{
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAGS flags; //!< Flags which apply to all OMMs in the array.
+ NvU32 numOMMUsageCounts; //!< Number of OMM usage count entries in the \p pOMMUsageCounts array.
+ const NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_USAGE_COUNT* pOMMUsageCounts; //!< Usage counts for each subdivision level and format combination across all the OMM entries in the build.
+ D3D12_GPU_VIRTUAL_ADDRESS inputBuffer; //!< Address for raw OMM input data; it must be 256-byte aligned.
+ //!< It is recommended to try to organize OMMs together in memory that are expected to be used close together spatially.
+ D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE perOMMDescs; //!< GPU array with one #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_DESC entry per OMM.
+} NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_INPUTS;
+
+#endif // defined(__vkd3d_d3d12_h__) && (defined(__ID3D12Device5_INTERFACE_DEFINED__) || defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__))
+
+#if defined(__vkd3d_d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__)
+
+//! Conservative memory requirements for building an OMM Array.
+//!
+//! \ingroup dx
+typedef struct _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO
+{
+ NvU64 resultDataMaxSizeInBytes; //!< Size required to hold the result of an OMM Array build based on the specified inputs.
+ NvU64 scratchDataSizeInBytes; //!< Scratch storage on GPU required during OMM Array build based on the specified inputs.
+} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO;
+
+//! Parameters given to NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo().
+//!
+//! \ingroup dx
+typedef struct _NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_V1
+{
+ NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_VER.
+ const NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_INPUTS* pDesc; //!< [in] Description of the OMM Array build.
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO* pInfo; //!< [out] Result of the query.
+} NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_V1;
+#define NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_V1, 1)
+typedef NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_V1 NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS;
+#define NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_VER NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_VER1
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// FUNCTION NAME: NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo
+//
+//! DESCRIPTION: Query conservative memory requirements for building an OMM (Opacity Micromap) Array.
+//! The returned size is conservative for OMM Array builds containing
+//! a lower or equal number of entries for each resolution and format combination.
+//!
+//!
+//! SUPPORTED OS: Windows 10 and higher
+//!
+//!
+//! \since Release: 520
+//!
+//! \param [in] pDevice Device on which the OMM Array will be built.
+//! \param [in,out] pParams Wrapper around the inputs and outputs of the function.
+//!
+//! \return This API can return any of the error codes enumerated in #NvAPI_Status.
+//! If there are return error codes with specific meaning for this API, they are listed below.
+//!
+//! \ingroup dx
+///////////////////////////////////////////////////////////////////////////////
+NVAPI_INTERFACE NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo(
+ __in ID3D12Device5* pDevice,
+ __inout NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS* pParams);
+
+#endif // defined(__vkd3d_d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__)
+
+#if defined(__vkd3d_d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__)
+
+//! Pipeline creation state flags.
+//!
+//! \ingroup dx
+typedef enum _NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS
+{
+ NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS_NONE = 0, //!< [in] No pipeline flags.
+ NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS_ENABLE_OMM_SUPPORT = NV_BIT(0), //!< [in] Change whether raytracing pipelines are created with support for Opacity Micromaps.
+ //!< If a triangle with an OMM is encountered during traversal and the pipeline was not created with support for them, behavior is undefined.
+ //!< Support should only be enabled if there are OMMs present, since it may incur a small penalty on traversal performance overall.
+} NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS;
+
+//! State used when creating new pipelines.
+//!
+//! \ingroup dx
+typedef struct _NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_V1
+{
+ NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_VER.
+ NvU32 flags; //!< [in] A bitwise OR of one or more #NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS flags for raytracing pipeline creation.
+} NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_V1;
+#define NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_V1, 1)
+typedef NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_V1 NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS;
+#define NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_VER NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_VER1
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// FUNCTION NAME: NvAPI_D3D12_SetCreatePipelineStateOptions
+//
+//! DESCRIPTION: Globally change the state affecting pipeline creations.
+//! This affects all pipelines created after this call, and until this function is called again.
+//!
+//! \note Only supported on GPUs capable of DXR.
+//! Some of the flags and fields have further restrictions, in which case their description will include a note with more details.
+//!
+//! SUPPORTED OS: Windows 10 and higher
+//!
+//!
+//! \since Release: 520
+//!
+//! \param [in] pDevice Device on which the pipelines will be created.
+//! \param [in] pState State to be applied to all future pipeline creations.
+
+//! \return This API can return any of the error codes enumerated in #NvAPI_Status.
+//! If there are return error codes with specific meaning for this API, they are listed below.
+//!
+//! \ingroup dx
+///////////////////////////////////////////////////////////////////////////////
+NVAPI_INTERFACE NvAPI_D3D12_SetCreatePipelineStateOptions(
+ __in ID3D12Device5* pDevice,
+ __in const NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS* pState);
+
+#endif // defined(__vkd3d_d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__)
+
+#if defined(__vkd3d_d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__)
+
+//! Type of serialized data.
+//!
+//! \ingroup dx
+typedef enum _NVAPI_D3D12_SERIALIZED_DATA_TYPE_EX
+{
+ // D3D12_SERIALIZED_DATA_TYPE flags
+ NVAPI_D3D12_SERIALIZED_DATA_RAYTRACING_ACCELERATION_STRUCTURE_EX = 0x0, //!< Serialized data contains a raytracing acceleration structure.
+ //!< Starting from offset 0, the first bytes of the serialized acceleration structure can be reinterpreted as \c D3D12_SERIALIZED_RAYTRACING_ACCELERATION_STRUCTURE_HEADER.
+ //!< That structure contains the identifier to be passed along to NvAPI_D3D12_CheckDriverMatchingIdentifierEx().
+
+ // NVAPI_D3D12_SERIALIZED_DATA_TYPE_EX specific flags
+ NVAPI_D3D12_SERIALIZED_DATA_RAYTRACING_OPACITY_MICROMAP_ARRAY_EX = 0x1, //!< Data blob contains an OMM Array.
+ //!< Starting from offset 0, the first bytes of the OMM Array can be reinterpreted as \c D3D12_SERIALIZED_DATA_DRIVER_MATCHING_IDENTIFIER.
+
+} NVAPI_D3D12_SERIALIZED_DATA_TYPE_EX;
+
+//! Parameters given to NvAPI_D3D12_CheckDriverMatchingIdentifierEx().
+//!
+//! \ingroup dx
+typedef struct _NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_V1
+{
+ NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_VER.
+ NVAPI_D3D12_SERIALIZED_DATA_TYPE_EX serializedDataType; //!< [in] Type of data to be deserialized; see #NVAPI_D3D12_SERIALIZED_DATA_TYPE_EX.
+ const D3D12_SERIALIZED_DATA_DRIVER_MATCHING_IDENTIFIER* pIdentifierToCheck; //!< [in] Identifier from the header of the serialized data to check with the driver; see \c D3D12_SERIALIZED_DATA_DRIVER_MATCHING_IDENTIFIER.
+ //!< Information about how to retrieve that identifier can be found in the description of each #NVAPI_D3D12_SERIALIZED_DATA_TYPE_EX enum.
+ D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS checkStatus; //!< [out] Result of the check; see \c D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS.
+} NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_V1;
+#define NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_V1, 1)
+typedef NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_V1 NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS;
+#define NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_VER NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_VER1
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// FUNCTION NAME: NvAPI_D3D12_CheckDriverMatchingIdentifierEx
+//
+//! DESCRIPTION: This function is an extension of <tt>ID3D12Device5::CheckDriverMatchingIdentifier()</tt> with additional serialized data types.
+//!
+//! SUPPORTED OS: Windows 10 and higher
+//!
+//!
+//! \since Release: 520
+//!
+//! \param [in] pDevice Device on which the data will be deserialized.
+//! \param [in,out] pParams Wrapper around the inputs and outputs of the function.
+//!
+//! \return This API can return any of the error codes enumerated in #NvAPI_Status.
+//! If there are return error codes with specific meaning for this API, they are listed below.
+//!
+//! \ingroup dx
+///////////////////////////////////////////////////////////////////////////////
+NVAPI_INTERFACE NvAPI_D3D12_CheckDriverMatchingIdentifierEx(
+ __in ID3D12Device5* pDevice,
+ __inout NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS* pParams);
+
+#endif // defined(__vkd3d_d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__)
+
+#if defined(__vkd3d_d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__)
+
+//! This enum extends \c D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS with modified and additional values.
+//! Only modified/new values are fully described; for more information on the other values, please check Microsoft's DirectX Raytracing Specification.
+//!
+//! \ingroup dx
+typedef enum _NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS_EX
+{
+ // D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS flags
+ NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_NONE_EX = 0x0, //!< No options specified for the acceleration structure build.
+ NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_UPDATE_EX = NV_BIT(0), //!< Allow the acceleration structure to later be updated (via the flag #NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PERFORM_UPDATE_EX), rather than always requiring a full rebuild.
+ NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_COMPACTION_EX = NV_BIT(1), //!< Allow for the acceleration structure to later be compacted.
+ NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE_EX = NV_BIT(2), //!< Favorize higher raytracing performance at the cost of longer build times.
+ NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_BUILD_EX = NV_BIT(3), //!< Favorize faster build times at the cost of lower raytracing performance.
+ NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_MINIMIZE_MEMORY_EX = NV_BIT(4), //!< Minimize the memory footprint of the produced acceleration structure, potentially at the cost of longer build time or lower raytracing performance.
+ NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PERFORM_UPDATE_EX = NV_BIT(5), //!< Instead of rebuilding the acceleration structure from scratch, the existing acceleration structure will be updated.
+ //!< Added behaviour: If #NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_OMM_UPDATE_EX is specified, OMM references may be changed along with positions when an update is performed.
+
+ // NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS_EX specific flags
+ NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_OMM_UPDATE_EX = NV_BIT(6), //!< The acceleration structure (AS) supports updating OMM contents (base OMM Array and/or indices).
+ //!< Specifying this flag may result in larger AS size and may reduce traversal performance.
+ NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_DISABLE_OMMS_EX = NV_BIT(7), //!< Only applicable for BLAS builds. If enabled, any instances referencing this BLAS are allowed to disable the OMM test through the #NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_DISABLE_OMMS_EX flag.
+ //!< Specifying this build flag may result in some reductions in traversal performance.
+ NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_OMM_OPACITY_STATES_UPDATE_EX = NV_BIT(8), //!< The acceleration structure (AS) supports updating OMM data (encoded opacity values).
+ //!< Specifying this flag may reduce traversal performance.
+
+} NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS_EX;
+
+//! This enum extends \c D3D12_RAYTRACING_GEOMETRY_TYPE with additional values.
+//! Only new values are fully described below; for more information on the other values, please check Microsoft's DirectX Raytracing Specification.
+//!
+//! \ingroup dx
+typedef enum _NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_EX
+{
+ // D3D12_RAYTRACING_GEOMETRY_TYPE flags
+ NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES_EX = 0x0, //!< This geometry is made of basic triangles.
+ NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS_EX = 0x1, //!< This geometry is made of axis-aligned bounding boxes (AABBs).
+
+ // NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_EX specific flags
+ NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_OMM_TRIANGLES_EX = 0x2, //!< Shares most fields with the basic triangle geometry type, but allows an OMM Array to be attached to the geometry.
+ //!< The basic triangle type and this OMM-enabled type geometries may be mixed in the same BLAS build.
+
+
+} NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_EX;
+
+//! If a triangle has a uniform OMM state in a BLAS build, it is preferable to signal this explicitly rather than attaching a single state OMM.
+//! This can be accomplished by supplying these special indices as entries in \c opacityMicromapIndexBuffer, in #NVAPI_D3D12_RAYTRACING_GEOMETRY_OMM_TRIANGLES_DESC.
+//!
+//! \ingroup dx
+typedef enum _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_SPECIAL_INDEX
+{
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_SPECIAL_INDEX_FULLY_TRANSPARENT = -1, //!< Uniform transparent OMM state.
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_SPECIAL_INDEX_FULLY_OPAQUE = -2, //!< Uniform opaque OMM state.
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_SPECIAL_INDEX_FULLY_UNKNOWN_TRANSPARENT = -3, //!< Uniform unknown-transparent OMM state.
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_SPECIAL_INDEX_FULLY_UNKNOWN_OPAQUE = -4 //!< Uniform unknown-opaque OMM state.
+} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_SPECIAL_INDEX;
+
+//! Geometry descriptor attachment with Opacity Micromaps.
+//!
+//! \ingroup dx
+typedef struct _NVAPI_D3D12_RAYTRACING_GEOMETRY_OMM_ATTACHMENT_DESC
+{
+ D3D12_GPU_VIRTUAL_ADDRESS_AND_STRIDE opacityMicromapIndexBuffer; //!< Optional buffer specifying which OMM index to use for each triangle; if \c NULL, there is a 1:1 mapping between input triangles and OMM Array entries.
+ //!< Special values can be used to encode OMMs with uniform state for individual triangles (see #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_SPECIAL_INDEX).
+ //!< For BLAS updates, this input buffer must match that of the original build if the #NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_OMM_UPDATE_EX build flag is not set.
+ DXGI_FORMAT opacityMicromapIndexFormat; //!< Format of \c opacityMicromapIndexBuffer, either \c DXGI_FORMAT_R32_UINT or \c DXGI_FORMAT_R16_UINT.
+ NvU32 opacityMicromapBaseLocation; //!< Constant added to all non-negative OMM indices in \p opacityMicromapIndexBuffer.
+ D3D12_GPU_VIRTUAL_ADDRESS opacityMicromapArray; //!< Pointer to an OMM Array used by this geometry; it may be set to \c NULL if no non-uniform OMMs are used.
+ //!< Unlike vertex, index, and transform buffers, this resource is dereferenced during raytracing.
+
+ NvU32 numOMMUsageCounts; //!< Number of OMM usage count entries in the \p pOMMUsageCounts array.
+ const NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_USAGE_COUNT* pOMMUsageCounts; //!< Usage counts for each subdivision level and format combination across all the OMM entries referred-to by the OMM index buffer specified by this geometry.
+
+} NVAPI_D3D12_RAYTRACING_GEOMETRY_OMM_ATTACHMENT_DESC;
+
+//! Geometry triangle descriptor with attached augmented Opacity Micromaps.
+//!
+//! \ingroup dx
+typedef struct _NVAPI_D3D12_RAYTRACING_GEOMETRY_OMM_TRIANGLES_DESC
+{
+ D3D12_RAYTRACING_GEOMETRY_TRIANGLES_DESC triangles; //!< Triangle mesh descriptor.
+ NVAPI_D3D12_RAYTRACING_GEOMETRY_OMM_ATTACHMENT_DESC ommAttachment; //!< Opacity Micromap attachment descriptor.
+} NVAPI_D3D12_RAYTRACING_GEOMETRY_OMM_TRIANGLES_DESC;
+
+//! This structure extends \c D3D12_RAYTRACING_GEOMETRY_DESC by supporting additional geometry types.
+//! Only new members are fully described below; for more information on the other members, please check Microsoft's DirectX Raytracing Specification.
+//!
+//! \ingroup dx
+typedef struct _NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX
+{
+ NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_EX type; //!< The type of geometry stored in the union of this structure.
+ D3D12_RAYTRACING_GEOMETRY_FLAGS flags; //!< Flags affecting how this geometry is processed by the raytracing pipeline.
+ union
+ {
+ D3D12_RAYTRACING_GEOMETRY_TRIANGLES_DESC triangles; //!< Describes triangle geometry if \c type is #NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES_EX.
+ //!< Otherwise, this parameter is unused (space repurposed in a union).
+ D3D12_RAYTRACING_GEOMETRY_AABBS_DESC aabbs; //!< Describes AABB geometry if \c type is #NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS_EX.
+ //!< Otherwise, this parameter is unused (space repurposed in a union).
+ NVAPI_D3D12_RAYTRACING_GEOMETRY_OMM_TRIANGLES_DESC ommTriangles; //!< Describes triangle geometry which may optionally use Opacity Micromaps, if \c type is #NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_OMM_TRIANGLES_EX.
+ //!< Otherwise, this parameter is unused (space repurposed in a union).
+ };
+} NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX;
+
+//! This enum extends \c D3D12_RAYTRACING_INSTANCE_FLAGS with additional values.
+//! Only new values are fully described below; for more information on the other values, please check Microsoft's DirectX Raytracing Specification.
+//!
+//! \ingroup dx
+typedef enum _NVAPI_D3D12_RAYTRACING_INSTANCE_FLAGS_EX
+{
+ // D3D12_RAYTRACING_INSTANCE_FLAGS flags
+ NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_NONE_EX = 0x0, //!< No options specified for this instance.
+ NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_CULL_DISABLE_EX = NV_BIT(0), //!< Disable triangle culling for this instance.
+ NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_FRONT_COUNTERCLOCKWISE_EX = NV_BIT(1), //!< Use counter-clockwise winding for defining front faces, instead of the default of clockwise winding.
+ NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_FORCE_OPAQUE_EX = NV_BIT(2), //!< Force all geometries in this instance to be opaque.
+ NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_FORCE_NON_OPAQUE_EX = NV_BIT(3), //!< All geometries in this instance will be processed as if they never had the \c D3D12_RAYTRACING_GEOMETRY_FLAG_OPAQUE flag applied to them.
+
+ // NVAPI_D3D12_RAYTRACING_INSTANCE_FLAGS_EX specific flags
+ NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_FORCE_OMM_2_STATE_EX = NV_BIT(4), //!< Ignore the Unknown state and only consider the Transparent/Opaque bit for all 4-state OMMs encountered during traversal.
+ //!< This flag has no effect if #NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_DISABLE_OMMS_EX is set.
+ NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_DISABLE_OMMS_EX = NV_BIT(5) //!< Disable OMMs for all triangles, and revert to using geometry opaque/non-opaque state instead (legacy behavior).
+ //!< This flag is only valid if the referenced BLAS was built with the #NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_DISABLE_OMMS_EX flag; omitting that flag during BLAS build will result in undefined behavior.
+} NVAPI_D3D12_RAYTRACING_INSTANCE_FLAGS_EX;
+
+//! This structure extends \c D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS by supporting additional geometry types.
+//! Only modified members are fully described below; for more information on the other members, please check Microsoft's DirectX Raytracing Specification.
+//!
+//! \ingroup dx
+typedef struct _NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX
+{
+ D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE type; //!< Whether a top-level acceleration structure (TLAS) or bottom-level acceleration structure (BLAS) will be built using this information.
+ NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS_EX flags; //!< Options influencing how the acceleration structure is built and which of its features can be used.
+ NvU32 numDescs; //!< If \c type is \c D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TOP_LEVEL, it represents the number of descriptions stored in \c instanceDescs.
+ //!< Otherwise, it contains the number of geometry descriptions stored in \c pGeometryDescs or \c ppGeometryDescs.
+ D3D12_ELEMENTS_LAYOUT descsLayout; //!< If \c type is \c D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BOTTOM_LEVEL, it specifies which of \c pGeometryDescs and \c ppGeometryDescs to use.
+ //!< Otherwise, this parameter is unused.
+ NvU32 geometryDescStrideInBytes; //!< Stride between consecutive geometry descriptors. Should typically be set to sizeof(NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX).
+ //!< Only used if \c type is \c D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL and \c descLayout is \c D3D12_ELEMENTS_LAYOUT_ARRAY.
+ //!< This field guarantees backwards compatibility, even if the geometry descriptor size increases in future NVAPI versions.
+ union
+ {
+ D3D12_GPU_VIRTUAL_ADDRESS instanceDescs; //!< If \c type is \c D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TOP_LEVEL, the referenced instance structures can used the extended set of flags #NVAPI_D3D12_RAYTRACING_INSTANCE_FLAGS_EX in place of the \c D3D12_RAYTRACING_INSTANCE_FLAGS mentioned in \c D3D12_RAYTRACING_INSTANCE_DESC.
+ //!< Otherwise, this parameter is unused (space repurposed in a union).
+ const NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX* pGeometryDescs; //!< If \c type is \c D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BOTTOM_LEVEL and \c descLayout is \c D3D12_ELEMENTS_LAYOUT_ARRAY, it contains the descriptions of all geometries to be built into a BLAS.
+ //!< Otherwise, this parameter is unused (space repurposed in a union).
+ const NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX*const* ppGeometryDescs; //!< If \c type is \c D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BOTTOM_LEVEL and \c descLayout is \c D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS, it contains the addresses of descriptions for all geometries to be built into a BLAS.
+ //!< Otherwise, this parameter is unused (space repurposed in a union).
+ };
+} NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX;
+
+//! Parameters given to NvAPI_D3D12_GetRaytracingAccelerationStructurePrebuildInfoEx().
+//!
+//! \ingroup dx
+typedef struct _NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_V1
+{
+ NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_VER.
+ const NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX* pDesc; //!< [in] Description of the acceleration-structure build.
+ D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO* pInfo; //!< [out] Result of the query.
+} NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_V1;
+#define NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_V1, 1)
+typedef NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_V1 NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS;
+#define NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_VER NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_VER1
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// FUNCTION NAME: NvAPI_D3D12_GetRaytracingAccelerationStructurePrebuildInfoEx
+//
+//! DESCRIPTION: This function is an extension of <tt>ID3D12Device5::GetRaytracingAccelerationStructurePrebuildInfo()</tt> with additional input types.
+//!
+//! \note Only supported on GPUs capable of DXR.
+//! Some of the flags and fields have further restrictions, in which case their description will include a note with more details.
+//!
+//! SUPPORTED OS: Windows 10 and higher
+//!
+//!
+//! \since Release: 520
+//!
+//! \param [in] pDevice Device on which the acceleration structure will be built.
+//! \param [in,out] pParams Wrapper around the inputs and outputs of the function.
+//!
+//! \return This API can return any of the error codes enumerated in #NvAPI_Status.
+//! If there are return error codes with specific meaning for this API, they are listed below.
+//!
+//! \ingroup dx
+///////////////////////////////////////////////////////////////////////////////
+NVAPI_INTERFACE NvAPI_D3D12_GetRaytracingAccelerationStructurePrebuildInfoEx(
+ __in ID3D12Device5* pDevice,
+ __inout NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS* pParams);
+
+#endif // defined(__vkd3d_d3d12_h__) && defined(__ID3D12Device5_INTERFACE_DEFINED__)
+
+#if defined(__vkd3d_d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__)
+
+//! Description of the inputs and memory areas used during the building of OMM Arrays.
+//!
+//! \ingroup dx
+typedef struct _NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_DESC
+{
+ D3D12_GPU_VIRTUAL_ADDRESS destOpacityMicromapArrayData; //!< Output location for the OMM Array build.
+ //!< NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo() reports the amount of memory required for the result given a set of input parameters.
+ //!< The address must be aligned to 256 bytes (#NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BYTE_ALIGNMENT).
+ NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_INPUTS inputs; //!< Description of the input data for the OMM Array build.
+ D3D12_GPU_VIRTUAL_ADDRESS scratchOpacityMicromapArrayData; //!< Location where the build will store temporary data.
+ //!< NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo() reports the amount of scratch memory the implementation will need for a given set of input parameters.
+ //!< The address must be aligned to 256 bytes (#NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BYTE_ALIGNMENT).
+ //!< Contents of this memory going into a build on the GPU timeline are irrelevant and will not be preserved.
+ //!< After the build is complete on the GPU timeline, the memory is left with whatever undefined contents the build finished with.
+ //!< The memory pointed to must be in state \c D3D12_RESOURCE_STATE_UNORDERED_ACCESS.
+} NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_DESC;
+
+//! Structure emitted by NvAPI_D3D12_EmitRaytracingOpacityMicromapArrayPostbuildInfo(), and optionally NvAPI_D3D12_BuildRaytracingOpacityMicromapArray(), when \c type equals #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_CURRENT_SIZE.
+//!
+//! \ingroup dx
+typedef struct _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_CURRENT_SIZE_DESC
+{
+ NvU64 currentSizeInBytes; //!< Size of the OMM Array buffer.
+ //!< The queried size may be smaller than the size reported by NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo().
+ //!< This allows the application to move and relocate the OMM Array to a smaller buffer to reclaim any unused memory after the OMM Array build is complete.
+} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_CURRENT_SIZE_DESC;
+
+//! Type of postbuild info to emit after an OMM Array build.
+//!
+//! \ingroup dx
+typedef enum _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_TYPE
+{
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_CURRENT_SIZE = 0x0 //!< Size of the current OMM Array. May be smaller than reported by the NvAPI_D3D12_GetRaytracingOpacityMicromapArrayPrebuildInfo() call.
+ //!< Unused memory can be reclaimed by copying the OMM Array into a new resource; see #NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_CURRENT_SIZE_DESC.
+} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_TYPE;
+
+//! Description of the postbuild information to generate from an OMM Array.
+//!
+//! \ingroup dx
+typedef struct _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_DESC
+{
+ D3D12_GPU_VIRTUAL_ADDRESS destBuffer; //!< Result storage.
+ //!< Size required and the layout of the contents written by the system depend on \p infoType.
+ //!< The memory pointed to must be in state \c D3D12_RESOURCE_STATE_UNORDERED_ACCESS.
+ //!< The memory must be aligned to the natural alignment for the members of the particular output structure being generated (e.g. 8 bytes for a struct with the largest member being \c NvU64).
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_TYPE infoType; //!< Type of postbuild information to retrieve.
+} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_DESC;
+
+//! Parameters given to NvAPI_D3D12_BuildRaytracingOpacityMicromapArray().
+//!
+//! \ingroup dx
+typedef struct _NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1
+{
+ NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER.
+ const NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_DESC* pDesc; //!< [in] Description of the OMM Array build.
+ NvU32 numPostbuildInfoDescs; //!< [in] Size of postbuild info desc array. Set to 0 if none are needed.
+ const NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_DESC* pPostbuildInfoDescs; //!< [in] Optional array of descriptions for postbuild info to generate describing properties of the acceleration structure that was built.
+ //!< [in] Any given postbuild info type, \c D3D12_RAYTRACING_ACCEELRATION_STRUCTURE_POSTBUILD_INFO_TYPE, can only be selected for output by at most one array entry.
+} NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1;
+#define NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1, 1)
+typedef NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1 NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS;
+#define NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER1
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// FUNCTION NAME: NvAPI_D3D12_BuildRaytracingOpacityMicromapArray
+//
+//! DESCRIPTION: Construct OMM Array for a collection of OMMs on the GPU.
+//! The CPU-side input buffers are not referenced after this call.
+//! The GPU-side input resources are not referenced after the build has concluded after <tt>ExecuteCommandList()</tt>.
+//! Additionally, the application may optionally output postbuild information immediately after the build.
+//!
+//! SUPPORTED OS: Windows 10 and higher
+//!
+//!
+//! \since Release: 520
+//!
+//! \param [in] pCommandList Command list on which the command will execute.
+//! \param [in] pParams Wrapper around the inputs and outputs of the function.
+//!
+//! \return This API can return any of the error codes enumerated in #NvAPI_Status.
+//! If there are return error codes with specific meaning for this API, they are listed below.
+//!
+//! \retval NVAPI_INVALID_COMBINATION <tt>pParams->pPostbuildInfoDescs</tt> was set to \c NULL while <tt>pParams->numPostbuildInfoDescs</tt> is non zero.
+//!
+//! \ingroup dx
+///////////////////////////////////////////////////////////////////////////////
+NVAPI_INTERFACE NvAPI_D3D12_BuildRaytracingOpacityMicromapArray(
+ __in ID3D12GraphicsCommandList4* pCommandList,
+ __in NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS* pParams);
+
+#endif // defined(__vkd3d_d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__)
+
+#if defined(__vkd3d_d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__)
+
+//! Parameters given to NvAPI_D3D12_RelocateRaytracingOpacityMicromapArray().
+//!
+//! \ingroup dx
+typedef struct _NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1
+{
+ NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER.
+ D3D12_GPU_VIRTUAL_ADDRESS opacityMicromapArray; //!< [in] OMM Array current memory address; it must be 256-byte aligned (#NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BYTE_ALIGNMENT).
+} NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1;
+#define NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1, 1)
+typedef NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_V1 NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS;
+#define NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER1
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// FUNCTION NAME: NvAPI_D3D12_RelocateRaytracingOpacityMicromapArray
+//
+//! DESCRIPTION: Makes the OMM Array usable at its current location in memory.
+//! An OMM Array that has been copied to a new location must be relocated using this function before it may be attached to any BLAS.
+//!
+//! SUPPORTED OS: Windows 10 and higher
+//!
+//!
+//! \since Release: 520
+//!
+//! \param [in] pCommandList Command list on which the command will execute.
+//! \param [in] pParams Wrapper around the inputs and outputs of the function.
+//!
+//! \return This API can return any of the error codes enumerated in #NvAPI_Status.
+//! If there are return error codes with specific meaning for this API, they are listed below.
+//!
+//! \ingroup dx
+///////////////////////////////////////////////////////////////////////////////
+NVAPI_INTERFACE NvAPI_D3D12_RelocateRaytracingOpacityMicromapArray(
+ __in ID3D12GraphicsCommandList4* pCommandList,
+ __in const NVAPI_RELOCATE_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS* pParams);
+
+#endif // defined(__vkd3d_d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__)
+
+#if defined(__vkd3d_d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__)
+
+//! Parameters given to NvAPI_D3D12_EmitRaytracingOpacityMicromapArrayPostbuildInfo().
+//!
+//! \ingroup dx
+typedef struct _NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_V1
+{
+ NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_VER.
+ const NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_DESC* pDesc; //!< [in] Description of which postbuild info to emit.
+ NvU32 numSources; //!< [in] Number of OMM Arrays in \p pSources.
+ const D3D12_GPU_VIRTUAL_ADDRESS* pSources; //!< [in] List of OMM Arrays for which postbuild info should be emitted.
+} NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_V1;
+#define NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_V1, 1)
+typedef NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_V1 NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS;
+#define NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_VER NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_VER1
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// FUNCTION NAME: NvAPI_D3D12_EmitRaytracingOpacityMicromapArrayPostbuildInfo
+//
+//! DESCRIPTION: Emits information about one or more OMM Arrays, only available after the OMM Array constructions have finished.
+//!
+//! SUPPORTED OS: Windows 10 and higher
+//!
+//!
+//! \since Release: 520
+//!
+//! \param [in] pCommandList Command list on which the command will execute.
+//! \param [in] pParams Wrapper around the inputs and outputs of the function.
+//!
+//! \return This API can return any of the error codes enumerated in #NvAPI_Status.
+//! If there are return error codes with specific meaning for this API, they are listed below.
+//!
+//! \ingroup dx
+///////////////////////////////////////////////////////////////////////////////
+NVAPI_INTERFACE NvAPI_D3D12_EmitRaytracingOpacityMicromapArrayPostbuildInfo(
+ __in ID3D12GraphicsCommandList4* pCommandList,
+ __in const NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS* pParams);
+
+#endif // defined(__vkd3d_d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__)
+
+#if defined(__vkd3d_d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__)
+
+//! This structure extends \c D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC by supporting additional geometry types as inputs.
+//! For more information on the different members, please check Microsoft's DirectX Raytracing Specification.
+//!
+//! \ingroup dx
+typedef struct _NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC_EX
+{
+ D3D12_GPU_VIRTUAL_ADDRESS destAccelerationStructureData; //!< Memory where the resulting acceleration structure will be stored.
+ NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX inputs; //!< The inputs to the build process.
+ D3D12_GPU_VIRTUAL_ADDRESS sourceAccelerationStructureData; //!< The acceleration structure to be updated.
+ //!< Otherwise if the acceleration structure should be rebuilt entirely, this value must be \c NULL.
+ D3D12_GPU_VIRTUAL_ADDRESS scratchAccelerationStructureData; //!< Memory that will be temporarily used during the building process.
+} NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC_EX;
+
+//! Parameters given to NvAPI_D3D12_RelocateRaytracingOpacityMicromapArray().
+//!
+//! \ingroup dx
+typedef struct _NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_V1
+{
+ NvU32 version; //!< [in] Structure version; it should be set to #NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_VER.
+ const NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC_EX* pDesc; //!< [in] Description of the acceleration structure to build.
+ NvU32 numPostbuildInfoDescs; //!< [in] Size of postbuild info desc array. Set to 0 if none are needed.
+ const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC* pPostbuildInfoDescs; //!< [in] Optional array of descriptions for postbuild info to generate describing properties of the acceleration structure that was built.
+ //!< Any given postbuild info type, \c D3D12_RAYTRACING_ACCEELRATION_STRUCTURE_POSTBUILD_INFO_TYPE, can only be selected for output by at most one array entry.
+} NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_V1;
+#define NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_VER1 MAKE_NVAPI_VERSION(NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_V1, 1)
+typedef NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_V1 NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS;
+#define NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_VER NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_VER1
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// FUNCTION NAME: NvAPI_D3D12_BuildRaytracingAccelerationStructureEx
+//
+//! DESCRIPTION: Perform an acceleration structure build on the GPU.
+//! Also optionally output postbuild information immediately after the build.
+//! This function is an extension of <tt>ID3D12GraphicsCommandList4::BuildRaytracingAccelerationStructure()</tt> with additional serialized data types.
+//!
+//! \note Only supported on GPUs capable of DXR.
+//! Some of the flags and fields have further restrictions, in which case their description will include a note with more details.
+//!
+//! SUPPORTED OS: Windows 10 and higher
+//!
+//!
+//! \since Release: 520
+//!
+//! \param [in] pCommandList Command list on which the command will execute.
+//! \param [in] pParams Wrapper around the inputs and outputs of the function.
+//!
+//! \return This API can return any of the error codes enumerated in #NvAPI_Status.
+//! If there are return error codes with specific meaning for this API, they are listed below.
+//!
+//! \retval NVAPI_INVALID_COMBINATION <tt>pParams->pPostbuildInfoDescs</tt> was set to \c NULL while <tt>pParams->numPostbuildInfoDescs</tt> is non zero.
+//!
+//! \ingroup dx
+///////////////////////////////////////////////////////////////////////////////
+NVAPI_INTERFACE NvAPI_D3D12_BuildRaytracingAccelerationStructureEx(
+ __in ID3D12GraphicsCommandList4* pCommandList,
+ __in const NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS* pParams);
+
+#endif // defined(__vkd3d_d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__)
+
+#if defined(__vkd3d_d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__)
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// Miscellaneous
+//
+///////////////////////////////////////////////////////////////////////////////
+
+//! Opacity Micromap micro-triangle states.
+//! Not part of any input, but listed here for convenience.
+//!
+//! \ingroup dx
+typedef enum _NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_STATE
+{
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_STATE_TRANSPARENT = 0, //!< Transparent OMM state: hit is ignored.
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_STATE_OPAQUE = 1, //!< Opaque OMM state: hit is committed.
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_STATE_UNKNOWN_TRANSPARENT = 2, //!< Unknown-transparent OMM state.
+ //!< * If operating in 2-state mode, ignore hit.
+ //!< * If operating in 4-state mode, invoke any-hit shader.
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_STATE_UNKNOWN_OPAQUE = 3 //!< Unknown-opaque OMM state.
+ //!< * If operating in 2-state mode, commit hit.
+ //!< * If operating in 4-state mode, invoke any-hit shader.
+} NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_STATE;
+
+//! Mandatory alignment for the address of an OMM Array.
+//!
+//! \ingroup dx
+#define NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BYTE_ALIGNMENT 256
+
+//! Highest subdivision-level allowed with OC1.
+//!
+//! \ingroup dx
+#define NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_OC1_MAX_SUBDIVISION_LEVEL 12
+
+//! A list of flags that can be given to the \c TraceRay() function in HLSL.
+//! Only new or modified values are fully described below; for more information on the other values, please check Microsoft's DirectX Raytracing Specification.
+//!
+//! \ingroup dx
+typedef enum _NVAPI_RAY_FLAGS_EX
+{
+ // RAY_FLAGS flags
+ NVAPI_RAY_FLAG_NONE_EX = 0x0, //!< No flag specified.
+ NVAPI_RAY_FLAG_FORCE_OPAQUE_EX = NV_BIT( 0), //!< Consider all intersected geometries to be opaque, regardless of the flags specified at the geometry and instance level.
+ NVAPI_RAY_FLAG_FORCE_NON_OPAQUE_EX = NV_BIT( 1), //!< Consider all intersected geometries to be non-opaque, regardless of the flags specified at the geometry and instance level.
+ NVAPI_RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH_EX = NV_BIT( 2), //!< End the traversal as soon as a geometry is hit, and that hit is not ignored by the any hit shader.
+ NVAPI_RAY_FLAG_SKIP_CLOSEST_HIT_SHADER_EX = NV_BIT( 3), //!< Do not invoke the closest hit shader once the traversal ends.
+ NVAPI_RAY_FLAG_CULL_BACK_FACING_TRIANGLES_EX = NV_BIT( 4), //!< Never intersect triangle geometries that are back facing with regard to the ray.
+ NVAPI_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES_EX = NV_BIT( 5), //!< Never intersect triangle geometries that are front facing with regard to the ray.
+ NVAPI_RAY_FLAG_CULL_OPAQUE_EX = NV_BIT( 6), //!< Never intersect geometries that were flagged as opaque.
+ NVAPI_RAY_FLAG_CULL_NON_OPAQUE_EX = NV_BIT( 7), //!< Never intersect geometries that were not flagged as opaque.
+ NVAPI_RAY_FLAG_SKIP_TRIANGLES_EX = NV_BIT( 8), //!< Never intersect triangle geometries.
+ NVAPI_RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES_EX = NV_BIT( 9), //!< Never intersect AABB geometries.
+
+ // NVAPI_RAY_FLAGS_EX specific flags
+ NVAPI_RAY_FLAG_FORCE_OMM_2_STATE_EX = NV_BIT(10), //!< Treat unknown-opaque and unknown-transparent as opaque and transparent, respectively, during traversal.
+ //!< If an instance is flagged with #NVAPI_D3D12_RAYTRACING_INSTANCE_FLAG_DISABLE_OMMS_EX, that takes precedence over this flag.
+} NVAPI_RAY_FLAG_EX;
+
+#endif // defined(__vkd3d_d3d12_h__) && defined(__ID3D12GraphicsCommandList4_INTERFACE_DEFINED__)
+
+#ifndef __NVAPI_EMPTY_SAL
+#ifdef __nvapi_undef__ecount
+ #undef __ecount
+ #undef __nvapi_undef__ecount
+#endif
+#ifdef __nvapi_undef__bcount
+ #undef __bcount
+ #undef __nvapi_undef__bcount
+#endif
+#ifdef __nvapi_undef__in
+ #undef __in
+ #undef __nvapi_undef__in
+#endif
+#ifdef __nvapi_undef__in_ecount
+ #undef __in_ecount
+ #undef __nvapi_undef__in_ecount
+#endif
+#ifdef __nvapi_undef__in_bcount
+ #undef __in_bcount
+ #undef __nvapi_undef__in_bcount
+#endif
+#ifdef __nvapi_undef__in_z
+ #undef __in_z
+ #undef __nvapi_undef__in_z
+#endif
+#ifdef __nvapi_undef__in_ecount_z
+ #undef __in_ecount_z
+ #undef __nvapi_undef__in_ecount_z
+#endif
+#ifdef __nvapi_undef__in_bcount_z
+ #undef __in_bcount_z
+ #undef __nvapi_undef__in_bcount_z
+#endif
+#ifdef __nvapi_undef__in_nz
+ #undef __in_nz
+ #undef __nvapi_undef__in_nz
+#endif
+#ifdef __nvapi_undef__in_ecount_nz
+ #undef __in_ecount_nz
+ #undef __nvapi_undef__in_ecount_nz
+#endif
+#ifdef __nvapi_undef__in_bcount_nz
+ #undef __in_bcount_nz
+ #undef __nvapi_undef__in_bcount_nz
+#endif
+#ifdef __nvapi_undef__out
+ #undef __out
+ #undef __nvapi_undef__out
+#endif
+#ifdef __nvapi_undef__out_ecount
+ #undef __out_ecount
+ #undef __nvapi_undef__out_ecount
+#endif
+#ifdef __nvapi_undef__out_bcount
+ #undef __out_bcount
+ #undef __nvapi_undef__out_bcount
+#endif
+#ifdef __nvapi_undef__out_ecount_part
+ #undef __out_ecount_part
+ #undef __nvapi_undef__out_ecount_part
+#endif
+#ifdef __nvapi_undef__out_bcount_part
+ #undef __out_bcount_part
+ #undef __nvapi_undef__out_bcount_part
+#endif
+#ifdef __nvapi_undef__out_ecount_full
+ #undef __out_ecount_full
+ #undef __nvapi_undef__out_ecount_full
+#endif
+#ifdef __nvapi_undef__out_bcount_full
+ #undef __out_bcount_full
+ #undef __nvapi_undef__out_bcount_full
+#endif
+#ifdef __nvapi_undef__out_z
+ #undef __out_z
+ #undef __nvapi_undef__out_z
+#endif
+#ifdef __nvapi_undef__out_z_opt
+ #undef __out_z_opt
+ #undef __nvapi_undef__out_z_opt
+#endif
+#ifdef __nvapi_undef__out_ecount_z
+ #undef __out_ecount_z
+ #undef __nvapi_undef__out_ecount_z
+#endif
+#ifdef __nvapi_undef__out_bcount_z
+ #undef __out_bcount_z
+ #undef __nvapi_undef__out_bcount_z
+#endif
+#ifdef __nvapi_undef__out_ecount_part_z
+ #undef __out_ecount_part_z
+ #undef __nvapi_undef__out_ecount_part_z
+#endif
+#ifdef __nvapi_undef__out_bcount_part_z
+ #undef __out_bcount_part_z
+ #undef __nvapi_undef__out_bcount_part_z
+#endif
+#ifdef __nvapi_undef__out_ecount_full_z
+ #undef __out_ecount_full_z
+ #undef __nvapi_undef__out_ecount_full_z
+#endif
+#ifdef __nvapi_undef__out_bcount_full_z
+ #undef __out_bcount_full_z
+ #undef __nvapi_undef__out_bcount_full_z
+#endif
+#ifdef __nvapi_undef__out_nz
+ #undef __out_nz
+ #undef __nvapi_undef__out_nz
+#endif
+#ifdef __nvapi_undef__out_nz_opt
+ #undef __out_nz_opt
+ #undef __nvapi_undef__out_nz_opt
+#endif
+#ifdef __nvapi_undef__out_ecount_nz
+ #undef __out_ecount_nz
+ #undef __nvapi_undef__out_ecount_nz
+#endif
+#ifdef __nvapi_undef__out_bcount_nz
+ #undef __out_bcount_nz
+ #undef __nvapi_undef__out_bcount_nz
+#endif
+#ifdef __nvapi_undef__inout
+ #undef __inout
+ #undef __nvapi_undef__inout
+#endif
+#ifdef __nvapi_undef__inout_ecount
+ #undef __inout_ecount
+ #undef __nvapi_undef__inout_ecount
+#endif
+#ifdef __nvapi_undef__inout_bcount
+ #undef __inout_bcount
+ #undef __nvapi_undef__inout_bcount
+#endif
+#ifdef __nvapi_undef__inout_ecount_part
+ #undef __inout_ecount_part
+ #undef __nvapi_undef__inout_ecount_part
+#endif
+#ifdef __nvapi_undef__inout_bcount_part
+ #undef __inout_bcount_part
+ #undef __nvapi_undef__inout_bcount_part
+#endif
+#ifdef __nvapi_undef__inout_ecount_full
+ #undef __inout_ecount_full
+ #undef __nvapi_undef__inout_ecount_full
+#endif
+#ifdef __nvapi_undef__inout_bcount_full
+ #undef __inout_bcount_full
+ #undef __nvapi_undef__inout_bcount_full
+#endif
+#ifdef __nvapi_undef__inout_z
+ #undef __inout_z
+ #undef __nvapi_undef__inout_z
+#endif
+#ifdef __nvapi_undef__inout_ecount_z
+ #undef __inout_ecount_z
+ #undef __nvapi_undef__inout_ecount_z
+#endif
+#ifdef __nvapi_undef__inout_bcount_z
+ #undef __inout_bcount_z
+ #undef __nvapi_undef__inout_bcount_z
+#endif
+#ifdef __nvapi_undef__inout_nz
+ #undef __inout_nz
+ #undef __nvapi_undef__inout_nz
+#endif
+#ifdef __nvapi_undef__inout_ecount_nz
+ #undef __inout_ecount_nz
+ #undef __nvapi_undef__inout_ecount_nz
+#endif
+#ifdef __nvapi_undef__inout_bcount_nz
+ #undef __inout_bcount_nz
+ #undef __nvapi_undef__inout_bcount_nz
+#endif
+#ifdef __nvapi_undef__ecount_opt
+ #undef __ecount_opt
+ #undef __nvapi_undef__ecount_opt
+#endif
+#ifdef __nvapi_undef__bcount_opt
+ #undef __bcount_opt
+ #undef __nvapi_undef__bcount_opt
+#endif
+#ifdef __nvapi_undef__in_opt
+ #undef __in_opt
+ #undef __nvapi_undef__in_opt
+#endif
+#ifdef __nvapi_undef__in_ecount_opt
+ #undef __in_ecount_opt
+ #undef __nvapi_undef__in_ecount_opt
+#endif
+#ifdef __nvapi_undef__in_bcount_opt
+ #undef __in_bcount_opt
+ #undef __nvapi_undef__in_bcount_opt
+#endif
+#ifdef __nvapi_undef__in_z_opt
+ #undef __in_z_opt
+ #undef __nvapi_undef__in_z_opt
+#endif
+#ifdef __nvapi_undef__in_ecount_z_opt
+ #undef __in_ecount_z_opt
+ #undef __nvapi_undef__in_ecount_z_opt
+#endif
+#ifdef __nvapi_undef__in_bcount_z_opt
+ #undef __in_bcount_z_opt
+ #undef __nvapi_undef__in_bcount_z_opt
+#endif
+#ifdef __nvapi_undef__in_nz_opt
+ #undef __in_nz_opt
+ #undef __nvapi_undef__in_nz_opt
+#endif
+#ifdef __nvapi_undef__in_ecount_nz_opt
+ #undef __in_ecount_nz_opt
+ #undef __nvapi_undef__in_ecount_nz_opt
+#endif
+#ifdef __nvapi_undef__in_bcount_nz_opt
+ #undef __in_bcount_nz_opt
+ #undef __nvapi_undef__in_bcount_nz_opt
+#endif
+#ifdef __nvapi_undef__out_opt
+ #undef __out_opt
+ #undef __nvapi_undef__out_opt
+#endif
+#ifdef __nvapi_undef__out_ecount_opt
+ #undef __out_ecount_opt
+ #undef __nvapi_undef__out_ecount_opt
+#endif
+#ifdef __nvapi_undef__out_bcount_opt
+ #undef __out_bcount_opt
+ #undef __nvapi_undef__out_bcount_opt
+#endif
+#ifdef __nvapi_undef__out_ecount_part_opt
+ #undef __out_ecount_part_opt
+ #undef __nvapi_undef__out_ecount_part_opt
+#endif
+#ifdef __nvapi_undef__out_bcount_part_opt
+ #undef __out_bcount_part_opt
+ #undef __nvapi_undef__out_bcount_part_opt
+#endif
+#ifdef __nvapi_undef__out_ecount_full_opt
+ #undef __out_ecount_full_opt
+ #undef __nvapi_undef__out_ecount_full_opt
+#endif
+#ifdef __nvapi_undef__out_bcount_full_opt
+ #undef __out_bcount_full_opt
+ #undef __nvapi_undef__out_bcount_full_opt
+#endif
+#ifdef __nvapi_undef__out_ecount_z_opt
+ #undef __out_ecount_z_opt
+ #undef __nvapi_undef__out_ecount_z_opt
+#endif
+#ifdef __nvapi_undef__out_bcount_z_opt
+ #undef __out_bcount_z_opt
+ #undef __nvapi_undef__out_bcount_z_opt
+#endif
+#ifdef __nvapi_undef__out_ecount_part_z_opt
+ #undef __out_ecount_part_z_opt
+ #undef __nvapi_undef__out_ecount_part_z_opt
+#endif
+#ifdef __nvapi_undef__out_bcount_part_z_opt
+ #undef __out_bcount_part_z_opt
+ #undef __nvapi_undef__out_bcount_part_z_opt
+#endif
+#ifdef __nvapi_undef__out_ecount_full_z_opt
+ #undef __out_ecount_full_z_opt
+ #undef __nvapi_undef__out_ecount_full_z_opt
+#endif
+#ifdef __nvapi_undef__out_bcount_full_z_opt
+ #undef __out_bcount_full_z_opt
+ #undef __nvapi_undef__out_bcount_full_z_opt
+#endif
+#ifdef __nvapi_undef__out_ecount_nz_opt
+ #undef __out_ecount_nz_opt
+ #undef __nvapi_undef__out_ecount_nz_opt
+#endif
+#ifdef __nvapi_undef__out_bcount_nz_opt
+ #undef __out_bcount_nz_opt
+ #undef __nvapi_undef__out_bcount_nz_opt
+#endif
+#ifdef __nvapi_undef__inout_opt
+ #undef __inout_opt
+ #undef __nvapi_undef__inout_opt
+#endif
+#ifdef __nvapi_undef__inout_ecount_opt
+ #undef __inout_ecount_opt
+ #undef __nvapi_undef__inout_ecount_opt
+#endif
+#ifdef __nvapi_undef__inout_bcount_opt
+ #undef __inout_bcount_opt
+ #undef __nvapi_undef__inout_bcount_opt
+#endif
+#ifdef __nvapi_undef__inout_ecount_part_opt
+ #undef __inout_ecount_part_opt
+ #undef __nvapi_undef__inout_ecount_part_opt
+#endif
+#ifdef __nvapi_undef__inout_bcount_part_opt
+ #undef __inout_bcount_part_opt
+ #undef __nvapi_undef__inout_bcount_part_opt
+#endif
+#ifdef __nvapi_undef__inout_ecount_full_opt
+ #undef __inout_ecount_full_opt
+ #undef __nvapi_undef__inout_ecount_full_opt
+#endif
+#ifdef __nvapi_undef__inout_bcount_full_opt
+ #undef __inout_bcount_full_opt
+ #undef __nvapi_undef__inout_bcount_full_opt
+#endif
+#ifdef __nvapi_undef__inout_z_opt
+ #undef __inout_z_opt
+ #undef __nvapi_undef__inout_z_opt
+#endif
+#ifdef __nvapi_undef__inout_ecount_z_opt
+ #undef __inout_ecount_z_opt
+ #undef __nvapi_undef__inout_ecount_z_opt
+#endif
+#ifdef __nvapi_undef__inout_ecount_z_opt
+ #undef __inout_ecount_z_opt
+ #undef __nvapi_undef__inout_ecount_z_opt
+#endif
+#ifdef __nvapi_undef__inout_bcount_z_opt
+ #undef __inout_bcount_z_opt
+ #undef __nvapi_undef__inout_bcount_z_opt
+#endif
+#ifdef __nvapi_undef__inout_nz_opt
+ #undef __inout_nz_opt
+ #undef __nvapi_undef__inout_nz_opt
+#endif
+#ifdef __nvapi_undef__inout_ecount_nz_opt
+ #undef __inout_ecount_nz_opt
+ #undef __nvapi_undef__inout_ecount_nz_opt
+#endif
+#ifdef __nvapi_undef__inout_bcount_nz_opt
+ #undef __inout_bcount_nz_opt
+ #undef __nvapi_undef__inout_bcount_nz_opt
+#endif
+#ifdef __nvapi_undef__deref_ecount
+ #undef __deref_ecount
+ #undef __nvapi_undef__deref_ecount
+#endif
+#ifdef __nvapi_undef__deref_bcount
+ #undef __deref_bcount
+ #undef __nvapi_undef__deref_bcount
+#endif
+#ifdef __nvapi_undef__deref_out
+ #undef __deref_out
+ #undef __nvapi_undef__deref_out
+#endif
+#ifdef __nvapi_undef__deref_out_ecount
+ #undef __deref_out_ecount
+ #undef __nvapi_undef__deref_out_ecount
+#endif
+#ifdef __nvapi_undef__deref_out_bcount
+ #undef __deref_out_bcount
+ #undef __nvapi_undef__deref_out_bcount
+#endif
+#ifdef __nvapi_undef__deref_out_ecount_part
+ #undef __deref_out_ecount_part
+ #undef __nvapi_undef__deref_out_ecount_part
+#endif
+#ifdef __nvapi_undef__deref_out_bcount_part
+ #undef __deref_out_bcount_part
+ #undef __nvapi_undef__deref_out_bcount_part
+#endif
+#ifdef __nvapi_undef__deref_out_ecount_full
+ #undef __deref_out_ecount_full
+ #undef __nvapi_undef__deref_out_ecount_full
+#endif
+#ifdef __nvapi_undef__deref_out_bcount_full
+ #undef __deref_out_bcount_full
+ #undef __nvapi_undef__deref_out_bcount_full
+#endif
+#ifdef __nvapi_undef__deref_out_z
+ #undef __deref_out_z
+ #undef __nvapi_undef__deref_out_z
+#endif
+#ifdef __nvapi_undef__deref_out_ecount_z
+ #undef __deref_out_ecount_z
+ #undef __nvapi_undef__deref_out_ecount_z
+#endif
+#ifdef __nvapi_undef__deref_out_bcount_z
+ #undef __deref_out_bcount_z
+ #undef __nvapi_undef__deref_out_bcount_z
+#endif
+#ifdef __nvapi_undef__deref_out_nz
+ #undef __deref_out_nz
+ #undef __nvapi_undef__deref_out_nz
+#endif
+#ifdef __nvapi_undef__deref_out_ecount_nz
+ #undef __deref_out_ecount_nz
+ #undef __nvapi_undef__deref_out_ecount_nz
+#endif
+#ifdef __nvapi_undef__deref_out_bcount_nz
+ #undef __deref_out_bcount_nz
+ #undef __nvapi_undef__deref_out_bcount_nz
+#endif
+#ifdef __nvapi_undef__deref_inout
+ #undef __deref_inout
+ #undef __nvapi_undef__deref_inout
+#endif
+#ifdef __nvapi_undef__deref_inout_z
+ #undef __deref_inout_z
+ #undef __nvapi_undef__deref_inout_z
+#endif
+#ifdef __nvapi_undef__deref_inout_ecount
+ #undef __deref_inout_ecount
+ #undef __nvapi_undef__deref_inout_ecount
+#endif
+#ifdef __nvapi_undef__deref_inout_bcount
+ #undef __deref_inout_bcount
+ #undef __nvapi_undef__deref_inout_bcount
+#endif
+#ifdef __nvapi_undef__deref_inout_ecount_part
+ #undef __deref_inout_ecount_part
+ #undef __nvapi_undef__deref_inout_ecount_part
+#endif
+#ifdef __nvapi_undef__deref_inout_bcount_part
+ #undef __deref_inout_bcount_part
+ #undef __nvapi_undef__deref_inout_bcount_part
+#endif
+#ifdef __nvapi_undef__deref_inout_ecount_full
+ #undef __deref_inout_ecount_full
+ #undef __nvapi_undef__deref_inout_ecount_full
+#endif
+#ifdef __nvapi_undef__deref_inout_bcount_full
+ #undef __deref_inout_bcount_full
+ #undef __nvapi_undef__deref_inout_bcount_full
+#endif
+#ifdef __nvapi_undef__deref_inout_z
+ #undef __deref_inout_z
+ #undef __nvapi_undef__deref_inout_z
+#endif
+#ifdef __nvapi_undef__deref_inout_ecount_z
+ #undef __deref_inout_ecount_z
+ #undef __nvapi_undef__deref_inout_ecount_z
+#endif
+#ifdef __nvapi_undef__deref_inout_bcount_z
+ #undef __deref_inout_bcount_z
+ #undef __nvapi_undef__deref_inout_bcount_z
+#endif
+#ifdef __nvapi_undef__deref_inout_nz
+ #undef __deref_inout_nz
+ #undef __nvapi_undef__deref_inout_nz
+#endif
+#ifdef __nvapi_undef__deref_inout_ecount_nz
+ #undef __deref_inout_ecount_nz
+ #undef __nvapi_undef__deref_inout_ecount_nz
+#endif
+#ifdef __nvapi_undef__deref_inout_bcount_nz
+ #undef __deref_inout_bcount_nz
+ #undef __nvapi_undef__deref_inout_bcount_nz
+#endif
+#ifdef __nvapi_undef__deref_ecount_opt
+ #undef __deref_ecount_opt
+ #undef __nvapi_undef__deref_ecount_opt
+#endif
+#ifdef __nvapi_undef__deref_bcount_opt
+ #undef __deref_bcount_opt
+ #undef __nvapi_undef__deref_bcount_opt
+#endif
+#ifdef __nvapi_undef__deref_out_opt
+ #undef __deref_out_opt
+ #undef __nvapi_undef__deref_out_opt
+#endif
+#ifdef __nvapi_undef__deref_out_ecount_opt
+ #undef __deref_out_ecount_opt
+ #undef __nvapi_undef__deref_out_ecount_opt
+#endif
+#ifdef __nvapi_undef__deref_out_bcount_opt
+ #undef __deref_out_bcount_opt
+ #undef __nvapi_undef__deref_out_bcount_opt
+#endif
+#ifdef __nvapi_undef__deref_out_ecount_part_opt
+ #undef __deref_out_ecount_part_opt
+ #undef __nvapi_undef__deref_out_ecount_part_opt
+#endif
+#ifdef __nvapi_undef__deref_out_bcount_part_opt
+ #undef __deref_out_bcount_part_opt
+ #undef __nvapi_undef__deref_out_bcount_part_opt
+#endif
+#ifdef __nvapi_undef__deref_out_ecount_full_opt
+ #undef __deref_out_ecount_full_opt
+ #undef __nvapi_undef__deref_out_ecount_full_opt
+#endif
+#ifdef __nvapi_undef__deref_out_bcount_full_opt
+ #undef __deref_out_bcount_full_opt
+ #undef __nvapi_undef__deref_out_bcount_full_opt
+#endif
+#ifdef __nvapi_undef__deref_out_z_opt
+ #undef __deref_out_z_opt
+ #undef __nvapi_undef__deref_out_z_opt
+#endif
+#ifdef __nvapi_undef__deref_out_ecount_z_opt
+ #undef __deref_out_ecount_z_opt
+ #undef __nvapi_undef__deref_out_ecount_z_opt
+#endif
+#ifdef __nvapi_undef__deref_out_bcount_z_opt
+ #undef __deref_out_bcount_z_opt
+ #undef __nvapi_undef__deref_out_bcount_z_opt
+#endif
+#ifdef __nvapi_undef__deref_out_nz_opt
+ #undef __deref_out_nz_opt
+ #undef __nvapi_undef__deref_out_nz_opt
+#endif
+#ifdef __nvapi_undef__deref_out_ecount_nz_opt
+ #undef __deref_out_ecount_nz_opt
+ #undef __nvapi_undef__deref_out_ecount_nz_opt
+#endif
+#ifdef __nvapi_undef__deref_out_bcount_nz_opt
+ #undef __deref_out_bcount_nz_opt
+ #undef __nvapi_undef__deref_out_bcount_nz_opt
+#endif
+#ifdef __nvapi_undef__deref_inout_opt
+ #undef __deref_inout_opt
+ #undef __nvapi_undef__deref_inout_opt
+#endif
+#ifdef __nvapi_undef__deref_inout_ecount_opt
+ #undef __deref_inout_ecount_opt
+ #undef __nvapi_undef__deref_inout_ecount_opt
+#endif
+#ifdef __nvapi_undef__deref_inout_bcount_opt
+ #undef __deref_inout_bcount_opt
+ #undef __nvapi_undef__deref_inout_bcount_opt
+#endif
+#ifdef __nvapi_undef__deref_inout_ecount_part_opt
+ #undef __deref_inout_ecount_part_opt
+ #undef __nvapi_undef__deref_inout_ecount_part_opt
+#endif
+#ifdef __nvapi_undef__deref_inout_bcount_part_opt
+ #undef __deref_inout_bcount_part_opt
+ #undef __nvapi_undef__deref_inout_bcount_part_opt
+#endif
+#ifdef __nvapi_undef__deref_inout_ecount_full_opt
+ #undef __deref_inout_ecount_full_opt
+ #undef __nvapi_undef__deref_inout_ecount_full_opt
+#endif
+#ifdef __nvapi_undef__deref_inout_bcount_full_opt
+ #undef __deref_inout_bcount_full_opt
+ #undef __nvapi_undef__deref_inout_bcount_full_opt
+#endif
+#ifdef __nvapi_undef__deref_inout_z_opt
+ #undef __deref_inout_z_opt
+ #undef __nvapi_undef__deref_inout_z_opt
+#endif
+#ifdef __nvapi_undef__deref_inout_ecount_z_opt
+ #undef __deref_inout_ecount_z_opt
+ #undef __nvapi_undef__deref_inout_ecount_z_opt
+#endif
+#ifdef __nvapi_undef__deref_inout_bcount_z_opt
+ #undef __deref_inout_bcount_z_opt
+ #undef __nvapi_undef__deref_inout_bcount_z_opt
+#endif
+#ifdef __nvapi_undef__deref_inout_nz_opt
+ #undef __deref_inout_nz_opt
+ #undef __nvapi_undef__deref_inout_nz_opt
+#endif
+#ifdef __nvapi_undef__deref_inout_ecount_nz_opt
+ #undef __deref_inout_ecount_nz_opt
+ #undef __nvapi_undef__deref_inout_ecount_nz_opt
+#endif
+#ifdef __nvapi_undef__deref_inout_bcount_nz_opt
+ #undef __deref_inout_bcount_nz_opt
+ #undef __nvapi_undef__deref_inout_bcount_nz_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_ecount
+ #undef __deref_opt_ecount
+ #undef __nvapi_undef__deref_opt_ecount
+#endif
+#ifdef __nvapi_undef__deref_opt_bcount
+ #undef __deref_opt_bcount
+ #undef __nvapi_undef__deref_opt_bcount
+#endif
+#ifdef __nvapi_undef__deref_opt_out
+ #undef __deref_opt_out
+ #undef __nvapi_undef__deref_opt_out
+#endif
+#ifdef __nvapi_undef__deref_opt_out_z
+ #undef __deref_opt_out_z
+ #undef __nvapi_undef__deref_opt_out_z
+#endif
+#ifdef __nvapi_undef__deref_opt_out_ecount
+ #undef __deref_opt_out_ecount
+ #undef __nvapi_undef__deref_opt_out_ecount
+#endif
+#ifdef __nvapi_undef__deref_opt_out_bcount
+ #undef __deref_opt_out_bcount
+ #undef __nvapi_undef__deref_opt_out_bcount
+#endif
+#ifdef __nvapi_undef__deref_opt_out_ecount_part
+ #undef __deref_opt_out_ecount_part
+ #undef __nvapi_undef__deref_opt_out_ecount_part
+#endif
+#ifdef __nvapi_undef__deref_opt_out_bcount_part
+ #undef __deref_opt_out_bcount_part
+ #undef __nvapi_undef__deref_opt_out_bcount_part
+#endif
+#ifdef __nvapi_undef__deref_opt_out_ecount_full
+ #undef __deref_opt_out_ecount_full
+ #undef __nvapi_undef__deref_opt_out_ecount_full
+#endif
+#ifdef __nvapi_undef__deref_opt_out_bcount_full
+ #undef __deref_opt_out_bcount_full
+ #undef __nvapi_undef__deref_opt_out_bcount_full
+#endif
+#ifdef __nvapi_undef__deref_opt_inout
+ #undef __deref_opt_inout
+ #undef __nvapi_undef__deref_opt_inout
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_ecount
+ #undef __deref_opt_inout_ecount
+ #undef __nvapi_undef__deref_opt_inout_ecount
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_bcount
+ #undef __deref_opt_inout_bcount
+ #undef __nvapi_undef__deref_opt_inout_bcount
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_ecount_part
+ #undef __deref_opt_inout_ecount_part
+ #undef __nvapi_undef__deref_opt_inout_ecount_part
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_bcount_part
+ #undef __deref_opt_inout_bcount_part
+ #undef __nvapi_undef__deref_opt_inout_bcount_part
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_ecount_full
+ #undef __deref_opt_inout_ecount_full
+ #undef __nvapi_undef__deref_opt_inout_ecount_full
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_bcount_full
+ #undef __deref_opt_inout_bcount_full
+ #undef __nvapi_undef__deref_opt_inout_bcount_full
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_z
+ #undef __deref_opt_inout_z
+ #undef __nvapi_undef__deref_opt_inout_z
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_ecount_z
+ #undef __deref_opt_inout_ecount_z
+ #undef __nvapi_undef__deref_opt_inout_ecount_z
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_bcount_z
+ #undef __deref_opt_inout_bcount_z
+ #undef __nvapi_undef__deref_opt_inout_bcount_z
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_nz
+ #undef __deref_opt_inout_nz
+ #undef __nvapi_undef__deref_opt_inout_nz
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_ecount_nz
+ #undef __deref_opt_inout_ecount_nz
+ #undef __nvapi_undef__deref_opt_inout_ecount_nz
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_bcount_nz
+ #undef __deref_opt_inout_bcount_nz
+ #undef __nvapi_undef__deref_opt_inout_bcount_nz
+#endif
+#ifdef __nvapi_undef__deref_opt_ecount_opt
+ #undef __deref_opt_ecount_opt
+ #undef __nvapi_undef__deref_opt_ecount_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_bcount_opt
+ #undef __deref_opt_bcount_opt
+ #undef __nvapi_undef__deref_opt_bcount_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_out_opt
+ #undef __deref_opt_out_opt
+ #undef __nvapi_undef__deref_opt_out_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_out_ecount_opt
+ #undef __deref_opt_out_ecount_opt
+ #undef __nvapi_undef__deref_opt_out_ecount_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_out_bcount_opt
+ #undef __deref_opt_out_bcount_opt
+ #undef __nvapi_undef__deref_opt_out_bcount_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_out_ecount_part_opt
+ #undef __deref_opt_out_ecount_part_opt
+ #undef __nvapi_undef__deref_opt_out_ecount_part_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_out_bcount_part_opt
+ #undef __deref_opt_out_bcount_part_opt
+ #undef __nvapi_undef__deref_opt_out_bcount_part_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_out_ecount_full_opt
+ #undef __deref_opt_out_ecount_full_opt
+ #undef __nvapi_undef__deref_opt_out_ecount_full_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_out_bcount_full_opt
+ #undef __deref_opt_out_bcount_full_opt
+ #undef __nvapi_undef__deref_opt_out_bcount_full_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_out_z_opt
+ #undef __deref_opt_out_z_opt
+ #undef __nvapi_undef__deref_opt_out_z_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_out_ecount_z_opt
+ #undef __deref_opt_out_ecount_z_opt
+ #undef __nvapi_undef__deref_opt_out_ecount_z_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_out_bcount_z_opt
+ #undef __deref_opt_out_bcount_z_opt
+ #undef __nvapi_undef__deref_opt_out_bcount_z_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_out_nz_opt
+ #undef __deref_opt_out_nz_opt
+ #undef __nvapi_undef__deref_opt_out_nz_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_out_ecount_nz_opt
+ #undef __deref_opt_out_ecount_nz_opt
+ #undef __nvapi_undef__deref_opt_out_ecount_nz_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_out_bcount_nz_opt
+ #undef __deref_opt_out_bcount_nz_opt
+ #undef __nvapi_undef__deref_opt_out_bcount_nz_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_opt
+ #undef __deref_opt_inout_opt
+ #undef __nvapi_undef__deref_opt_inout_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_ecount_opt
+ #undef __deref_opt_inout_ecount_opt
+ #undef __nvapi_undef__deref_opt_inout_ecount_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_bcount_opt
+ #undef __deref_opt_inout_bcount_opt
+ #undef __nvapi_undef__deref_opt_inout_bcount_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_ecount_part_opt
+ #undef __deref_opt_inout_ecount_part_opt
+ #undef __nvapi_undef__deref_opt_inout_ecount_part_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_bcount_part_opt
+ #undef __deref_opt_inout_bcount_part_opt
+ #undef __nvapi_undef__deref_opt_inout_bcount_part_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_ecount_full_opt
+ #undef __deref_opt_inout_ecount_full_opt
+ #undef __nvapi_undef__deref_opt_inout_ecount_full_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_bcount_full_opt
+ #undef __deref_opt_inout_bcount_full_opt
+ #undef __nvapi_undef__deref_opt_inout_bcount_full_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_z_opt
+ #undef __deref_opt_inout_z_opt
+ #undef __nvapi_undef__deref_opt_inout_z_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_ecount_z_opt
+ #undef __deref_opt_inout_ecount_z_opt
+ #undef __nvapi_undef__deref_opt_inout_ecount_z_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_bcount_z_opt
+ #undef __deref_opt_inout_bcount_z_opt
+ #undef __nvapi_undef__deref_opt_inout_bcount_z_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_nz_opt
+ #undef __deref_opt_inout_nz_opt
+ #undef __nvapi_undef__deref_opt_inout_nz_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_ecount_nz_opt
+ #undef __deref_opt_inout_ecount_nz_opt
+ #undef __nvapi_undef__deref_opt_inout_ecount_nz_opt
+#endif
+#ifdef __nvapi_undef__deref_opt_inout_bcount_nz_opt
+ #undef __deref_opt_inout_bcount_nz_opt
+ #undef __nvapi_undef__deref_opt_inout_bcount_nz_opt
+#endif
+#ifdef __nvapi_success
+ #undef __success
+ #undef __nvapi_success
+#endif
+#ifdef __nvapi__Ret_notnull_
+ #undef __nvapi__Ret_notnull_
+ #undef _Ret_notnull_
+#endif
+#ifdef __nvapi__Post_writable_byte_size_
+ #undef __nvapi__Post_writable_byte_size_
+ #undef _Post_writable_byte_size_
+#endif
+#ifdef __nvapi_Outptr_
+ #undef __nvapi_Outptr_
+ #undef _Outptr_
+#endif
+
+#endif // __NVAPI_EMPTY_SAL
+
+#ifdef __cplusplus
+}; //extern "C" {
+
+#endif
+
+#pragma pack(pop)
+
+#endif // _NVAPI_H
\ No newline at end of file
diff --git a/include/vkd3d_command_list_vkd3d_ext.idl b/include/vkd3d_command_list_vkd3d_ext.idl
index e57fe145..f8870974 100644
--- a/include/vkd3d_command_list_vkd3d_ext.idl
+++ b/include/vkd3d_command_list_vkd3d_ext.idl
@@ -40,3 +40,17 @@ interface ID3D12GraphicsCommandListExt1 : ID3D12GraphicsCommandListExt
{
HRESULT LaunchCubinShaderEx(D3D12_CUBIN_DATA_HANDLE *handle, UINT32 block_x, UINT32 block_y, UINT32 block_z, UINT32 smem_size, const void *params, UINT32 param_size, const void *raw_params, UINT32 raw_params_count);
}
+
+[
+ uuid(d53b0028-afb4-4b65-a4f1-7b0daaa65b50),
+ object,
+ local,
+ pointer_default(unique)
+]
+interface ID3D12GraphicsCommandListExt2 : ID3D12GraphicsCommandListExt1
+{
+ HRESULT BuildRaytracingAccelerationStructureEx(const void *params);
+ HRESULT BuildRaytracingOpacityMicromapArray(void *params);
+ HRESULT RelocateRaytracingOpacityMicromapArray(const void *params);
+ HRESULT EmitRaytracingOpacityMicromapArrayPostbuildInfo(const void *params);
+}
diff --git a/include/vkd3d_device_vkd3d_ext.idl b/include/vkd3d_device_vkd3d_ext.idl
index 3e615d76..41dab628 100644
--- a/include/vkd3d_device_vkd3d_ext.idl
+++ b/include/vkd3d_device_vkd3d_ext.idl
@@ -35,6 +35,20 @@ interface ID3D12DeviceExt : IUnknown
HRESULT CaptureUAVInfo(D3D12_UAV_INFO *uav_info);
}
+[
+ uuid(11ea7a1a-0f6a-49bf-b612-3e30f8e201de),
+ object,
+ local,
+ pointer_default(unique)
+]
+interface ID3D12DeviceExt1 : ID3D12DeviceExt
+{
+ HRESULT SetCreatePipelineStateOptions(const void *params);
+ HRESULT CheckDriverMatchingIdentifierEx(void *params);
+ HRESULT GetRaytracingAccelerationStructurePrebuildInfoEx(void *params);
+ HRESULT GetRaytracingOpacityMicromapArrayPrebuildInfo(void *params);
+}
+
[
uuid(39da4e09-bd1c-4198-9fae-86bbe3be41fd),
object,
diff --git a/include/vkd3d_shader.h b/include/vkd3d_shader.h
index 9066814a..e22c7d3f 100644
--- a/include/vkd3d_shader.h
+++ b/include/vkd3d_shader.h
@@ -368,6 +368,7 @@ enum vkd3d_shader_target_extension
VKD3D_SHADER_TARGET_EXTENSION_SUPPORT_FP16_DENORM_PRESERVE,
VKD3D_SHADER_TARGET_EXTENSION_SUPPORT_FP64_DENORM_PRESERVE,
VKD3D_SHADER_TARGET_EXTENSION_SUPPORT_SUBGROUP_PARTITIONED_NV,
+ VKD3D_SHADER_TARGET_EXTENSION_OPACITY_MICROMAP,
VKD3D_SHADER_TARGET_EXTENSION_COUNT,
};
diff --git a/include/vkd3d_vk_includes.h b/include/vkd3d_vk_includes.h
index c43e0189..a4afd34b 100644
--- a/include/vkd3d_vk_includes.h
+++ b/include/vkd3d_vk_includes.h
@@ -40,6 +40,7 @@ typedef enum VkImageLayout VkImageLayout;
typedef enum D3D12_VK_EXTENSION
{
+ D3D12_VK_EXT_OPACITY_MICROMAP = 0x0,
D3D12_VK_NVX_BINARY_IMPORT = 0x1,
D3D12_VK_NVX_IMAGE_VIEW_HANDLE = 0x2,
D3D12_VK_NV_LOW_LATENCY_2 = 0x3
diff --git a/libs/vkd3d-shader/dxil.c b/libs/vkd3d-shader/dxil.c
index dd2f4423..a0aa9229 100644
--- a/libs/vkd3d-shader/dxil.c
+++ b/libs/vkd3d-shader/dxil.c
@@ -919,6 +919,17 @@ int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc,
goto end;
}
}
+ else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_OPACITY_MICROMAP)
+ {
+ static const dxil_spv_option_opacity_micromap helper =
+ { { DXIL_SPV_OPTION_OPACITY_MICROMAP }, DXIL_SPV_TRUE };
+ if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
+ {
+ ERR("dxil-spirv does not support OPACITY_MICROMAP.\n");
+ ret = VKD3D_ERROR_NOT_IMPLEMENTED;
+ goto end;
+ }
+ }
}
if (dxil_spv_converter_add_option(converter, &denorm_preserve.base) != DXIL_SPV_SUCCESS)
@@ -1549,6 +1560,17 @@ int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil,
goto end;
}
}
+ else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_OPACITY_MICROMAP)
+ {
+ static const dxil_spv_option_opacity_micromap helper =
+ { { DXIL_SPV_OPTION_OPACITY_MICROMAP }, DXIL_SPV_TRUE };
+ if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
+ {
+ ERR("dxil-spirv does not support OPACITY_MICROMAP.\n");
+ ret = VKD3D_ERROR_NOT_IMPLEMENTED;
+ goto end;
+ }
+ }
}
}
diff --git a/libs/vkd3d/acceleration_structure.c b/libs/vkd3d/acceleration_structure.c
index 5e9cda01..19482682 100644
--- a/libs/vkd3d/acceleration_structure.c
+++ b/libs/vkd3d/acceleration_structure.c
@@ -40,6 +40,22 @@ static VkBuildAccelerationStructureFlagsKHR d3d12_build_flags_to_vk(
return vk_flags;
}
+static VkBuildAccelerationStructureFlagsKHR nv_build_flags_to_vk(
+ NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS_EX flags)
+{
+ VkBuildAccelerationStructureFlagsKHR vk_flags = d3d12_build_flags_to_vk(
+ (D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS)flags);
+
+ if (flags & NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_OMM_UPDATE_EX)
+ vk_flags |= VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_OPACITY_MICROMAP_UPDATE_EXT;
+ if (flags & NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_DISABLE_OMMS_EX)
+ vk_flags |= VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_DISABLE_OPACITY_MICROMAPS_EXT;
+ if (flags & NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_OMM_OPACITY_STATES_UPDATE_EX)
+ vk_flags |= VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_OPACITY_MICROMAP_DATA_UPDATE_EXT;
+
+ return vk_flags;
+}
+
static VkGeometryFlagsKHR d3d12_geometry_flags_to_vk(D3D12_RAYTRACING_GEOMETRY_FLAGS flags)
{
VkGeometryFlagsKHR vk_flags = 0;
@@ -61,6 +77,15 @@ uint32_t vkd3d_acceleration_structure_get_geometry_count(
return desc->NumDescs;
}
+uint32_t vkd3d_acceleration_structure_get_geometry_count_nv(
+ const NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX *desc)
+{
+ if (desc->type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL)
+ return 1;
+ else
+ return desc->numDescs;
+}
+
bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *device,
const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS *desc,
VkAccelerationStructureBuildGeometryInfoKHR *build_info,
@@ -251,6 +276,274 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
return true;
}
+bool vkd3d_acceleration_structure_convert_inputs_nv(struct d3d12_device *device,
+ const NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX *desc,
+ VkAccelerationStructureBuildGeometryInfoKHR *build_info,
+ VkAccelerationStructureGeometryKHR *geometry_infos,
+ VkAccelerationStructureTrianglesOpacityMicromapEXT *omm_infos,
+ VkAccelerationStructureBuildRangeInfoKHR *range_infos,
+ uint32_t *primitive_counts)
+{
+ VkAccelerationStructureTrianglesOpacityMicromapEXT *opacity_micromap;
+ VkAccelerationStructureGeometryTrianglesDataKHR *triangles;
+ VkAccelerationStructureGeometryAabbsDataKHR *aabbs;
+ const NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX *geom_desc;
+ bool have_triangles, have_aabbs;
+ uint32_t primitive_count;
+ unsigned int i;
+
+ RT_TRACE("Converting inputs.\n");
+ RT_TRACE("=====================\n");
+
+ memset(build_info, 0, sizeof(*build_info));
+ build_info->sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR;
+
+ if (desc->type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL)
+ {
+ build_info->type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR;
+ RT_TRACE("Top level build.\n");
+ }
+ else
+ {
+ build_info->type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
+ RT_TRACE("Bottom level build.\n");
+ }
+
+ build_info->flags = nv_build_flags_to_vk(desc->flags);
+
+ if (desc->flags & D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PERFORM_UPDATE)
+ {
+ RT_TRACE("BUILD_FLAG_PERFORM_UPDATE.\n");
+ build_info->mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR;
+ }
+ else
+ build_info->mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
+
+ if (desc->type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL)
+ {
+ memset(geometry_infos, 0, sizeof(*geometry_infos));
+ geometry_infos[0].sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR;
+ geometry_infos[0].geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR;
+ geometry_infos[0].geometry.instances.sType =
+ VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR;
+ geometry_infos[0].geometry.instances.arrayOfPointers =
+ desc->descsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS ? VK_TRUE : VK_FALSE;
+ geometry_infos[0].geometry.instances.data.deviceAddress = desc->instanceDescs;
+
+ if (primitive_counts)
+ primitive_counts[0] = desc->numDescs;
+
+ if (range_infos)
+ {
+ range_infos[0].primitiveCount = desc->numDescs;
+ range_infos[0].firstVertex = 0;
+ range_infos[0].primitiveOffset = 0;
+ range_infos[0].transformOffset = 0;
+ }
+
+ build_info->geometryCount = 1;
+ RT_TRACE(" ArrayOfPointers: %u.\n",
+ desc->descsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS ? 1 : 0);
+ RT_TRACE(" NumDescs: %u.\n", desc->numDescs);
+ }
+ else
+ {
+ have_triangles = false;
+ have_aabbs = false;
+
+ memset(geometry_infos, 0, sizeof(*geometry_infos) * desc->numDescs);
+ memset(omm_infos, 0, sizeof(*omm_infos) * desc->numDescs);
+
+ if (primitive_counts)
+ memset(primitive_counts, 0, sizeof(*primitive_counts) * desc->numDescs);
+
+ build_info->geometryCount = desc->numDescs;
+
+ for (i = 0; i < desc->numDescs; i++)
+ {
+ geometry_infos[i].sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR;
+ RT_TRACE(" Geom %u:\n", i);
+
+ if (desc->descsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS)
+ {
+ geom_desc = desc->ppGeometryDescs[i];
+ RT_TRACE(" ArrayOfPointers\n");
+ }
+ else
+ {
+ geom_desc = (const NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX *)(((const char *)desc->pGeometryDescs) + desc->geometryDescStrideInBytes * i);
+ RT_TRACE(" PointerToArray\n");
+ }
+
+ geometry_infos[i].flags = d3d12_geometry_flags_to_vk(geom_desc->flags);
+ RT_TRACE(" Flags = #%x\n", geom_desc->flags);
+
+ switch (geom_desc->type)
+ {
+ case D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES:
+ if (have_aabbs)
+ {
+ ERR("Cannot mix and match geometry types in a BLAS.\n");
+ return false;
+ }
+ have_triangles = true;
+
+ geometry_infos[i].geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR;
+ triangles = &geometry_infos[i].geometry.triangles;
+ triangles->sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR;
+ triangles->indexData.deviceAddress = geom_desc->triangles.IndexBuffer;
+ if (geom_desc->triangles.IndexFormat != DXGI_FORMAT_UNKNOWN)
+ {
+ if (!geom_desc->triangles.IndexBuffer)
+ WARN("Application is using IndexBuffer = 0 and IndexFormat != UNKNOWN. Likely application bug.\n");
+
+ triangles->indexType =
+ geom_desc->triangles.IndexFormat == DXGI_FORMAT_R16_UINT ?
+ VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32;
+ primitive_count = geom_desc->triangles.IndexCount / 3;
+ RT_TRACE(" Indexed : Index count = %u (%u bits)\n",
+ geom_desc->triangles.IndexCount,
+ triangles->indexType == VK_INDEX_TYPE_UINT16 ? 16 : 32);
+ RT_TRACE(" Vertex count: %u\n", geom_desc->triangles.VertexCount);
+ RT_TRACE(" IBO VA: %"PRIx64".\n", geom_desc->triangles.IndexBuffer);
+ }
+ else
+ {
+ primitive_count = geom_desc->triangles.VertexCount / 3;
+ triangles->indexType = VK_INDEX_TYPE_NONE_KHR;
+ RT_TRACE(" Triangle list : Vertex count: %u\n", geom_desc->triangles.VertexCount);
+ }
+
+ triangles->maxVertex = max(1, geom_desc->triangles.VertexCount) - 1;
+ triangles->vertexStride = geom_desc->triangles.VertexBuffer.StrideInBytes;
+ triangles->vertexFormat = vkd3d_internal_get_vk_format(device, geom_desc->triangles.VertexFormat);
+ triangles->vertexData.deviceAddress = geom_desc->triangles.VertexBuffer.StartAddress;
+ triangles->transformData.deviceAddress = geom_desc->triangles.Transform3x4;
+
+ RT_TRACE(" Transform3x4: %s\n", geom_desc->triangles.Transform3x4 ? "on" : "off");
+ RT_TRACE(" Vertex format: %s\n", debug_dxgi_format(geom_desc->triangles.VertexFormat));
+ RT_TRACE(" VBO VA: %"PRIx64"\n", geom_desc->triangles.VertexBuffer.StartAddress);
+ RT_TRACE(" Vertex stride: %"PRIu64" bytes\n", geom_desc->triangles.VertexBuffer.StrideInBytes);
+ break;
+
+ case D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS:
+ if (have_triangles)
+ {
+ ERR("Cannot mix and match geometry types in a BLAS.\n");
+ return false;
+ }
+ have_aabbs = true;
+
+ geometry_infos[i].geometryType = VK_GEOMETRY_TYPE_AABBS_KHR;
+ aabbs = &geometry_infos[i].geometry.aabbs;
+ aabbs->sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR;
+ aabbs->stride = geom_desc->aabbs.AABBs.StrideInBytes;
+ aabbs->data.deviceAddress = geom_desc->aabbs.AABBs.StartAddress;
+ primitive_count = geom_desc->aabbs.AABBCount;
+ RT_TRACE(" AABB stride: %"PRIu64" bytes\n", geom_desc->aabbs.AABBs.StrideInBytes);
+ break;
+
+ case NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_OMM_TRIANGLES_EX:
+ if (have_aabbs)
+ {
+ ERR("Cannot mix and match geometry types in a BLAS.\n");
+ return false;
+ }
+ have_triangles = true;
+
+ geometry_infos[i].geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR;
+ triangles = &geometry_infos[i].geometry.triangles;
+ triangles->sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR;
+ triangles->indexData.deviceAddress = geom_desc->ommTriangles.triangles.IndexBuffer;
+ if (geom_desc->ommTriangles.triangles.IndexFormat != DXGI_FORMAT_UNKNOWN)
+ {
+ if (!geom_desc->ommTriangles.triangles.IndexBuffer)
+ WARN("Application is using IndexBuffer = 0 and IndexFormat != UNKNOWN. Likely application bug.\n");
+
+ triangles->indexType =
+ geom_desc->ommTriangles.triangles.IndexFormat == DXGI_FORMAT_R16_UINT ?
+ VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32;
+ primitive_count = geom_desc->ommTriangles.triangles.IndexCount / 3;
+ RT_TRACE(" Indexed : Index count = %u (%u bits)\n",
+ geom_desc->ommTriangles.triangles.IndexCount,
+ triangles->indexType == VK_INDEX_TYPE_UINT16 ? 16 : 32);
+ RT_TRACE(" Vertex count: %u\n", geom_desc->ommTriangles.triangles.VertexCount);
+ RT_TRACE(" IBO VA: %"PRIx64".\n", geom_desc->ommTriangles.triangles.IndexBuffer);
+ }
+ else
+ {
+ primitive_count = geom_desc->ommTriangles.triangles.VertexCount / 3;
+ triangles->indexType = VK_INDEX_TYPE_NONE_KHR;
+ RT_TRACE(" Triangle list : Vertex count: %u\n", geom_desc->ommTriangles.triangles.VertexCount);
+ }
+
+ triangles->maxVertex = max(1, geom_desc->ommTriangles.triangles.VertexCount) - 1;
+ triangles->vertexStride = geom_desc->ommTriangles.triangles.VertexBuffer.StrideInBytes;
+ triangles->vertexFormat = vkd3d_internal_get_vk_format(device, geom_desc->ommTriangles.triangles.VertexFormat);
+ triangles->vertexData.deviceAddress = geom_desc->ommTriangles.triangles.VertexBuffer.StartAddress;
+ triangles->transformData.deviceAddress = geom_desc->ommTriangles.triangles.Transform3x4;
+
+ RT_TRACE(" Transform3x4: %s\n", geom_desc->ommTriangles.triangles.Transform3x4 ? "on" : "off");
+ RT_TRACE(" Vertex format: %s\n", debug_dxgi_format(geom_desc->ommTriangles.triangles.VertexFormat));
+ RT_TRACE(" VBO VA: %"PRIx64"\n", geom_desc->ommTriangles.triangles.VertexBuffer.StartAddress);
+ RT_TRACE(" Vertex stride: %"PRIu64" bytes\n", geom_desc->ommTriangles.triangles.VertexBuffer.StrideInBytes);
+
+ triangles->pNext = opacity_micromap = &omm_infos[i];
+ opacity_micromap->sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_TRIANGLES_OPACITY_MICROMAP_EXT;
+ opacity_micromap->pNext = NULL;
+ opacity_micromap->indexType =
+ geom_desc->ommTriangles.ommAttachment.opacityMicromapIndexFormat == DXGI_FORMAT_R16_UINT ?
+ VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32;
+ opacity_micromap->indexBuffer.deviceAddress = geom_desc->ommTriangles.ommAttachment.opacityMicromapIndexBuffer.StartAddress;
+ opacity_micromap->indexStride = geom_desc->ommTriangles.ommAttachment.opacityMicromapIndexBuffer.StrideInBytes;
+ opacity_micromap->baseTriangle = geom_desc->ommTriangles.ommAttachment.opacityMicromapBaseLocation;
+
+ if (geom_desc->ommTriangles.ommAttachment.numOMMUsageCounts && geom_desc->ommTriangles.ommAttachment.pOMMUsageCounts)
+ {
+ STATIC_ASSERT(sizeof(VkMicromapUsageEXT) == sizeof(NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_USAGE_COUNT));
+ STATIC_ASSERT(offsetof(VkMicromapUsageEXT, count) == offsetof(NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_USAGE_COUNT, count));
+ STATIC_ASSERT(offsetof(VkMicromapUsageEXT, subdivisionLevel) == offsetof(NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_USAGE_COUNT, subdivisionLevel));
+ STATIC_ASSERT(offsetof(VkMicromapUsageEXT, format) == offsetof(NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_USAGE_COUNT, format));
+ STATIC_ASSERT(sizeof(uint32_t) == sizeof(NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT));
+ opacity_micromap->pUsageCounts = (const void *)geom_desc->ommTriangles.ommAttachment.pOMMUsageCounts;
+ opacity_micromap->usageCountsCount = geom_desc->ommTriangles.ommAttachment.numOMMUsageCounts;
+ }
+
+ if (geom_desc->ommTriangles.ommAttachment.opacityMicromapArray)
+ opacity_micromap->micromap = vkd3d_va_map_place_opacity_micromap(&device->memory_allocator.va_map, device, geom_desc->ommTriangles.ommAttachment.opacityMicromapArray);
+
+ RT_TRACE(" OMM Index type: %s\n", debug_dxgi_format(geom_desc->ommTriangles.ommAttachment.opacityMicromapIndexFormat));
+ RT_TRACE(" OMM IBO VA: %"PRIx64"\n", geom_desc->ommTriangles.ommAttachment.opacityMicromapIndexBuffer.StartAddress);
+ RT_TRACE(" OMM Index stride: %"PRIu64" bytes\n", geom_desc->ommTriangles.ommAttachment.opacityMicromapIndexBuffer.StrideInBytes);
+ RT_TRACE(" OMM Base: %u\n", geom_desc->ommTriangles.ommAttachment.opacityMicromapBaseLocation);
+ RT_TRACE(" OMM Usage counts: %u\n", geom_desc->ommTriangles.ommAttachment.numOMMUsageCounts);
+ RT_TRACE(" OMM Micromap VA: %"PRIx64"\n", geom_desc->ommTriangles.ommAttachment.opacityMicromapArray);
+ break;
+
+ default:
+ FIXME("Unsupported geometry type %u.\n", geom_desc->type);
+ return false;
+ }
+
+ if (primitive_counts)
+ primitive_counts[i] = primitive_count;
+
+ if (range_infos)
+ {
+ range_infos[i].primitiveCount = primitive_count;
+ range_infos[i].firstVertex = 0;
+ range_infos[i].primitiveOffset = 0;
+ range_infos[i].transformOffset = 0;
+ }
+
+ RT_TRACE(" Primitive count %u.\n", primitive_count);
+ }
+ }
+
+ RT_TRACE("=====================\n");
+ return true;
+}
+
static void vkd3d_acceleration_structure_end_barrier(struct d3d12_command_list *list)
{
/* We resolve the query in TRANSFER, but DXR expects UNORDERED_ACCESS. */
diff --git a/libs/vkd3d/breadcrumbs.c b/libs/vkd3d/breadcrumbs.c
index 8dc7b0a0..e36a88c7 100644
--- a/libs/vkd3d/breadcrumbs.c
+++ b/libs/vkd3d/breadcrumbs.c
@@ -76,6 +76,10 @@ static const char *vkd3d_breadcrumb_command_type_to_str(enum vkd3d_breadcrumb_co
return "copy_rtas";
case VKD3D_BREADCRUMB_COMMAND_EMIT_RTAS_POSTBUILD:
return "emit_rtas_postbuild";
+ case VKD3D_BREADCRUMB_COMMAND_BUILD_OMM:
+ return "build_omm";
+ case VKD3D_BREADCRUMB_COMMAND_EMIT_OMM_POSTBUILD:
+ return "emit_omm_postbuild";
case VKD3D_BREADCRUMB_COMMAND_TRACE_RAYS:
return "trace_rays";
case VKD3D_BREADCRUMB_COMMAND_BARRIER:
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c
index 65bfe41d..bb04fe65 100644
--- a/libs/vkd3d/command.c
+++ b/libs/vkd3d/command.c
@@ -74,11 +74,9 @@ static VkImageLayout d3d12_command_list_get_depth_stencil_resource_layout(const
static void d3d12_command_list_decay_optimal_dsv_resource(struct d3d12_command_list *list,
const struct d3d12_resource *resource, uint32_t plane_optimal_mask,
struct d3d12_command_list_barrier_batch *batch);
-static void d3d12_command_list_end_transfer_batch(struct d3d12_command_list *list);
static void d3d12_command_list_end_wbi_batch(struct d3d12_command_list *list);
static inline void d3d12_command_list_ensure_transfer_batch(struct d3d12_command_list *list, enum vkd3d_batch_type type);
static void d3d12_command_list_free_rtas_batch(struct d3d12_command_list *list);
-static void d3d12_command_list_flush_rtas_batch(struct d3d12_command_list *list);
static void d3d12_command_list_clear_rtas_batch(struct d3d12_command_list *list);
static void d3d12_command_list_flush_query_resolves(struct d3d12_command_list *list);
@@ -1934,7 +1932,6 @@ static HRESULT d3d12_command_allocator_allocate_command_buffer(struct d3d12_comm
}
static void d3d12_command_list_invalidate_all_state(struct d3d12_command_list *list);
-static void d3d12_command_list_end_current_render_pass(struct d3d12_command_list *list, bool suspend);
static void d3d12_command_list_begin_new_sequence(struct d3d12_command_list *list)
{
@@ -4569,7 +4566,7 @@ cleanup:
return result;
}
-static void d3d12_command_list_end_current_render_pass(struct d3d12_command_list *list, bool suspend)
+void d3d12_command_list_end_current_render_pass(struct d3d12_command_list *list, bool suspend)
{
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
VkMemoryBarrier2 vk_barrier;
@@ -4729,6 +4726,13 @@ static void vk_access_and_stage_flags_from_d3d12_resource_state(const struct d3d
*stages |= VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR;
*access |= VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR |
VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
+
+ if (device->device_info.opacity_micromap_features.micromap)
+ {
+ *stages |= VK_PIPELINE_STAGE_2_MICROMAP_BUILD_BIT_EXT;
+ *access |= VK_ACCESS_2_MICROMAP_READ_BIT_EXT |
+ VK_ACCESS_2_MICROMAP_WRITE_BIT_EXT;
+ }
}
break;
@@ -4740,6 +4744,9 @@ static void vk_access_and_stage_flags_from_d3d12_resource_state(const struct d3d
VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR;
*access |= VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR |
VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR;
+
+ if (device->device_info.opacity_micromap_features.micromap)
+ *access |= VK_ACCESS_2_MICROMAP_READ_BIT_EXT;
}
break;
@@ -4766,6 +4773,10 @@ static void vk_access_and_stage_flags_from_d3d12_resource_state(const struct d3d
/* Vertex / index / transform buffer inputs are NON_PIXEL_SHADER_RESOURCES in DXR.
* They access SHADER_READ_BIT in Vulkan, so just need to add the stage. */
*stages |= VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR;
+
+ if (device->device_info.opacity_micromap_features.micromap)
+ *stages |= VK_PIPELINE_STAGE_2_MICROMAP_BUILD_BIT_EXT;
+
}
break;
@@ -4968,7 +4979,8 @@ HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(d3d12_command_list_i
}
if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandListExt)
- || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandListExt1))
+ || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandListExt1)
+ || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandListExt2))
{
struct d3d12_command_list *command_list = impl_from_ID3D12GraphicsCommandList(iface);
d3d12_command_list_vkd3d_ext_AddRef(&command_list->ID3D12GraphicsCommandListExt_iface);
@@ -8078,7 +8090,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(d3d12_command_list
VKD3D_BREADCRUMB_COMMAND(COPY);
}
-static void d3d12_command_list_end_transfer_batch(struct d3d12_command_list *list)
+void d3d12_command_list_end_transfer_batch(struct d3d12_command_list *list)
{
struct d3d12_command_list_barrier_batch barriers;
size_t i;
@@ -14639,13 +14651,15 @@ static void d3d12_command_list_free_rtas_batch(struct d3d12_command_list *list)
vkd3d_free(rtas_batch->build_infos);
vkd3d_free(rtas_batch->geometry_infos);
+ vkd3d_free(rtas_batch->omm_infos);
vkd3d_free(rtas_batch->range_infos);
vkd3d_free(rtas_batch->range_ptrs);
}
-static bool d3d12_command_list_allocate_rtas_build_info(struct d3d12_command_list *list, uint32_t geometry_count,
+bool d3d12_command_list_allocate_rtas_build_info(struct d3d12_command_list *list, uint32_t geometry_count,
VkAccelerationStructureBuildGeometryInfoKHR **build_info,
VkAccelerationStructureGeometryKHR **geometry_infos,
+ VkAccelerationStructureTrianglesOpacityMicromapEXT **omm_infos,
VkAccelerationStructureBuildRangeInfoKHR **range_infos)
{
struct d3d12_rtas_batch_state *rtas_batch = &list->rtas_batch;
@@ -14664,6 +14678,13 @@ static bool d3d12_command_list_allocate_rtas_build_info(struct d3d12_command_lis
return false;
}
+ if (!vkd3d_array_reserve((void **)&rtas_batch->omm_infos, &rtas_batch->omm_info_size,
+ rtas_batch->geometry_info_count + geometry_count, sizeof(*rtas_batch->omm_infos)))
+ {
+ ERR("Failed to allocate opacity micromap info array.\n");
+ return false;
+ }
+
if (!vkd3d_array_reserve((void **)&rtas_batch->range_infos, &rtas_batch->range_info_size,
rtas_batch->geometry_info_count + geometry_count, sizeof(*rtas_batch->range_infos)))
{
@@ -14673,6 +14694,7 @@ static bool d3d12_command_list_allocate_rtas_build_info(struct d3d12_command_lis
*build_info = &rtas_batch->build_infos[rtas_batch->build_info_count];
*geometry_infos = &rtas_batch->geometry_infos[rtas_batch->geometry_info_count];
+ *omm_infos = &rtas_batch->omm_infos[rtas_batch->geometry_info_count];
*range_infos = &rtas_batch->range_infos[rtas_batch->geometry_info_count];
rtas_batch->build_info_count += 1;
@@ -14689,7 +14711,7 @@ static void d3d12_command_list_clear_rtas_batch(struct d3d12_command_list *list)
rtas_batch->geometry_info_count = 0;
}
-static void d3d12_command_list_flush_rtas_batch(struct d3d12_command_list *list)
+void d3d12_command_list_flush_rtas_batch(struct d3d12_command_list *list)
{
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
struct d3d12_rtas_batch_state *rtas_batch = &list->rtas_batch;
@@ -14737,6 +14759,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_BuildRaytracingAccelerationStru
struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList(iface);
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
struct d3d12_rtas_batch_state *rtas_batch = &list->rtas_batch;
+ VkAccelerationStructureTrianglesOpacityMicromapEXT *omm_infos;
VkAccelerationStructureBuildGeometryInfoKHR *build_info;
VkAccelerationStructureBuildRangeInfoKHR *range_infos;
VkAccelerationStructureGeometryKHR *geometry_infos;
@@ -14768,6 +14791,12 @@ static void STDMETHODCALLTYPE d3d12_command_list_BuildRaytracingAccelerationStru
vk_barrier.dstStageMask = VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR;
vk_barrier.dstAccessMask = VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
+ if (list->device->device_info.opacity_micromap_features.micromap)
+ {
+ vk_barrier.srcAccessMask |= VK_ACCESS_2_MICROMAP_READ_BIT_EXT;
+ vk_barrier.dstAccessMask |= VK_ACCESS_2_MICROMAP_READ_BIT_EXT;
+ }
+
memset(&dep_info, 0, sizeof(dep_info));
dep_info.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO;
dep_info.memoryBarrierCount = 1;
@@ -14786,7 +14815,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_BuildRaytracingAccelerationStru
#endif
if (!d3d12_command_list_allocate_rtas_build_info(list, geometry_count,
- &build_info, &geometry_infos, &range_infos))
+ &build_info, &geometry_infos, &omm_infos, &range_infos))
return;
if (!vkd3d_acceleration_structure_convert_inputs(list->device, &desc->Inputs,
@@ -15779,7 +15808,7 @@ static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12Comma
return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList_iface);
}
-extern CONST_VTBL struct ID3D12GraphicsCommandListExt1Vtbl d3d12_command_list_vkd3d_ext_vtbl;
+extern CONST_VTBL struct ID3D12GraphicsCommandListExt2Vtbl d3d12_command_list_vkd3d_ext_vtbl;
static void d3d12_command_list_init_attachment_info(VkRenderingAttachmentInfo *attachment_info)
{
diff --git a/libs/vkd3d/command_list_vkd3d_ext.c b/libs/vkd3d/command_list_vkd3d_ext.c
index 7e54fae6..dd867978 100644
--- a/libs/vkd3d/command_list_vkd3d_ext.c
+++ b/libs/vkd3d/command_list_vkd3d_ext.c
@@ -120,7 +120,373 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_vkd3d_ext_LaunchCubinShader(
0 /* raw_params_count */);
}
-CONST_VTBL struct ID3D12GraphicsCommandListExt1Vtbl d3d12_command_list_vkd3d_ext_vtbl =
+static HRESULT STDMETHODCALLTYPE d3d12_command_list_vkd3d_ext_BuildRaytracingAccelerationStructureEx(d3d12_command_list_vkd3d_ext_iface *iface,
+ const void *params)
+{
+ struct d3d12_command_list *list = d3d12_command_list_from_ID3D12GraphicsCommandListExt(iface);
+ const NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS *nvParams = params;
+ const NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC_EX *desc;
+ const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
+ struct d3d12_rtas_batch_state *rtas_batch = &list->rtas_batch;
+ VkAccelerationStructureTrianglesOpacityMicromapEXT *omm_infos;
+ VkAccelerationStructureBuildGeometryInfoKHR *build_info;
+ VkAccelerationStructureBuildRangeInfoKHR *range_infos;
+ VkAccelerationStructureGeometryKHR *geometry_infos;
+ uint32_t *primitive_counts = NULL;
+ VkMemoryBarrier2 vk_barrier;
+ VkDependencyInfo dep_info;
+ uint32_t geometry_count;
+ TRACE("iface %p, params %p.\n", iface, params);
+
+ if (!nvParams)
+ return NVAPI_INVALID_ARGUMENT;
+
+ if (nvParams->version != NVAPI_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_EX_PARAMS_VER1)
+ return NVAPI_INCOMPATIBLE_STRUCT_VERSION;
+
+ if (!nvParams->pDesc || (nvParams->numPostbuildInfoDescs && !nvParams->pPostbuildInfoDescs))
+ return NVAPI_INVALID_ARGUMENT;
+
+ desc = nvParams->pDesc;
+
+ if (!d3d12_device_supports_ray_tracing_tier_1_0(list->device))
+ {
+ WARN("Acceleration structure is not supported. Calling this is invalid.\n");
+ return NVAPI_ERROR;
+ }
+
+ /* Do not batch TLAS and BLAS builds into the same command, since doing so
+ * is disallowed if there are data dependencies between the builds. This
+ * happens in Cyberpunk 2077, which does not emit appropriate UAV barriers. */
+ if (rtas_batch->build_info_count && rtas_batch->build_type != desc->inputs.type)
+ {
+ d3d12_command_list_flush_rtas_batch(list);
+
+ memset(&vk_barrier, 0, sizeof(vk_barrier));
+ vk_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2;
+ vk_barrier.srcStageMask = VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR;
+ vk_barrier.srcAccessMask = VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
+ vk_barrier.dstStageMask = VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR;
+ vk_barrier.dstAccessMask = VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
+
+ if (list->device->device_info.opacity_micromap_features.micromap)
+ {
+ vk_barrier.srcAccessMask |= VK_ACCESS_2_MICROMAP_READ_BIT_EXT;
+ vk_barrier.dstAccessMask |= VK_ACCESS_2_MICROMAP_READ_BIT_EXT;
+ }
+
+ memset(&dep_info, 0, sizeof(dep_info));
+ dep_info.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO;
+ dep_info.memoryBarrierCount = 1;
+ dep_info.pMemoryBarriers = &vk_barrier;
+
+ VK_CALL(vkCmdPipelineBarrier2(list->cmd.vk_command_buffer, &dep_info));
+ }
+
+ rtas_batch->build_type = desc->inputs.type;
+
+ geometry_count = vkd3d_acceleration_structure_get_geometry_count_nv(&desc->inputs);
+
+#ifdef VKD3D_ENABLE_BREADCRUMBS
+ if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS)
+ primitive_counts = vkd3d_malloc(geometry_count * sizeof(*primitive_counts));
+#endif
+
+ if (!d3d12_command_list_allocate_rtas_build_info(list, geometry_count,
+ &build_info, &geometry_infos, &omm_infos, &range_infos))
+ return NVAPI_OUT_OF_MEMORY;
+
+ if (!vkd3d_acceleration_structure_convert_inputs_nv(list->device, &desc->inputs,
+ build_info, geometry_infos, omm_infos, range_infos, primitive_counts))
+ {
+ ERR("Failed to convert inputs.\n");
+ return NVAPI_ERROR;
+ }
+
+ if (desc->destAccelerationStructureData)
+ {
+ build_info->dstAccelerationStructure =
+ vkd3d_va_map_place_acceleration_structure(&list->device->memory_allocator.va_map,
+ list->device, desc->destAccelerationStructureData);
+ if (build_info->dstAccelerationStructure == VK_NULL_HANDLE)
+ {
+ ERR("Failed to place destAccelerationStructure. Dropping call.\n");
+ return NVAPI_ERROR;
+ }
+ }
+
+ if (build_info->mode == VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR &&
+ desc->sourceAccelerationStructureData)
+ {
+ build_info->srcAccelerationStructure =
+ vkd3d_va_map_place_acceleration_structure(&list->device->memory_allocator.va_map,
+ list->device, desc->sourceAccelerationStructureData);
+ if (build_info->srcAccelerationStructure == VK_NULL_HANDLE)
+ {
+ ERR("Failed to place srcAccelerationStructure. Dropping call.\n");
+ return NVAPI_ERROR;
+ }
+ }
+
+ build_info->scratchData.deviceAddress = desc->scratchAccelerationStructureData;
+
+ /* Immediately execute the RTAS build command here
+ * so that we don't have to copy micromap usage counts */
+ d3d12_command_list_flush_rtas_batch(list);
+
+#ifdef VKD3D_ENABLE_BREADCRUMBS
+ if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS)
+ {
+ VKD3D_BREADCRUMB_TAG("RTAS build [Dest VA, Source VA, Scratch VA]");
+ VKD3D_BREADCRUMB_AUX64(desc->destAccelerationStructureData);
+ VKD3D_BREADCRUMB_AUX64(desc->sourceAccelerationStructureData);
+ VKD3D_BREADCRUMB_AUX64(desc->scratchAccelerationStructureData);
+ VKD3D_BREADCRUMB_TAG((desc->inputs.flags & D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PERFORM_UPDATE) ?
+ "Update" : "Create");
+ VKD3D_BREADCRUMB_TAG(desc->inputs.type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL ? "Top" : "Bottom");
+ {
+ VkAccelerationStructureBuildSizesInfoKHR size_info;
+
+ memset(&size_info, 0, sizeof(size_info));
+ size_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR;
+
+ if (desc->inputs.flags & D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PERFORM_UPDATE)
+ {
+ build_info->mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
+ build_info->flags |= VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR;
+ }
+ VK_CALL(vkGetAccelerationStructureBuildSizesKHR(list->device->vk_device,
+ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, build_info,
+ primitive_counts, &size_info));
+ VKD3D_BREADCRUMB_TAG("Build requirements [Size, Build Scratch, Update Scratch]");
+ VKD3D_BREADCRUMB_AUX64(size_info.accelerationStructureSize);
+ VKD3D_BREADCRUMB_AUX64(size_info.buildScratchSize);
+ VKD3D_BREADCRUMB_AUX64(size_info.updateScratchSize);
+
+ if (desc->inputs.type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL)
+ {
+ VKD3D_BREADCRUMB_AUX64(desc->inputs.instanceDescs);
+ VKD3D_BREADCRUMB_AUX32(desc->inputs.numDescs);
+ }
+ else
+ {
+ unsigned int i;
+ for (i = 0; i < desc->inputs.numDescs; i++)
+ {
+ const NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX *geom;
+ if (desc->inputs.descsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY)
+ geom = &desc->inputs.pGeometryDescs[i];
+ else
+ geom = desc->inputs.ppGeometryDescs[i];
+
+ switch (geom->type)
+ {
+ case NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES_EX:
+ VKD3D_BREADCRUMB_TAG("Triangle [Flags, VBO VA, VBO stride, IBO, Transform, VBO format, IBO format, V count, I count]");
+ VKD3D_BREADCRUMB_AUX32(geom->flags);
+ VKD3D_BREADCRUMB_AUX64(geom->triangles.VertexBuffer.StartAddress);
+ VKD3D_BREADCRUMB_AUX64(geom->triangles.VertexBuffer.StrideInBytes);
+ VKD3D_BREADCRUMB_AUX64(geom->triangles.IndexBuffer);
+ VKD3D_BREADCRUMB_AUX64(geom->triangles.Transform3x4);
+ VKD3D_BREADCRUMB_AUX32(geom->triangles.VertexFormat);
+ VKD3D_BREADCRUMB_AUX32(geom->triangles.IndexFormat);
+ VKD3D_BREADCRUMB_AUX32(geom->triangles.VertexCount);
+ VKD3D_BREADCRUMB_AUX32(geom->triangles.IndexCount);
+ break;
+
+ case NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS_EX:
+ VKD3D_BREADCRUMB_TAG("AABB [Flags, VA, stride, count]");
+ VKD3D_BREADCRUMB_AUX32(geom->flags);
+ VKD3D_BREADCRUMB_AUX64(geom->aabbs.AABBs.StartAddress);
+ VKD3D_BREADCRUMB_AUX64(geom->aabbs.AABBs.StrideInBytes);
+ VKD3D_BREADCRUMB_AUX64(geom->aabbs.AABBCount);
+ break;
+
+ case NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_OMM_TRIANGLES_EX:
+ VKD3D_BREADCRUMB_TAG("OMM Triangle [Flags, VBO VA, VBO stride, IBO, Transform, VBO format, IBO format, V count, I count, OMM IBO VA, OMM IBO stride, OMM IBO format, OMM base location, OMM array, OMM usage count]");
+ VKD3D_BREADCRUMB_AUX32(geom->flags);
+ VKD3D_BREADCRUMB_AUX64(geom->ommTriangles.triangles.VertexBuffer.StartAddress);
+ VKD3D_BREADCRUMB_AUX64(geom->ommTriangles.triangles.VertexBuffer.StrideInBytes);
+ VKD3D_BREADCRUMB_AUX64(geom->ommTriangles.triangles.IndexBuffer);
+ VKD3D_BREADCRUMB_AUX64(geom->ommTriangles.triangles.Transform3x4);
+ VKD3D_BREADCRUMB_AUX32(geom->ommTriangles.triangles.VertexFormat);
+ VKD3D_BREADCRUMB_AUX32(geom->ommTriangles.triangles.IndexFormat);
+ VKD3D_BREADCRUMB_AUX32(geom->ommTriangles.triangles.VertexCount);
+ VKD3D_BREADCRUMB_AUX32(geom->ommTriangles.triangles.IndexCount);
+ VKD3D_BREADCRUMB_AUX64(geom->ommTriangles.ommAttachment.opacityMicromapIndexBuffer.StartAddress);
+ VKD3D_BREADCRUMB_AUX64(geom->ommTriangles.ommAttachment.opacityMicromapIndexBuffer.StrideInBytes);
+ VKD3D_BREADCRUMB_AUX32(geom->ommTriangles.ommAttachment.opacityMicromapIndexFormat);
+ VKD3D_BREADCRUMB_AUX32(geom->ommTriangles.ommAttachment.opacityMicromapBaseLocation);
+ VKD3D_BREADCRUMB_AUX64(geom->ommTriangles.ommAttachment.opacityMicromapArray);
+ VKD3D_BREADCRUMB_AUX32(geom->ommTriangles.ommAttachment.numOMMUsageCounts);
+ break;
+ }
+ }
+ }
+ }
+
+ vkd3d_free(primitive_counts);
+ }
+#endif
+
+ if (nvParams->numPostbuildInfoDescs)
+ {
+ vkd3d_acceleration_structure_emit_immediate_postbuild_info(list,
+ nvParams->numPostbuildInfoDescs, nvParams->pPostbuildInfoDescs,
+ build_info->dstAccelerationStructure);
+ }
+
+ VKD3D_BREADCRUMB_COMMAND(BUILD_RTAS);
+
+ return NVAPI_OK;
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_command_list_vkd3d_ext_BuildRaytracingOpacityMicromapArray(d3d12_command_list_vkd3d_ext_iface *iface,
+ void *params)
+{
+ struct d3d12_command_list *list = d3d12_command_list_from_ID3D12GraphicsCommandListExt(iface);
+ NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS *nvParams = params;
+ const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
+ const NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_DESC *desc;
+ struct vkd3d_opacity_micromap_build_info build_info;
+ TRACE("iface %p, params %p.\n", iface, params);
+
+ if (!nvParams)
+ return NVAPI_INVALID_ARGUMENT;
+
+ if (nvParams->version != NVAPI_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_PARAMS_VER1)
+ return NVAPI_INCOMPATIBLE_STRUCT_VERSION;
+
+ if (!nvParams->pDesc || (nvParams->numPostbuildInfoDescs && !nvParams->pPostbuildInfoDescs))
+ return NVAPI_INVALID_ARGUMENT;
+
+ desc = nvParams->pDesc;
+
+ if (!list->device->device_info.opacity_micromap_features.micromap)
+ {
+ ERR("Opacity micromap is not supported. Calling this is invalid.\n");
+ return NVAPI_NOT_SUPPORTED;
+ }
+
+ if (!vkd3d_opacity_micromap_convert_inputs_nv(list->device, &build_info, &nvParams->pDesc->inputs))
+ {
+ ERR("Failed to convert inputs.\n");
+ return NVAPI_ERROR;
+ }
+
+ if (desc->destOpacityMicromapArrayData)
+ {
+ build_info.build_info.dstMicromap =
+ vkd3d_va_map_place_opacity_micromap(&list->device->memory_allocator.va_map,
+ list->device, desc->destOpacityMicromapArrayData);
+ if (build_info.build_info.dstMicromap == VK_NULL_HANDLE)
+ {
+ ERR("Failed to place dstMicromap. Dropping call.\n");
+ return NVAPI_ERROR;
+ }
+ }
+
+ build_info.build_info.scratchData.deviceAddress = desc->scratchOpacityMicromapArrayData;
+
+ d3d12_command_list_end_current_render_pass(list, true);
+ d3d12_command_list_end_transfer_batch(list);
+
+ VK_CALL(vkCmdBuildMicromapsEXT(list->cmd.vk_command_buffer, 1,
+ &build_info.build_info));
+
+#ifdef VKD3D_ENABLE_BREADCRUMBS
+ VKD3D_BREADCRUMB_TAG("OMM build [Dest VA, Scratch VA]");
+ VKD3D_BREADCRUMB_AUX64(desc->destOpacityMicromapArrayData);
+ VKD3D_BREADCRUMB_AUX64(desc->scratchOpacityMicromapArrayData);
+ {
+ VkMicromapBuildSizesInfoEXT size_info;
+ unsigned int i;
+
+ memset(&size_info, 0, sizeof(size_info));
+ size_info.sType = VK_STRUCTURE_TYPE_MICROMAP_BUILD_SIZES_INFO_EXT;
+
+ VK_CALL(vkGetMicromapBuildSizesEXT(list->device->vk_device,
+ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &build_info.build_info,
+ &size_info));
+ VKD3D_BREADCRUMB_TAG("Build requirements [Size, Build Scratch, Discardable]");
+ VKD3D_BREADCRUMB_AUX64(size_info.micromapSize);
+ VKD3D_BREADCRUMB_AUX64(size_info.buildScratchSize);
+ VKD3D_BREADCRUMB_AUX32(size_info.discardable);
+
+ VKD3D_BREADCRUMB_TAG("Inputs [Flags, VA, Descs VA, Descs stride]");
+ VKD3D_BREADCRUMB_AUX32(desc->inputs.flags);
+ VKD3D_BREADCRUMB_AUX64(desc->inputs.inputBuffer);
+ VKD3D_BREADCRUMB_AUX64(desc->inputs.perOMMDescs.StartAddress);
+ VKD3D_BREADCRUMB_AUX64(desc->inputs.perOMMDescs.StrideInBytes);
+
+ for (i = 0; i < desc->inputs.numOMMUsageCounts; i++)
+ {
+ const NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_USAGE_COUNT *usage_count = &desc->inputs.pOMMUsageCounts[i];
+
+ VKD3D_BREADCRUMB_TAG("Usage Count [Count, Subdivision Level, Format]");
+ VKD3D_BREADCRUMB_AUX32(usage_count->count);
+ VKD3D_BREADCRUMB_AUX32(usage_count->subdivisionLevel);
+ VKD3D_BREADCRUMB_AUX32(usage_count->format);
+ }
+ }
+#endif
+
+ vkd3d_opacity_micromap_build_info_cleanup(&build_info);
+
+ if (nvParams->numPostbuildInfoDescs)
+ {
+ vkd3d_opacity_micromap_emit_immediate_postbuild_info_nv(list,
+ nvParams->numPostbuildInfoDescs, nvParams->pPostbuildInfoDescs,
+ build_info.build_info.dstMicromap);
+ }
+
+ VKD3D_BREADCRUMB_COMMAND(BUILD_OMM);
+
+ return NVAPI_OK;
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_command_list_vkd3d_ext_RelocateRaytracingOpacityMicromapArray(d3d12_command_list_vkd3d_ext_iface *iface,
+ const void *params)
+{
+ FIXME("iface %p, params %p stub.\n", iface, params);
+ return NVAPI_OK;
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_command_list_vkd3d_ext_EmitRaytracingOpacityMicromapArrayPostbuildInfo(d3d12_command_list_vkd3d_ext_iface *iface,
+ const void *params)
+{
+ struct d3d12_command_list *list = d3d12_command_list_from_ID3D12GraphicsCommandListExt(iface);
+ const NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS *nvParams = params;
+ TRACE("iface %p, params %p.\n", iface, params);
+
+ if (!params)
+ return NVAPI_INVALID_ARGUMENT;
+
+ if (!nvParams)
+ return NVAPI_INVALID_ARGUMENT;
+
+ if (nvParams->version != NVAPI_EMIT_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_PARAMS_VER1)
+ return NVAPI_INCOMPATIBLE_STRUCT_VERSION;
+
+ if (!nvParams->pDesc || (nvParams->numSources && !nvParams->pSources))
+ return NVAPI_INVALID_ARGUMENT;
+
+ if (!list->device->device_info.opacity_micromap_features.micromap)
+ {
+ ERR("Opacity micromap is not supported. Calling this is invalid.\n");
+ return NVAPI_NOT_SUPPORTED;
+ }
+
+ d3d12_command_list_end_current_render_pass(list, true);
+ vkd3d_opacity_micromap_emit_postbuild_info_nv(list,
+ nvParams->pDesc, nvParams->numSources, nvParams->pSources);
+
+ VKD3D_BREADCRUMB_COMMAND(EMIT_OMM_POSTBUILD);
+
+ return NVAPI_OK;
+}
+
+CONST_VTBL struct ID3D12GraphicsCommandListExt2Vtbl d3d12_command_list_vkd3d_ext_vtbl =
{
/* IUnknown methods */
d3d12_command_list_vkd3d_ext_QueryInterface,
@@ -133,5 +499,11 @@ CONST_VTBL struct ID3D12GraphicsCommandListExt1Vtbl d3d12_command_list_vkd3d_ext
/* ID3D12GraphicsCommandListExt1 methods */
d3d12_command_list_vkd3d_ext_LaunchCubinShaderEx,
+
+ /* ID3D12GraphicsCommandListExt2 methods */
+ d3d12_command_list_vkd3d_ext_BuildRaytracingAccelerationStructureEx,
+ d3d12_command_list_vkd3d_ext_BuildRaytracingOpacityMicromapArray,
+ d3d12_command_list_vkd3d_ext_RelocateRaytracingOpacityMicromapArray,
+ d3d12_command_list_vkd3d_ext_EmitRaytracingOpacityMicromapArrayPostbuildInfo,
};
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c
index 13c10dca..6542c3a4 100644
--- a/libs/vkd3d/device.c
+++ b/libs/vkd3d/device.c
@@ -105,6 +105,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] =
VK_EXTENSION(EXT_DYNAMIC_RENDERING_UNUSED_ATTACHMENTS, EXT_dynamic_rendering_unused_attachments),
VK_EXTENSION(EXT_LINE_RASTERIZATION, EXT_line_rasterization),
VK_EXTENSION(EXT_IMAGE_COMPRESSION_CONTROL, EXT_image_compression_control),
+ VK_EXTENSION_DISABLE_COND(EXT_OPACITY_MICROMAP, EXT_opacity_micromap, VKD3D_CONFIG_FLAG_NO_DXR),
/* AMD extensions */
VK_EXTENSION(AMD_BUFFER_MARKER, AMD_buffer_marker),
VK_EXTENSION(AMD_DEVICE_COHERENT_MEMORY, AMD_device_coherent_memory),
@@ -1783,6 +1784,12 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i
vk_prepend_struct(&info->properties2, &info->memory_decompression_properties);
}
+ if (vulkan_info->EXT_opacity_micromap)
+ {
+ info->opacity_micromap_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPACITY_MICROMAP_FEATURES_EXT;
+ vk_prepend_struct(&info->features2, &info->opacity_micromap_features);
+ }
+
VK_CALL(vkGetPhysicalDeviceFeatures2(device->vk_physical_device, &info->features2));
VK_CALL(vkGetPhysicalDeviceProperties2(device->vk_physical_device, &info->properties2));
}
@@ -2189,6 +2196,11 @@ static void vkd3d_trace_physical_device_features(const struct vkd3d_physical_dev
TRACE(" VkPhysicalDeviceLineRasterizationFeaturesEXT:\n");
TRACE(" rectangularLines: %u\n", info->line_rasterization_features.rectangularLines);
TRACE(" smoothLines: %u\n", info->line_rasterization_features.smoothLines);
+
+ TRACE(" VkPhysicalDeviceOpacityMicromapFeaturesEXT:\n");
+ TRACE(" micromap: %#x\n", info->opacity_micromap_features.micromap);
+ TRACE(" micromapCaptureReplay: %#x\n", info->opacity_micromap_features.micromapCaptureReplay);
+ TRACE(" micromapHostCommands: %#x\n", info->opacity_micromap_features.micromapHostCommands);
}
static HRESULT vkd3d_init_device_extensions(struct d3d12_device *device,
@@ -3225,7 +3237,8 @@ HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface,
return S_OK;
}
- if (IsEqualGUID(riid, &IID_ID3D12DeviceExt))
+ if (IsEqualGUID(riid, &IID_ID3D12DeviceExt)
+ || IsEqualGUID(riid, &IID_ID3D12DeviceExt1))
{
struct d3d12_device *device = impl_from_ID3D12Device(iface);
d3d12_device_vkd3d_ext_AddRef(&device->ID3D12DeviceExt_iface);
@@ -8070,6 +8083,12 @@ static void vkd3d_init_shader_extensions(struct d3d12_device *device)
device->vk_info.shader_extensions[device->vk_info.shader_extension_count++] =
VKD3D_SHADER_TARGET_EXTENSION_SUPPORT_SUBGROUP_PARTITIONED_NV;
}
+
+ if (device->device_info.opacity_micromap_features.micromap)
+ {
+ device->vk_info.shader_extensions[device->vk_info.shader_extension_count++] =
+ VKD3D_SHADER_TARGET_EXTENSION_OPACITY_MICROMAP;
+ }
}
static void vkd3d_compute_shader_interface_key(struct d3d12_device *device)
@@ -8185,7 +8204,7 @@ static void d3d12_device_replace_vtable(struct d3d12_device *device)
}
}
-extern CONST_VTBL struct ID3D12DeviceExtVtbl d3d12_device_vkd3d_ext_vtbl;
+extern CONST_VTBL struct ID3D12DeviceExt1Vtbl d3d12_device_vkd3d_ext_vtbl;
extern CONST_VTBL struct ID3D12DXVKInteropDeviceVtbl d3d12_dxvk_interop_device_vtbl;
extern CONST_VTBL struct ID3DLowLatencyDeviceVtbl d3d_low_latency_device_vtbl;
@@ -8351,6 +8370,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
vkd3d_renderdoc_begin_capture(device->vkd3d_instance->vk_instance);
#endif
+ device->global_ray_tracing_pipeline_create_flags = 0;
+
return S_OK;
out_cleanup_descriptor_qa_global_info:
diff --git a/libs/vkd3d/device_vkd3d_ext.c b/libs/vkd3d/device_vkd3d_ext.c
index 5bb7eca8..7dc70847 100644
--- a/libs/vkd3d/device_vkd3d_ext.c
+++ b/libs/vkd3d/device_vkd3d_ext.c
@@ -69,6 +69,9 @@ static BOOL STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetExtensionSupport(ID3D12D
TRACE("iface %p, extension %u \n", iface, extension);
switch (extension)
{
+ case D3D12_VK_EXT_OPACITY_MICROMAP:
+ ret_val = device->vk_info.EXT_opacity_micromap;
+ break;
case D3D12_VK_NVX_BINARY_IMPORT:
ret_val = device->vk_info.NVX_binary_import;
break;
@@ -215,7 +218,208 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CaptureUAVInfo(ID3D12Dev
return S_OK;
}
-CONST_VTBL struct ID3D12DeviceExtVtbl d3d12_device_vkd3d_ext_vtbl =
+static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_SetCreatePipelineStateOptions(d3d12_device_vkd3d_ext_iface *iface,
+ const void *params)
+{
+ const NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS *nvParams = params;
+ struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface);
+
+ TRACE("iface %p, params %p.\n", iface, params);
+
+ if (!nvParams)
+ return NVAPI_INVALID_ARGUMENT;
+
+ if (nvParams->version != NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_VER1)
+ return NVAPI_INCOMPATIBLE_STRUCT_VERSION;
+
+ if (nvParams->flags & NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS_ENABLE_OMM_SUPPORT)
+ {
+ if (!device->device_info.opacity_micromap_features.micromap)
+ {
+ ERR("Opacity micromap is not supported. Calling this is invalid.\n");
+ return NVAPI_NOT_SUPPORTED;
+ }
+
+ device->global_ray_tracing_pipeline_create_flags |= VK_PIPELINE_CREATE_RAY_TRACING_OPACITY_MICROMAP_BIT_EXT;
+ }
+ else
+ {
+ device->global_ray_tracing_pipeline_create_flags &= ~VK_PIPELINE_CREATE_RAY_TRACING_OPACITY_MICROMAP_BIT_EXT;
+ }
+
+ TRACE("flags #%x.\n", device->global_ray_tracing_pipeline_create_flags);
+
+ return NVAPI_OK;
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CheckDriverMatchingIdentifierEx(d3d12_device_vkd3d_ext_iface *iface,
+ void *params)
+{
+ NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS *nvParams = params;
+
+ TRACE("iface %p, params %p.\n", iface, params);
+
+ if (!nvParams)
+ return NVAPI_INVALID_ARGUMENT;
+
+ if (nvParams->version != NVAPI_CHECK_DRIVER_MATCHING_IDENTIFIER_EX_PARAMS_VER1)
+ return NVAPI_INCOMPATIBLE_STRUCT_VERSION;
+
+ if (nvParams->serializedDataType == NVAPI_D3D12_SERIALIZED_DATA_RAYTRACING_ACCELERATION_STRUCTURE_EX ||
+ nvParams->serializedDataType == NVAPI_D3D12_SERIALIZED_DATA_RAYTRACING_OPACITY_MICROMAP_ARRAY_EX)
+ nvParams->checkStatus = D3D12_DRIVER_MATCHING_IDENTIFIER_UNRECOGNIZED;
+ else
+ nvParams->checkStatus = D3D12_DRIVER_MATCHING_IDENTIFIER_UNSUPPORTED_TYPE;
+
+ return NVAPI_OK;
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetRaytracingAccelerationStructurePrebuildInfoEx(d3d12_device_vkd3d_ext_iface *iface,
+ void *params)
+{
+ NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS *nvParams = params;
+ struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface);
+
+ VkAccelerationStructureTrianglesOpacityMicromapEXT omms_stack[VKD3D_BUILD_INFO_STACK_COUNT];
+ VkAccelerationStructureGeometryKHR geometries_stack[VKD3D_BUILD_INFO_STACK_COUNT];
+ const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
+ uint32_t primitive_counts_stack[VKD3D_BUILD_INFO_STACK_COUNT];
+ D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO *info;
+ VkAccelerationStructureTrianglesOpacityMicromapEXT *omms;
+ VkAccelerationStructureBuildGeometryInfoKHR build_info;
+ VkAccelerationStructureBuildSizesInfoKHR size_info;
+ VkAccelerationStructureGeometryKHR *geometries;
+ uint32_t *primitive_counts;
+ uint32_t geometry_count;
+ HRESULT ns = NVAPI_OK;
+
+ TRACE("iface %p, params %p.\n", iface, params);
+
+ if (!params)
+ return NVAPI_INVALID_ARGUMENT;
+
+ if (nvParams->version != NVAPI_GET_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO_EX_PARAMS_VER1)
+ return NVAPI_INCOMPATIBLE_STRUCT_VERSION;
+
+ if (!nvParams->pDesc || !nvParams->pInfo)
+ return NVAPI_INVALID_ARGUMENT;
+
+ info = nvParams->pInfo;
+
+ if (!d3d12_device_supports_ray_tracing_tier_1_0(device))
+ {
+ ERR("Acceleration structure is not supported. Calling this is invalid.\n");
+ memset(info, 0, sizeof(*info));
+ return NVAPI_NOT_SUPPORTED;
+ }
+
+ geometry_count = vkd3d_acceleration_structure_get_geometry_count_nv(nvParams->pDesc);
+ primitive_counts = primitive_counts_stack;
+ geometries = geometries_stack;
+ omms = omms_stack;
+
+ if (geometry_count > VKD3D_BUILD_INFO_STACK_COUNT)
+ {
+ primitive_counts = vkd3d_malloc(geometry_count * sizeof(*primitive_counts));
+ geometries = vkd3d_malloc(geometry_count * sizeof(*geometries));
+ omms = vkd3d_malloc(geometry_count * sizeof(*omms));
+ }
+
+ if (!vkd3d_acceleration_structure_convert_inputs_nv(device,
+ nvParams->pDesc, &build_info, geometries, omms, NULL, primitive_counts))
+ {
+ ERR("Failed to convert inputs.\n");
+ memset(info, 0, sizeof(*info));
+ ns = NVAPI_ERROR;
+ goto cleanup;
+ }
+
+ build_info.pGeometries = geometries;
+
+ memset(&size_info, 0, sizeof(size_info));
+ size_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR;
+
+ VK_CALL(vkGetAccelerationStructureBuildSizesKHR(device->vk_device,
+ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &build_info,
+ primitive_counts, &size_info));
+
+ info->ResultDataMaxSizeInBytes = size_info.accelerationStructureSize;
+ info->ScratchDataSizeInBytes = size_info.buildScratchSize;
+ info->UpdateScratchDataSizeInBytes = size_info.updateScratchSize;
+
+ TRACE("ResultDataMaxSizeInBytes: %"PRIu64".\n", info->ResultDataMaxSizeInBytes);
+ TRACE("ScratchDatSizeInBytes: %"PRIu64".\n", info->ScratchDataSizeInBytes);
+ TRACE("UpdateScratchDataSizeInBytes: %"PRIu64".\n", info->UpdateScratchDataSizeInBytes);
+
+cleanup:
+
+ if (geometry_count > VKD3D_BUILD_INFO_STACK_COUNT)
+ {
+ vkd3d_free(primitive_counts);
+ vkd3d_free(geometries);
+ vkd3d_free(omms);
+ }
+
+ return ns;
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetRaytracingOpacityMicromapArrayPrebuildInfo(d3d12_device_vkd3d_ext_iface *iface,
+ void *params)
+{
+ NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS *nvParams = params;
+ struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface);
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO *info;
+ const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
+ struct vkd3d_opacity_micromap_build_info build_info;
+ VkMicromapBuildSizesInfoEXT size_info;
+
+ TRACE("iface %p, params %p.\n", iface, params);
+
+ if (!nvParams)
+ return NVAPI_INVALID_ARGUMENT;
+
+ if (nvParams->version != NVAPI_GET_RAYTRACING_OPACITY_MICROMAP_ARRAY_PREBUILD_INFO_PARAMS_VER1)
+ return NVAPI_INCOMPATIBLE_STRUCT_VERSION;
+
+ if (!nvParams->pDesc || !nvParams->pInfo)
+ return NVAPI_INVALID_ARGUMENT;
+
+ info = nvParams->pInfo;
+
+ if (!device->device_info.opacity_micromap_features.micromap)
+ {
+ ERR("Opacity micromap is not supported. Calling this is invalid.\n");
+ memset(info, 0, sizeof(*info));
+ return NVAPI_NOT_SUPPORTED;
+ }
+
+ if (!vkd3d_opacity_micromap_convert_inputs_nv(device, &build_info, nvParams->pDesc))
+ {
+ ERR("Failed to convert inputs.\n");
+ memset(info, 0, sizeof(*info));
+ return NVAPI_ERROR;
+ }
+
+ memset(&size_info, 0, sizeof(size_info));
+ size_info.sType = VK_STRUCTURE_TYPE_MICROMAP_BUILD_SIZES_INFO_EXT;
+
+ VK_CALL(vkGetMicromapBuildSizesEXT(device->vk_device,
+ VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,
+ &build_info.build_info, &size_info));
+
+ vkd3d_opacity_micromap_build_info_cleanup(&build_info);
+
+ info->resultDataMaxSizeInBytes = size_info.micromapSize;
+ info->scratchDataSizeInBytes = size_info.buildScratchSize;
+
+ TRACE("ResultDataMaxSizeInBytes: %"PRIu64".\n", info->resultDataMaxSizeInBytes);
+ TRACE("ScratchDatSizeInBytes: %"PRIu64".\n", info->scratchDataSizeInBytes);
+ TRACE("Micromap %s discardable.\n", size_info.discardable ? "is" : "is not");
+
+ return NVAPI_OK;
+}
+
+CONST_VTBL struct ID3D12DeviceExt1Vtbl d3d12_device_vkd3d_ext_vtbl =
{
/* IUnknown methods */
d3d12_device_vkd3d_ext_QueryInterface,
@@ -229,7 +433,13 @@ CONST_VTBL struct ID3D12DeviceExtVtbl d3d12_device_vkd3d_ext_vtbl =
d3d12_device_vkd3d_ext_DestroyCubinComputeShader,
d3d12_device_vkd3d_ext_GetCudaTextureObject,
d3d12_device_vkd3d_ext_GetCudaSurfaceObject,
- d3d12_device_vkd3d_ext_CaptureUAVInfo
+ d3d12_device_vkd3d_ext_CaptureUAVInfo,
+
+ /* ID3D12DeviceExt1 methods */
+ d3d12_device_vkd3d_ext_SetCreatePipelineStateOptions,
+ d3d12_device_vkd3d_ext_CheckDriverMatchingIdentifierEx,
+ d3d12_device_vkd3d_ext_GetRaytracingAccelerationStructurePrebuildInfoEx,
+ d3d12_device_vkd3d_ext_GetRaytracingOpacityMicromapArrayPrebuildInfo,
};
diff --git a/libs/vkd3d/meson.build b/libs/vkd3d/meson.build
index de322997..20feb0f9 100644
--- a/libs/vkd3d/meson.build
+++ b/libs/vkd3d/meson.build
@@ -74,6 +74,7 @@ vkd3d_src = [
'vkd3d_main.c',
'raytracing_pipeline.c',
'acceleration_structure.c',
+ 'opacity_micromap.c',
'swapchain.c'
]
diff --git a/libs/vkd3d/opacity_micromap.c b/libs/vkd3d/opacity_micromap.c
new file mode 100644
index 00000000..64a325ef
--- /dev/null
+++ b/libs/vkd3d/opacity_micromap.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright 2023 Krzysztof Bogacki
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
+#include "vkd3d_private.h"
+
+#define RT_TRACE TRACE
+
+void vkd3d_opacity_micromap_build_info_cleanup(
+ struct vkd3d_opacity_micromap_build_info *info)
+{
+ if (info->usages != info->usages_stack)
+ vkd3d_free(info->usages);
+}
+
+static VkBuildMicromapFlagsEXT nv_d3d12_build_flags_to_vk(
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAGS flags)
+{
+ VkBuildMicromapFlagsEXT vk_flags = 0;
+
+ if (flags & NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAG_PREFER_FAST_TRACE)
+ vk_flags |= VK_BUILD_MICROMAP_PREFER_FAST_TRACE_BIT_EXT;
+ if (flags & NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_BUILD_FLAG_PREFER_FAST_BUILD)
+ vk_flags |= VK_BUILD_MICROMAP_PREFER_FAST_BUILD_BIT_EXT;
+
+ return vk_flags;
+}
+
+static VkOpacityMicromapFormatEXT nv_d3d12_format_to_vk(
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT format)
+{
+ switch (format)
+ {
+ case NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT_OC1_2_STATE:
+ return VK_OPACITY_MICROMAP_FORMAT_2_STATE_EXT;
+ case NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT_OC1_4_STATE:
+ return VK_OPACITY_MICROMAP_FORMAT_4_STATE_EXT;
+ default:
+ FIXME("Unrecognized format #%x.\n", format);
+ return (VkOpacityMicromapFormatEXT)format;
+ }
+}
+
+static char const* debug_omm_format(
+ NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT format)
+{
+ switch (format)
+ {
+ #define ENUM_NAME(x) case x: return #x;
+ ENUM_NAME(NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT_OC1_2_STATE)
+ ENUM_NAME(NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT_OC1_4_STATE)
+ #undef ENUM_NAME
+ }
+
+ return vkd3d_dbg_sprintf("Unknown NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_FORMAT (%u)",
+ (uint32_t)format);
+}
+
+bool vkd3d_opacity_micromap_convert_inputs_nv(const struct d3d12_device *device,
+ struct vkd3d_opacity_micromap_build_info *info,
+ const NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_INPUTS *desc)
+{
+ const NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_USAGE_COUNT *usage_count;
+ VkMicromapBuildInfoEXT *build_info;
+ VkMicromapUsageEXT *usage;
+ unsigned int i;
+
+ RT_TRACE("Converting inputs.\n");
+ RT_TRACE("=====================\n");
+
+ build_info = &info->build_info;
+ memset(build_info, 0, sizeof(*build_info));
+ build_info->sType = VK_STRUCTURE_TYPE_MICROMAP_BUILD_INFO_EXT;
+ build_info->type = VK_MICROMAP_TYPE_OPACITY_MICROMAP_EXT;
+ build_info->flags = nv_d3d12_build_flags_to_vk(desc->flags);
+ build_info->mode = VK_BUILD_MICROMAP_MODE_BUILD_EXT;
+ build_info->usageCountsCount = desc->numOMMUsageCounts;
+
+ info->usages = info->usages_stack;
+
+ if (desc->numOMMUsageCounts <= VKD3D_BUILD_INFO_STACK_COUNT)
+ memset(info->usages, 0, sizeof(*info->usages) * desc->numOMMUsageCounts);
+ else
+ info->usages = vkd3d_calloc(desc->numOMMUsageCounts, sizeof(*info->usages));
+
+ for (i = 0; i < desc->numOMMUsageCounts; i++)
+ {
+ RT_TRACE(" Usage %u:\n", i);
+
+ usage_count = &desc->pOMMUsageCounts[i];
+ usage = &info->usages[i];
+
+ usage->count = usage_count->count;
+ usage->subdivisionLevel = usage_count->subdivisionLevel;
+ usage->format = nv_d3d12_format_to_vk(usage_count->format);
+
+ RT_TRACE(" Count: %u\n", usage_count->count);
+ RT_TRACE(" Subdivision level: %u\n", usage_count->subdivisionLevel);
+ RT_TRACE(" Format: %s\n", debug_omm_format(usage_count->format));
+ }
+
+ build_info->pUsageCounts = info->usages;
+ build_info->data.deviceAddress = desc->inputBuffer;
+ build_info->triangleArray.deviceAddress = desc->perOMMDescs.StartAddress;
+ build_info->triangleArrayStride = desc->perOMMDescs.StrideInBytes;
+
+ RT_TRACE(" IBO VA: %"PRIx64"\n", desc->inputBuffer);
+ RT_TRACE(" Triangles VA: %"PRIx64"\n", desc->perOMMDescs.StartAddress);
+ RT_TRACE(" Triangles stride: %"PRIu64" bytes\n", desc->perOMMDescs.StrideInBytes);
+
+ RT_TRACE("=====================\n");
+ return true;
+}
+
+static void vkd3d_opacity_micromap_end_barrier(struct d3d12_command_list *list)
+{
+ /* We resolve the query in TRANSFER, but DXR expects UNORDERED_ACCESS. */
+ const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
+ VkDependencyInfo dep_info;
+ VkMemoryBarrier2 barrier;
+
+ memset(&barrier, 0, sizeof(barrier));
+ barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2;
+ barrier.srcStageMask = VK_PIPELINE_STAGE_2_COPY_BIT;
+ barrier.srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT;
+ barrier.dstStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
+
+ memset(&dep_info, 0, sizeof(dep_info));
+ dep_info.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO;
+ dep_info.memoryBarrierCount = 1;
+ dep_info.pMemoryBarriers = &barrier;
+
+ VK_CALL(vkCmdPipelineBarrier2(list->cmd.vk_command_buffer, &dep_info));
+}
+
+static void vkd3d_opacity_micromap_write_postbuild_info(
+ struct d3d12_command_list *list,
+ const NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_DESC *desc,
+ VkDeviceSize desc_offset,
+ VkMicromapEXT vk_opacity_micromap)
+{
+ const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
+ const struct vkd3d_unique_resource *resource;
+ VkBuffer vk_buffer;
+ uint32_t offset;
+
+ resource = vkd3d_va_map_deref(&list->device->memory_allocator.va_map, desc->destBuffer);
+ if (!resource)
+ {
+ ERR("Invalid resource.\n");
+ return;
+ }
+
+ vk_buffer = resource->vk_buffer;
+ offset = desc->destBuffer - resource->va;
+ offset += desc_offset;
+
+ FIXME("Unsupported InfoType %u.\n", desc->infoType);
+ /* TODO: CURRENT_SIZE is something we cannot query in Vulkan, so
+ * we'll need to keep around a buffer to handle this.
+ * For now, just clear to 0. */
+ VK_CALL(vkCmdFillBuffer(list->cmd.vk_command_buffer, vk_buffer, offset,
+ sizeof(uint64_t), 0));
+}
+
+void vkd3d_opacity_micromap_emit_postbuild_info_nv(
+ struct d3d12_command_list *list,
+ const NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_DESC *desc,
+ uint32_t count,
+ const D3D12_GPU_VIRTUAL_ADDRESS *addresses)
+{
+ const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
+ VkMicromapEXT vk_opacity_micromap;
+ VkDependencyInfo dep_info;
+ VkMemoryBarrier2 barrier;
+ uint32_t i;
+
+ /* We resolve the query in TRANSFER, but DXR expects UNORDERED_ACCESS. */
+ memset(&barrier, 0, sizeof(barrier));
+ barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2;
+ barrier.srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
+ barrier.dstStageMask = VK_PIPELINE_STAGE_2_COPY_BIT;
+ barrier.dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT;
+
+ memset(&dep_info, 0, sizeof(dep_info));
+ dep_info.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO;
+ dep_info.memoryBarrierCount = 1;
+ dep_info.pMemoryBarriers = &barrier;
+
+ VK_CALL(vkCmdPipelineBarrier2(list->cmd.vk_command_buffer, &dep_info));
+
+ for (i = 0; i < count; i++)
+ {
+ vk_opacity_micromap = vkd3d_va_map_place_opacity_micromap(
+ &list->device->memory_allocator.va_map, list->device, addresses[i]);
+ if (vk_opacity_micromap)
+ vkd3d_opacity_micromap_write_postbuild_info(list, desc, i * sizeof(uint64_t), vk_opacity_micromap);
+ else
+ ERR("Failed to query opacity micromap for VA 0x%"PRIx64".\n", addresses[i]);
+ }
+
+ vkd3d_opacity_micromap_end_barrier(list);
+}
+
+void vkd3d_opacity_micromap_emit_immediate_postbuild_info_nv(
+ struct d3d12_command_list *list, uint32_t count,
+ const NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_DESC *desc,
+ VkMicromapEXT vk_opacity_micromap)
+{
+ /* In D3D12 we are supposed to be able to emit without an explicit barrier,
+ * but we need to emit them for Vulkan. */
+
+ const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
+ VkDependencyInfo dep_info;
+ VkMemoryBarrier2 barrier;
+ uint32_t i;
+
+ memset(&barrier, 0, sizeof(barrier));
+ barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2;
+ barrier.srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
+ barrier.srcAccessMask = VK_ACCESS_2_MICROMAP_WRITE_BIT_EXT;
+ /* The query accesses STRUCTURE_READ_BIT in BUILD_BIT stage. */
+ barrier.dstStageMask = VK_PIPELINE_STAGE_2_MICROMAP_BUILD_BIT_EXT | VK_PIPELINE_STAGE_2_COPY_BIT;
+ barrier.dstAccessMask = VK_ACCESS_2_MICROMAP_READ_BIT_EXT | VK_ACCESS_2_TRANSFER_WRITE_BIT;
+
+ memset(&dep_info, 0, sizeof(dep_info));
+ dep_info.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO;
+ dep_info.memoryBarrierCount = 1;
+ dep_info.pMemoryBarriers = &barrier;
+
+ VK_CALL(vkCmdPipelineBarrier2(list->cmd.vk_command_buffer, &dep_info));
+
+ for (i = 0; i < count; i++)
+ vkd3d_opacity_micromap_write_postbuild_info(list, &desc[i], 0, vk_opacity_micromap);
+
+ vkd3d_opacity_micromap_end_barrier(list);
+}
diff --git a/libs/vkd3d/raytracing_pipeline.c b/libs/vkd3d/raytracing_pipeline.c
index 52202d2d..94ae4b84 100644
--- a/libs/vkd3d/raytracing_pipeline.c
+++ b/libs/vkd3d/raytracing_pipeline.c
@@ -2655,6 +2655,8 @@ static HRESULT d3d12_state_object_compile_pipeline_variant(struct d3d12_state_ob
if (object->pipeline_config.Flags & D3D12_RAYTRACING_PIPELINE_FLAG_SKIP_PROCEDURAL_PRIMITIVES)
pipeline_create_info.flags |= VK_PIPELINE_CREATE_RAY_TRACING_SKIP_AABBS_BIT_KHR;
+ pipeline_create_info.flags |= object->device->global_ray_tracing_pipeline_create_flags;
+
library_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LIBRARY_CREATE_INFO_KHR;
library_info.pNext = NULL;
library_info.libraryCount = data->vk_libraries_count;
diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c
index 6c9a0057..4bf454a1 100644
--- a/libs/vkd3d/resource.c
+++ b/libs/vkd3d/resource.c
@@ -189,6 +189,14 @@ HRESULT vkd3d_create_buffer(struct d3d12_device *device,
/* This is always allowed. Used for vertex/index buffer inputs to RTAS build. */
buffer_info.usage |= VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR |
VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR;
+
+ if (device->device_info.opacity_micromap_features.micromap)
+ {
+ if (heap_type == D3D12_HEAP_TYPE_DEFAULT || !is_cpu_accessible_heap(heap_properties))
+ buffer_info.usage |= VK_BUFFER_USAGE_MICROMAP_STORAGE_BIT_EXT;
+
+ buffer_info.usage |= VK_BUFFER_USAGE_MICROMAP_BUILD_INPUT_READ_ONLY_BIT_EXT;
+ }
}
if (heap_type == D3D12_HEAP_TYPE_UPLOAD)
@@ -1022,6 +1030,7 @@ static uint32_t vkd3d_view_entry_hash(const void *key)
{
case VKD3D_VIEW_TYPE_BUFFER:
case VKD3D_VIEW_TYPE_ACCELERATION_STRUCTURE:
+ case VKD3D_VIEW_TYPE_OPACITY_MICROMAP:
hash = hash_uint64((uint64_t)k->u.buffer.buffer);
hash = hash_combine(hash, hash_uint64(k->u.buffer.offset));
hash = hash_combine(hash, hash_uint64(k->u.buffer.size));
@@ -1087,6 +1096,7 @@ static bool vkd3d_view_entry_compare(const void *key, const struct hash_map_entr
{
case VKD3D_VIEW_TYPE_BUFFER:
case VKD3D_VIEW_TYPE_ACCELERATION_STRUCTURE:
+ case VKD3D_VIEW_TYPE_OPACITY_MICROMAP:
return k->u.buffer.buffer == e->key.u.buffer.buffer &&
k->u.buffer.format == e->key.u.buffer.format &&
k->u.buffer.offset == e->key.u.buffer.offset &&
@@ -1238,6 +1248,10 @@ struct vkd3d_view *vkd3d_view_map_create_view(struct vkd3d_view_map *view_map,
success = vkd3d_create_acceleration_structure_view(device, &key->u.buffer, &view);
break;
+ case VKD3D_VIEW_TYPE_OPACITY_MICROMAP:
+ success = vkd3d_create_opacity_micromap_view(device, &key->u.buffer, &view);
+ break;
+
default:
ERR("Unsupported view type %u.\n", key->view_type);
success = false;
@@ -3818,6 +3832,9 @@ static void vkd3d_view_destroy(struct vkd3d_view *view, struct d3d12_device *dev
case VKD3D_VIEW_TYPE_ACCELERATION_STRUCTURE:
VK_CALL(vkDestroyAccelerationStructureKHR(device->vk_device, view->vk_acceleration_structure, NULL));
break;
+ case VKD3D_VIEW_TYPE_OPACITY_MICROMAP:
+ VK_CALL(vkDestroyMicromapEXT(device->vk_device, view->vk_micromap, NULL));
+ break;
default:
WARN("Unhandled view type %d.\n", view->type);
}
@@ -4163,6 +4180,42 @@ bool vkd3d_create_acceleration_structure_view(struct d3d12_device *device, const
return true;
}
+bool vkd3d_create_opacity_micromap_view(struct d3d12_device *device, const struct vkd3d_buffer_view_desc *desc,
+ struct vkd3d_view **view)
+{
+ const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
+ VkMicromapCreateInfoEXT create_info;
+ VkMicromapEXT vk_micromap;
+ struct vkd3d_view *object;
+ VkResult vr;
+
+ create_info.sType = VK_STRUCTURE_TYPE_MICROMAP_CREATE_INFO_EXT;
+ create_info.pNext = NULL;
+ create_info.type = VK_MICROMAP_TYPE_OPACITY_MICROMAP_EXT;
+ create_info.createFlags = 0;
+ create_info.deviceAddress = 0;
+ create_info.buffer = desc->buffer;
+ create_info.offset = desc->offset;
+ create_info.size = desc->size;
+
+ vr = VK_CALL(vkCreateMicromapEXT(device->vk_device, &create_info, NULL, &vk_micromap));
+ if (vr != VK_SUCCESS)
+ return false;
+
+ if (!(object = vkd3d_view_create(VKD3D_VIEW_TYPE_OPACITY_MICROMAP)))
+ {
+ VK_CALL(vkDestroyMicromapEXT(device->vk_device, vk_micromap, NULL));
+ return false;
+ }
+
+ object->vk_micromap = vk_micromap;
+ object->format = desc->format;
+ object->info.buffer.offset = desc->offset;
+ object->info.buffer.size = desc->size;
+ *view = object;
+ return true;
+}
+
#define VKD3D_VIEW_RAW_BUFFER 0x1
static void vkd3d_get_metadata_buffer_view_for_resource(struct d3d12_device *device,
@@ -8486,6 +8539,13 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info,
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR |
VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR |
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR;
+
+ if (device->device_info.opacity_micromap_features.micromap)
+ {
+ buffer_info.usage |=
+ VK_BUFFER_USAGE_MICROMAP_STORAGE_BIT_EXT |
+ VK_BUFFER_USAGE_MICROMAP_BUILD_INPUT_READ_ONLY_BIT_EXT;
+ }
}
VK_CALL(vkGetDeviceBufferMemoryRequirements(device->vk_device, &buffer_requirement_info, &memory_requirements));
diff --git a/libs/vkd3d/va_map.c b/libs/vkd3d/va_map.c
index 56b99852..5ba71449 100644
--- a/libs/vkd3d/va_map.c
+++ b/libs/vkd3d/va_map.c
@@ -300,6 +300,55 @@ VkAccelerationStructureKHR vkd3d_va_map_place_acceleration_structure(struct vkd3
return view->vk_acceleration_structure;
}
+VkMicromapEXT vkd3d_va_map_place_opacity_micromap(struct vkd3d_va_map *va_map,
+ struct d3d12_device *device,
+ VkDeviceAddress va)
+{
+ struct vkd3d_unique_resource *resource;
+ struct vkd3d_view_map *old_view_map;
+ struct vkd3d_view_map *view_map;
+ const struct vkd3d_view *view;
+ struct vkd3d_view_key key;
+
+ resource = vkd3d_va_map_deref_mutable(va_map, va);
+ if (!resource || !resource->va)
+ return VK_NULL_HANDLE;
+
+ view_map = vkd3d_atomic_ptr_load_explicit(&resource->view_map, vkd3d_memory_order_acquire);
+ if (!view_map)
+ {
+ view_map = vkd3d_malloc(sizeof(*view_map));
+ if (!view_map)
+ return VK_NULL_HANDLE;
+
+ if (FAILED(vkd3d_view_map_init(view_map)))
+ {
+ vkd3d_free(view_map);
+ return VK_NULL_HANDLE;
+ }
+
+ old_view_map = vkd3d_atomic_ptr_compare_exchange(&resource->view_map, NULL, view_map,
+ vkd3d_memory_order_release, vkd3d_memory_order_acquire);
+ if (old_view_map)
+ {
+ vkd3d_view_map_destroy(view_map, device);
+ vkd3d_free(view_map);
+ view_map = old_view_map;
+ }
+ }
+
+ key.view_type = VKD3D_VIEW_TYPE_OPACITY_MICROMAP;
+ key.u.buffer.buffer = resource->vk_buffer;
+ key.u.buffer.offset = va - resource->va;
+ key.u.buffer.size = resource->size - key.u.buffer.offset;
+ key.u.buffer.format = NULL;
+
+ view = vkd3d_view_map_create_view(view_map, device, &key);
+ if (!view)
+ return VK_NULL_HANDLE;
+ return view->vk_micromap;
+}
+
void vkd3d_va_map_init(struct vkd3d_va_map *va_map)
{
memset(va_map, 0, sizeof(*va_map));
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h
index 24c35181..c5bd8573 100644
--- a/libs/vkd3d/vkd3d_private.h
+++ b/libs/vkd3d/vkd3d_private.h
@@ -42,6 +42,7 @@
#include "vkd3d_file_utils.h"
#include "vkd3d_native_sync_handle.h"
#include "copy_utils.h"
+#include "nvapi.h"
#include <assert.h>
#include <inttypes.h>
#include <limits.h>
@@ -153,6 +154,7 @@ struct vkd3d_vulkan_info
bool EXT_dynamic_rendering_unused_attachments;
bool EXT_line_rasterization;
bool EXT_image_compression_control;
+ bool EXT_opacity_micromap;
/* AMD device extensions */
bool AMD_buffer_marker;
bool AMD_device_coherent_memory;
@@ -288,6 +290,9 @@ const struct vkd3d_unique_resource *vkd3d_va_map_deref(struct vkd3d_va_map *va_m
VkAccelerationStructureKHR vkd3d_va_map_place_acceleration_structure(struct vkd3d_va_map *va_map,
struct d3d12_device *device,
VkDeviceAddress va);
+VkMicromapEXT vkd3d_va_map_place_opacity_micromap(struct vkd3d_va_map *va_map,
+ struct d3d12_device *device,
+ VkDeviceAddress va);
void vkd3d_va_map_init(struct vkd3d_va_map *va_map);
void vkd3d_va_map_cleanup(struct vkd3d_va_map *va_map);
@@ -1081,7 +1086,8 @@ enum vkd3d_view_type
VKD3D_VIEW_TYPE_BUFFER,
VKD3D_VIEW_TYPE_IMAGE,
VKD3D_VIEW_TYPE_SAMPLER,
- VKD3D_VIEW_TYPE_ACCELERATION_STRUCTURE
+ VKD3D_VIEW_TYPE_ACCELERATION_STRUCTURE,
+ VKD3D_VIEW_TYPE_OPACITY_MICROMAP
};
struct vkd3d_view
@@ -1096,6 +1102,7 @@ struct vkd3d_view
VkImageView vk_image_view;
VkSampler vk_sampler;
VkAccelerationStructureKHR vk_acceleration_structure;
+ VkMicromapEXT vk_micromap;
};
const struct vkd3d_format *format;
union
@@ -1152,6 +1159,8 @@ bool vkd3d_create_raw_r32ui_vk_buffer_view(struct d3d12_device *device,
VkBuffer vk_buffer, VkDeviceSize offset, VkDeviceSize range, VkBufferView *vk_view);
bool vkd3d_create_acceleration_structure_view(struct d3d12_device *device,
const struct vkd3d_buffer_view_desc *desc, struct vkd3d_view **view);
+bool vkd3d_create_opacity_micromap_view(struct d3d12_device *device,
+ const struct vkd3d_buffer_view_desc *desc, struct vkd3d_view **view);
bool vkd3d_create_texture_view(struct d3d12_device *device,
const struct vkd3d_texture_view_desc *desc, struct vkd3d_view **view);
@@ -2553,7 +2562,7 @@ struct vkd3d_rendering_info
};
/* ID3D12CommandListExt */
-typedef ID3D12GraphicsCommandListExt1 d3d12_command_list_vkd3d_ext_iface;
+typedef ID3D12GraphicsCommandListExt2 d3d12_command_list_vkd3d_ext_iface;
struct d3d12_state_object;
@@ -2662,6 +2671,9 @@ struct d3d12_rtas_batch_state
size_t geometry_info_count;
size_t geometry_info_size;
+ VkAccelerationStructureTrianglesOpacityMicromapEXT *omm_infos;
+ size_t omm_info_size;
+
VkAccelerationStructureBuildRangeInfoKHR *range_infos;
size_t range_info_size;
@@ -2855,6 +2867,14 @@ HRESULT d3d12_command_list_create(struct d3d12_device *device,
UINT node_mask, D3D12_COMMAND_LIST_TYPE type, struct d3d12_command_list **list);
bool d3d12_command_list_reset_query(struct d3d12_command_list *list,
VkQueryPool vk_pool, uint32_t index);
+void d3d12_command_list_end_current_render_pass(struct d3d12_command_list *list, bool suspend);
+void d3d12_command_list_end_transfer_batch(struct d3d12_command_list *list);
+bool d3d12_command_list_allocate_rtas_build_info(struct d3d12_command_list *list, uint32_t geometry_count,
+ VkAccelerationStructureBuildGeometryInfoKHR **build_info,
+ VkAccelerationStructureGeometryKHR **geometry_infos,
+ VkAccelerationStructureTrianglesOpacityMicromapEXT **omm_infos,
+ VkAccelerationStructureBuildRangeInfoKHR **range_infos);
+void d3d12_command_list_flush_rtas_batch(struct d3d12_command_list *list);
static inline struct vkd3d_pipeline_bindings *d3d12_command_list_get_bindings(
struct d3d12_command_list *list, enum vkd3d_pipeline_type pipeline_type)
@@ -3275,6 +3295,8 @@ enum vkd3d_breadcrumb_command_type
VKD3D_BREADCRUMB_COMMAND_BUILD_RTAS,
VKD3D_BREADCRUMB_COMMAND_COPY_RTAS,
VKD3D_BREADCRUMB_COMMAND_EMIT_RTAS_POSTBUILD,
+ VKD3D_BREADCRUMB_COMMAND_BUILD_OMM,
+ VKD3D_BREADCRUMB_COMMAND_EMIT_OMM_POSTBUILD,
VKD3D_BREADCRUMB_COMMAND_TRACE_RAYS,
VKD3D_BREADCRUMB_COMMAND_BARRIER,
VKD3D_BREADCRUMB_COMMAND_AUX32, /* Used to report arbitrary 32-bit words as arguments to other commands. */
@@ -4300,6 +4322,7 @@ struct vkd3d_physical_device_info
VkPhysicalDeviceMaintenance5FeaturesKHR maintenance_5_features;
VkPhysicalDeviceLineRasterizationFeaturesEXT line_rasterization_features;
VkPhysicalDeviceImageCompressionControlFeaturesEXT image_compression_control_features;
+ VkPhysicalDeviceOpacityMicromapFeaturesEXT opacity_micromap_features;
VkPhysicalDeviceFeatures2 features2;
@@ -4376,7 +4399,7 @@ struct vkd3d_descriptor_qa_global_info;
struct vkd3d_descriptor_qa_heap_buffer_data;
/* ID3D12DeviceExt */
-typedef ID3D12DeviceExt d3d12_device_vkd3d_ext_iface;
+typedef ID3D12DeviceExt1 d3d12_device_vkd3d_ext_iface;
/* ID3D12DXVKInteropDevice */
typedef ID3D12DXVKInteropDevice d3d12_dxvk_interop_device_iface;
@@ -4465,6 +4488,7 @@ struct d3d12_device
struct vkd3d_device_swapchain_info swapchain_info;
struct vkd3d_device_frame_markers frame_markers;
+ VkPipelineCreateFlags global_ray_tracing_pipeline_create_flags;
};
HRESULT d3d12_device_create(struct vkd3d_instance *instance,
@@ -5214,12 +5238,21 @@ struct vkd3d_view *vkd3d_view_map_create_view(struct vkd3d_view_map *view_map,
uint32_t vkd3d_acceleration_structure_get_geometry_count(
const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS *desc);
+uint32_t vkd3d_acceleration_structure_get_geometry_count_nv(
+ const NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX *desc);
bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *device,
const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS *desc,
VkAccelerationStructureBuildGeometryInfoKHR *build_info,
VkAccelerationStructureGeometryKHR *geometry_infos,
VkAccelerationStructureBuildRangeInfoKHR *range_infos,
uint32_t *primitive_counts);
+bool vkd3d_acceleration_structure_convert_inputs_nv(struct d3d12_device *device,
+ const NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX *desc,
+ VkAccelerationStructureBuildGeometryInfoKHR *build_info,
+ VkAccelerationStructureGeometryKHR *geometry_infos,
+ VkAccelerationStructureTrianglesOpacityMicromapEXT *omm_infos,
+ VkAccelerationStructureBuildRangeInfoKHR *range_infos,
+ uint32_t *primitive_counts);
void vkd3d_acceleration_structure_emit_postbuild_info(
struct d3d12_command_list *list,
const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *desc,
@@ -5233,6 +5266,29 @@ void vkd3d_acceleration_structure_copy(
D3D12_GPU_VIRTUAL_ADDRESS dst, D3D12_GPU_VIRTUAL_ADDRESS src,
D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE mode);
+/* Opacity micromap helpers. */
+struct vkd3d_opacity_micromap_build_info
+{
+ VkMicromapUsageEXT usages_stack[VKD3D_BUILD_INFO_STACK_COUNT];
+ VkMicromapBuildInfoEXT build_info;
+ VkMicromapUsageEXT *usages;
+};
+
+void vkd3d_opacity_micromap_build_info_cleanup(
+ struct vkd3d_opacity_micromap_build_info *info);
+bool vkd3d_opacity_micromap_convert_inputs_nv(const struct d3d12_device *device,
+ struct vkd3d_opacity_micromap_build_info *info,
+ const NVAPI_D3D12_BUILD_RAYTRACING_OPACITY_MICROMAP_ARRAY_INPUTS *desc);
+void vkd3d_opacity_micromap_emit_postbuild_info_nv(
+ struct d3d12_command_list *list,
+ const NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_DESC *desc,
+ uint32_t count,
+ const D3D12_GPU_VIRTUAL_ADDRESS *addresses);
+void vkd3d_opacity_micromap_emit_immediate_postbuild_info_nv(
+ struct d3d12_command_list *list, uint32_t count,
+ const NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_ARRAY_POSTBUILD_INFO_DESC *desc,
+ VkMicromapEXT vk_opacity_micromap);
+
typedef enum D3D11_USAGE
{
D3D11_USAGE_DEFAULT,
diff --git a/libs/vkd3d/vulkan_procs.h b/libs/vkd3d/vulkan_procs.h
index 6b32bca1..12cf7456 100644
--- a/libs/vkd3d/vulkan_procs.h
+++ b/libs/vkd3d/vulkan_procs.h
@@ -347,6 +347,12 @@ VK_DEVICE_EXT_PFN(vkSetDeviceMemoryPriorityEXT)
VK_DEVICE_EXT_PFN(vkCmdDecompressMemoryNV)
VK_DEVICE_EXT_PFN(vkCmdDecompressMemoryIndirectCountNV)
+/* VK_EXT_opacity_micromap */
+VK_DEVICE_EXT_PFN(vkGetMicromapBuildSizesEXT)
+VK_DEVICE_EXT_PFN(vkCreateMicromapEXT)
+VK_DEVICE_EXT_PFN(vkDestroyMicromapEXT)
+VK_DEVICE_EXT_PFN(vkCmdBuildMicromapsEXT)
+
#undef VK_INSTANCE_PFN
#undef VK_INSTANCE_EXT_PFN
#undef VK_DEVICE_PFN
From dd574726b5c94824c7f0ee847aaf677f04892503 Mon Sep 17 00:00:00 2001
From: Eric Sullivan <esullivan@nvidia.com>
Date: Thu, 7 Sep 2023 09:27:14 -0700
Subject: [PATCH 2/2] vkd3d: Add support for VK_NV_low_latency2
This commit add support for the VK_NV_low_latency2 extension, and
implements the ID3DLowLatencyDevice, and ID3D12CommandQueueExt
interfaces.
---
include/meson.build | 1 +
include/vkd3d_command_queue_vkd3d_ext.idl | 30 +++
include/vkd3d_device_vkd3d_ext.idl | 15 ++
include/vkd3d_vk_includes.h | 34 ++-
libs/vkd3d/command.c | 41 ++-
libs/vkd3d/command_queue_vkd3d_ext.c | 100 +++++++
libs/vkd3d/device.c | 14 +-
libs/vkd3d/device_vkd3d_ext.c | 158 ++++++++++-
libs/vkd3d/meson.build | 1 +
libs/vkd3d/swapchain.c | 315 +++++++++++++++++++++-
libs/vkd3d/vkd3d_private.h | 68 ++++-
libs/vkd3d/vulkan_procs.h | 7 +
12 files changed, 763 insertions(+), 21 deletions(-)
create mode 100644 include/vkd3d_command_queue_vkd3d_ext.idl
create mode 100644 libs/vkd3d/command_queue_vkd3d_ext.c
diff --git a/include/meson.build b/include/meson.build
index c58579e9d2..e6ef1767df 100644
--- a/include/meson.build
+++ b/include/meson.build
@@ -12,6 +12,7 @@ vkd3d_idl = [
'vkd3d_dxgitype.idl',
'vkd3d_swapchain_factory.idl',
'vkd3d_command_list_vkd3d_ext.idl',
+ 'vkd3d_command_queue_vkd3d_ext.idl',
'vkd3d_device_vkd3d_ext.idl',
'vkd3d_core_interface.idl',
]
diff --git a/include/vkd3d_command_queue_vkd3d_ext.idl b/include/vkd3d_command_queue_vkd3d_ext.idl
new file mode 100644
index 0000000000..3c69f00a64
--- /dev/null
+++ b/include/vkd3d_command_queue_vkd3d_ext.idl
@@ -0,0 +1,30 @@
+/*
+ * * Copyright 2023 NVIDIA Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+import "vkd3d_d3d12.idl";
+import "vkd3d_vk_includes.h";
+
+[
+ uuid(40ed3f96-e773-e9bc-fc0c-e95560c99ad6),
+ object,
+ local,
+ pointer_default(unique)
+]
+interface ID3D12CommandQueueExt : IUnknown
+{
+ HRESULT NotifyOutOfBandCommandQueue(D3D12_OUT_OF_BAND_CQ_TYPE type);
+}
diff --git a/include/vkd3d_device_vkd3d_ext.idl b/include/vkd3d_device_vkd3d_ext.idl
index 3e615d76a1..4a21ba763e 100644
--- a/include/vkd3d_device_vkd3d_ext.idl
+++ b/include/vkd3d_device_vkd3d_ext.idl
@@ -54,3 +54,18 @@ interface ID3D12DXVKInteropDevice : IUnknown
HRESULT LockCommandQueue(ID3D12CommandQueue *queue);
HRESULT UnlockCommandQueue(ID3D12CommandQueue *queue);
}
+
+[
+ uuid(f3112584-41f9-348d-a59b-00b7e1d285d6),
+ object,
+ local,
+ pointer_default(unique)
+]
+interface ID3DLowLatencyDevice : IUnknown
+{
+ BOOL SupportsLowLatency();
+ HRESULT LatencySleep();
+ HRESULT SetLatencySleepMode(BOOL low_latency_mode, BOOL low_latency_boost, UINT32 minimum_interval_us);
+ HRESULT SetLatencyMarker(UINT64 frameID, UINT32 markerType);
+ HRESULT GetLatencyInfo(D3D12_LATENCY_RESULTS *latency_results);
+}
diff --git a/include/vkd3d_vk_includes.h b/include/vkd3d_vk_includes.h
index c43e018935..020596130a 100644
--- a/include/vkd3d_vk_includes.h
+++ b/include/vkd3d_vk_includes.h
@@ -41,9 +41,16 @@ typedef enum VkImageLayout VkImageLayout;
typedef enum D3D12_VK_EXTENSION
{
D3D12_VK_NVX_BINARY_IMPORT = 0x1,
- D3D12_VK_NVX_IMAGE_VIEW_HANDLE = 0x2
+ D3D12_VK_NVX_IMAGE_VIEW_HANDLE = 0x2,
+ D3D12_VK_NV_LOW_LATENCY_2 = 0x3
} D3D12_VK_EXTENSION;
+typedef enum D3D12_OUT_OF_BAND_CQ_TYPE
+{
+ OUT_OF_BAND_RENDER = 0,
+ OUT_OF_BAND_PRESENT = 1
+} D3D12_OUT_OF_BAND_CQ_TYPE;
+
typedef struct D3D12_CUBIN_DATA_HANDLE
{
VkCuFunctionNVX vkCuFunction;
@@ -61,5 +68,30 @@ typedef struct D3D12_UAV_INFO
UINT64 gpuVASize;
} D3D12_UAV_INFO;
+typedef struct D3D12_LATENCY_RESULTS
+{
+ UINT32 version;
+ struct D3D12_FRAME_REPORT {
+ UINT64 frameID;
+ UINT64 inputSampleTime;
+ UINT64 simStartTime;
+ UINT64 simEndTime;
+ UINT64 renderSubmitStartTime;
+ UINT64 renderSubmitEndTime;
+ UINT64 presentStartTime;
+ UINT64 presentEndTime;
+ UINT64 driverStartTime;
+ UINT64 driverEndTime;
+ UINT64 osRenderQueueStartTime;
+ UINT64 osRenderQueueEndTime;
+ UINT64 gpuRenderStartTime;
+ UINT64 gpuRenderEndTime;
+ UINT32 gpuActiveRenderTimeUs;
+ UINT32 gpuFrameTimeUs;
+ UINT8 rsvd[120];
+ } frame_reports[64];
+ UINT8 rsvd[32];
+} D3D12_LATENCY_RESULTS;
+
#endif // __VKD3D_VK_INCLUDES_H
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c
index 3d0ebaa6ff..171920ac2c 100644
--- a/libs/vkd3d/command.c
+++ b/libs/vkd3d/command.c
@@ -14301,12 +14301,14 @@ static struct d3d12_command_list *d3d12_command_list_from_iface(ID3D12CommandLis
}
/* ID3D12CommandQueue */
+extern ULONG STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_AddRef(d3d12_command_queue_vkd3d_ext_iface *iface);
+
static inline struct d3d12_command_queue *impl_from_ID3D12CommandQueue(ID3D12CommandQueue *iface)
{
return CONTAINING_RECORD(iface, struct d3d12_command_queue, ID3D12CommandQueue_iface);
}
-static HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12CommandQueue *iface,
+HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12CommandQueue *iface,
REFIID riid, void **object)
{
TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object);
@@ -14325,6 +14327,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12Comman
return S_OK;
}
+ if (IsEqualGUID(riid, &IID_ID3D12CommandQueueExt))
+ {
+ struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
+ d3d12_command_queue_vkd3d_ext_AddRef(&command_queue->ID3D12CommandQueueExt_iface);
+ *object = &command_queue->ID3D12CommandQueueExt_iface;
+ return S_OK;
+ }
+
if (IsEqualGUID(riid, &IID_IDXGIVkSwapChainFactory))
{
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
@@ -14339,7 +14349,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(ID3D12Comman
return E_NOINTERFACE;
}
-static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *iface)
+ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *iface)
{
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
ULONG refcount = InterlockedIncrement(&command_queue->refcount);
@@ -14349,7 +14359,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *if
return refcount;
}
-static ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(ID3D12CommandQueue *iface)
+ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(ID3D12CommandQueue *iface)
{
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
ULONG refcount = InterlockedDecrement(&command_queue->refcount);
@@ -14823,6 +14833,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm
sub.execute.cmd_count = num_command_buffers;
sub.execute.command_allocators = allocators;
sub.execute.num_command_allocators = command_list_count;
+ sub.execute.frame_id = command_queue->device->frame_markers.render;
#ifdef VKD3D_ENABLE_BREADCRUMBS
sub.execute.breadcrumb_indices = breadcrumb_indices;
sub.execute.breadcrumb_indices_count = breadcrumb_indices ? command_list_count : 0;
@@ -14985,6 +14996,8 @@ static D3D12_COMMAND_QUEUE_DESC * STDMETHODCALLTYPE d3d12_command_queue_GetDesc(
return desc;
}
+extern CONST_VTBL struct ID3D12CommandQueueExtVtbl d3d12_command_queue_vkd3d_ext_vtbl;
+
static CONST_VTBL struct ID3D12CommandQueueVtbl d3d12_command_queue_vtbl =
{
/* IUnknown methods */
@@ -15492,13 +15505,15 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu
const VkCommandBufferSubmitInfo *transition_cmd,
const VkSemaphoreSubmitInfo *transition_semaphore,
struct d3d12_command_allocator **command_allocators, size_t num_command_allocators,
- bool debug_capture, bool split_submissions)
+ uint64_t frame_id, bool debug_capture,
+ bool split_submissions)
{
const struct vkd3d_vk_device_procs *vk_procs = &command_queue->device->vk_procs;
struct vkd3d_queue *vkd3d_queue = command_queue->vkd3d_queue;
VkSemaphoreSubmitInfo signal_semaphore_info;
VkSemaphoreSubmitInfo binary_semaphore_info;
VkSubmitInfo2 submit_desc[4];
+ VkLatencySubmissionPresentIdNV latency_submit_present_info;
uint32_t num_submits;
VkQueue vk_queue;
unsigned int i;
@@ -15578,6 +15593,18 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu
num_submits += 2;
}
+ if (command_queue->device->vk_info.NV_low_latency2 &&
+ command_queue->device->swapchain_info.low_latency_swapchain &&
+ dxgi_vk_swap_chain_low_latency_enabled(command_queue->device->swapchain_info.low_latency_swapchain))
+ {
+ latency_submit_present_info.sType = VK_STRUCTURE_TYPE_LATENCY_SUBMISSION_PRESENT_ID_NV;
+ latency_submit_present_info.pNext = NULL;
+ latency_submit_present_info.presentID = frame_id;
+
+ for (i = 0; i < num_submits; i++)
+ submit_desc[i].pNext = &latency_submit_present_info;
+ }
+
#ifdef VKD3D_ENABLE_RENDERDOC
/* For each submission we have marked to be captured, we will first need to filter it
* based on VKD3D_AUTO_CAPTURE_COUNTS.
@@ -16078,7 +16105,9 @@ static void *d3d12_command_queue_submission_worker_main(void *userdata)
&transition_cmd, &transition_semaphore,
submission.execute.command_allocators,
submission.execute.num_command_allocators,
- submission.execute.debug_capture, submission.execute.split_submission);
+ submission.execute.frame_id,
+ submission.execute.debug_capture,
+ submission.execute.split_submission);
/* command_queue_execute takes ownership of the outstanding_submission_counters allocation.
* The atomic counters are decremented when the submission is observed to be freed.
@@ -16140,6 +16169,7 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue,
int rc;
queue->ID3D12CommandQueue_iface.lpVtbl = &d3d12_command_queue_vtbl;
+ queue->ID3D12CommandQueueExt_iface.lpVtbl = &d3d12_command_queue_vkd3d_ext_vtbl;
queue->refcount = 1;
queue->desc = *desc;
@@ -16268,6 +16298,7 @@ void vkd3d_enqueue_initial_transition(ID3D12CommandQueue *queue, ID3D12Resource
memset(&sub, 0, sizeof(sub));
sub.type = VKD3D_SUBMISSION_EXECUTE;
+ sub.execute.frame_id = d3d12_queue->device->frame_markers.render;
sub.execute.transition_count = 1;
sub.execute.transitions = vkd3d_malloc(sizeof(*sub.execute.transitions));
sub.execute.transitions[0].type = VKD3D_INITIAL_TRANSITION_TYPE_RESOURCE;
diff --git a/libs/vkd3d/command_queue_vkd3d_ext.c b/libs/vkd3d/command_queue_vkd3d_ext.c
new file mode 100644
index 0000000000..0fba03b058
--- /dev/null
+++ b/libs/vkd3d/command_queue_vkd3d_ext.c
@@ -0,0 +1,100 @@
+/*
+ * * Copyright 2023 NVIDIA Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
+
+#include "vkd3d_private.h"
+
+static inline struct d3d12_command_queue *d3d12_command_queue_from_ID3D12CommandQueueExt(d3d12_command_queue_vkd3d_ext_iface *iface)
+{
+ return CONTAINING_RECORD(iface, struct d3d12_command_queue, ID3D12CommandQueueExt_iface);
+}
+
+extern ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(d3d12_command_queue_iface *iface);
+
+ULONG STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_AddRef(d3d12_command_queue_vkd3d_ext_iface *iface)
+{
+ struct d3d12_command_queue *command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface);
+ return d3d12_command_queue_AddRef(&command_queue->ID3D12CommandQueue_iface);
+}
+
+extern ULONG STDMETHODCALLTYPE d3d12_command_queue_Release(d3d12_command_queue_iface *iface);
+
+static ULONG STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_Release(d3d12_command_queue_vkd3d_ext_iface *iface)
+{
+ struct d3d12_command_queue *command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface);
+ return d3d12_command_queue_Release(&command_queue->ID3D12CommandQueue_iface);
+}
+
+extern HRESULT STDMETHODCALLTYPE d3d12_command_queue_QueryInterface(d3d12_command_queue_iface *iface,
+ REFIID iid, void **object);
+
+static HRESULT STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_QueryInterface(d3d12_command_queue_vkd3d_ext_iface *iface,
+ REFIID iid, void **out)
+{
+ struct d3d12_command_queue *command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface);
+ TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out);
+ return d3d12_command_queue_QueryInterface(&command_queue->ID3D12CommandQueue_iface, iid, out);
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_NotifyOutOfBandCommandQueue(d3d12_command_queue_vkd3d_ext_iface *iface, D3D12_OUT_OF_BAND_CQ_TYPE type)
+{
+ const struct vkd3d_vk_device_procs *vk_procs;
+ struct d3d12_command_queue* command_queue;
+ VkOutOfBandQueueTypeInfoNV queue_info;
+ VkOutOfBandQueueTypeNV queue_type;
+
+ command_queue = d3d12_command_queue_from_ID3D12CommandQueueExt(iface);
+
+ if (!command_queue->device->vk_info.NV_low_latency2)
+ return E_NOTIMPL;
+
+ vk_procs = &command_queue->device->vk_procs;
+
+ switch (type)
+ {
+ case OUT_OF_BAND_RENDER:
+ queue_type = VK_OUT_OF_BAND_QUEUE_TYPE_RENDER_NV;
+ break;
+ case OUT_OF_BAND_PRESENT:
+ queue_type = VK_OUT_OF_BAND_QUEUE_TYPE_PRESENT_NV;
+ break;
+ default:
+ WARN("Invalid queue type %x\n", type);
+ }
+
+ queue_info.sType = VK_STRUCTURE_TYPE_OUT_OF_BAND_QUEUE_TYPE_INFO_NV;
+ queue_info.pNext = NULL;
+ queue_info.queueType = queue_type;
+
+ VK_CALL(vkQueueNotifyOutOfBandNV(command_queue->vkd3d_queue->vk_queue, &queue_info));
+
+ return S_OK;
+}
+
+CONST_VTBL struct ID3D12CommandQueueExtVtbl d3d12_command_queue_vkd3d_ext_vtbl =
+{
+ /* IUnknown methods */
+ d3d12_command_queue_vkd3d_ext_QueryInterface,
+ d3d12_command_queue_vkd3d_ext_AddRef,
+ d3d12_command_queue_vkd3d_ext_Release,
+
+ /* ID3D12CommandQueueExt methods */
+ d3d12_command_queue_vkd3d_ext_NotifyOutOfBandCommandQueue
+};
+
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c
index fcf408aa2b..5d27bbaa18 100644
--- a/libs/vkd3d/device.c
+++ b/libs/vkd3d/device.c
@@ -116,6 +116,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] =
VK_EXTENSION(NV_SHADER_SUBGROUP_PARTITIONED, NV_shader_subgroup_partitioned),
VK_EXTENSION(NV_MEMORY_DECOMPRESSION, NV_memory_decompression),
VK_EXTENSION(NV_DEVICE_GENERATED_COMMANDS_COMPUTE, NV_device_generated_commands_compute),
+ VK_EXTENSION(NV_LOW_LATENCY_2, NV_low_latency2),
/* VALVE extensions */
VK_EXTENSION(VALVE_MUTABLE_DESCRIPTOR_TYPE, VALVE_mutable_descriptor_type),
VK_EXTENSION(VALVE_DESCRIPTOR_SET_HOST_MAPPING, VALVE_descriptor_set_host_mapping),
@@ -3096,8 +3097,9 @@ void d3d12_device_return_query_pool(struct d3d12_device *device, const struct vk
}
/* ID3D12Device */
-extern ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_AddRef(ID3D12DeviceExt *iface);
+extern ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_AddRef(d3d12_device_vkd3d_ext_iface *iface);
extern ULONG STDMETHODCALLTYPE d3d12_dxvk_interop_device_AddRef(ID3D12DXVKInteropDevice *iface);
+extern ULONG STDMETHODCALLTYPE d3d12_low_latency_device_AddRef(ID3DLowLatencyDevice *iface);
HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface,
REFIID riid, void **object)
@@ -3144,6 +3146,14 @@ HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface,
return S_OK;
}
+ if (IsEqualGUID(riid, &IID_ID3DLowLatencyDevice))
+ {
+ struct d3d12_device *device = impl_from_ID3D12Device(iface);
+ d3d12_low_latency_device_AddRef(&device->ID3DLowLatencyDevice_iface);
+ *object = &device->ID3DLowLatencyDevice_iface;
+ return S_OK;
+ }
+
WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid));
*object = NULL;
@@ -8005,6 +8015,7 @@ static void d3d12_device_replace_vtable(struct d3d12_device *device)
extern CONST_VTBL struct ID3D12DeviceExtVtbl d3d12_device_vkd3d_ext_vtbl;
extern CONST_VTBL struct ID3D12DXVKInteropDeviceVtbl d3d12_dxvk_interop_device_vtbl;
+extern CONST_VTBL struct ID3DLowLatencyDeviceVtbl d3d_low_latency_device_vtbl;
static void vkd3d_scratch_pool_init(struct d3d12_device *device)
{
@@ -8075,6 +8086,7 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
device->ID3D12DeviceExt_iface.lpVtbl = &d3d12_device_vkd3d_ext_vtbl;
device->ID3D12DXVKInteropDevice_iface.lpVtbl = &d3d12_dxvk_interop_device_vtbl;
+ device->ID3DLowLatencyDevice_iface.lpVtbl = &d3d_low_latency_device_vtbl;
if ((rc = rwlock_init(&device->vertex_input_lock)))
{
diff --git a/libs/vkd3d/device_vkd3d_ext.c b/libs/vkd3d/device_vkd3d_ext.c
index 5bb7eca840..cf10247488 100644
--- a/libs/vkd3d/device_vkd3d_ext.c
+++ b/libs/vkd3d/device_vkd3d_ext.c
@@ -20,18 +20,18 @@
#include "vkd3d_private.h"
-static inline struct d3d12_device *d3d12_device_from_ID3D12DeviceExt(ID3D12DeviceExt *iface)
+static inline struct d3d12_device *d3d12_device_from_ID3D12DeviceExt(d3d12_device_vkd3d_ext_iface *iface)
{
return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12DeviceExt_iface);
}
-ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_AddRef(ID3D12DeviceExt *iface)
+ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_AddRef(d3d12_device_vkd3d_ext_iface *iface)
{
struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface);
return d3d12_device_add_ref(device);
}
-static ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_Release(ID3D12DeviceExt *iface)
+static ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_Release(d3d12_device_vkd3d_ext_iface *iface)
{
struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface);
return d3d12_device_release(device);
@@ -40,7 +40,7 @@ static ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_Release(ID3D12DeviceExt *i
extern HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface,
REFIID riid, void **object);
-static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_QueryInterface(ID3D12DeviceExt *iface,
+static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_QueryInterface(d3d12_device_vkd3d_ext_iface *iface,
REFIID iid, void **out)
{
struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface);
@@ -48,7 +48,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_QueryInterface(ID3D12Dev
return d3d12_device_QueryInterface(&device->ID3D12Device_iface, iid, out);
}
-static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetVulkanHandles(ID3D12DeviceExt *iface, VkInstance *vk_instance, VkPhysicalDevice *vk_physical_device, VkDevice *vk_device)
+static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetVulkanHandles(d3d12_device_vkd3d_ext_iface *iface, VkInstance *vk_instance, VkPhysicalDevice *vk_physical_device, VkDevice *vk_device)
{
struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface);
TRACE("iface %p, vk_instance %p, vk_physical_device %p, vk_device %p \n", iface, vk_instance, vk_physical_device, vk_device);
@@ -61,7 +61,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetVulkanHandles(ID3D12D
return S_OK;
}
-static BOOL STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetExtensionSupport(ID3D12DeviceExt *iface, D3D12_VK_EXTENSION extension)
+static BOOL STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetExtensionSupport(d3d12_device_vkd3d_ext_iface *iface, D3D12_VK_EXTENSION extension)
{
const struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface);
bool ret_val = false;
@@ -75,6 +75,9 @@ static BOOL STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetExtensionSupport(ID3D12D
case D3D12_VK_NVX_IMAGE_VIEW_HANDLE:
ret_val = device->vk_info.NVX_image_view_handle;
break;
+ case D3D12_VK_NV_LOW_LATENCY_2:
+ ret_val = device->vk_info.NV_low_latency2;
+ break;
default:
WARN("Invalid extension %x\n", extension);
}
@@ -82,7 +85,7 @@ static BOOL STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetExtensionSupport(ID3D12D
return ret_val;
}
-static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CreateCubinComputeShaderWithName(ID3D12DeviceExt *iface, const void *cubin_data,
+static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CreateCubinComputeShaderWithName(d3d12_device_vkd3d_ext_iface *iface, const void *cubin_data,
UINT32 cubin_size, UINT32 block_x, UINT32 block_y, UINT32 block_z, const char *shader_name, D3D12_CUBIN_DATA_HANDLE **out_handle)
{
VkCuFunctionCreateInfoNVX functionCreateInfo = { VK_STRUCTURE_TYPE_CU_FUNCTION_CREATE_INFO_NVX };
@@ -129,7 +132,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CreateCubinComputeShader
return S_OK;
}
-static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_DestroyCubinComputeShader(ID3D12DeviceExt *iface, D3D12_CUBIN_DATA_HANDLE *handle)
+static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_DestroyCubinComputeShader(d3d12_device_vkd3d_ext_iface *iface, D3D12_CUBIN_DATA_HANDLE *handle)
{
const struct vkd3d_vk_device_procs *vk_procs;
struct d3d12_device *device;
@@ -149,7 +152,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_DestroyCubinComputeShade
return S_OK;
}
-static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaTextureObject(ID3D12DeviceExt *iface, D3D12_CPU_DESCRIPTOR_HANDLE srv_handle,
+static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaTextureObject(d3d12_device_vkd3d_ext_iface *iface, D3D12_CPU_DESCRIPTOR_HANDLE srv_handle,
D3D12_CPU_DESCRIPTOR_HANDLE sampler_handle, UINT32 *cuda_texture_handle)
{
VkImageViewHandleInfoNVX imageViewHandleInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX };
@@ -177,7 +180,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaTextureObject(ID3
return S_OK;
}
-static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaSurfaceObject(ID3D12DeviceExt *iface, D3D12_CPU_DESCRIPTOR_HANDLE uav_handle,
+static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaSurfaceObject(d3d12_device_vkd3d_ext_iface *iface, D3D12_CPU_DESCRIPTOR_HANDLE uav_handle,
UINT32 *cuda_surface_handle)
{
VkImageViewHandleInfoNVX imageViewHandleInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX };
@@ -202,7 +205,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaSurfaceObject(ID3
extern VKD3D_THREAD_LOCAL struct D3D12_UAV_INFO *d3d12_uav_info;
-static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CaptureUAVInfo(ID3D12DeviceExt *iface, D3D12_UAV_INFO *uav_info)
+static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CaptureUAVInfo(d3d12_device_vkd3d_ext_iface *iface, D3D12_UAV_INFO *uav_info)
{
if (!uav_info)
return E_INVALIDARG;
@@ -417,3 +420,136 @@ CONST_VTBL struct ID3D12DXVKInteropDeviceVtbl d3d12_dxvk_interop_device_vtbl =
d3d12_dxvk_interop_device_LockCommandQueue,
d3d12_dxvk_interop_device_UnlockCommandQueue,
};
+
+static inline struct d3d12_device *d3d12_device_from_ID3DLowLatencyDevice(d3d_low_latency_device_iface *iface)
+{
+ return CONTAINING_RECORD(iface, struct d3d12_device, ID3DLowLatencyDevice_iface);
+}
+
+ULONG STDMETHODCALLTYPE d3d12_low_latency_device_AddRef(d3d_low_latency_device_iface *iface)
+{
+ struct d3d12_device *device = d3d12_device_from_ID3DLowLatencyDevice(iface);
+ return d3d12_device_add_ref(device);
+}
+
+static ULONG STDMETHODCALLTYPE d3d12_low_latency_device_Release(d3d_low_latency_device_iface *iface)
+{
+ struct d3d12_device *device = d3d12_device_from_ID3DLowLatencyDevice(iface);
+ return d3d12_device_release(device);
+}
+
+extern HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface,
+ REFIID riid, void **object);
+
+static HRESULT STDMETHODCALLTYPE d3d12_low_latency_device_QueryInterface(d3d_low_latency_device_iface *iface,
+ REFIID iid, void **out)
+{
+ struct d3d12_device *device = d3d12_device_from_ID3DLowLatencyDevice(iface);
+ TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out);
+ return d3d12_device_QueryInterface(&device->ID3D12Device_iface, iid, out);
+}
+
+static BOOL STDMETHODCALLTYPE d3d12_low_latency_device_SupportsLowLatency(d3d_low_latency_device_iface *iface)
+{
+ struct d3d12_device *device;
+
+ device = d3d12_device_from_ID3DLowLatencyDevice(iface);
+
+ return device->vk_info.NV_low_latency2;
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_low_latency_device_LatencySleep(d3d_low_latency_device_iface *iface)
+{
+ struct d3d12_device *device;
+
+ device = d3d12_device_from_ID3DLowLatencyDevice(iface);
+
+ if (!device->vk_info.NV_low_latency2)
+ return E_NOTIMPL;
+
+ if (device->swapchain_info.low_latency_swapchain)
+ return dxgi_vk_swap_chain_latency_sleep(device->swapchain_info.low_latency_swapchain);
+
+ return S_OK;
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_low_latency_device_SetLatencySleepMode(d3d_low_latency_device_iface *iface, BOOL low_latency_mode, BOOL low_latency_boost,
+ UINT32 minimum_interval_us)
+{
+ struct d3d12_device *device;
+
+ device = d3d12_device_from_ID3DLowLatencyDevice(iface);
+
+ if (!device->vk_info.NV_low_latency2)
+ return E_NOTIMPL;
+
+ if (device->swapchain_info.low_latency_swapchain)
+ return dxgi_vk_swap_chain_set_latency_sleep_mode(device->swapchain_info.low_latency_swapchain, low_latency_mode, low_latency_boost, minimum_interval_us);
+
+ return S_OK;
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_low_latency_device_SetLatencyMarker(d3d_low_latency_device_iface *iface, UINT64 frameID, UINT32 markerType)
+{
+ struct d3d12_device *device;
+ VkLatencyMarkerNV vk_marker;
+ uint64_t internal_frame_id;
+
+ device = d3d12_device_from_ID3DLowLatencyDevice(iface);
+ vk_marker = (VkLatencyMarkerNV)markerType;
+
+ if (!device->vk_info.NV_low_latency2)
+ return E_NOTIMPL;
+
+ // Offset the frameID by one to ensure it will always
+ // be a valid presentID
+ internal_frame_id = frameID + 1;
+
+ switch (vk_marker)
+ {
+ case VK_LATENCY_MARKER_SIMULATION_START_NV:
+ device->frame_markers.simulation = internal_frame_id;
+ break;
+ case VK_LATENCY_MARKER_RENDERSUBMIT_START_NV:
+ device->frame_markers.render = internal_frame_id;
+ break;
+ case VK_LATENCY_MARKER_PRESENT_START_NV:
+ device->frame_markers.present = internal_frame_id;
+ break;
+ }
+
+ if (device->swapchain_info.low_latency_swapchain)
+ return dxgi_vk_swap_chain_set_latency_marker(device->swapchain_info.low_latency_swapchain, internal_frame_id, vk_marker);
+
+ return S_OK;
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_low_latency_device_GetLatencyInfo(d3d_low_latency_device_iface *iface, D3D12_LATENCY_RESULTS *latency_results)
+{
+ struct d3d12_device *device;
+
+ device = d3d12_device_from_ID3DLowLatencyDevice(iface);
+
+ if (!device->vk_info.NV_low_latency2)
+ return E_NOTIMPL;
+
+ if (device->swapchain_info.low_latency_swapchain)
+ return dxgi_vk_swap_chain_get_latency_info(device->swapchain_info.low_latency_swapchain, latency_results);
+
+ return S_OK;
+}
+
+CONST_VTBL struct ID3DLowLatencyDeviceVtbl d3d_low_latency_device_vtbl =
+{
+ /* IUnknown methods */
+ d3d12_low_latency_device_QueryInterface,
+ d3d12_low_latency_device_AddRef,
+ d3d12_low_latency_device_Release,
+
+ /* ID3DLowLatencyDevice methods */
+ d3d12_low_latency_device_SupportsLowLatency,
+ d3d12_low_latency_device_LatencySleep,
+ d3d12_low_latency_device_SetLatencySleepMode,
+ d3d12_low_latency_device_SetLatencyMarker,
+ d3d12_low_latency_device_GetLatencyInfo
+};
diff --git a/libs/vkd3d/meson.build b/libs/vkd3d/meson.build
index 3692ceba9c..2120e7d52a 100644
--- a/libs/vkd3d/meson.build
+++ b/libs/vkd3d/meson.build
@@ -41,6 +41,7 @@ vkd3d_src = [
'cache.c',
'command.c',
'command_list_vkd3d_ext.c',
+ 'command_queue_vkd3d_ext.c',
'device.c',
'device_vkd3d_ext.c',
'heap.c',
diff --git a/libs/vkd3d/swapchain.c b/libs/vkd3d/swapchain.c
index 27a55c8b5b..80eb558d08 100644
--- a/libs/vkd3d/swapchain.c
+++ b/libs/vkd3d/swapchain.c
@@ -58,6 +58,7 @@ struct dxgi_vk_swap_chain_present_request
DXGI_COLOR_SPACE_TYPE dxgi_color_space_type;
DXGI_VK_HDR_METADATA dxgi_hdr_metadata;
uint32_t swap_interval;
+ uint64_t frame_id;
bool modifies_hdr_metadata;
};
@@ -67,6 +68,13 @@ struct present_wait_entry
uint64_t begin_frame_time_ns;
};
+struct low_latency_state
+{
+ bool mode;
+ bool boost;
+ uint32_t minimum_interval_us;
+};
+
struct dxgi_vk_swap_chain
{
IDXGIVkSwapChain IDXGIVkSwapChain_iface;
@@ -133,6 +141,16 @@ struct dxgi_vk_swap_chain
/* State tracking in present tasks on how to deal with swapchain recreation. */
bool force_swapchain_recreation;
bool is_surface_lost;
+
+ /* Info about the current low latency state of the swapchain */
+ pthread_mutex_t low_latency_lock;
+
+ VkSemaphore low_latency_sem;
+ uint64_t low_latency_sem_value;
+
+ bool low_latency_update_requested;
+ struct low_latency_state requested_low_latency_state;
+ struct low_latency_state low_latency_state;
} present;
struct dxgi_vk_swap_chain_present_request request, request_ring[DXGI_MAX_SWAP_CHAIN_BUFFERS];
@@ -317,6 +335,13 @@ static ULONG STDMETHODCALLTYPE dxgi_vk_swap_chain_Release(IDXGIVkSwapChain *ifac
if (!refcount)
{
+ if (chain->queue->device->vk_info.NV_low_latency2)
+ {
+ pthread_mutex_lock(&chain->present.low_latency_lock);
+ d3d12_device_remove_swapchain(chain->queue->device, chain);
+ pthread_mutex_unlock(&chain->present.low_latency_lock);
+ }
+
dxgi_vk_swap_chain_drain_queue(chain);
dxgi_vk_swap_chain_cleanup(chain);
vkd3d_free(chain);
@@ -760,6 +785,7 @@ static HRESULT STDMETHODCALLTYPE dxgi_vk_swap_chain_Present(IDXGIVkSwapChain *if
request->dxgi_hdr_metadata = chain->user.dxgi_hdr_metadata;
request->modifies_hdr_metadata = chain->user.modifies_hdr_metadata;
request->begin_frame_time_ns = chain->user.begin_frame_time_ns;
+ request->frame_id = chain->queue->device->frame_markers.present;
chain->user.modifies_hdr_metadata = false;
/* Need to process this task in queue thread to deal with wait-before-signal.
@@ -1283,6 +1309,8 @@ static void dxgi_vk_swap_chain_recreate_swapchain_in_present_task(struct dxgi_vk
VkDevice vk_device = chain->queue->device->vk_device;
VkCommandPoolCreateInfo command_pool_create_info;
VkSwapchainCreateInfoKHR swapchain_create_info;
+ VkSwapchainLatencyCreateInfoNV swapchain_latency_create_info;
+ VkLatencySleepModeInfoNV swapchain_latency_sleep_mode_info;
VkSurfaceCapabilitiesKHR surface_caps;
VkSurfaceFormatKHR surface_format;
VkImageViewCreateInfo view_info;
@@ -1374,6 +1402,15 @@ static void dxgi_vk_swap_chain_recreate_swapchain_in_present_task(struct dxgi_vk
swapchain_create_info.imageExtent.height = max(swapchain_create_info.imageExtent.height, surface_caps.minImageExtent.height);
swapchain_create_info.imageExtent.height = min(swapchain_create_info.imageExtent.height, surface_caps.maxImageExtent.height);
+ if (chain->queue->device->vk_info.NV_low_latency2)
+ {
+ memset(&swapchain_latency_create_info, 0, sizeof(swapchain_latency_create_info));
+ swapchain_latency_create_info.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_LATENCY_CREATE_INFO_NV;
+ swapchain_latency_create_info.pNext = NULL;
+ swapchain_latency_create_info.latencyModeEnable = true;
+ swapchain_create_info.pNext = &swapchain_latency_create_info;
+ }
+
vr = VK_CALL(vkCreateSwapchainKHR(vk_device, &swapchain_create_info, NULL, &chain->present.vk_swapchain));
if (vr < 0)
{
@@ -1387,6 +1424,29 @@ static void dxgi_vk_swap_chain_recreate_swapchain_in_present_task(struct dxgi_vk
INFO("Got %u swapchain images.\n", chain->present.backbuffer_count);
+ /* If low latency is supported restore the current low latency state now */
+ if (chain->queue->device->vk_info.NV_low_latency2)
+ {
+ struct low_latency_state* low_latency_state = chain->present.low_latency_update_requested ?
+ &chain->present.requested_low_latency_state : &chain->present.low_latency_state;
+
+ memset(&swapchain_latency_sleep_mode_info, 0, sizeof(swapchain_latency_sleep_mode_info));
+ swapchain_latency_sleep_mode_info.sType = VK_STRUCTURE_TYPE_LATENCY_SLEEP_MODE_INFO_NV;
+ swapchain_latency_sleep_mode_info.pNext = NULL;
+
+ swapchain_latency_sleep_mode_info.lowLatencyMode = low_latency_state->mode;
+ swapchain_latency_sleep_mode_info.lowLatencyBoost = low_latency_state->boost;
+ swapchain_latency_sleep_mode_info.minimumIntervalUs = low_latency_state->minimum_interval_us;
+
+ VK_CALL(vkSetLatencySleepModeNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &swapchain_latency_sleep_mode_info));
+
+ if (chain->present.low_latency_update_requested)
+ {
+ memcpy(&chain->present.low_latency_state, &chain->present.requested_low_latency_state, sizeof(struct low_latency_state));
+ chain->present.low_latency_update_requested = false;
+ }
+ }
+
memset(&view_info, 0, sizeof(view_info));
view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
view_info.format = swapchain_create_info.imageFormat;
@@ -1795,9 +1855,12 @@ static void dxgi_vk_swap_chain_present_iteration(struct dxgi_vk_swap_chain *chai
* Non-FIFO swapchains will pump their frame latency handles through the fallback path of blit command being done.
* Especially on Xwayland, the present ID is updated when images actually hit on-screen due to MAILBOX behavior.
* This would unnecessarily stall our progress. */
- if (chain->wait_thread.active && !chain->present.present_id_valid && chain->request.swap_interval > 0)
+ if (chain->wait_thread.active && !chain->present.present_id_valid &&
+ (chain->request.swap_interval > 0 || chain->present.low_latency_state.mode))
{
- chain->present.present_id += 1;
+ chain->present.present_id = (chain->present.low_latency_state.mode) ?
+ chain->request.frame_id : chain->present.present_id + 1;
+
present_id.sType = VK_STRUCTURE_TYPE_PRESENT_ID_KHR;
present_id.pNext = NULL;
present_id.swapchainCount = 1;
@@ -1905,6 +1968,9 @@ static void dxgi_vk_swap_chain_present_callback(void *chain_)
if (!chain->wait_thread.active)
present_count = 1;
+ if (chain->queue->device->vk_info.NV_low_latency2)
+ pthread_mutex_lock(&chain->present.low_latency_lock);
+
for (i = 0; i < present_count; i++)
{
/* A present iteration may or may not render to backbuffer. We'll apply best effort here.
@@ -1912,6 +1978,9 @@ static void dxgi_vk_swap_chain_present_callback(void *chain_)
dxgi_vk_swap_chain_present_iteration(chain, 0);
}
+ if (chain->queue->device->vk_info.NV_low_latency2)
+ pthread_mutex_unlock(&chain->present.low_latency_lock);
+
/* When this is signalled, lets main thread know that it's safe to free user buffers.
* Signal this just once on the outside since we might have retries, swap_interval > 1, etc, which complicates command buffer recycling. */
dxgi_vk_swap_chain_present_signal_blit_semaphore(chain);
@@ -2041,6 +2110,52 @@ static HRESULT dxgi_vk_swap_chain_init_waiter_thread(struct dxgi_vk_swap_chain *
return S_OK;
}
+static HRESULT dxgi_vk_swap_chain_init_low_latency(struct dxgi_vk_swap_chain* chain)
+{
+ const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs;
+
+ VkSemaphoreTypeCreateInfoKHR semaphore_type_info;
+ VkSemaphoreCreateInfo semaphore_info;
+ VkResult vr;
+
+ chain->present.low_latency_sem = VK_NULL_HANDLE;
+ chain->present.low_latency_sem_value = 0;
+
+ chain->present.low_latency_update_requested = false;
+ chain->present.requested_low_latency_state.mode = false;
+ chain->present.requested_low_latency_state.boost = false;
+ chain->present.requested_low_latency_state.minimum_interval_us = 0;
+
+ chain->present.low_latency_state.mode = false;
+ chain->present.low_latency_state.boost = false;
+ chain->present.low_latency_state.minimum_interval_us = 0;
+
+ if (chain->queue->device->vk_info.NV_low_latency2)
+ {
+ memset(&semaphore_type_info, 0, sizeof(semaphore_type_info));
+ semaphore_type_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR;
+ semaphore_type_info.pNext = NULL;
+ semaphore_type_info.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR;
+ semaphore_type_info.initialValue = 0;
+
+ memset(&semaphore_info, 0, sizeof(semaphore_info));
+ semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
+ semaphore_info.pNext = &semaphore_type_info;
+ semaphore_info.flags = 0;
+
+ if ((vr = VK_CALL(vkCreateSemaphore(chain->queue->device->vk_device,
+ &semaphore_info, NULL, &chain->present.low_latency_sem))) < 0)
+ {
+ ERR("Failed to create semaphore, vr %d.\n", vr);
+ return hresult_from_vk_result(vr);
+ }
+
+ pthread_mutex_init(&chain->present.low_latency_lock, NULL);
+ }
+
+ return S_OK;
+}
+
static HRESULT dxgi_vk_swap_chain_init(struct dxgi_vk_swap_chain *chain, IDXGIVkSurfaceFactory *pFactory,
const DXGI_SWAP_CHAIN_DESC1 *pDesc, struct d3d12_command_queue *queue)
{
@@ -2066,6 +2181,9 @@ static HRESULT dxgi_vk_swap_chain_init(struct dxgi_vk_swap_chain *chain, IDXGIVk
if (FAILED(hr = dxgi_vk_swap_chain_init_waiter_thread(chain)))
goto err;
+ if (FAILED(hr = dxgi_vk_swap_chain_init_low_latency(chain)))
+ goto err;
+
ID3D12CommandQueue_AddRef(&queue->ID3D12CommandQueue_iface);
return S_OK;
@@ -2093,6 +2211,13 @@ static HRESULT STDMETHODCALLTYPE dxgi_vk_swap_chain_factory_CreateSwapChain(IDXG
return hr;
}
+ if (chain->queue->device->vk_info.NV_low_latency2)
+ {
+ pthread_mutex_lock(&chain->present.low_latency_lock);
+ d3d12_device_register_swapchain(chain->queue->device, chain);
+ pthread_mutex_unlock(&chain->present.low_latency_lock);
+ }
+
*ppSwapchain = &chain->IDXGIVkSwapChain_iface;
return S_OK;
}
@@ -2108,6 +2233,192 @@ static CONST_VTBL struct IDXGIVkSwapChainFactoryVtbl dxgi_vk_swap_chain_factory_
dxgi_vk_swap_chain_factory_CreateSwapChain,
};
+bool dxgi_vk_swap_chain_low_latency_enabled(struct dxgi_vk_swap_chain* chain)
+{
+ return chain->present.low_latency_state.mode;
+}
+
+HRESULT dxgi_vk_swap_chain_latency_sleep(struct dxgi_vk_swap_chain* chain)
+{
+ const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs;
+
+ VkLatencySleepInfoNV latency_sleep_info;
+ VkSemaphoreWaitInfo sem_wait_info;
+
+ if (chain->present.low_latency_state.mode)
+ {
+ // Increment the low latency sem value before the wait
+ chain->present.low_latency_sem_value++;
+
+ memset(&latency_sleep_info, 0, sizeof(latency_sleep_info));
+ latency_sleep_info.sType = VK_STRUCTURE_TYPE_LATENCY_SLEEP_INFO_NV;
+ latency_sleep_info.pNext = NULL;
+ latency_sleep_info.signalSemaphore = chain->present.low_latency_sem;
+ latency_sleep_info.value = chain->present.low_latency_sem_value;
+
+ pthread_mutex_lock(&chain->present.low_latency_lock);
+ VK_CALL(vkLatencySleepNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &latency_sleep_info));
+ pthread_mutex_unlock(&chain->present.low_latency_lock);
+
+ memset(&sem_wait_info, 0, sizeof(sem_wait_info));
+ sem_wait_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO;
+ sem_wait_info.pNext = NULL;
+ sem_wait_info.flags = 0;
+ sem_wait_info.semaphoreCount = 1;
+ sem_wait_info.pSemaphores = &chain->present.low_latency_sem;
+ sem_wait_info.pValues = &chain->present.low_latency_sem_value;
+
+ VK_CALL(vkWaitSemaphores(chain->queue->device->vk_device, &sem_wait_info, UINT64_MAX));
+ }
+
+ return S_OK;
+}
+
+HRESULT dxgi_vk_swap_chain_set_latency_sleep_mode(struct dxgi_vk_swap_chain* chain, bool low_latency_mode,
+ bool low_latency_boost, uint32_t minimum_interval_us)
+{
+ const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs;
+
+ VkLatencySleepModeInfoNV latency_sleep_mode_info;
+
+ if (low_latency_mode == chain->present.low_latency_state.mode &&
+ low_latency_boost == chain->present.low_latency_state.boost &&
+ minimum_interval_us == chain->present.low_latency_state.minimum_interval_us)
+ {
+ return S_OK;
+ }
+
+ // If we are enabling low latency mode, recreate the swapchain
+ // to sync the frameIds provided by nvapi to the presentID
+ // used a present time
+ if (low_latency_mode && !chain->present.low_latency_state.mode)
+ {
+ chain->present.requested_low_latency_state.mode = low_latency_mode;
+ chain->present.requested_low_latency_state.boost = low_latency_boost;
+ chain->present.requested_low_latency_state.minimum_interval_us = minimum_interval_us;
+
+ // In order to use the frameId provided by the application
+ // the swapchain will have to be recreated to reset the
+ // present ID
+ chain->present.low_latency_update_requested = true;
+ chain->present.force_swapchain_recreation = true;
+ }
+ else
+ {
+ memset(&latency_sleep_mode_info, 0, sizeof(latency_sleep_mode_info));
+ latency_sleep_mode_info.sType = VK_STRUCTURE_TYPE_LATENCY_SLEEP_MODE_INFO_NV;
+ latency_sleep_mode_info.pNext = NULL;
+ latency_sleep_mode_info.lowLatencyMode = low_latency_mode;
+ latency_sleep_mode_info.lowLatencyBoost = low_latency_boost;
+ latency_sleep_mode_info.minimumIntervalUs = minimum_interval_us;
+
+ pthread_mutex_lock(&chain->present.low_latency_lock);
+ VK_CALL(vkSetLatencySleepModeNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &latency_sleep_mode_info));
+ pthread_mutex_unlock(&chain->present.low_latency_lock);
+
+ chain->present.low_latency_state.mode = low_latency_mode;
+ chain->present.low_latency_state.boost = low_latency_boost;
+ chain->present.low_latency_state.minimum_interval_us = minimum_interval_us;
+ }
+
+ return S_OK;
+}
+
+HRESULT dxgi_vk_swap_chain_set_latency_marker(struct dxgi_vk_swap_chain* chain, uint64_t frameID, VkLatencyMarkerNV marker)
+{
+ const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs;
+
+ VkSetLatencyMarkerInfoNV latency_marker_info;
+
+ if (chain->present.low_latency_state.mode)
+ {
+ memset(&latency_marker_info, 0, sizeof(latency_marker_info));
+ latency_marker_info.sType = VK_STRUCTURE_TYPE_SET_LATENCY_MARKER_INFO_NV;
+ latency_marker_info.pNext = NULL;
+ latency_marker_info.presentID = frameID;
+ latency_marker_info.marker = marker;
+
+ pthread_mutex_lock(&chain->present.low_latency_lock);
+ VK_CALL(vkSetLatencyMarkerNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &latency_marker_info));
+ pthread_mutex_unlock(&chain->present.low_latency_lock);
+ }
+
+ return S_OK;
+}
+
+HRESULT dxgi_vk_swap_chain_get_latency_info(struct dxgi_vk_swap_chain* chain, D3D12_LATENCY_RESULTS *latency_results)
+{
+ const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs;
+
+ VkGetLatencyMarkerInfoNV marker_info;
+ VkLatencyTimingsFrameReportNV* frame_reports;
+ uint32_t report_count;
+ uint32_t i;
+
+ if (!chain->present.low_latency_state.mode)
+ {
+ memset(latency_results->frame_reports, 0, sizeof(latency_results->frame_reports));
+ return S_OK;
+ }
+
+ memset(&marker_info, 0, sizeof(marker_info));
+ marker_info.sType = VK_STRUCTURE_TYPE_GET_LATENCY_MARKER_INFO_NV;
+
+ pthread_mutex_lock(&chain->present.low_latency_lock);
+
+ VK_CALL(vkGetLatencyTimingsNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &report_count, &marker_info));
+
+ if (report_count >= 64)
+ {
+ report_count = 64;
+ frame_reports = vkd3d_calloc(report_count, sizeof(VkLatencyTimingsFrameReportNV));
+ for (i = 0; i < report_count; i++)
+ frame_reports[i].sType = VK_STRUCTURE_TYPE_LATENCY_TIMINGS_FRAME_REPORT_NV;
+
+ marker_info.pTimings = frame_reports;
+
+ VK_CALL(vkGetLatencyTimingsNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &report_count, &marker_info));
+
+ for (i = 0; i < report_count; i++)
+ {
+ latency_results->frame_reports[i].frameID = frame_reports[i].presentID - 1;
+ latency_results->frame_reports[i].inputSampleTime = frame_reports[i].inputSampleTimeUs;
+ latency_results->frame_reports[i].simStartTime = frame_reports[i].simStartTimeUs;
+ latency_results->frame_reports[i].simEndTime = frame_reports[i].simEndTimeUs;
+ latency_results->frame_reports[i].renderSubmitStartTime = frame_reports[i].renderSubmitStartTimeUs;
+ latency_results->frame_reports[i].renderSubmitEndTime = frame_reports[i].renderSubmitEndTimeUs;
+ latency_results->frame_reports[i].presentStartTime = frame_reports[i].presentStartTimeUs;
+ latency_results->frame_reports[i].presentEndTime = frame_reports[i].presentEndTimeUs;
+ latency_results->frame_reports[i].driverStartTime = frame_reports[i].driverStartTimeUs;
+ latency_results->frame_reports[i].driverEndTime = frame_reports[i].driverEndTimeUs;
+ latency_results->frame_reports[i].osRenderQueueStartTime = frame_reports[i].osRenderQueueStartTimeUs;
+ latency_results->frame_reports[i].osRenderQueueEndTime = frame_reports[i].osRenderQueueEndTimeUs;
+ latency_results->frame_reports[i].gpuRenderStartTime = frame_reports[i].gpuRenderStartTimeUs;
+ latency_results->frame_reports[i].gpuRenderEndTime = frame_reports[i].gpuRenderEndTimeUs;
+ latency_results->frame_reports[i].gpuActiveRenderTimeUs =
+ frame_reports[i].gpuRenderEndTimeUs - frame_reports[i].gpuRenderStartTimeUs;
+ latency_results->frame_reports[i].gpuFrameTimeUs = 0;
+
+ if (i) {
+ latency_results->frame_reports[i].gpuFrameTimeUs =
+ frame_reports[i].gpuRenderEndTimeUs - frame_reports[i - 1].gpuRenderEndTimeUs;
+ }
+ }
+
+ vkd3d_free(frame_reports);
+ }
+ else
+ {
+ // If there are less than 64 frame reports, zero out the frame report
+ // buffer returned to the app.
+ memset(latency_results->frame_reports, 0, sizeof(latency_results->frame_reports));
+ }
+
+ pthread_mutex_unlock(&chain->present.low_latency_lock);
+
+ return S_OK;
+}
+
HRESULT dxgi_vk_swap_chain_factory_init(struct d3d12_command_queue *queue, struct dxgi_vk_swap_chain_factory *chain)
{
chain->IDXGIVkSwapChainFactory_iface.lpVtbl = &dxgi_vk_swap_chain_factory_vtbl;
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h
index 8c6a0ffd02..0c4929a258 100644
--- a/libs/vkd3d/vkd3d_private.h
+++ b/libs/vkd3d/vkd3d_private.h
@@ -37,6 +37,7 @@
#include "vkd3d_platform.h"
#include "vkd3d_swapchain_factory.h"
#include "vkd3d_command_list_vkd3d_ext.h"
+#include "vkd3d_command_queue_vkd3d_ext.h"
#include "vkd3d_device_vkd3d_ext.h"
#include "vkd3d_string.h"
#include "vkd3d_file_utils.h"
@@ -166,6 +167,7 @@ struct vkd3d_vulkan_info
bool NV_shader_subgroup_partitioned;
bool NV_memory_decompression;
bool NV_device_generated_commands_compute;
+ bool NV_low_latency2;
/* VALVE extensions */
bool VALVE_mutable_descriptor_type;
bool VALVE_descriptor_set_host_mapping;
@@ -2991,6 +2993,7 @@ struct d3d12_command_queue_submission_execute
struct d3d12_command_allocator **command_allocators;
UINT cmd_count;
UINT num_command_allocators;
+ uint64_t frame_id;
struct vkd3d_initial_transition *transitions;
size_t transition_count;
@@ -3046,12 +3049,30 @@ struct dxgi_vk_swap_chain_factory
struct d3d12_command_queue *queue;
};
+struct dxgi_vk_swap_chain;
+
+bool dxgi_vk_swap_chain_low_latency_enabled(struct dxgi_vk_swap_chain* chain);
+HRESULT dxgi_vk_swap_chain_latency_sleep(struct dxgi_vk_swap_chain* chain);
+HRESULT dxgi_vk_swap_chain_set_latency_sleep_mode(struct dxgi_vk_swap_chain* chain,
+ bool low_latency_mode, bool low_latency_boost, uint32_t minimum_interval_us);
+HRESULT dxgi_vk_swap_chain_set_latency_marker(struct dxgi_vk_swap_chain* chain,
+ uint64_t frameID, VkLatencyMarkerNV marker);
+HRESULT dxgi_vk_swap_chain_get_latency_info(struct dxgi_vk_swap_chain* chain,
+ D3D12_LATENCY_RESULTS *latency_results);
+
HRESULT dxgi_vk_swap_chain_factory_init(struct d3d12_command_queue *queue, struct dxgi_vk_swap_chain_factory *chain);
+/* ID3D12CommandQueueExt */
+typedef ID3D12CommandQueueExt d3d12_command_queue_vkd3d_ext_iface;
+
/* ID3D12CommandQueue */
+typedef ID3D12CommandQueue d3d12_command_queue_iface;
+
struct d3d12_command_queue
{
- ID3D12CommandQueue ID3D12CommandQueue_iface;
+ d3d12_command_queue_iface ID3D12CommandQueue_iface;
+ d3d12_command_queue_vkd3d_ext_iface ID3D12CommandQueueExt_iface;
+
LONG refcount;
D3D12_COMMAND_QUEUE_DESC desc;
@@ -4203,6 +4224,19 @@ struct vkd3d_cached_command_allocator
uint32_t vk_family_index;
};
+struct vkd3d_device_swapchain_info
+{
+ struct dxgi_vk_swap_chain* low_latency_swapchain;
+ uint32_t swapchain_count;
+};
+
+struct vkd3d_device_frame_markers
+{
+ uint64_t simulation;
+ uint64_t render;
+ uint64_t present;
+};
+
/* ID3D12Device */
typedef ID3D12Device12 d3d12_device_iface;
@@ -4215,6 +4249,9 @@ typedef ID3D12DeviceExt d3d12_device_vkd3d_ext_iface;
/* ID3D12DXVKInteropDevice */
typedef ID3D12DXVKInteropDevice d3d12_dxvk_interop_device_iface;
+/* ID3DLowLatencyDevice */
+typedef ID3DLowLatencyDevice d3d_low_latency_device_iface;
+
struct d3d12_device_scratch_pool
{
struct vkd3d_scratch_buffer scratch_buffers[VKD3D_MAX_SCRATCH_BUFFER_COUNT];
@@ -4229,6 +4266,7 @@ struct d3d12_device
d3d12_device_iface ID3D12Device_iface;
d3d12_device_vkd3d_ext_iface ID3D12DeviceExt_iface;
d3d12_dxvk_interop_device_iface ID3D12DXVKInteropDevice_iface;
+ d3d_low_latency_device_iface ID3DLowLatencyDevice_iface;
LONG refcount;
VkDevice vk_device;
@@ -4299,6 +4337,9 @@ struct d3d12_device
#endif
uint64_t shader_interface_key;
uint32_t device_has_dgc_templates;
+
+ struct vkd3d_device_swapchain_info swapchain_info;
+ struct vkd3d_device_frame_markers frame_markers;
};
HRESULT d3d12_device_create(struct vkd3d_instance *instance,
@@ -4521,6 +4562,31 @@ UINT d3d12_determine_shading_rate_image_tile_size(struct d3d12_device *device);
bool d3d12_device_supports_required_subgroup_size_for_stage(
struct d3d12_device *device, VkShaderStageFlagBits stage);
+static inline void d3d12_device_register_swapchain(struct d3d12_device* device, struct dxgi_vk_swap_chain* swapchain)
+{
+ if (!device->swapchain_info.low_latency_swapchain &&
+ device->swapchain_info.swapchain_count == 0)
+ {
+ device->swapchain_info.low_latency_swapchain = swapchain;
+ }
+ else
+ {
+ device->swapchain_info.low_latency_swapchain = NULL;
+ }
+
+ device->swapchain_info.swapchain_count++;
+}
+
+static inline void d3d12_device_remove_swapchain(struct d3d12_device* device, struct dxgi_vk_swap_chain* swapchain)
+{
+ if (device->swapchain_info.low_latency_swapchain == swapchain)
+ {
+ device->swapchain_info.low_latency_swapchain = NULL;
+ }
+
+ device->swapchain_info.swapchain_count--;
+}
+
/* ID3DBlob */
struct d3d_blob
{
diff --git a/libs/vkd3d/vulkan_procs.h b/libs/vkd3d/vulkan_procs.h
index 983c06f884..f4bc39d9a6 100644
--- a/libs/vkd3d/vulkan_procs.h
+++ b/libs/vkd3d/vulkan_procs.h
@@ -346,6 +346,13 @@ VK_DEVICE_EXT_PFN(vkSetDeviceMemoryPriorityEXT)
VK_DEVICE_EXT_PFN(vkCmdDecompressMemoryNV)
VK_DEVICE_EXT_PFN(vkCmdDecompressMemoryIndirectCountNV)
+/* VK_NV_low_latency2 */
+VK_DEVICE_EXT_PFN(vkSetLatencySleepModeNV)
+VK_DEVICE_EXT_PFN(vkLatencySleepNV)
+VK_DEVICE_EXT_PFN(vkSetLatencyMarkerNV)
+VK_DEVICE_EXT_PFN(vkGetLatencyTimingsNV)
+VK_DEVICE_EXT_PFN(vkQueueNotifyOutOfBandNV)
+
#undef VK_INSTANCE_PFN
#undef VK_INSTANCE_EXT_PFN
#undef VK_DEVICE_PFN
diff --git a/libs/vkd3d/command_queue_vkd3d_ext.c b/libs/vkd3d/command_queue_vkd3d_ext.c
index 0fba03b..b995a22 100644
--- a/libs/vkd3d/command_queue_vkd3d_ext.c
+++ b/libs/vkd3d/command_queue_vkd3d_ext.c
@@ -76,6 +76,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_vkd3d_ext_NotifyOutOfBandCo
break;
default:
WARN("Invalid queue type %x\n", type);
+ return E_INVALIDARG;
}
queue_info.sType = VK_STRUCTURE_TYPE_OUT_OF_BAND_QUEUE_TYPE_INFO_NV;
diff --git a/libs/vkd3d/device_vkd3d_ext.c b/libs/vkd3d/device_vkd3d_ext.c
index cf102474..986efea3 100644
--- a/libs/vkd3d/device_vkd3d_ext.c
+++ b/libs/vkd3d/device_vkd3d_ext.c
@@ -516,6 +516,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_low_latency_device_SetLatencyMarker(d3d_l
case VK_LATENCY_MARKER_PRESENT_START_NV:
device->frame_markers.present = internal_frame_id;
break;
+ default:
+ break;
}
if (device->swapchain_info.low_latency_swapchain)
diff --git a/libs/vkd3d/swapchain.c b/libs/vkd3d/swapchain.c
index 627a351..19ce741 100644
--- a/libs/vkd3d/swapchain.c
+++ b/libs/vkd3d/swapchain.c
@@ -2356,7 +2356,6 @@ HRESULT dxgi_vk_swap_chain_get_latency_info(struct dxgi_vk_swap_chain* chain, D3
VkGetLatencyMarkerInfoNV marker_info;
VkLatencyTimingsFrameReportNV* frame_reports;
- uint32_t report_count;
uint32_t i;
if (!chain->present.low_latency_state.mode)
@@ -2370,20 +2369,20 @@ HRESULT dxgi_vk_swap_chain_get_latency_info(struct dxgi_vk_swap_chain* chain, D3
pthread_mutex_lock(&chain->present.low_latency_lock);
- VK_CALL(vkGetLatencyTimingsNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &report_count, &marker_info));
+ VK_CALL(vkGetLatencyTimingsNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &marker_info));
- if (report_count >= 64)
+ if (marker_info.timingCount >= 64)
{
- report_count = 64;
- frame_reports = vkd3d_calloc(report_count, sizeof(VkLatencyTimingsFrameReportNV));
- for (i = 0; i < report_count; i++)
+ marker_info.timingCount = 64;
+ frame_reports = vkd3d_calloc(marker_info.timingCount, sizeof(VkLatencyTimingsFrameReportNV));
+ for (i = 0; i < marker_info.timingCount; i++)
frame_reports[i].sType = VK_STRUCTURE_TYPE_LATENCY_TIMINGS_FRAME_REPORT_NV;
marker_info.pTimings = frame_reports;
- VK_CALL(vkGetLatencyTimingsNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &report_count, &marker_info));
+ VK_CALL(vkGetLatencyTimingsNV(chain->queue->device->vk_device, chain->present.vk_swapchain, &marker_info));
- for (i = 0; i < report_count; i++)
+ for (i = 0; i < marker_info.timingCount; i++)
{
latency_results->frame_reports[i].frameID = frame_reports[i].presentID - 1;
latency_results->frame_reports[i].inputSampleTime = frame_reports[i].inputSampleTimeUs;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment