Skip to content

Instantly share code, notes, and snippets.

@Triang3l
Last active September 3, 2017 18:56
Show Gist options
  • Save Triang3l/98e08271b9fbb429da2d9cb3339f286d to your computer and use it in GitHub Desktop.
Save Triang3l/98e08271b9fbb429da2d9cb3339f286d to your computer and use it in GitHub Desktop.
#include "SRDescriptors.h"
#include "../GPU/DescriptorStorage.h"
#include "../Log/Log.h"
#include "../Math/Bit.h"
#include "../Parallel/Threading.h"
#include <algorithm>
#include <cstdint>
#include <cstring>
namespace SecretGame::GFX::SRDescriptors {
// A buddy allocator for shader resource descriptors, inspired by http://bitsquid.blogspot.com/2015/08/allocation-adventures-3-buddy-allocator.html
static GPU::DescriptorStorage *s_Storage;
// Synchronization of GFX and assets. Will be rarely actually locked.
static Parallel::Mutex *s_AllocationLock;
// The smallest block size that can be allocated.
// Must be large enough for easier allocation, but large enough for low fragmentation.
// Ideally, something equal to or larger than an average mesh needs.
static constexpr unsigned int MIN_ALLOCATION_POWER = 2;
static constexpr unsigned int MIN_ALLOCATION = 1 << MIN_ALLOCATION_POWER;
// Number of levels in the power-of-two allocation tree.
static constexpr unsigned int ALLOCATION_LEVEL_COUNT = 15;
// Maximum total number of descriptors loaded at once.
static constexpr unsigned int MAX_DESCRIPTOR_COUNT = MIN_ALLOCATION << (ALLOCATION_LEVEL_COUNT - 1);
// Assuming minimum size 4 and 65536 descriptors, 4-sized are 0-16383, 8 are 16384-24575, and so on.
static constexpr unsigned int NODE_TOTAL_COUNT = ((MAX_DESCRIPTOR_COUNT / MIN_ALLOCATION) << 1) - 1;
static unsigned int s_Nodes[NODE_TOTAL_COUNT];
// A node consists of its type and, for half-free nodes, a circular linked list (with level-relative indices) of half-free nodes on that level.
enum : unsigned int {
NODE_TYPE_FREE, // Completely unused node. 0 due to memset.
NODE_TYPE_HALF_FREE, // Split node, one child is allocated, another child is free and will be used for allocation.
NODE_TYPE_FULL, // Both children are non-free.
NODE_TYPE_DATA // Stores descriptors.
};
static constexpr unsigned int NODE_TYPE_MASK = 0x3;
static constexpr unsigned int NODE_HALF_FREE_PREVIOUS_SHIFT = 2;
static constexpr unsigned int NODE_HALF_FREE_NEXT_SHIFT = NODE_HALF_FREE_PREVIOUS_SHIFT + (ALLOCATION_LEVEL_COUNT - 1);
static constexpr unsigned int NODE_HALF_FREE_LIST_MASK = (1 << (ALLOCATION_LEVEL_COUNT - 1)) - 1;
// Lists of free nodes at each level. If no free node, try to split an upper one.
static unsigned int s_NodeHalfFreeStart[ALLOCATION_LEVEL_COUNT]; // Level-relative, like inside nodes.
static constexpr unsigned int NODE_HALF_FREE_START_NONE = UINT_MAX;
void Initialize() {
s_Storage = GPU::DescriptorStorage::Create(GPU::DescriptorStorage::TYPE_SHADER_RESOURCES, MAX_DESCRIPTOR_COUNT);
if (s_Storage == nullptr) {
// The GPU backend uses direct binding.
return;
}
s_AllocationLock = new Parallel::Mutex;
std::memset(s_Nodes, 0, sizeof(s_Nodes));
std::memset(s_NodeHalfFreeStart, 0xff, sizeof(s_NodeHalfFreeStart));
}
void Shutdown() {
if (s_Storage == nullptr) {
return;
}
delete s_AllocationLock;
s_AllocationLock = nullptr;
delete s_Storage;
s_Storage = nullptr;
}
GPU::DescriptorStorage *GetStorage() {
return s_Storage;
}
static constexpr unsigned int GetFirstNodeAtLevel(unsigned int level) {
return ((1 << ALLOCATION_LEVEL_COUNT) - 1) & ~((1 << (ALLOCATION_LEVEL_COUNT - level)) - 1);
}
static constexpr unsigned int GetLevelRelativeNodeIndexMask(unsigned int level) {
return (1 << ((ALLOCATION_LEVEL_COUNT - 1) - level)) - 1;
}
static void SwitchHalfFreeNodeType(unsigned int level, unsigned int nodeRelativeIndex, unsigned int newType) {
unsigned int levelOffset = GetFirstNodeAtLevel(level);
unsigned int oldNodeData = s_Nodes[levelOffset + nodeRelativeIndex];
LOG_ASSERT((oldNodeData & NODE_TYPE_MASK) == NODE_TYPE_HALF_FREE,
"GFX.SRDescriptors.SwitchHalfFreeNodeType: Allocation node must be half-free.");
unsigned int oldPrevious = (oldNodeData >> NODE_HALF_FREE_PREVIOUS_SHIFT) & NODE_HALF_FREE_LIST_MASK;
unsigned int oldNext = (oldNodeData >> NODE_HALF_FREE_NEXT_SHIFT) & NODE_HALF_FREE_LIST_MASK;
if (oldPrevious == oldNext) {
// Was the only half-free node at its level.
LOG_ASSERT(oldPrevious == s_NodeHalfFreeStart[level],
"GFX.SRDescriptors.Allocate: Self-linked half-free nodes must be in the beginning of the lists.");
s_NodeHalfFreeStart[level] = NODE_HALF_FREE_START_NONE;
} else {
s_Nodes[levelOffset + oldPrevious] = (s_Nodes[levelOffset + oldPrevious] &
~(NODE_HALF_FREE_LIST_MASK << NODE_HALF_FREE_NEXT_SHIFT)) | (oldNext << NODE_HALF_FREE_NEXT_SHIFT);
s_Nodes[levelOffset + oldNext] = (s_Nodes[levelOffset + oldNext] &
~(NODE_HALF_FREE_LIST_MASK << NODE_HALF_FREE_PREVIOUS_SHIFT)) | (oldPrevious << NODE_HALF_FREE_PREVIOUS_SHIFT);
if (s_NodeHalfFreeStart[level] == nodeRelativeIndex) {
s_NodeHalfFreeStart[level] = oldNext;
}
}
s_Nodes[levelOffset + nodeRelativeIndex] = newType;
}
unsigned int Allocate(unsigned int count) {
LOG_ASSERT(s_Storage != nullptr, "GFX.SRDescriptors.Allocate: Descriptor storage must exist.");
if (count == 0) {
return 0;
}
// To simplify allocations because the top level doesn't have parents.
if (count > (MAX_DESCRIPTOR_COUNT >> 1)) {
Log::FatalError("GFX.SRDescriptors.Allocate: Too many descriptors requested (%u, maximum %u).",
count, MAX_DESCRIPTOR_COUNT >> 1);
}
// Align the count to power of 2.
count = std::max(count, MIN_ALLOCATION);
unsigned int level = Math::Bit::MSOnePosition32(count);
if ((count & (count - 1)) != 0) {
++level;
}
count = 1 << level;
level -= MIN_ALLOCATION_POWER;
Parallel::Mutex::ScopedLock lock(*s_AllocationLock);
// Finding a half-free node at the closest upper level.
unsigned int halfFreeLevel;
for (halfFreeLevel = level + 1; halfFreeLevel < ALLOCATION_LEVEL_COUNT; ++halfFreeLevel) {
if (s_NodeHalfFreeStart[halfFreeLevel] != NODE_HALF_FREE_START_NONE) {
break;
}
}
// Bridging the found half-free node and the allocation level with half-free nodes,
// from the widest to the narrowest level without half-free nodes.
bool createdTopNode = false;
if (halfFreeLevel >= ALLOCATION_LEVEL_COUNT) {
// Checking if the storage is full (no half-free found and can't be created).
if ((s_Nodes[NODE_TOTAL_COUNT - 1] & NODE_TYPE_MASK) == NODE_TYPE_FULL) {
Log::FatalError("GFX.SRDescriptors.Allocate: No free space for a block of %u descriptors.", count);
}
s_Nodes[NODE_TOTAL_COUNT - 1] = NODE_TYPE_HALF_FREE; // The top level only has one node with the 0 index, no need to self-link.
s_NodeHalfFreeStart[ALLOCATION_LEVEL_COUNT - 1] = 0;
halfFreeLevel = ALLOCATION_LEVEL_COUNT - 1;
createdTopNode = true;
}
unsigned int parentRelativeNodeIndex = s_NodeHalfFreeStart[halfFreeLevel];
if (!createdTopNode) {
// One child is used already, and another one will be used either for bridging or for data.
SwitchHalfFreeNodeType(halfFreeLevel, parentRelativeNodeIndex, NODE_TYPE_FULL);
}
while (halfFreeLevel > level + 1) {
--halfFreeLevel;
parentRelativeNodeIndex <<= 1;
unsigned int parentNodeIndex = GetFirstNodeAtLevel(halfFreeLevel) + parentRelativeNodeIndex;
if ((s_Nodes[parentNodeIndex] & NODE_TYPE_MASK) != NODE_TYPE_FREE) {
parentRelativeNodeIndex ^= 1;
parentNodeIndex ^= 1;
}
LOG_ASSERT((s_Nodes[parentNodeIndex] & NODE_TYPE_MASK) == NODE_TYPE_FREE,
"GFX.SRDescriptors.Allocate: When bridging, at least one child of a half-free node must be free.");
s_Nodes[parentNodeIndex] = NODE_TYPE_HALF_FREE |
(parentRelativeNodeIndex << NODE_HALF_FREE_PREVIOUS_SHIFT) | (parentRelativeNodeIndex << NODE_HALF_FREE_NEXT_SHIFT);
s_NodeHalfFreeStart[halfFreeLevel] = parentRelativeNodeIndex;
}
// Inserting a new block.
unsigned int nodeIndex = GetFirstNodeAtLevel(level) + (parentRelativeNodeIndex << 1);
if ((s_Nodes[nodeIndex] & NODE_TYPE_MASK) != NODE_TYPE_FREE) {
nodeIndex ^= 1;
}
LOG_ASSERT((s_Nodes[nodeIndex] & NODE_TYPE_MASK) == NODE_TYPE_FREE,
"GFX.SRDescriptors.Allocate: When inserting, at least one child of a half-free node must be free.");
s_Nodes[nodeIndex] = NODE_TYPE_DATA;
return ((nodeIndex & GetLevelRelativeNodeIndexMask(level)) << level) * MIN_ALLOCATION;
}
void Free(unsigned int index) {
LOG_ASSERT(s_Storage != nullptr, "GFX.SRDescriptors.Free: Descriptor storage must exist.");
if ((index & (MIN_ALLOCATION - 1)) != 0 || index >= MAX_DESCRIPTOR_COUNT) {
Log::FatalError("GFX.SRDescriptors.Free: Index %u is illegal - not %u-aligned or out of the %u bound.",
index, MIN_ALLOCATION, MAX_DESCRIPTOR_COUNT);
}
unsigned int widestSearchLevel = (index != 0 ?
Math::Bit::LSOnePosition32(index >> MIN_ALLOCATION_POWER) : ALLOCATION_LEVEL_COUNT - 2);
Parallel::Mutex::ScopedLock lock(*s_AllocationLock);
// Searching for the allocated block at the allocation levels, up to the widest level possible with the index alignment.
unsigned int level, nodeRelativeIndex = index >> MIN_ALLOCATION_POWER;
for (level = 0; level <= widestSearchLevel; ++level) {
if ((s_Nodes[GetFirstNodeAtLevel(level) + nodeRelativeIndex] & NODE_TYPE_MASK) == NODE_TYPE_DATA) {
break;
}
nodeRelativeIndex >>= 1;
}
if (level > widestSearchLevel) {
Log::FatalError("GFX.SRDescriptors.Free: No descriptor block with index %u allocated.", index);
}
// Updating the parents: 1/2 to 0/2 recursively, stop at 2/2 to 1/2.
s_Nodes[GetFirstNodeAtLevel(level) + nodeRelativeIndex] = NODE_TYPE_FREE;
while (level < ALLOCATION_LEVEL_COUNT - 1) {
++level;
nodeRelativeIndex >>= 1;
unsigned int levelOffset = GetFirstNodeAtLevel(level);
unsigned int nodeIndex = levelOffset + nodeRelativeIndex;
unsigned int nodeType = s_Nodes[nodeIndex] & NODE_TYPE_MASK;
if (nodeType != NODE_TYPE_HALF_FREE) {
LOG_ASSERT(nodeType == NODE_TYPE_FULL, "GFX.SRDescriptors.Free: Parent can be either full or half-free.");
// Making the full parent node half-free and inserting it into the half-free list.
unsigned int previous, next;
unsigned int oldHalfFreeRelativeIndex = s_NodeHalfFreeStart[level];
if (oldHalfFreeRelativeIndex == NODE_HALF_FREE_START_NONE) {
previous = next = nodeRelativeIndex;
} else {
unsigned int oldHalfFreeStartData = s_Nodes[levelOffset + oldHalfFreeRelativeIndex];
previous = (oldHalfFreeStartData >> NODE_HALF_FREE_PREVIOUS_SHIFT) & NODE_HALF_FREE_LIST_MASK;
next = (oldHalfFreeStartData >> NODE_HALF_FREE_NEXT_SHIFT) & NODE_HALF_FREE_LIST_MASK;
s_Nodes[levelOffset + previous] = (s_Nodes[levelOffset + previous] &
~(NODE_HALF_FREE_LIST_MASK << NODE_HALF_FREE_NEXT_SHIFT)) | (nodeRelativeIndex << NODE_HALF_FREE_NEXT_SHIFT);
s_Nodes[levelOffset + next] = (s_Nodes[levelOffset + next] &
~(NODE_HALF_FREE_LIST_MASK << NODE_HALF_FREE_PREVIOUS_SHIFT)) | (nodeRelativeIndex << NODE_HALF_FREE_PREVIOUS_SHIFT);
}
s_Nodes[nodeIndex] = NODE_TYPE_HALF_FREE | (previous << NODE_HALF_FREE_PREVIOUS_SHIFT) | (next << NODE_HALF_FREE_NEXT_SHIFT);
s_NodeHalfFreeStart[level] = nodeRelativeIndex;
break;
}
// Making the half-free parent node free and going up.
SwitchHalfFreeNodeType(level, nodeRelativeIndex, NODE_TYPE_FREE);
}
}
}
namespace SecretGame::GFX {
unsigned int Assets_AllocateSRDescriptors(unsigned int count) {
return SRDescriptors::Allocate(count);
}
void Assets_FreeSRDescriptors(unsigned int index) {
SRDescriptors::Free(index);
}
GPU::DescriptorStorage *Assets_GetSRDescriptorStorage() {
return SRDescriptors::GetStorage();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment