Skip to content

Instantly share code, notes, and snippets.

@tcantenot
Forked from dwilliamson/PerfectHash.cpp
Created January 24, 2021 13:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tcantenot/f58a35b131b00017cfd0cfe33a95c437 to your computer and use it in GitHub Desktop.
Save tcantenot/f58a35b131b00017cfd0cfe33a95c437 to your computer and use it in GitHub Desktop.
Trivial perfect hash generator that uses only a single array for runtime lookup with next to no ALU. Very good for small table sizes (e.g. < 128). Very bad for larger table sizes. This has been quickly "STL-ified" to remove use of my own containers.
uint32_t NextPow2(uint32_t v)
{
v--;
v |= (v >> 1);
v |= (v >> 2);
v |= (v >> 4);
v |= (v >> 8);
v |= (v >> 16);
v++;
return v;
}
uint32_t Hash(uint32_t key, uint32_t shifter)
{
#if 1
// This version is negligible ALU but leads to bigger tables and, perhaps, worse cache performance
return key << shifter;
#else
// This hash mix version is more ALU but generates much smaller tables, leading to better cache performance
return key ^ ((key << shifter) + 0x9E3779B9 + (key << 6) + (key >> 2));
#endif
}
class clcpp_attr(reflect) PerfectHash
{
public:
// max_table_size: Maximum size of the internal array indexed by hashed keys
// max_multiplier: Max shifter applied to key before generating an array index.
//
// Increasing any of these increases the startup search time for an exact hash table.
PerfectHash(uint32_t max_table_size, uint32_t max_shifter)
: m_maxTableSize(max_table_size)
, m_maxShifter(max_shifter)
{
}
// Pack the key for the build step
void Preload(uint32_t key, void* data)
{
m_preloadEntries.push_back({data, key});
}
void Build()
{
assert(m_shifter == -1);
assert(m_tableSize == 0);
// Check power-of-two table sizes
uint32_t nb_keys = m_preloadEntries.size();
for (uint32_t table_size = NextPow2(nb_keys); table_size < m_maxTableSize; table_size *= 2)
{
std::vector<bool> taken(table_size);
// Check all values of shifter
for (uint32_t shifter = 0; shifter < m_maxShifter; shifter++)
{
// An array of flags signaling if a key has already hashed to this location
std::fill(taken.begin(), taken.end(), false);
// Generate array locations for each key while checking for collisions
bool collision = false;
for (uint32_t key_index = 0; key_index < nb_keys; key_index++)
{
uint32_t key = m_preloadEntries[key_index].key;
uint32_t index = Hash(key, shifter) & (table_size - 1);
if (taken[index])
{
collision = true;
break;
}
taken[index] = true;
}
// If there were no collisions, this is a valid table size/multiplier pair
if (!collision)
{
m_shifter = shifter;
break;
}
}
// Final early-out check for a valid pair
if (m_shifter != -1)
{
m_tableSize = table_size;
break;
}
}
assert(m_shifter >= 0);
assert(m_tableSize > 0);
// Build the exact hash tables
m_hashedDataArray.resize(m_tableSize);
for (const Entry& entry : m_preloadEntries)
{
uint32_t index = Hash(entry.key, m_shifter) & (m_tableSize - 1);
m_hashedDataArray[index] = entry.data;
}
}
void* Get(uint32_t key) const
{
uint32_t index = Hash(key, m_shifter) & (m_tableSize - 1);
return m_hashedDataArray[index];
}
private:
// Key/data pair
struct clcpp_attr(reflect) Entry
{
void* data;
uint32_t key;
};
// Construction input
const uint32_t m_maxTableSize;
const uint32_t m_maxShifter;
// Preloaded list of key/data pairs before adding to the hash
std::vector<Entry> m_preloadEntries;
// Generated key shifter and exact hash table size
uint32_t m_shifter = -1;
uint32_t m_tableSize = 0;
// Runtime lookup array
std::vector<void*> m_hashedDataArray;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment