Last active
August 29, 2015 14:09
-
-
Save markpapadakis/f985d05710d0819250ad to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// A simple LUT backed index for faster binary search, with LUT partition encoding | |
// A bloom filter can be attached to it, for when you have many, many values - though in practice | |
// it is rarely needed, especially if the bits(resolution) is over 16 | |
// | |
// You need (1 << bits) * sizeof(T::key) for the lut, e.g if T::key is uint32_t, for a 16bits LUT, that's | |
// 262k. Increasing resolution results in higher lookup efficiency, reducing it results in lower memory requirements | |
template<typename T> | |
static __attribute__((always_inline)) constexpr T MSB(const T n, const uint8_t span) | |
{ | |
return n >> ((sizeof(T) * 8) - span); | |
} | |
template<typename T> | |
struct Index | |
{ | |
Vector<T> list; | |
using key_t = decltype(T::key); | |
static constexpr key_t keyMax = (key_t)(uint32_t)((1U << (sizeof(key_t) * 8)) - 1); | |
// TODO: provide sizeBase in constructor or default to this computed value | |
static constexpr key_t sizeBase = (key_t)(uint32_t)((1U << ((sizeof(key_t) * 8) - 1)) - 1); | |
key_t *lut; | |
const uint8_t res; | |
const uint32_t lutSize; | |
uint8_t padding = 0; | |
Index(const uint8_t r) | |
: res{r}, lutSize{1U<<r} | |
{ | |
lut = (key_t *)malloc(sizeof(key_t) * lutSize); | |
for (uint32_t i = 0; i != lutSize; ++i) | |
lut[i] = keyMax; | |
} | |
const T *Lookup(const key_t key) const | |
{ | |
const auto lutIndex = MSB(((uint32_t)key) << padding, res); | |
const auto v = lut[lutIndex]; | |
if (v >= sizeBase) | |
return NULL; | |
int32_t btm = v; | |
int32_t top; | |
if (lutIndex == lutSize - 1) | |
{ | |
top = list.size() - 1; | |
} | |
else | |
{ | |
const auto next = lut[lutIndex + 1]; | |
if (next >= sizeBase) | |
{ | |
const auto size = next - sizeBase; | |
top = (btm + size) - 1; | |
} | |
else | |
{ | |
top = next - 1; | |
} | |
} | |
const auto *const all = list.Values(); | |
// Binary search in [btm, top] | |
while (btm <= top) | |
{ | |
const int32_t mid = (btm + top) / 2; | |
const auto *const r = all + mid; | |
if (key == r->key) | |
return r; | |
else if (key < r->key) | |
top = mid - 1; | |
else | |
btm = mid + 1; | |
} | |
return NULL; | |
} | |
void Commit(void) // Make sure list[] is sorted | |
{ | |
const auto cnt = list.Size(); | |
if (!cnt) | |
return; | |
const auto maxKey = list.LastValue().key; | |
const auto *const all = list.Values(); | |
auto *const lutPartitionSize = (key_t *)calloc(lutSize, sizeof(key_t)); | |
padding = __builtin_clz(maxKey); | |
for (uint32_t i = 0; i != cnt; ) | |
{ | |
const auto k = (uint32_t)all[i].key; | |
const auto a = k << padding; | |
const uint32_t lutIndex = MSB(a, res); | |
const auto base = i; | |
for (lut[lutIndex] = i, ++i; i != cnt && MSB(all[i].key << padding, res) == lutIndex; ++i) | |
continue; | |
lutPartitionSize[lutIndex] = i - base; | |
} | |
// Patch lut[]. | |
// No value may have been mapped to lut[index + 1]; we need to account for that | |
// In those cases, encode lut[index] partition size into lut[index + 1] | |
const auto last = lutSize - 1; | |
for (uint32_t i = 0; i < last; ) | |
{ | |
if (lut[i] != keyMax && lut[i + 1] == keyMax) | |
{ | |
lut[i + 1] = lutPartitionSize[i] + sizeBase; | |
i+=2; | |
} | |
else | |
++i; | |
} | |
free(lutPartitionSize); | |
} | |
}; | |
struct document | |
{ | |
uint32_t key; | |
uint8_t type; | |
uint32_t owner; | |
}; | |
int main(int argc, char *argv[]) | |
{ | |
Index<document> index(12); | |
// Add all documents in index.list[] | |
// Make sure it's sorted when done, e.g | |
// std::sort(index.list.begin(), index.list.end(), [](const document &d1, const document &d2) { return d1.key < d2.key; }); | |
index.Commit(); | |
auto *const res = index.Lookup(8100); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment