Skip to content

Instantly share code, notes, and snippets.

@dgabriele
Created August 11, 2012 18:59
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dgabriele/3326413 to your computer and use it in GitHub Desktop.
Save dgabriele/3326413 to your computer and use it in GitHub Desktop.
lock-free Bloom filter with quiescent consistency
#include <iostream>
#include <atomic>
#include <vector>
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <stdint.h>
#include <limits.h>
class bloom_filter
{
typedef std::atomic_uchar atomic_byte;
static inline unsigned H1(const unsigned x) {
return (1583458089U * x);
}
static inline unsigned H2(const unsigned x) {
return (784588716U * x);
}
inline unsigned HASH(unsigned h1, unsigned h2, unsigned i) {
return ((h1 + i * h2 + ((i*(i-1)) >> 1)) % m_m);
}
const unsigned m_cap;
const unsigned m_m;
const unsigned m_k;
const unsigned m_bpe;
const unsigned m_len;
atomic_byte* m_vec;
public:
explicit bloom_filter(unsigned cap, unsigned bpe)
: m_cap(1 << (unsigned)ceil(log(cap)/log(2))),
m_m (m_cap * bpe),
m_k ((unsigned)ceil((0.693f * bpe))),
m_bpe (bpe),
m_len ((unsigned)ceil((cap * bpe) / 8.f)),
m_vec (new atomic_byte[m_len])
{
}
~bloom_filter()
{
delete[] m_vec;
}
bool contains(unsigned x)
{
unsigned h1 = H1(x);
unsigned h2 = H2(x);
for (unsigned t = 0; t < m_k; ++t)
{
unsigned addr = HASH(h1, h2, t+1);
unsigned index = addr >> 3;
unsigned offset = 1 << (addr & 7);
if ( ! (m_vec[index] & offset) )
return (false);
}
return (true);
}
void insert(unsigned x)
{
unsigned h1 = H1(x);
unsigned h2 = H2(x);
for (unsigned t = 0; t < m_k; ++t)
{
unsigned addr = HASH(h1, h2, t+1);
unsigned index = addr >> 3;
unsigned char offset = 1 << (addr & 7);
unsigned char byte = m_vec[index];
do {
if ( ! (byte & offset) ) {
if (!m_vec[index].compare_exchange_weak(byte, byte|offset)) {
byte = m_vec[index];
continue;
}
} break;
} while (true);
}
}
};
int main()
{
unsigned fp = 0; // false positive count
unsigned cap = 1 << 15; // bloom filter capacity until fp is suboptimal
unsigned bpe = 6; // bits per element
bloom_filter bf(cap, bpe);
for (unsigned i=1; i<cap; ++i)
bf.insert(i);
// query the next `cap' elements, counting the
// number of positives. these are false positives.
for (unsigned i=cap; i<(cap << 1)-1; ++i)
fp += bf.contains(i);
std::cout << "Pr[false positive] = " << ((float)fp / cap) << std::endl;
return (0);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment