Skip to content

Instantly share code, notes, and snippets.

@agasiev
Created July 27, 2013 15:34
Show Gist options
  • Save agasiev/6095179 to your computer and use it in GitHub Desktop.
Save agasiev/6095179 to your computer and use it in GitHub Desktop.
Simple Bloom filter implementation.
#include <iostream>
#include <cstdlib>
#include <cmath>
#include <vector>
using namespace std;
int get_array_len(int n, float p) {
return round(-n*log(p)/(log(2)*log(2)));
}
int hash_count(int m, int n) {
return round(log(2) * (float)m/(float)n);
}
int get_hash(int v, int hash_id, int arr_len) {
return (v * hash_id) % arr_len;
}
void addv(vector<bool> & b, int v, int h_cnt) {
for (int i = 1; i <= h_cnt; i++) {
b[get_hash(v, i, b.size())] = true;
}
}
bool check(vector<bool> & b, int v, int h_cnt) {
for (int i = 1; i <= h_cnt; i++) {
if (b[get_hash(v, i, b.size())] != true) return false;
}
return true;
}
int main(int, char**) {
int elements_cnt = 10000;
float probability = 0.001; // Probability of false-positive detections
int len = get_array_len(elements_cnt, probability);
int hcnt = hash_count(len, elements_cnt);
vector<bool> bits(len, false);
int max_value = elements_cnt * 5;
vector<int> a(elements_cnt, 0);
for (int i = 0; i < a.size(); i++) {
a[i] = rand() % max_value;
addv(bits, a[i], hcnt);
}
cout << "Elements count: " << elements_cnt << endl;
cout << "Probability: " << probability << endl;
cout << "Bitset length: " << len << " (" << (len/8) << " bytes of memory)" << endl;
cout << "Hashes count: " << hcnt << endl;
int positive = 0;
int false_positive = 0;
int negative = 0;
int check_count = 1000;
for (int i = 0; i < check_count; i++) {
int v = rand() % max_value;
if (check(bits, v, hcnt)) {
bool exists = false;
for (int j = 0; j < a.size(); j++) {
if (a[j] == v) {
exists = true;
break;
}
}
if (exists) positive++;
else false_positive++;
}
else {
negative++;
}
}
cout << "Positive: " << positive << endl;
cout << "False positive: " << false_positive << endl;
cout << "Negative: " << negative << endl;
cout << "False positive detection ratio: " << ((float)false_positive/(float)(check_count - negative)) << endl;
cout << "Negative ratio: " << ((float)negative/float(check_count)) << endl;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment