Last active
May 9, 2018 22:15
-
-
Save standage/00aff6753d67495ec6f345294feb59c1 to your computer and use it in GitHub Desktop.
A filter class implementing a Bloom filter and Count-min sketch. Compile with `g++ -Wall -Werror -O3 -o test --std=c++11 testfilter.cpp filter.cpp`.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <assert.h> | |
#include <cmath> | |
#include "filter.hpp" | |
template<typename ElementType, typename CounterType, size_t maxcount> | |
filter<ElementType, CounterType, maxcount>::filter(std::vector<size_t> array_sizes) | |
: _cells_occupied(array_sizes.size(), 0), | |
_arrays(array_sizes.size(), std::vector<CounterType>()) | |
{ | |
for (size_t i = 0; i < array_sizes.size(); i++) { | |
size_t size = array_sizes[i]; | |
_arrays[i].resize(size, 0); | |
} | |
} | |
template<typename ElementType, typename CounterType, size_t maxcount> | |
void filter<ElementType, CounterType, maxcount>::add(ElementType element) | |
{ | |
for (size_t i = 0; i < _arrays.size(); i++){ | |
size_t index = element % _arrays[i].size(); | |
if (_arrays[i][index] == 0) { | |
_cells_occupied[i] += 1; | |
} | |
if (_arrays[i][index] < maxcount) { | |
_arrays[i][index] = _arrays[i][index] + 1; | |
} | |
} | |
} | |
template<typename ElementType, typename CounterType, size_t maxcount> | |
CounterType filter<ElementType, CounterType, maxcount>::get(ElementType element) | |
{ | |
CounterType mincount = _arrays[0][element % _arrays[0].size()]; | |
for (auto array : _arrays) { | |
size_t index = element % array.size(); | |
CounterType count = array[index]; | |
if (count == 0) { | |
// No need to check other arrays if any of them contain a 0 | |
return 0; | |
} | |
if (count < mincount) { | |
mincount = count; | |
} | |
} | |
return mincount; | |
} | |
template class filter<int, bool, 1>; | |
template class filter<int, uint8_t, 255>; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stddef.h> | |
#include <stdint.h> | |
#include <vector> | |
template<typename ElementType, typename CounterType, size_t maxcount> | |
class filter | |
{ | |
protected: | |
std::vector<size_t> _cells_occupied; | |
std::vector<std::vector<CounterType>> _arrays; | |
public: | |
explicit filter(std::vector<size_t> array_sizes); | |
void add(ElementType element); | |
CounterType get(ElementType element); | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cstdlib> | |
#include <chrono> | |
#include <iostream> | |
#include <vector> | |
#include "filter.hpp" | |
typedef std::chrono::time_point<std::chrono::system_clock> timepoint; | |
int main() | |
{ | |
srand(112358); | |
std::vector<uint64_t> elements(0); | |
for (int i = 0; i < 100000; i++) { | |
int element = rand(); | |
elements.push_back(element); | |
} | |
filter<int, uint8_t, 255> counts({499979, 499973, 499969, 499957}); | |
std::cerr << "Being populating filter..."; | |
timepoint start = std::chrono::system_clock::now(); | |
for (auto element : elements) { | |
counts.add(element); | |
} | |
timepoint end = std::chrono::system_clock::now(); | |
std::chrono::duration<double> elapsed = end - start; | |
std::cerr << "done! (" << elapsed.count() << " seconds elapsed)\n"; | |
std::cerr << "Begin querying filter..."; | |
start = std::chrono::system_clock::now(); | |
for (auto element : elements) { | |
uint8_t count = counts.get(element); | |
if (count > 1) { | |
std::cout << "Element " << element << " appears " << (int)count << " times\n"; | |
} | |
} | |
end = std::chrono::system_clock::now(); | |
elapsed = end - start; | |
std::cerr << "done! (" << elapsed.count() << "seconds elapsed)\n"; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment