Skip to content

Instantly share code, notes, and snippets.

@standage
Last active May 9, 2018 22:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save standage/00aff6753d67495ec6f345294feb59c1 to your computer and use it in GitHub Desktop.
Save standage/00aff6753d67495ec6f345294feb59c1 to your computer and use it in GitHub Desktop.
A filter class implementing a Bloom filter and Count-min sketch. Compile with `g++ -Wall -Werror -O3 -o test --std=c++11 testfilter.cpp filter.cpp`.
#include <iostream>
#include <assert.h>
#include <cmath>
#include "filter.hpp"
template<typename ElementType, typename CounterType, size_t maxcount>
filter<ElementType, CounterType, maxcount>::filter(std::vector<size_t> array_sizes)
: _cells_occupied(array_sizes.size(), 0),
_arrays(array_sizes.size(), std::vector<CounterType>())
{
for (size_t i = 0; i < array_sizes.size(); i++) {
size_t size = array_sizes[i];
_arrays[i].resize(size, 0);
}
}
template<typename ElementType, typename CounterType, size_t maxcount>
void filter<ElementType, CounterType, maxcount>::add(ElementType element)
{
for (size_t i = 0; i < _arrays.size(); i++){
size_t index = element % _arrays[i].size();
if (_arrays[i][index] == 0) {
_cells_occupied[i] += 1;
}
if (_arrays[i][index] < maxcount) {
_arrays[i][index] = _arrays[i][index] + 1;
}
}
}
template<typename ElementType, typename CounterType, size_t maxcount>
CounterType filter<ElementType, CounterType, maxcount>::get(ElementType element)
{
CounterType mincount = _arrays[0][element % _arrays[0].size()];
for (auto array : _arrays) {
size_t index = element % array.size();
CounterType count = array[index];
if (count == 0) {
// No need to check other arrays if any of them contain a 0
return 0;
}
if (count < mincount) {
mincount = count;
}
}
return mincount;
}
template class filter<int, bool, 1>;
template class filter<int, uint8_t, 255>;
#include <stddef.h>
#include <stdint.h>
#include <vector>
template<typename ElementType, typename CounterType, size_t maxcount>
class filter
{
protected:
std::vector<size_t> _cells_occupied;
std::vector<std::vector<CounterType>> _arrays;
public:
explicit filter(std::vector<size_t> array_sizes);
void add(ElementType element);
CounterType get(ElementType element);
};
#include <cstdlib>
#include <chrono>
#include <iostream>
#include <vector>
#include "filter.hpp"
typedef std::chrono::time_point<std::chrono::system_clock> timepoint;
int main()
{
srand(112358);
std::vector<uint64_t> elements(0);
for (int i = 0; i < 100000; i++) {
int element = rand();
elements.push_back(element);
}
filter<int, uint8_t, 255> counts({499979, 499973, 499969, 499957});
std::cerr << "Being populating filter...";
timepoint start = std::chrono::system_clock::now();
for (auto element : elements) {
counts.add(element);
}
timepoint end = std::chrono::system_clock::now();
std::chrono::duration<double> elapsed = end - start;
std::cerr << "done! (" << elapsed.count() << " seconds elapsed)\n";
std::cerr << "Begin querying filter...";
start = std::chrono::system_clock::now();
for (auto element : elements) {
uint8_t count = counts.get(element);
if (count > 1) {
std::cout << "Element " << element << " appears " << (int)count << " times\n";
}
}
end = std::chrono::system_clock::now();
elapsed = end - start;
std::cerr << "done! (" << elapsed.count() << "seconds elapsed)\n";
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment