Skip to content

Instantly share code, notes, and snippets.

@badamczewski
Created December 1, 2020 13:08
Show Gist options
  • Star 13 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save badamczewski/42ec5d3aabd47c32684cdb87851f8a51 to your computer and use it in GitHub Desktop.
Save badamczewski/42ec5d3aabd47c32684cdb87851f8a51 to your computer and use it in GitHub Desktop.
BloomFilter Source Code
using System;
using System.Collections.Generic;
using System.Text;
namespace ProbabilisticDataStructures.DataStructures
{
public class BitSet
{
private ulong[] bitset;
public int Size { get; private set; }
public BitSet(int size)
{
bitset = new ulong[(size / 64) + 1];
Size = size;
}
public void Add(int index)
{
bitset[index / 64] |= (1u << (index % 64));
}
public bool Contains(int index)
{
return (bitset[index / 64] & (1u << (index % 64))) != 0;
}
}
}
using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
namespace ProbabilisticDataStructures.DataStructures
{
public class BloomFilter
{
public readonly int _hashFunctionCount;
public readonly BitSet _hashBits;
public readonly HashFunction _getHashSecondary;
public delegate uint HashFunction(int input);
public BloomFilter(int capacity)
: this(capacity, 0.01f)
{
}
public BloomFilter(int capacity, float errorRate)
: this(BestM(capacity, errorRate), BestK(capacity, errorRate))
{
}
public BloomFilter(int m, int k)
{
this._getHashSecondary = HashFunctions.HashInt32Shift;
this._hashFunctionCount = k;
this._hashBits = new BitSet(m);
}
public void Add(int item)
{
uint primaryHash = HashFunctions.HashInt32Jenkins(item);
uint secondaryHash = this._getHashSecondary(item);
for (int i = 1; i <= this._hashFunctionCount; i++)
{
uint hash = this.ComputeHash(primaryHash, secondaryHash, i);
this._hashBits.Add((int)hash);
}
}
public bool Contains(int item)
{
uint primaryHash = HashFunctions.HashInt32Jenkins(item);
uint secondaryHash = this._getHashSecondary(item);
for (int i = 1; i <= this._hashFunctionCount; i++)
{
uint hash = this.ComputeHash(primaryHash, secondaryHash, i);
if (this._hashBits.Contains((int)hash) == false)
{
return false;
}
}
return true;
}
public static int BestK(int capacity, float errorRate)
{
return (int)Math.Round(Math.Log(2.0) * BestM(capacity, errorRate) / capacity);
}
public static int BestM(int capacity, float errorRate)
{
return (int)Math.Ceiling(capacity * Math.Log(errorRate, (1.0 / Math.Pow(2, Math.Log(2.0)))));
}
public static double ErrorRate(int capacity, int m, int k)
{
var x = 1 - Math.Pow(Math.E, -k * (double)capacity / (double)m);
return Math.Pow(x, k);
}
public static float BestErrorRate(int capacity, int m)
{
float c = (float)(1.0 / capacity);
if (c != 0)
{
return c;
}
// default
// http://www.cs.princeton.edu/courses/archive/spring02/cs493/lec7.pdf
return (float)Math.Pow(0.6185, m / capacity);
}
/// <summary>
/// Performs Dillinger and Manolios double hashing.
/// </summary>
/// <param name="primaryHash"> The primary hash. </param>
/// <param name="secondaryHash"> The secondary hash. </param>
/// <param name="i"> The i. </param>
/// <returns> The <see cref="int"/>. </returns>
private uint ComputeHash(uint primaryHash, uint secondaryHash, int i)
{
var c = (primaryHash + ((uint)i * secondaryHash));
uint resultingHash = (uint)(c % (this._hashBits.Size));
return resultingHash;
}
}
}
using System;
using System.Collections.Generic;
using System.Text;
namespace ProbabilisticDataStructures.DataStructures
{
public static class HashFunctions
{
/// <summary>
/// Hashes a 32-bit signed int using Thomas Wang's method v3.1 (http://www.concentric.net/~Ttwang/tech/inthash.htm).
/// Runtime is suggested to be 11 cycles.
/// </summary>
/// <param name="input">The integer to hash.</param>
/// <returns>The hashed result.</returns>
public static uint HashInt32Shift(int input)
{
uint x = (uint)input;
unchecked
{
x = ~x + (x << 15); // x = (x << 15) - x- 1, as (~x) + y is equivalent to y - x - 1 in two's complement representation
x = x ^ (x >> 12);
x = x + (x << 2);
x = x ^ (x >> 4);
x = x * 2057; // x = (x + (x << 3)) + (x<< 11);
x = x ^ (x >> 16);
return x;
}
}
public static uint HashInt32Jenkins(int input)
{
uint a = (uint)input;
unchecked
{
a = (a + 0x7ed55d16) + (a << 12);
a = (a ^ 0xc761c23c) ^ (a >> 19);
a = (a + 0x165667b1) + (a << 5);
a = (a + 0xd3a2646c) ^ (a << 9);
a = (a + 0xfd7046c5) + (a << 3);
a = (a ^ 0xb55a4f09) ^ (a >> 16);
return a;
}
}
public static int HashInt32FNV1a(int input)
{
uint x = (uint)input;
unchecked
{
uint fnvPrime = 16777619;
uint hash = 2166136261;
hash = (x >> 24) ^ hash;
hash = (x >> 24) * fnvPrime;
hash = (x >> 16) ^ hash;
hash = (x >> 16) * fnvPrime;
hash = (x >> 8) ^ hash;
hash = (x >> 8) * fnvPrime;
hash = (x) ^ hash;
hash = (x) * fnvPrime;
return (int)hash;
}
}
/// <summary>
/// Hashes a string using Bob Jenkin's "One At A Time" method from Dr. Dobbs (http://burtleburtle.net/bob/hash/doobs.html).
/// Runtime is suggested to be 9x+9, where x = input.Length.
/// </summary>
/// <param name="input">The string to hash.</param>
/// <returns>The hashed result.</returns>
public static int HashString(string input)
{
string s = input as string;
int hash = 0;
for (int i = 0; i < s.Length; i++)
{
hash += s[i];
hash += (hash << 10);
hash ^= (hash >> 6);
}
hash += (hash << 3);
hash ^= (hash >> 11);
hash += (hash << 15);
return hash;
}
}
}
@badamczewski
Copy link
Author

???

@juliusfriedman
Copy link

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment