Skip to content

Instantly share code, notes, and snippets.

@vbezhenar
Created August 23, 2022 20:58
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vbezhenar/6289ef718f2e14725bf33cbbc8c6d7a2 to your computer and use it in GitHub Desktop.
Save vbezhenar/6289ef718f2e14725bf33cbbc8c6d7a2 to your computer and use it in GitHub Desktop.
import murmur332 from './murmur332.js';
// https://en.wikipedia.org/wiki/Bloom_filter
// https://hur.st/bloomfilter/
export default class BloomFilter {
#data;
#dataBitMask;
#hashCount;
/**
* Total number of bits: `(2 ** dataLengthP2) * 32`
* @param dataLengthP2 log2 size of the data array.
* Must be between 0 and 25.
* @param hashCount must be > 0
*/
constructor(dataLengthP2, hashCount) {
if (!(dataLengthP2 >= 0 && dataLengthP2 <= 25)) throw new Error('dataLengthP2 must be between 0 and 25');
if (!(hashCount > 0)) throw new Error('hashCount must bet > 0');
/* eslint-disable no-bitwise */
const dataLength = 1 << dataLengthP2;
const dataBitMask = (1 << (dataLengthP2 + 5)) - 1;
/* eslint-enable no-bitwise */
this.#data = new Int32Array(dataLength);
this.#dataBitMask = dataBitMask;
this.#hashCount = hashCount;
}
add(value) {
const valueArray = BloomFilter.#valueToInt32Array(value);
const data = this.#data;
const dataBitMask = this.#dataBitMask;
const hashCount = this.#hashCount;
for (let i = 0; i < hashCount; i += 1) {
const hashValue = murmur332(valueArray, i);
/* eslint-disable no-bitwise */
const dataBitIndex = hashValue & dataBitMask;
const dataItemIndex = dataBitIndex >> 5;
const itemBitIndex = dataBitIndex & 0x1f;
data[dataItemIndex] |= (1 << itemBitIndex);
/* eslint-enable no-bitwise */
}
}
has(value) {
const valueArray = BloomFilter.#valueToInt32Array(value);
const data = this.#data;
const dataBitMask = this.#dataBitMask;
const hashCount = this.#hashCount;
for (let i = 0; i < hashCount; i += 1) {
const hashValue = murmur332(valueArray, i);
/* eslint-disable no-bitwise */
const dataBitIndex = hashValue & dataBitMask;
const dataItemIndex = dataBitIndex >> 5;
const itemBitIndex = dataBitIndex & 0x1f;
if ((data[dataItemIndex] & (1 << itemBitIndex)) === 0) return false;
/* eslint-enable no-bitwise */
}
return true;
}
static #valueToInt32Array(value) {
switch (typeof value) {
case 'string':
return BloomFilter.#stringToInt32Array(value);
default:
throw new Error(`unsupported value type: ${typeof value}`);
}
}
static #stringToInt32Array(string) {
const stringLength = string.length;
if (stringLength === 0) return new Int32Array(0);
const arrayLength = Math.trunc((stringLength - 1) / 4) + 1;
const array = new Int32Array(arrayLength);
let nextArrayItem = 0;
let nextCharShift = 0;
let nextArrayIndex = 0;
for (let charIndex = 0; charIndex < stringLength; charIndex += 1) {
const charCode = string.charCodeAt(charIndex);
if (charCode > 127) throw new Error(`unsupported string "${string}": only ASCII strings are supported`);
/* eslint-disable no-bitwise */
nextArrayItem |= charCode << nextCharShift;
/* eslint-enable no-bitwise */
if (charIndex % 4 === 3) {
array[nextArrayIndex] = nextArrayItem;
nextArrayItem = 0;
nextCharShift = 0;
nextArrayIndex += 1;
} else {
nextCharShift += 8;
}
}
if (nextArrayItem !== 0) array[nextArrayIndex] = nextArrayItem;
return array;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment