Skip to content

Instantly share code, notes, and snippets.

@bellbind
Last active Jun 18, 2020
Embed
What would you like to do?
[JavaScript]CuckooHash/Cuckoo Filter implementations
import {sha1} from "./sha1.js";
const te = new TextEncoder();
const eq = (a, b) => a && b && a.length === b.length &&
a.every((v, i) => v === b[i]);
// cuckoo filter: https://en.wikipedia.org/wiki/Cuckoo_filter
export const CuckooFilter = class {
constructor({
fingerprintSize = 4, tableSize = 17, pushLimit = 128, hash = sha1
} = {}) {
this.fingerprintSize = fingerprintSize;
this.tableSize = tableSize;
this.pushLimit = pushLimit;
this.hash = hash;
this.table = Array(tableSize);
this.size = 0;
}
add(item) {
const hash = this.hash(te.encode(String(item)));
let fp = new Uint8Array(hash).slice(0, this.fingerprintSize);
let h = new Uint32Array(hash)[0];
for (let i = 0; i <= this.pushLimit; i++) {
const idx = h % this.tableSize;
if (!this.table[idx]) {
this.table[idx] = fp;
this.size++;
return true;
}
if (eq(fp, this.table[idx])) return true;
[fp, this.table[idx]] = [this.table[idx], fp];
h = h ^ new Uint32Array(this.hash(fp))[0];
}
return false;
}
has(item) {
const hash = this.hash(te.encode(String(item)));
const fp = new Uint8Array(hash).slice(0, this.fingerprintSize);
const h0 = new Uint32Array(hash)[0];
if (eq(fp, this.table[h0 % this.tableSize])) return true;
const h1 = h0 ^ new Uint32Array(this.hash(fp))[0];
if (eq(fp, this.table[h1 % this.tableSize])) return true;
return false;
}
delete(item) {
const hash = this.hash(te.encode(String(item)));
const fp = new Uint8Array(hash).slice(0, this.fingerprintSize);
const h0 = new Uint32Array(hash)[0];
if (eq(fp, this.table[h0 % this.tableSize])) {
delete this.table[h0 % this.tableSize];
this.size--;
return true;
}
const h1 = h0 ^ new Uint32Array(this.hash(fp))[0];
if (eq(fp, this.table[h1 % this.tableSize])) {
delete this.table[h1 % this.tableSize];
this.size--;
return true;
}
return false;
}
};
import {assert, assertEquals} from "https://deno.land/std/testing/asserts.ts";
import {CuckooFilter} from "./cuckoo-filter.js";
Deno.test("basic CuckooFilter", () => {
const f = new CuckooFilter();
f.add("foo");
f.add("bar");
f.add("buzz");
f.add("buzz");
//console.log(f.table);
assertEquals(f.size, 3);
assert(f.has("foo"));
assert(f.has("bar"));
assert(f.has("buzz"));
f.delete("bar");
//console.log(f.table);
assertEquals(f.size, 2);
assert(!f.has("bar"));
});
Deno.test("small CuckooFilter", () => {
const f = new CuckooFilter({tableSize: 5});
f.add("foo");
f.add("bar");
f.add("buzz");
f.add("quux");
//console.log(f.table);
assertEquals(f.size, 4);
assert(f.has("foo"));
assert(f.has("bar"));
assert(f.has("buzz"));
assert(f.has("quux"));
});
import {sha1} from "./sha1.js";
const te = new TextEncoder();
// hasher(key) returns an integer array which has at least 2 elements
export const sha1Hasher =
key => new Uint32Array(sha1(te.encode(key)));
// CuckooHash: https://en.wikipedia.org/wiki/Cuckoo_hashing
export const CuckooHash = class {
constructor({init = 16, pushLimit = 32, hasher = sha1Hasher} = {}) {
this.init = init;
this.pushLimit = pushLimit;
this.hasher = hasher;
this.clear();
}
// ES Map methods (but not keeped inserted order)
*entries() {yield* this.table.flat().map(({key, value}) => [key, value]);}
*keys() {yield* this.table.flat().map(({key}) => key);}
*values() {yield* this.table.flat().map(({value}) => value);}
forEach(...args) {return [...this.entries()].forEach(...args);}
[Symbol.iterator]() {return this.entries();}
clear() {
this.size = 0;
this.table = [...Array(this.init)].map(_ => Array(2));
}
has(key) {
const [h1, h2] = this.hasher(String(key));
const e1 = this.table[h1 % this.table.length][0];
if (e1 && String(e1.key) === String(key)) return true;
const e2 = this.table[h2 % this.table.length][1];
if (e2 && String(e2.key) === String(key)) return true;
return false;
}
get(key) {
const [h1, h2] = this.hasher(String(key));
const e1 = this.table[h1 % this.table.length][0];
if (e1 && String(e1.key) === String(key)) return e1.value;
const e2 = this.table[h2 % this.table.length][1];
if (e2 && String(e2.key) === String(key)) return e2.value;
return undefined;
}
delete(key) {
const [h1, h2] = this.hasher(String(key));
const e1 = this.table[h1 % this.table.length][0];
if (e1 && String(e1.key) === String(key)) {
delete this.table[h1 % this.table.length][0];
this.size--;
return true;
}
const e2 = this.table[h2 % this.table.length][1];
if (e2 && String(e2.key) === String(key)) {
delete this.table[h2 % this.table.length][1];
this.size--;
return true;
}
return false;
}
set(key, value) {
if (this.size >= this.table.length) this.grow();
const [h1, h2] = this.hasher(String(key));
const old = this.setEntry(key, value, h1, h2);
if (!old) this.size++;
return this;
}
// privates: each entry holds hash values
setEntry(key, value, h1, h2) {
let entry = {key, value, h1, h2}, turn = 0;
while (true) {
for (let i = 0; i < this.pushLimit; i++) {
const idx = (turn === 0 ? entry.h1 : entry.h2) % this.table.length;
const doPush = this.table[idx][turn] &&
String(this.table[idx][turn].key) !== String(entry.key);
[entry, this.table[idx][turn]] = [this.table[idx][turn], entry];
if (doPush) {
turn = turn ^ 1;
} else {
return entry;
}
}
this.grow();
}
}
grow() {
const es = this.table.flat();
this.table = [...Array(this.table.length * 2)].map(_ => Array(2));
for (const {key, value, h1, h2} of es) {
this.setEntry(key, value, h1, h2);
}
}
}
// $ deno test
import {assert, assertEquals} from "https://deno.land/std/testing/asserts.ts";
import {CuckooHash} from "./cuckoo-hash.js";
Deno.test("basic CuckooHash", () => {
const m = new CuckooHash();
m.set("foo", 1);
m.set("bar", 2);
m.set("buzz", 3);
m.delete("bar");
m.set("buzz", 4);
assertEquals(m.size, 2);
assertEquals(m.get("foo"), 1);
assert(!m.has("bar"));
assertEquals(m.get("buzz"), 4);
});
Deno.test("growing CuckooHash", () => {
const m = new CuckooHash({init: 1});
m.set("foo", 1);
m.set("bar", 2);
//console.log(m.table);
m.set("buzz", 3);
//console.log(m.table);
m.set("quux", 4);
//console.log(m.table);
assertEquals(m.size, 4);
});
const hs = Array.from(Array(16), (_, i) => i.toString(16));
const hsr = hs.slice().reverse();
const h2s = hs.join("").match(/../g), h2sr = hsr.join("").match(/../g);
const h2mix = hs.map((h, i) => `${hsr[i]}${h}`);
const hseq = h2s.concat(h2sr, h2mix).map(hex => parseInt(hex, 16));
const H = new Uint32Array(Uint8Array.from(hseq.slice(0, 20)).buffer);
const K = Uint32Array.from(
[2, 3, 5, 10], v => Math.floor(Math.sqrt(v) * (2 ** 30)));
const F = [
(b, c, d) => ((b & c) | ((~b >>> 0) & d)) >>> 0,
(b, c, d) => b ^ c ^ d,
(b, c, d) => (b & c) | (b & d) | (c & d),
(b, c, d) => b ^ c ^ d,
];
function rotl(v, n) {
return ((v << n) | (v >>> (32 - n))) >>> 0;
}
export function sha1(buffer) {
const u8a = ArrayBuffer.isView(buffer) ?
new Uint8Array(buffer.buffer, buffer.byteOffset, buffer.byteLength) :
new Uint8Array(buffer);
const total = Math.ceil((u8a.length + 9) / 64) * 64;
const chunks = new Uint8Array(total);
chunks.set(u8a);
chunks.fill(0, u8a.length);
chunks[u8a.length] = 0x80;
const lenbuf = new DataView(chunks.buffer, total - 8);
const low = u8a.length % (1 << 29);
const high = (u8a.length - low) / (1 << 29);
lenbuf.setUint32(0, high, false);
lenbuf.setUint32(4, low << 3, false);
const hash = H.slice();
const w = new Uint32Array(80);
for (let offs = 0; offs < total; offs += 64) {
const chunk = new DataView(chunks.buffer, offs, 64);
for (let i = 0; i < 16; i++) w[i] = chunk.getUint32(i * 4, false);
for (let i = 16; i < 80; i++) {
w[i] = rotl(w[i - 3] ^ w[i - 8] ^ w[i - 14] ^ w[i - 16], 1);
}
let [a, b, c, d, e] = hash;
for (let s = 0; s < 4; s++) {
for (let i = s * 20, end = i + 20; i < end; i++) {
const ne = rotl(a, 5) + F[s](b, c, d) + e + K[s] + w[i];
[a, b, c, d, e] = [ne >>> 0, a, rotl(b, 30), c, d];
}
}
hash[0] += a; hash[1] += b; hash[2] += c; hash[3] += d; hash[4] += e;
}
const digest = new DataView(new ArrayBuffer(20));
hash.forEach((v, i) => digest.setUint32(i * 4, v, false));
return digest.buffer;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment