Created
September 9, 2018 15:51
-
-
Save shobhitsharma/8485be8b6bc3ae23b33c5df5f909e624 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict'; | |
var Assert = {}; | |
Assert.GE = function(key, value, bound) { | |
if (!Number.isInteger(value)) throw new Error(key + ' must be an integer'); | |
if (!Number.isInteger(bound)) throw new Error(key + ' bound not an integer'); | |
if (value < bound) throw new Error(key + ' must be at least ' + bound); | |
}; | |
Assert.LE = function(key, value, bound) { | |
if (!Number.isInteger(value)) throw new Error(key + ' must be an integer'); | |
if (!Number.isInteger(bound)) throw new Error(key + ' bound not an integer'); | |
if (value > bound) throw new Error(key + ' must be at most ' + bound); | |
}; | |
Assert.P2 = function(key, value) { | |
if (!Number.isInteger(value)) throw new Error(key + ' must be an integer'); | |
if (value <= 0) throw new Error(key + ' must be greater than 0'); | |
if (value & (value - 1)) throw new Error(key + ' must be a power of 2'); | |
}; | |
// Hashes assigned by Hash() instead of using multiple return destructuring: | |
// We want to avoid allocating millions of objects just to return 2 hashes. | |
var H1 = 0; | |
var H2 = 0; | |
// Tabulation hash function: | |
function Hash(key, keyOffset, keySize) { | |
// Assigning to a local variable is faster than to a global variable: | |
var h1 = 0; | |
var h2 = 0; | |
var i = 0; | |
while (i < keySize) { | |
// Minimize cache misses by interleaving both tables into a single table: | |
// Minimize variable assignments by reusing k as an index into TABLE: | |
// Unrolled to process 4 bytes at a time: | |
h1 ^= ( | |
TABLE[(((i << 1) + 0) << 8) + key[keyOffset + i + 0]] ^ | |
TABLE[(((i << 1) + 1) << 8) + key[keyOffset + i + 1]] ^ | |
TABLE[(((i << 1) + 2) << 8) + key[keyOffset + i + 2]] ^ | |
TABLE[(((i << 1) + 3) << 8) + key[keyOffset + i + 3]] | |
); | |
h2 ^= ( | |
TABLE[(((i << 1) + 4) << 8) + key[keyOffset + i + 0]] ^ | |
TABLE[(((i << 1) + 5) << 8) + key[keyOffset + i + 1]] ^ | |
TABLE[(((i << 1) + 6) << 8) + key[keyOffset + i + 2]] ^ | |
TABLE[(((i << 1) + 7) << 8) + key[keyOffset + i + 3]] | |
); | |
i += 4; | |
} | |
H1 = h1; | |
H2 = h2; | |
} | |
// Slot lookup table, given 8-bits, return the index of an empty slot (if any): | |
// We use this to find an empty slot in a single branch. | |
var SLOT = (function() { | |
var slots = 8; | |
var table = new Uint8Array(1 << slots); | |
for (var index = 0; index < table.length; index++) { | |
for (var slot = 0; slot < slots; slot++) { | |
if ((index & (1 << slot)) === 0) break; | |
} | |
table[index] = slot; | |
} | |
return table; | |
})(); | |
// Interleaved entropy table used by tabulation hash function: | |
var TABLE = (function() { | |
var word = 4; | |
var table = new Int32Array(64 * 256 * 2); | |
var buffer = require('crypto').randomBytes(table.length * word); | |
for (var index = 0, length = table.length; index < length; index++) { | |
table[index] = buffer.readInt32LE(index * word); | |
} | |
return table; | |
})(); | |
// A fallback for when valueSize is 0 and the user does not pass a value buffer: | |
var VALUE = Buffer.alloc(0); | |
function HashTable(keySize, valueSize, buffers=8, elements=1024, size=0) { | |
Assert.GE('keySize', keySize, HashTable.KEY_MIN); | |
Assert.LE('keySize', keySize, HashTable.KEY_MAX); | |
Assert.GE('valueSize', valueSize, HashTable.VALUE_MIN); | |
Assert.LE('valueSize', valueSize, HashTable.VALUE_MAX); | |
Assert.GE('buffers', buffers, HashTable.BUFFERS_MIN); | |
Assert.LE('buffers', buffers, HashTable.BUFFERS_MAX); | |
Assert.P2('buffers', buffers); | |
Assert.GE('elements', elements, HashTable.ELEMENTS_MIN); | |
Assert.LE('elements', elements, HashTable.ELEMENTS_MAX); | |
Assert.GE('size', size, HashTable.SIZE_MIN); | |
Assert.LE('size', size, HashTable.SIZE_MAX); | |
// We can optimize the hash function significantly if key is a multiple of 4: | |
if (keySize % 4) throw new Error('keySize must be a multiple of 4'); | |
this.keySize = keySize; | |
this.valueSize = valueSize; | |
this.tables = new Array(buffers); | |
this.bucket = HashTable.bucket(keySize, valueSize); | |
var buckets = HashTable.buckets(this.bucket, buffers, elements, size); | |
if (buckets * this.bucket > HashTable.BUFFER_MAX) { | |
throw new Error(HashTable.ERROR_MAXIMUM_CAPACITY_EXCEEDED); | |
} | |
for (var offset = 0; offset < buffers; offset++) { | |
this.tables[offset] = new Table(keySize, valueSize, this.bucket, buckets); | |
} | |
this.capacity = buffers * buckets * 8; | |
this.length = 0; | |
this.mask = buffers - 1; | |
this.mode = 0; // 1 = resizing with set(), 2 = evicting with cache(). | |
} | |
HashTable.prototype.cache = function(key, keyOffset, value, valueOffset) { | |
if (this.mode === 1) throw new Error(HashTable.ERROR_MODE); | |
this.mode = 2; | |
if (this.valueSize === 0) { | |
value = VALUE; | |
valueOffset = 0; | |
} | |
Hash(key, keyOffset, this.keySize); | |
var table = this.tables[(((H1 >> 24) << 8) | (H2 >> 24)) & this.mask]; | |
var result = table.cache(H1, H2, key, keyOffset, value, valueOffset); | |
if (result === 0) this.length++; | |
return result; | |
}; | |
HashTable.prototype.exist = function(key, keyOffset) { | |
Hash(key, keyOffset, this.keySize); | |
var table = this.tables[(((H1 >> 24) << 8) | (H2 >> 24)) & this.mask]; | |
return table.exist(H1, H2, key, keyOffset); | |
}; | |
HashTable.prototype.get = function(key, keyOffset, value, valueOffset) { | |
if (this.valueSize === 0) { | |
value = VALUE; | |
valueOffset = 0; | |
} | |
Hash(key, keyOffset, this.keySize); | |
var table = this.tables[(((H1 >> 24) << 8) | (H2 >> 24)) & this.mask]; | |
return table.get(H1, H2, key, keyOffset, value, valueOffset); | |
}; | |
Object.defineProperty(HashTable.prototype, 'load', { | |
get: function() { | |
return this.length / this.capacity; | |
} | |
}); | |
HashTable.prototype.set = function(key, keyOffset, value, valueOffset) { | |
if (this.mode === 2) throw new Error(HashTable.ERROR_MODE); | |
this.mode = 1; | |
if (this.valueSize === 0) { | |
value = VALUE; | |
valueOffset = 0; | |
} | |
Hash(key, keyOffset, this.keySize); | |
var h1 = H1; | |
var h2 = H2; | |
var table = this.tables[(((h1 >> 24) << 8) | (h2 >> 24)) & this.mask]; | |
var result = table.set(h1, h2, key, keyOffset, value, valueOffset); | |
if (result === 1) return 1; | |
if (result === 0) { | |
this.length++; | |
return 0; | |
} | |
for (var resize = 1; resize <= 2; resize++) { | |
var buckets = table.buckets; | |
if (table.resize(buckets << resize)) { | |
this.capacity -= buckets * 8; | |
this.capacity += table.buckets * 8; | |
var result = table.set(h1, h2, key, keyOffset, value, valueOffset); | |
if (result === 1) return 1; | |
if (result === 0) { | |
this.length++; | |
return 0; | |
} | |
} | |
} | |
throw new Error(HashTable.ERROR_SET); | |
}; | |
Object.defineProperty(HashTable.prototype, 'size', { | |
get: function() { | |
var size = this.capacity / 8 * this.bucket; | |
Assert.GE('size', size, 0); | |
return size; | |
} | |
}); | |
HashTable.prototype.unset = function(key, keyOffset) { | |
Hash(key, keyOffset, this.keySize); | |
var table = this.tables[(((H1 >> 24) << 8) | (H2 >> 24)) & this.mask]; | |
var result = table.unset(H1, H2, key, keyOffset); | |
if (result === 1) this.length--; | |
return result; | |
}; | |
// Constants: | |
HashTable.KEY_MIN = 4; | |
HashTable.KEY_MAX = 64; | |
HashTable.VALUE_MIN = 0; | |
HashTable.VALUE_MAX = 67108864; | |
HashTable.BUFFERS_MIN = 1; | |
HashTable.BUFFERS_MAX = 8192; // Javascript Arrays degrade at 10,000 elements. | |
HashTable.ELEMENTS_MIN = 0; | |
HashTable.ELEMENTS_MAX = 68719476736; | |
HashTable.SIZE_MIN = 0; | |
HashTable.SIZE_MAX = 1024 * 1024 * 1024 * 1024; | |
HashTable.BUCKETS_MIN = 2; | |
HashTable.BUCKETS_MAX = 65536; | |
HashTable.BUFFER_MAX = require('buffer').kMaxLength; | |
Assert.GE('BUFFER_MAX', HashTable.BUFFER_MAX, 0); | |
Assert.LE('BUFFER_MAX', HashTable.BUFFER_MAX, Math.pow(2, 32) - 1); | |
Assert.GE('SLOT.length', SLOT.length, 256); | |
Assert.LE('SLOT.length', SLOT.length, 256); | |
Assert.GE('TABLE.length', TABLE.length, HashTable.KEY_MAX * 256 * 2); | |
Assert.LE('TABLE.length', TABLE.length, HashTable.KEY_MAX * 256 * 2); | |
// Too many elements or buffer allocation limit reached, add more buffers: | |
HashTable.ERROR_MAXIMUM_CAPACITY_EXCEEDED = 'maximum capacity exceeded'; | |
// cache() and set() methods are mutually exclusive: | |
// Once cache() is called, the table switches to non-resizing, caching mode. | |
// Once set() is called, the table switches to resizing, second position mode. | |
// This enables several optimizations and is safer: | |
// 1. cache() does not need to scan second position for an element. | |
// 2. cache() can assume all elements are in first position when refiltering. | |
// 3. cache() might otherwise evict an element that was inserted using set(). | |
HashTable.ERROR_MODE = 'cache() and set() methods are mutually exclusive'; | |
// This might indicate an adversarial attack, or weak tabulation hash entropy: | |
HashTable.ERROR_SET = 'set() failed despite multiple resize attempts'; | |
HashTable.bucket = function(keySize, valueSize) { | |
Assert.GE('keySize', keySize, HashTable.KEY_MIN); | |
Assert.LE('keySize', keySize, HashTable.KEY_MAX); | |
Assert.GE('valueSize', valueSize, HashTable.VALUE_MIN); | |
Assert.LE('valueSize', valueSize, HashTable.VALUE_MAX); | |
if (keySize % 4) throw new Error('keySize must be a multiple of 4'); | |
// Bucket includes padding for 64-byte cache line alignment: | |
var bucket = Math.ceil((20 + (keySize + valueSize) * 8) / 64) * 64; | |
Assert.GE('bucket', bucket, 0); | |
return bucket; | |
}; | |
HashTable.buckets = function(bucket, buffers, elements, size) { | |
Assert.GE('bucket', bucket, 20 + HashTable.KEY_MIN * 8); | |
if (bucket % 64) throw new Error('bucket must be a multiple of 64'); | |
Assert.GE('buffers', buffers, HashTable.BUFFERS_MIN); | |
Assert.LE('buffers', buffers, HashTable.BUFFERS_MAX); | |
Assert.P2('buffers', buffers); | |
Assert.GE('elements', elements, HashTable.ELEMENTS_MIN); | |
Assert.LE('elements', elements, HashTable.ELEMENTS_MAX); | |
Assert.GE('size', size, HashTable.SIZE_MIN); | |
Assert.LE('size', size, HashTable.SIZE_MAX); | |
// Allocate buckets based on the total size or expected number of elements: | |
if (size > 0) { | |
// Use floor() to avoid exceeding size: | |
var power = Math.floor(Math.log2(Math.max(1, size / buffers / bucket))); | |
} else { | |
// Use ceil() to avoid resizing: | |
var power = Math.ceil(Math.log2(Math.max(1, elements * 1.1 / buffers / 8))); | |
} | |
var buckets = Math.pow(2, power); | |
if (buckets < HashTable.BUCKETS_MIN) buckets = HashTable.BUCKETS_MIN; | |
if (buckets > HashTable.BUCKETS_MAX) buckets = HashTable.BUCKETS_MAX; | |
Assert.GE('buckets', buckets, HashTable.BUCKETS_MIN); | |
Assert.LE('buckets', buckets, HashTable.BUCKETS_MAX); | |
Assert.P2('buckets', buckets); | |
return buckets; | |
}; | |
function Table(keySize, valueSize, bucket, buckets) { | |
this.keySize = keySize; | |
this.valueSize = valueSize; | |
this.bucket = bucket; | |
this.buckets = buckets; | |
this.buffer = Buffer.alloc(this.bucket * this.buckets); | |
// Reduce branching through unrolled copy methods: | |
this.copyKey = this.copy(keySize) || this.copyKeyGeneric; | |
this.copyValue = this.copy(valueSize) || this.copyValueGeneric; | |
// Replace modulus with fast bitwise AND (buckets must be a power of 2): | |
this.mask = this.buckets - 1; | |
// Optimize global variable lookup: | |
this.SLOT = SLOT; | |
} | |
Table.prototype.assign = function( | |
bucket, | |
tag, | |
slot, | |
key, | |
keyOffset, | |
value, | |
valueOffset | |
) { | |
this.buffer[bucket + 9] |= (1 << slot); // Mark the slot as present. | |
this.buffer[bucket + 9 + 1 + slot] = tag; // Assign the element's tag. | |
this.copyKey(key, keyOffset, this.buffer, this.keyOffset(bucket, slot)); | |
this.copyValue( | |
value, | |
valueOffset, | |
this.buffer, | |
this.valueOffset(bucket, slot) | |
); | |
}; | |
Table.prototype.cache = function(h1, h2, key, keyOffset, value, valueOffset) { | |
// See comments in set(): | |
var tag = (h1 >> 16) & 255; | |
var b1 = (h1 & this.mask) * this.bucket; | |
var b2 = (h2 & this.mask) * this.bucket; | |
var f1 = (tag >> 4) & 7; | |
var f2 = 1 << (tag & 7); | |
if (this.buffer[b1 + f1] & f2) { | |
var s1 = this.scan(b1, tag, key, keyOffset); | |
if (s1 < 8) { | |
// Mark the element as recently used: | |
this.buffer[b1 + 18] |= (1 << s1); | |
this.copyValue(value, valueOffset, this.buffer, this.valueOffset(b1, s1)); | |
return 1; | |
} | |
} | |
// Evict the least recently used slot in first position: | |
var s3 = this.evict(b1); | |
var eviction = this.buffer[b1 + 9] & (1 << s3); | |
if (eviction) { | |
// Mark the slot as empty so that the element is excluded from its filter: | |
this.buffer[b1 + 9] &= ~(1 << s3); | |
// Reset the old element's filter: | |
this.filterReset(b1, this.buffer[b1 + 9 + 1 + s3] & 7); | |
} | |
// Add the new element in its place: | |
this.assign(b1, tag, s3, key, keyOffset, value, valueOffset); | |
// Add the new element to its filter (this can be a different filter): | |
this.buffer[b1 + f1] |= f2; | |
// Mark the element as recently used: | |
this.buffer[b1 + 18] |= (1 << s3); | |
return eviction ? 2 : 0; | |
}; | |
Table.prototype.copy = function(size) { | |
switch (size) { | |
case 0: return this.copy00; | |
case 4: return this.copy04; | |
case 8: return this.copy08; | |
case 16: return this.copy16; | |
case 20: return this.copy20; | |
case 32: return this.copy32; | |
case 48: return this.copy48; | |
case 64: return this.copy64; | |
case 128: return this.copy128; | |
case 256: return this.copy256; | |
} | |
return undefined; | |
}; | |
Table.prototype.copyKeyGeneric = function(s, sO, t, tO) { | |
var size = this.keySize; | |
var groups = size >>> 2; | |
while (groups--) { | |
t[tO + 0] = s[sO + 0]; | |
t[tO + 1] = s[sO + 1]; | |
t[tO + 2] = s[sO + 2]; | |
t[tO + 3] = s[sO + 3]; | |
tO += 4; | |
sO += 4; | |
size -= 4; | |
} | |
while (size--) t[tO++] = s[sO++]; | |
}; | |
Table.prototype.copyValueGeneric = function(s, sO, t, tO) { | |
var size = this.valueSize; | |
if (size < 128) { | |
var groups = size >>> 3; | |
while (groups--) { | |
t[tO + 0] = s[sO + 0]; | |
t[tO + 1] = s[sO + 1]; | |
t[tO + 2] = s[sO + 2]; | |
t[tO + 3] = s[sO + 3]; | |
t[tO + 4] = s[sO + 4]; | |
t[tO + 5] = s[sO + 5]; | |
t[tO + 6] = s[sO + 6]; | |
t[tO + 7] = s[sO + 7]; | |
tO += 8; | |
sO += 8; | |
size -= 8; | |
} | |
while (size--) t[tO++] = s[sO++]; | |
} else { | |
s.copy(t, tO, sO, sO + size); | |
} | |
}; | |
Table.prototype.copy00 = function(s, sO, t, tO) {}; | |
Table.prototype.copy04 = function(s, sO, t, tO) { | |
t[tO + 0] = s[sO + 0]; | |
t[tO + 1] = s[sO + 1]; | |
t[tO + 2] = s[sO + 2]; | |
t[tO + 3] = s[sO + 3]; | |
}; | |
Table.prototype.copy08 = function(s, sO, t, tO) { | |
t[tO + 0] = s[sO + 0]; | |
t[tO + 1] = s[sO + 1]; | |
t[tO + 2] = s[sO + 2]; | |
t[tO + 3] = s[sO + 3]; | |
t[tO + 4] = s[sO + 4]; | |
t[tO + 5] = s[sO + 5]; | |
t[tO + 6] = s[sO + 6]; | |
t[tO + 7] = s[sO + 7]; | |
}; | |
Table.prototype.copy16 = function(s, sO, t, tO) { | |
t[tO + 0] = s[sO + 0]; | |
t[tO + 1] = s[sO + 1]; | |
t[tO + 2] = s[sO + 2]; | |
t[tO + 3] = s[sO + 3]; | |
t[tO + 4] = s[sO + 4]; | |
t[tO + 5] = s[sO + 5]; | |
t[tO + 6] = s[sO + 6]; | |
t[tO + 7] = s[sO + 7]; | |
t[tO + 8] = s[sO + 8]; | |
t[tO + 9] = s[sO + 9]; | |
t[tO + 10] = s[sO + 10]; | |
t[tO + 11] = s[sO + 11]; | |
t[tO + 12] = s[sO + 12]; | |
t[tO + 13] = s[sO + 13]; | |
t[tO + 14] = s[sO + 14]; | |
t[tO + 15] = s[sO + 15]; | |
}; | |
Table.prototype.copy20 = function(s, sO, t, tO) { | |
t[tO + 0] = s[sO + 0]; | |
t[tO + 1] = s[sO + 1]; | |
t[tO + 2] = s[sO + 2]; | |
t[tO + 3] = s[sO + 3]; | |
t[tO + 4] = s[sO + 4]; | |
t[tO + 5] = s[sO + 5]; | |
t[tO + 6] = s[sO + 6]; | |
t[tO + 7] = s[sO + 7]; | |
t[tO + 8] = s[sO + 8]; | |
t[tO + 9] = s[sO + 9]; | |
t[tO + 10] = s[sO + 10]; | |
t[tO + 11] = s[sO + 11]; | |
t[tO + 12] = s[sO + 12]; | |
t[tO + 13] = s[sO + 13]; | |
t[tO + 14] = s[sO + 14]; | |
t[tO + 15] = s[sO + 15]; | |
t[tO + 16] = s[sO + 16]; | |
t[tO + 17] = s[sO + 17]; | |
t[tO + 18] = s[sO + 18]; | |
t[tO + 19] = s[sO + 19]; | |
}; | |
Table.prototype.copy32 = function(s, sO, t, tO) { | |
t[tO + 0] = s[sO + 0]; | |
t[tO + 1] = s[sO + 1]; | |
t[tO + 2] = s[sO + 2]; | |
t[tO + 3] = s[sO + 3]; | |
t[tO + 4] = s[sO + 4]; | |
t[tO + 5] = s[sO + 5]; | |
t[tO + 6] = s[sO + 6]; | |
t[tO + 7] = s[sO + 7]; | |
t[tO + 8] = s[sO + 8]; | |
t[tO + 9] = s[sO + 9]; | |
t[tO + 10] = s[sO + 10]; | |
t[tO + 11] = s[sO + 11]; | |
t[tO + 12] = s[sO + 12]; | |
t[tO + 13] = s[sO + 13]; | |
t[tO + 14] = s[sO + 14]; | |
t[tO + 15] = s[sO + 15]; | |
t[tO + 16] = s[sO + 16]; | |
t[tO + 17] = s[sO + 17]; | |
t[tO + 18] = s[sO + 18]; | |
t[tO + 19] = s[sO + 19]; | |
t[tO + 20] = s[sO + 20]; | |
t[tO + 21] = s[sO + 21]; | |
t[tO + 22] = s[sO + 22]; | |
t[tO + 23] = s[sO + 23]; | |
t[tO + 24] = s[sO + 24]; | |
t[tO + 25] = s[sO + 25]; | |
t[tO + 26] = s[sO + 26]; | |
t[tO + 27] = s[sO + 27]; | |
t[tO + 28] = s[sO + 28]; | |
t[tO + 29] = s[sO + 29]; | |
t[tO + 30] = s[sO + 30]; | |
t[tO + 31] = s[sO + 31]; | |
}; | |
Table.prototype.copy48 = function(s, sO, t, tO) { | |
this.copy32(s, sO + 0, t, tO + 0); | |
this.copy16(s, sO + 32, t, tO + 32); | |
}; | |
Table.prototype.copy64 = function(s, sO, t, tO) { | |
this.copy32(s, sO + 0, t, tO + 0); | |
this.copy32(s, sO + 32, t, tO + 32); | |
}; | |
Table.prototype.copy128 = function(s, sO, t, tO) { | |
this.copy32(s, sO + 0, t, tO + 0); | |
this.copy32(s, sO + 32, t, tO + 32); | |
this.copy32(s, sO + 64, t, tO + 64); | |
this.copy32(s, sO + 96, t, tO + 96); | |
}; | |
Table.prototype.copy256 = function(s, sO, t, tO) { | |
this.copy32(s, sO + 0, t, tO + 0); | |
this.copy32(s, sO + 32, t, tO + 32); | |
this.copy32(s, sO + 64, t, tO + 64); | |
this.copy32(s, sO + 96, t, tO + 96); | |
this.copy32(s, sO + 128, t, tO + 128); | |
this.copy32(s, sO + 160, t, tO + 160); | |
this.copy32(s, sO + 192, t, tO + 192); | |
this.copy32(s, sO + 224, t, tO + 224); | |
}; | |
Table.prototype.equal = function(a, aOffset, b, bOffset, size) { | |
while (size--) { | |
if (a[aOffset++] != b[bOffset++]) return 0; | |
} | |
return 1; | |
}; | |
// Evict an element using the CLOCK eviction policy which approximates LRU: | |
Table.prototype.evict = function(bucket) { | |
// After the CLOCK hand wraps, we are guaranteed an eviction: | |
var tick = 8 + 1; | |
while (tick--) { | |
// Find the slot pointed to by CLOCK hand: | |
var slot = this.buffer[bucket + 18 + 1]; | |
// Increment CLOCK hand regardless of whether slot was recently used: | |
this.buffer[bucket + 18 + 1] = (this.buffer[bucket + 18 + 1] + 1) & 7; | |
// Evict slot if slot was not recently used: | |
if ((this.buffer[bucket + 18] & (1 << slot)) === 0) break; | |
// Slot was recently used, clear recently used bit and keep ticking: | |
this.buffer[bucket + 18] &= ~(1 << slot); | |
} | |
return slot; | |
}; | |
Table.prototype.exist = function(h1, h2, key, keyOffset) { | |
// See comments in set(): | |
var tag = (h1 >> 16) & 255; | |
var b1 = (h1 & this.mask) * this.bucket; | |
var b2 = (h2 & this.mask) * this.bucket; | |
var f1 = (tag >> 4) & 7; | |
var f2 = 1 << (tag & 7); | |
if (this.buffer[b1 + f1] & f2) { | |
var s1 = this.scan(b1, tag, key, keyOffset); | |
if (s1 < 8) return 1; | |
var s2 = this.scan(b2, tag, key, keyOffset); | |
if (s2 < 8) return 1; | |
} | |
return 0; | |
}; | |
// Decrement a filter's count of elements in second position: | |
Table.prototype.filterDecrementSecondPosition = function(bucket) { | |
if (this.buffer[bucket + 8] === 0) throw new Error('count should not be 0'); | |
if (this.buffer[bucket + 8] < 255) { | |
this.buffer[bucket + 8]--; | |
if (this.buffer[bucket + 8] === 0) { | |
for (var filter = 0; filter < 8; filter++) { | |
this.filterReset(bucket, filter); | |
} | |
} | |
} | |
}; | |
// Increment a filter's count of elements in second position: | |
Table.prototype.filterIncrementSecondPosition = function(bucket) { | |
// Once the counter saturates, it can no longer be incremented or decremented. | |
// This is extremely unlikely, we expect at most 4 elements and can count 254. | |
// Even if it does saturate, the worst is that we never reset the filter. | |
if (this.buffer[bucket + 8] < 255) this.buffer[bucket + 8]++; | |
}; | |
// Reset a filter to remove stale entries: | |
Table.prototype.filterReset = function(bucket, filter) { | |
// Filter has elements in second position and cannot be reset: | |
if (this.buffer[bucket + 8] !== 0) return; | |
// Filter has no elements (since no bits are set): | |
if (this.buffer[bucket + filter] === 0) return; | |
// Reset filter and add elements back: | |
this.buffer[bucket + filter] = 0; | |
for (var slot = 0; slot < 8; slot++) { | |
// Slot must be present (not empty): | |
if (this.buffer[bucket + 9] & (1 << slot)) { | |
// Element must belong to the same filter (and be in first position): | |
// We do not check whether element is actually in second position. | |
// This would need special bookkeeping, is unlikely, and adds little. | |
var tag = this.buffer[bucket + 9 + 1 + slot]; | |
var f1 = (tag >> 4) & 7; | |
if (f1 === filter) { | |
var f2 = 1 << (tag & 7); | |
this.buffer[bucket + filter] |= f2; | |
} | |
} | |
} | |
}; | |
Table.prototype.get = function(h1, h2, key, keyOffset, value, valueOffset) { | |
// See comments in set(): | |
var tag = (h1 >> 16) & 255; | |
var b1 = (h1 & this.mask) * this.bucket; | |
var b2 = (h2 & this.mask) * this.bucket; | |
var f1 = (tag >> 4) & 7; | |
var f2 = 1 << (tag & 7); | |
if (this.buffer[b1 + f1] & f2) { | |
var s1 = this.scan(b1, tag, key, keyOffset); | |
if (s1 < 8) { | |
// Mark element as recently used: | |
this.buffer[b1 + 18] |= (1 << s1); | |
this.copyValue(this.buffer, this.valueOffset(b1, s1), value, valueOffset); | |
return 1; | |
} | |
var s2 = this.scan(b2, tag, key, keyOffset); | |
if (s2 < 8) { | |
this.buffer[b2 + 18] |= (1 << s2); | |
this.copyValue(this.buffer, this.valueOffset(b2, s2), value, valueOffset); | |
return 1; | |
} | |
} | |
return 0; | |
}; | |
Table.prototype.keyOffset = function(bucket, slot) { | |
// 20 = 8:Filter 1:FilterCount 1:Present 8:Tags 1:ClockUsed 1:ClockHand | |
// We keep the element's key and value together to optimize the common case of | |
// comparing the key and retrieving the value without a cache miss. | |
return bucket + 20 + (this.keySize + this.valueSize) * slot; | |
}; | |
Table.prototype.resize = function(resizeBuckets) { | |
Assert.GE('resizeBuckets', resizeBuckets, this.buckets * 2); | |
Assert.P2('resizeBuckets', resizeBuckets); | |
if ( | |
resizeBuckets > HashTable.BUCKETS_MAX || | |
this.bucket * resizeBuckets > HashTable.BUFFER_MAX | |
) { | |
throw new Error(HashTable.ERROR_MAXIMUM_CAPACITY_EXCEEDED); | |
} | |
var buckets = this.buckets; | |
var buffer = this.buffer; | |
this.buckets = resizeBuckets; | |
this.buffer = Buffer.alloc(this.bucket * resizeBuckets); | |
this.mask = resizeBuckets - 1; | |
for (var index = 0; index < buckets; index++) { | |
var bucket = index * this.bucket; | |
for (var slot = 0; slot < 8; slot++) { | |
if (buffer[bucket + 9] & (1 << slot)) { | |
// We assume keyOffset, valueOffset depend only on bucket and slot: | |
var keyOffset = this.keyOffset(bucket, slot); | |
var valueOffset = this.valueOffset(bucket, slot); | |
Hash(buffer, keyOffset, this.keySize); | |
if (this.set(H1, H2, buffer, keyOffset, buffer, valueOffset) === -1) { | |
// Fail this resize() attempt (and restore back to before resize): | |
// The caller should try again with more resizeBuckets. | |
this.buckets = buckets; | |
this.buffer = buffer; | |
this.mask = buckets - 1; | |
return 0; | |
} | |
} | |
} | |
} | |
return 1; | |
}; | |
Table.prototype.scan = function(bucket, tag, key, keyOffset) { | |
for (var slot = 0; slot < 8; slot++) { | |
if ( | |
// Check the tag before checking presence bits: | |
// The tag is a better branch predictor with more entropy. | |
(this.buffer[bucket + 9 + 1 + slot] === tag) && | |
(this.buffer[bucket + 9] & (1 << slot)) && | |
this.equal( | |
this.buffer, | |
this.keyOffset(bucket, slot), | |
key, | |
keyOffset, | |
this.keySize | |
) | |
) { | |
break; | |
} | |
} | |
return slot; | |
}; | |
Table.prototype.set = function(h1, h2, key, keyOffset, value, valueOffset) { | |
// Use the 2nd most significant byte of H1 for 1-byte tag: | |
var tag = (h1 >> 16) & 255; | |
// Use the 3rd and 4th most significant bytes of H1 and H2 for bucket offset: | |
var b1 = (h1 & this.mask) * this.bucket; | |
var b2 = (h2 & this.mask) * this.bucket; | |
// Reuse tag entropy for filter entropy (instead of using 2nd MSB from H2): | |
// This enables us to find the filter for any element without hashing its key. | |
// This increases tag-scanning false positives, but optimizes filter resets. | |
// This tradeoff is significant for cache(), where evictions reset filters. | |
// At 100% occupancy, 1 element per filter, we expect 1 in 9 false positives. | |
// See: https://hur.st/bloomfilter/?n=1&p=&m=8&k=1 | |
var f1 = (tag >> 4) & 7; // Use tag's upper 4-bits to select a 1-byte filter. | |
var f2 = 1 << (tag & 7); // Use tag's lower 4-bits to select a bit. | |
// Check the filter to see if the element might exist: | |
if (this.buffer[b1 + f1] & f2) { | |
// Search for the element and update the element's value if found: | |
var s1 = this.scan(b1, tag, key, keyOffset); | |
if (s1 < 8) { | |
this.copyValue(value, valueOffset, this.buffer, this.valueOffset(b1, s1)); | |
return 1; | |
} | |
var s2 = this.scan(b2, tag, key, keyOffset); | |
if (s2 < 8) { | |
this.copyValue(value, valueOffset, this.buffer, this.valueOffset(b2, s2)); | |
return 1; | |
} | |
} | |
// Find an empty slot in first position: | |
var s3 = this.SLOT[this.buffer[b1 + 9]]; | |
if (s3 < 8) { | |
this.assign(b1, tag, s3, key, keyOffset, value, valueOffset); | |
this.buffer[b1 + f1] |= f2; | |
return 0; | |
} | |
// Find an empty slot in second position: | |
var s4 = this.SLOT[this.buffer[b2 + 9]]; | |
if (s4 < 8) { | |
this.assign(b2, tag, s4, key, keyOffset, value, valueOffset); | |
this.buffer[b1 + f1] |= f2; | |
this.filterIncrementSecondPosition(b1); | |
return 0; | |
} | |
// Vacate a slot in first position: | |
var s5 = this.vacate(b1); | |
if (s5 < 8) { | |
this.assign(b1, tag, s5, key, keyOffset, value, valueOffset); | |
this.buffer[b1 + f1] |= f2; | |
return 0; | |
} | |
// Vacate a slot in second position: | |
var s6 = this.vacate(b2); | |
if (s6 < 8) { | |
this.assign(b2, tag, s6, key, keyOffset, value, valueOffset); | |
this.buffer[b1 + f1] |= f2; | |
this.filterIncrementSecondPosition(b1); | |
return 0; | |
} | |
return -1; | |
}; | |
Table.prototype.unset = function(h1, h2, key, keyOffset) { | |
// See comments in set(): | |
var tag = (h1 >> 16) & 255; | |
var b1 = (h1 & this.mask) * this.bucket; | |
var b2 = (h2 & this.mask) * this.bucket; | |
var f1 = (tag >> 4) & 7; | |
var f2 = 1 << (tag & 7); | |
if (this.buffer[b1 + f1] & f2) { | |
var s1 = this.scan(b1, tag, key, keyOffset); | |
if (s1 < 8) { | |
this.buffer[b1 + 9] &= ~(1 << s1); | |
this.buffer[b1 + 9 + 1 + s1] = 0; | |
this.zero(this.keyOffset(b1, s1), this.keySize); | |
this.zero(this.valueOffset(b1, s1), this.valueSize); | |
this.filterReset(b1, f1); | |
return 1; | |
} | |
var s2 = this.scan(b2, tag, key, keyOffset); | |
if (s2 < 8) { | |
this.buffer[b2 + 9] &= ~(1 << s2); | |
this.buffer[b2 + 9 + 1 + s2] = 0; | |
this.zero(this.keyOffset(b2, s2), this.keySize); | |
this.zero(this.valueOffset(b2, s2), this.valueSize); | |
this.filterDecrementSecondPosition(b1); | |
return 1; | |
} | |
} | |
return 0; | |
}; | |
Table.prototype.vacate = function(bucket) { | |
for (var slot = 0; slot < 8; slot++) { | |
var keyOffset = this.keyOffset(bucket, slot); | |
var valueOffset = this.valueOffset(bucket, slot); | |
Hash(this.buffer, keyOffset, this.keySize); | |
var tag = (H1 >> 16) & 255; | |
var b1 = (H1 & this.mask) * this.bucket; | |
var b2 = (H2 & this.mask) * this.bucket; | |
if (bucket === b1) { | |
// Move existing element to second position if there is an empty slot: | |
var s2 = this.SLOT[this.buffer[b2 + 9]]; | |
if (s2 < 8) { | |
this.assign( | |
b2, tag, s2, this.buffer, keyOffset, this.buffer, valueOffset | |
); | |
this.filterIncrementSecondPosition(b1); | |
break; | |
} | |
// First and second positions are the same, or second position is full. | |
} else if (bucket === b2) { | |
// Move existing element back to first position if there is an empty slot: | |
var s1 = this.SLOT[this.buffer[b1 + 9]]; | |
if (s1 < 8) { | |
this.assign( | |
b1, tag, s1, this.buffer, keyOffset, this.buffer, valueOffset | |
); | |
this.filterDecrementSecondPosition(b1); | |
break; | |
} | |
} else { | |
throw new Error('bucket !== b1 && bucket !== b2'); | |
} | |
} | |
return slot; | |
}; | |
Table.prototype.valueOffset = function(bucket, slot) { | |
// See comment in keyOffset(): | |
return bucket + 20 + (this.keySize + this.valueSize) * slot + this.keySize; | |
}; | |
Table.prototype.zero = function(offset, size) { | |
if (size < 64) { | |
var groups = size >>> 2; | |
while (groups--) { | |
this.buffer[offset + 0] = 0; | |
this.buffer[offset + 1] = 0; | |
this.buffer[offset + 2] = 0; | |
this.buffer[offset + 3] = 0; | |
offset += 4; | |
size -= 4; | |
} | |
while (size--) this.buffer[offset++] = 0; | |
} else { | |
this.buffer.fill(0, offset, offset + size); | |
} | |
}; | |
module.exports = HashTable; | |
// S.D.G. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment