- Fix the silent hash collision issue
- Provide more/better examples
- Create a repository for this, and translate to a YUI-compatible module
- Document all the methods &c.
- Create a 'transient' version to make Trie construction much cheaper.
Last active
August 29, 2015 14:00
-
-
Save nhusher/66fafa126cf67c37c2af to your computer and use it in GitHub Desktop.
A working HAMT implementation in JS.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// --- Utility Functions ---------------------------------------------------- | |
// | |
/* | |
Found at <http://blog.faultylabs.com/2011.php?p=jsbitfun> | |
@param bits {Number} An integer to run popCnt on. | |
@return The number of 1 bits in the number. | |
*/ | |
function popcount(bits) { | |
bits >>>= 0 /* force uint32 */ | |
for(var i = 0; bits; bits &= bits - 1) { | |
i++ | |
} | |
return i; | |
} | |
/* | |
A (bad) test for determining if an object is a hashmap-style object. | |
@param o {Object} | |
@return Whether or not the object is a hashmap-style object | |
*/ | |
function isObject(o) { | |
return o.constructor === Object; | |
} | |
/* | |
Is a function? | |
@param f {Function} | |
@return Whether or not f is a function. | |
*/ | |
function isFn(f) { | |
return (typeof f === 'function'); | |
} | |
/* | |
Based on Java's hashing function. Good enough (probably), but slow. | |
@param v {Number|String|Hashable} A value that can be hashed. Integers | |
hashes to themself. Floats are translated to strings before being hashed, | |
otherwise, the function expects the object to have a `hash` function that | |
it can call. | |
`hash` should be a function that returns that either returns a unique | |
value for every instance of an object (i.e. a GUID), or a unique value for | |
each permutation of the values within an object. | |
*/ | |
function hash(v) { | |
// coerce floating point numbers to string | |
v = (typeof v === 'number' && Math.floor(v) !== v) ? ''+v : v; | |
if(typeof v === 'number') { | |
return v >> 0; | |
} else if(typeof v === 'string') { | |
for(var h = 0, i = 0, l = v.length; i < l; i += 1) { | |
h = (((h << 5) - h) + v.charCodeAt(i)); | |
} | |
return (h >> 0); | |
} else if(typeof v.hash === 'function') { | |
// allow custom hashing functions | |
return v.hash(); | |
} else { | |
throw new Error("Value " + v + " is not hashable!"); | |
} | |
} | |
/* | |
Print a number as bits. Useful for visualizing bit masks. | |
@param num {Number} | |
@return A binary string formatted into nibbles. | |
*/ | |
function bit(num) { | |
var digits = num.toString(2).match(/(\d+)/)[0], | |
pad = ['0000','0000','0000','0000', | |
'0000','0000','0000','0000'] | |
return (pad.join('') + digits).slice(-32).match(/.{4}/g).join(' '); | |
} | |
// | |
// --- Internal Objects ----------------------------------------------------- | |
// | |
/** | |
A node in the HAMT. A Trie implementation detail that probably shouldn't be | |
interacted with directly. | |
@class Node | |
@constructor | |
@param mask {Number} The bitmask of the trie node. Usually 0 on construction. | |
@param children {Array} An array of children. Usually empty on construction. | |
**/ | |
function Node(mask, children) { | |
this._mask = mask; | |
this._children = children; | |
} | |
var NP = Node.prototype; | |
Node.prototype = { | |
constructor: Node, | |
set: function(shift, valueBox) { | |
var mask = this._mask, | |
children = this._children, | |
hash = valueBox._hash, | |
maskPos = 1 << ((hash >>> shift) & 31), // calculates bitmask pos | |
index = popcount(mask & (maskPos - 1)), | |
child; | |
if(mask & maskPos) { | |
child = children[index]; | |
if(child instanceof Node) { | |
return new Node(mask, [].concat( | |
children.slice(0,index), | |
child.set(shift + 5, valueBox), | |
children.slice(index + 1) | |
)); | |
} else if(child instanceof Box) { | |
// TODO: resolve hash collisions with smarter Box objects | |
return new Node(mask, [].concat( | |
children.slice(0, index), | |
new Node(mask + 5, []).set(shift + 5, child).set(shift + 5, valueBox), | |
children.slice(index + 1))); | |
} | |
throw new Error("Invalid state: child is neither Node or value Box"); | |
} else { | |
return new Node( ((mask | maskPos) >>> 0), [].concat( | |
children.slice(0, index), | |
valueBox, | |
children.slice(index) | |
)); | |
} | |
}, | |
unset: function(shift, hash, key) { | |
var mask = this._mask, | |
children = this._children, | |
maskPos = 1 << ((hash >>> shift) & 31), | |
index = popcount(mask & (maskPos - 1)), | |
result = this, | |
child; | |
if(mask & maskPos) { | |
child = children[index]; | |
if(child instanceof Node) { | |
child = child.unset(shift + 5, hash, key); | |
if(child === null) { | |
result = new Node(mask ^ (mask & maskPos), [].concat( | |
children.slice(0, index), | |
children.slice(index + 1) | |
)); | |
} | |
result = new Node(mask, [].concat( | |
children.slice(0, index), | |
child, | |
children.slice(index + 1) | |
)); | |
} else if(child instanceof Box) { | |
result = new Node((maskPos ^ mask) >>> 0, [].concat( | |
children.slice(0, index), | |
children.slice(index + 1) | |
)); | |
} else { | |
throw new Error("Invalid state: child is neither Node or value Box"); | |
} | |
} | |
return (result._children.length === 0) ? null : result; | |
}, | |
get: function(shift, hash, key) { | |
var mask = this._mask, | |
maskPos = 1 << ((hash >>> shift) & 31), | |
child; | |
if(mask & maskPos) { | |
child = this._children[popcount(mask & (maskPos - 1))]; | |
if(child instanceof Node) { | |
return child.get(shift + 5, hash, key); | |
} else if(child instanceof Box) { | |
return child._value; | |
} | |
} | |
return undefined; | |
}, | |
entries: function() { | |
var result = []; | |
this._children.forEach(function(c) { | |
if(c instanceof Node) { | |
result = result.concat(c.entries()); | |
} else { | |
result.push(c); | |
} | |
}); | |
return result; | |
} | |
}; | |
/** | |
A container object for leaf nodes in the Trie. | |
@class Box | |
@constructor | |
@param key {Number|String|Hashable} | |
@param value {any} | |
**/ | |
function Box(key, value) { | |
this._key = key; | |
this._value = value; | |
// Store the hash, since it's expensive to look up. | |
this._hash = hash(key); | |
} | |
// | |
// --- Public Interface ----------------------------------------------------- | |
// | |
/** | |
A persistent immutable tree-based data structure based on Phil Bagwell's | |
'Ideal Hash Trees' paper. "Modifying" the tree produces new instances of the | |
tree that sturcutrally-share information with previous instances. | |
**Features:** | |
- Immutability: Adding items to the Trie return a new instance of the Trie | |
that shares structure with the previous version. | |
- Complex keys: Keys in the Trie can be numeric, strings, floating-point, or | |
objects. Object keys must implement the function `hash`, which returns a | |
unique key for that representation of object state. For example: | |
```` | |
function Key(name) { | |
this.hash = function() { return "__KEY__" + name; }; | |
} | |
```` | |
In this case, subsequent instantiations of `Key` with the same name will | |
result in the same key. | |
**Caveats:** | |
- The Trie makes no order guarantees. Keys may be stored internally in any | |
order, or no order at all. Numeric keys will be returned in-order when | |
converting between a trie and a javascript array. String keys are always | |
orderless. | |
- The Trie uses a 32-bit hashing algorithm to store keys for fast lookup. | |
There is currently no strategy for resolving hash collisions. Currently | |
hash collisions will silently overwrite the previous instance. This is | |
on the shortlist of things to fix. | |
- For many operations, and most notably writes, the Trie is much slower than | |
a primitive JS data structure. This is the cost of strucutural sharing and | |
immutability. | |
- Immutability is not enforced, which is to say that you can reach into the | |
object and modify its members directly without error. However, if you do | |
this, the Trie cannot help you. Similarly, it's possible to put mutable | |
data into the Trie that can be modified by anyone who holds the reference. | |
The Trie has no way of preventing that, and such actions should be avoided. | |
@class Trie | |
@constructor | |
@param [a] {Object|Array} A data structure to convert into a persistent trie | |
@param [deep] {Boolean} Whether to translate children into tries as well. | |
**/ | |
function Trie(a, deep) { | |
var root = new Node(0, []), | |
i, l; | |
if(a) { | |
if(Array.isArray(a)) { | |
for(i = 0, l = a.length; i < l; i += 1) { | |
root = root.set(0, new Box(i, Trie._deepConstruct(a[i], deep))); | |
} | |
} else if(isObject(a)) { | |
for(i in a) { | |
if(a.hasOwnProperty(i)) { | |
root = root.set(0, new Box(i, Trie._deepConstruct(a[i], deep))); | |
} | |
} | |
} | |
} | |
this._root = root; | |
} | |
Trie._deepConstruct = function(val, deep) { | |
if((Array.isArray(val) || isObject(val)) && deep === true) { | |
return new Trie(val, deep); | |
} else { | |
return val; | |
} | |
}; | |
Trie.prototype = { | |
constructor: Trie, | |
/** | |
"Sets" a value on the Trie for the provided key. Returns a new version of | |
the trie with the added element or elements. | |
If the key is a raw object or array, it will be treated as a collection | |
of items to be added with the named keys or indicies. In this case, if | |
the second argument is true, it will translate child objects and arrays | |
into Tries as well. | |
@method set | |
@param key {String|Object} | |
@param value {Any|Boolean} | |
@return {Trie} | |
**/ | |
set: function(key, value) { | |
var ret = new Trie(); | |
if(isObject(key)) { | |
ret._root = Object.keys(key).reduce(function(a, k) { | |
return a.set(0, new Box(k, Trie._deepConstruct(key[k], value))); | |
}, this._root); | |
} if(Array.isArray(key)) { | |
ret._root = key.reduce(function(a, v, i) { | |
return a.set(0, new Box(i, Trie._deepConstruct(v, value))); | |
}, this._root); | |
} else { | |
ret._root = this._root.set(0, new Box(key, value)); | |
} | |
return ret; | |
}, | |
/** | |
Returns a member from the Trie. | |
@method get | |
@param key {String|Number|Hashable} | |
@return The referenced value or undefined if not found. | |
**/ | |
get: function(key) { | |
return this._root.get(0, hash(key), key); | |
}, | |
/** | |
"Removes" a member from the Trie. Returns a copy of the Trie with the | |
named property removed. | |
@method unset | |
@param key {String|Number|Hashable} | |
@return {Trie} | |
**/ | |
unset: function(key) { | |
var ret = new Trie(), | |
args = [].slice.call(arguments); | |
ret._root = args.reduce(function(a, k) { | |
return a.unset(0, hash(k), k); | |
}, this._root); | |
return ret; | |
}, | |
/** | |
Excutes `fn` against every member of the Trie, optionally in the context | |
provided by the second argument. Behaves approximately like JS `forEach` | |
noting the above caveats about ordering. | |
@method each | |
@param fn {Function} | |
@param [context] {Object} | |
**/ | |
each: function(fn, context) { | |
this.map(fn, context); | |
}, | |
/** | |
Executes `fn` against every member in the list, taking the return value | |
and passing it to the next call as the first argument. Behaves like JS | |
reduce with the above caveats about ordering. | |
@method reduce | |
@param fn {Function} | |
@param [initial] {Any} | |
**/ | |
reduce: function(fn, initial) { | |
return this._root.entries().reduce(function(a, c) { | |
return fn.call(null, a, c._value, c._key); | |
}, initial); | |
}, | |
/** | |
Returns a list of keys in the Trie. | |
@method keys | |
@return {Array} | |
**/ | |
keys: function() { | |
return this.map(function(v, k) { return k; }); | |
}, | |
/** | |
Returns a list of values in the Trie. | |
@method values | |
@return {Array} | |
**/ | |
values: function(flatten) { | |
return this.map(function(v, k) { | |
return (flatten && v instanceof Trie) ? v.flatten(true) : v; | |
}); | |
}, | |
/** | |
Translates the Trie into an object/array representation of the object, | |
optionally performing the translation recursively. | |
@method flatten | |
@param deep {Boolean} | |
@return {Object|Array} | |
**/ | |
flatten: function(deep) { | |
return this.reduce(function(a, v, k) { | |
if(deep && v instanceof Trie) { | |
v = v.flatten(deep); | |
} | |
if(Array.isArray(a)) { | |
if(typeof k === "number" && Math.floor(k) === k) { | |
a[k] = v; | |
return a; | |
} else { | |
a = a.reduce(function(a, k, v) { | |
a[k] = v; return a; | |
}, {}); | |
a[k] = v; | |
return a; | |
} | |
} else { | |
a[k] = v; | |
return a; | |
} | |
}, []); | |
} | |
}; | |
/** | |
@method map | |
**/ | |
/** | |
@method filter | |
**/ | |
/** | |
@method some | |
**/ | |
/** | |
@method every | |
**/ | |
[ 'map', 'filter', 'some', 'every' ].forEach(function(action) { | |
Trie.prototype[action] = function(fn, cx) { | |
return this._root.entries()[action](function(entry) { | |
return fn.call(cx, entry._value, entry._key); | |
}); | |
}; | |
}); | |
// exports.Trie = Trie; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var p1 = new Trie({ foo: 1, bar: 2, baz: 3 }), | |
p2 = p1.unset("baz"); | |
p1.reduce(function(a, b) { return (a || 0) + b }); // 6 | |
p2.reduce(function(a, b) { return (a || 0) + b }); // 3 | |
// Use a complex key instead of a primitive value | |
function ComplexKey(name) { | |
this.hash = function() { | |
return "COMPLEXKEY__" + name; | |
}; | |
} | |
var p3 = p1.set(new ComplexKey("world"), "hello"); | |
p3.get(new ComplexKey("world")) // "hello" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment