Skip to content

Instantly share code, notes, and snippets.

@nhusher
Last active August 29, 2015 14:00
Show Gist options
  • Save nhusher/66fafa126cf67c37c2af to your computer and use it in GitHub Desktop.
Save nhusher/66fafa126cf67c37c2af to your computer and use it in GitHub Desktop.
A working HAMT implementation in JS.
//
// --- Utility Functions ----------------------------------------------------
//
/*
Found at <http://blog.faultylabs.com/2011.php?p=jsbitfun>
@param bits {Number} An integer to run popCnt on.
@return The number of 1 bits in the number.
*/
function popcount(bits) {
bits >>>= 0 /* force uint32 */
for(var i = 0; bits; bits &= bits - 1) {
i++
}
return i;
}
/*
A (bad) test for determining if an object is a hashmap-style object.
@param o {Object}
@return Whether or not the object is a hashmap-style object
*/
function isObject(o) {
return o.constructor === Object;
}
/*
Is a function?
@param f {Function}
@return Whether or not f is a function.
*/
function isFn(f) {
return (typeof f === 'function');
}
/*
Based on Java's hashing function. Good enough (probably), but slow.
@param v {Number|String|Hashable} A value that can be hashed. Integers
hashes to themself. Floats are translated to strings before being hashed,
otherwise, the function expects the object to have a `hash` function that
it can call.
`hash` should be a function that returns that either returns a unique
value for every instance of an object (i.e. a GUID), or a unique value for
each permutation of the values within an object.
*/
function hash(v) {
// coerce floating point numbers to string
v = (typeof v === 'number' && Math.floor(v) !== v) ? ''+v : v;
if(typeof v === 'number') {
return v >> 0;
} else if(typeof v === 'string') {
for(var h = 0, i = 0, l = v.length; i < l; i += 1) {
h = (((h << 5) - h) + v.charCodeAt(i));
}
return (h >> 0);
} else if(typeof v.hash === 'function') {
// allow custom hashing functions
return v.hash();
} else {
throw new Error("Value " + v + " is not hashable!");
}
}
/*
Print a number as bits. Useful for visualizing bit masks.
@param num {Number}
@return A binary string formatted into nibbles.
*/
function bit(num) {
var digits = num.toString(2).match(/(\d+)/)[0],
pad = ['0000','0000','0000','0000',
'0000','0000','0000','0000']
return (pad.join('') + digits).slice(-32).match(/.{4}/g).join(' ');
}
//
// --- Internal Objects -----------------------------------------------------
//
/**
A node in the HAMT. A Trie implementation detail that probably shouldn't be
interacted with directly.
@class Node
@constructor
@param mask {Number} The bitmask of the trie node. Usually 0 on construction.
@param children {Array} An array of children. Usually empty on construction.
**/
function Node(mask, children) {
this._mask = mask;
this._children = children;
}
var NP = Node.prototype;
Node.prototype = {
constructor: Node,
set: function(shift, valueBox) {
var mask = this._mask,
children = this._children,
hash = valueBox._hash,
maskPos = 1 << ((hash >>> shift) & 31), // calculates bitmask pos
index = popcount(mask & (maskPos - 1)),
child;
if(mask & maskPos) {
child = children[index];
if(child instanceof Node) {
return new Node(mask, [].concat(
children.slice(0,index),
child.set(shift + 5, valueBox),
children.slice(index + 1)
));
} else if(child instanceof Box) {
// TODO: resolve hash collisions with smarter Box objects
return new Node(mask, [].concat(
children.slice(0, index),
new Node(mask + 5, []).set(shift + 5, child).set(shift + 5, valueBox),
children.slice(index + 1)));
}
throw new Error("Invalid state: child is neither Node or value Box");
} else {
return new Node( ((mask | maskPos) >>> 0), [].concat(
children.slice(0, index),
valueBox,
children.slice(index)
));
}
},
unset: function(shift, hash, key) {
var mask = this._mask,
children = this._children,
maskPos = 1 << ((hash >>> shift) & 31),
index = popcount(mask & (maskPos - 1)),
result = this,
child;
if(mask & maskPos) {
child = children[index];
if(child instanceof Node) {
child = child.unset(shift + 5, hash, key);
if(child === null) {
result = new Node(mask ^ (mask & maskPos), [].concat(
children.slice(0, index),
children.slice(index + 1)
));
}
result = new Node(mask, [].concat(
children.slice(0, index),
child,
children.slice(index + 1)
));
} else if(child instanceof Box) {
result = new Node((maskPos ^ mask) >>> 0, [].concat(
children.slice(0, index),
children.slice(index + 1)
));
} else {
throw new Error("Invalid state: child is neither Node or value Box");
}
}
return (result._children.length === 0) ? null : result;
},
get: function(shift, hash, key) {
var mask = this._mask,
maskPos = 1 << ((hash >>> shift) & 31),
child;
if(mask & maskPos) {
child = this._children[popcount(mask & (maskPos - 1))];
if(child instanceof Node) {
return child.get(shift + 5, hash, key);
} else if(child instanceof Box) {
return child._value;
}
}
return undefined;
},
entries: function() {
var result = [];
this._children.forEach(function(c) {
if(c instanceof Node) {
result = result.concat(c.entries());
} else {
result.push(c);
}
});
return result;
}
};
/**
A container object for leaf nodes in the Trie.
@class Box
@constructor
@param key {Number|String|Hashable}
@param value {any}
**/
function Box(key, value) {
this._key = key;
this._value = value;
// Store the hash, since it's expensive to look up.
this._hash = hash(key);
}
//
// --- Public Interface -----------------------------------------------------
//
/**
A persistent immutable tree-based data structure based on Phil Bagwell's
'Ideal Hash Trees' paper. "Modifying" the tree produces new instances of the
tree that sturcutrally-share information with previous instances.
**Features:**
- Immutability: Adding items to the Trie return a new instance of the Trie
that shares structure with the previous version.
- Complex keys: Keys in the Trie can be numeric, strings, floating-point, or
objects. Object keys must implement the function `hash`, which returns a
unique key for that representation of object state. For example:
````
function Key(name) {
this.hash = function() { return "__KEY__" + name; };
}
````
In this case, subsequent instantiations of `Key` with the same name will
result in the same key.
**Caveats:**
- The Trie makes no order guarantees. Keys may be stored internally in any
order, or no order at all. Numeric keys will be returned in-order when
converting between a trie and a javascript array. String keys are always
orderless.
- The Trie uses a 32-bit hashing algorithm to store keys for fast lookup.
There is currently no strategy for resolving hash collisions. Currently
hash collisions will silently overwrite the previous instance. This is
on the shortlist of things to fix.
- For many operations, and most notably writes, the Trie is much slower than
a primitive JS data structure. This is the cost of strucutural sharing and
immutability.
- Immutability is not enforced, which is to say that you can reach into the
object and modify its members directly without error. However, if you do
this, the Trie cannot help you. Similarly, it's possible to put mutable
data into the Trie that can be modified by anyone who holds the reference.
The Trie has no way of preventing that, and such actions should be avoided.
@class Trie
@constructor
@param [a] {Object|Array} A data structure to convert into a persistent trie
@param [deep] {Boolean} Whether to translate children into tries as well.
**/
function Trie(a, deep) {
var root = new Node(0, []),
i, l;
if(a) {
if(Array.isArray(a)) {
for(i = 0, l = a.length; i < l; i += 1) {
root = root.set(0, new Box(i, Trie._deepConstruct(a[i], deep)));
}
} else if(isObject(a)) {
for(i in a) {
if(a.hasOwnProperty(i)) {
root = root.set(0, new Box(i, Trie._deepConstruct(a[i], deep)));
}
}
}
}
this._root = root;
}
Trie._deepConstruct = function(val, deep) {
if((Array.isArray(val) || isObject(val)) && deep === true) {
return new Trie(val, deep);
} else {
return val;
}
};
Trie.prototype = {
constructor: Trie,
/**
"Sets" a value on the Trie for the provided key. Returns a new version of
the trie with the added element or elements.
If the key is a raw object or array, it will be treated as a collection
of items to be added with the named keys or indicies. In this case, if
the second argument is true, it will translate child objects and arrays
into Tries as well.
@method set
@param key {String|Object}
@param value {Any|Boolean}
@return {Trie}
**/
set: function(key, value) {
var ret = new Trie();
if(isObject(key)) {
ret._root = Object.keys(key).reduce(function(a, k) {
return a.set(0, new Box(k, Trie._deepConstruct(key[k], value)));
}, this._root);
} if(Array.isArray(key)) {
ret._root = key.reduce(function(a, v, i) {
return a.set(0, new Box(i, Trie._deepConstruct(v, value)));
}, this._root);
} else {
ret._root = this._root.set(0, new Box(key, value));
}
return ret;
},
/**
Returns a member from the Trie.
@method get
@param key {String|Number|Hashable}
@return The referenced value or undefined if not found.
**/
get: function(key) {
return this._root.get(0, hash(key), key);
},
/**
"Removes" a member from the Trie. Returns a copy of the Trie with the
named property removed.
@method unset
@param key {String|Number|Hashable}
@return {Trie}
**/
unset: function(key) {
var ret = new Trie(),
args = [].slice.call(arguments);
ret._root = args.reduce(function(a, k) {
return a.unset(0, hash(k), k);
}, this._root);
return ret;
},
/**
Excutes `fn` against every member of the Trie, optionally in the context
provided by the second argument. Behaves approximately like JS `forEach`
noting the above caveats about ordering.
@method each
@param fn {Function}
@param [context] {Object}
**/
each: function(fn, context) {
this.map(fn, context);
},
/**
Executes `fn` against every member in the list, taking the return value
and passing it to the next call as the first argument. Behaves like JS
reduce with the above caveats about ordering.
@method reduce
@param fn {Function}
@param [initial] {Any}
**/
reduce: function(fn, initial) {
return this._root.entries().reduce(function(a, c) {
return fn.call(null, a, c._value, c._key);
}, initial);
},
/**
Returns a list of keys in the Trie.
@method keys
@return {Array}
**/
keys: function() {
return this.map(function(v, k) { return k; });
},
/**
Returns a list of values in the Trie.
@method values
@return {Array}
**/
values: function(flatten) {
return this.map(function(v, k) {
return (flatten && v instanceof Trie) ? v.flatten(true) : v;
});
},
/**
Translates the Trie into an object/array representation of the object,
optionally performing the translation recursively.
@method flatten
@param deep {Boolean}
@return {Object|Array}
**/
flatten: function(deep) {
return this.reduce(function(a, v, k) {
if(deep && v instanceof Trie) {
v = v.flatten(deep);
}
if(Array.isArray(a)) {
if(typeof k === "number" && Math.floor(k) === k) {
a[k] = v;
return a;
} else {
a = a.reduce(function(a, k, v) {
a[k] = v; return a;
}, {});
a[k] = v;
return a;
}
} else {
a[k] = v;
return a;
}
}, []);
}
};
/**
@method map
**/
/**
@method filter
**/
/**
@method some
**/
/**
@method every
**/
[ 'map', 'filter', 'some', 'every' ].forEach(function(action) {
Trie.prototype[action] = function(fn, cx) {
return this._root.entries()[action](function(entry) {
return fn.call(cx, entry._value, entry._key);
});
};
});
// exports.Trie = Trie;
var p1 = new Trie({ foo: 1, bar: 2, baz: 3 }),
p2 = p1.unset("baz");
p1.reduce(function(a, b) { return (a || 0) + b }); // 6
p2.reduce(function(a, b) { return (a || 0) + b }); // 3
// Use a complex key instead of a primitive value
function ComplexKey(name) {
this.hash = function() {
return "COMPLEXKEY__" + name;
};
}
var p3 = p1.set(new ComplexKey("world"), "hello");
p3.get(new ComplexKey("world")) // "hello"

Hash-array Mapped Trie in Javascript

Todo:

  • Fix the silent hash collision issue
  • Provide more/better examples
  • Create a repository for this, and translate to a YUI-compatible module
  • Document all the methods &c.
  • Create a 'transient' version to make Trie construction much cheaper.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment