viclib/gist:507c1a1f7c1206158938

## gistfile1.js
// This is a simple interpreter for the untyped lambda calculus
// in JavaScript, used as an example of the possible benefits of
// hash-consing. Skip to the end of the file to see the tests.
// ~ SrPeixinho

var i32size    = Math.pow(2,32);
var i32sizeInv = 1/i32size;
var stats = {
    beta_reductions       : 0,
    reductions            : 0,
    unmemoized_reductions : 0,
    subs_calls            : 0,
    unmemoized_subs_calls : 0,
    max_var               : 0};
function Runtime(hashCons){
    this.subsMemo = {};
    this.left_    = [];
    this.right_   = [];
    this.normal_  = {};
    this.buckets  = 1000003;
    this.hash     = new Array(this.buckets);
    this.hashCons = hashCons;
    for (var i=0; i<this.buckets; ++i)
        this.hash[i] = [];
};
Runtime.prototype.app = function(x,y){
    // Applies term "x" to term "y".
    // This is where memory the allocation happens.
    if (this.hashCons){
        var bucketIndex = ((x>>>0)*31+(y>>>0))%this.buckets;
        for(var j=0,bucket=this.hash[bucketIndex],l=bucket.length; j<l; j+=1){
            ptr = bucket[j];
            if (x === this.left(ptr) && y === this.right(ptr))
                return ptr;
        };
    };
    var pos           = this.left_.length;
    var ptr           = 0x80000000|pos
    this.left_[pos]   = x;
    this.right_[pos]  = y;
    if (this.hashCons)
        bucket[j] = ptr;
    return ptr;
};
Runtime.prototype.isApp = function(node){
    return (node >>> 20) === 0x800;
};
Runtime.prototype.left = function(node){
    return this.left_[(node & 0x000FFFFF)];
};
Runtime.prototype.right = function(node){
    return this.right_[(node & 0x000FFFFF)];
};
Runtime.prototype.isLam = function(node){
    return node & 0x3FF00000;
};
Runtime.prototype.lam = function(node){
    return node + 0x00100000;
};
Runtime.prototype.body = function(node){
    return (node >>> 20) === 0x800 ? this.right(node) : node - 0x00100000;
};
Runtime.prototype.isVar = function(node){
    return !(node >>> 20);
};
Runtime.prototype.normal = function(x){
    ++stats.reductions;
    var self = this;
    if (this.hashCons && this.normal_[x])
        return this.normal_[x];
    if (this.isApp(x)){
        var l  = this.normal(this.left(x));
        var r  = this.normal(this.right(x));
        var ll = this.left(l);
        var lr = this.right(l);
    };
    ++stats.unmemoized_reductions;
    var normal = (
        this.isVar(x) ? (stats.max_var < x ? (stats.max_var=x) : x)
        : this.isLam(x) ? this.lam(this.normal(this.body(x)))
        : this.isLam(l) ? this.apply(l,r)
        : this.app(l,r));
    if (this.hashCons)
        this.normal_[x] = normal;
    return normal;
};
Runtime.prototype.subs = function (t,d,w,x){
    // Substitutes by "t" all variables of "x",
    // that are bound to "d"th abstraction
    // above it. Add "x" to free variables.
    ++stats.subs_calls;
    var hash = ""+t+"_"+d+"_"+w+"_"+x; // bad, bad, todo: improve
    if (this.hashCons && this.subsMemo[hash])
        return this.subsMemo[hash];
    ++stats.unmemoized_subs_calls;
    // Don't even bother trying to read this.
    return this.subsMemo[hash]
        = this.isApp(x) ? this.app(this.subs(t,d,w,this.left(x)),this.subs(t,d,w,this.right(x)))
        : this.isLam(x) ? this.lam(this.subs(t,d+1,w,this.body(x)))
        : this.isVar(x) ? (t && x===d ? this.subs(0,-1,d,t) : x>d ? x+w : x)
        : x;
};
Runtime.prototype.apply = function(f,x){
    ++stats.beta_reductions;
    return this.normal(this.subs(x,0,-1,this.body(f)));
};
Runtime.prototype.toString = function(x){
    return (this.isApp(x) ? "("+this.toString(this.left(x))+" "+this.toString(this.right(x))+")" :
            this.isLam(x) ? "(λ "+this.toString(this.body(x))+")"
            : "v"+x);
};


// Change this to "false" to disable hash consing.
var useHashConsing = true;

// Starts runtime + helpers.
var rt = new Runtime(useHashConsing);
L = function(x){return rt.lam(x)};
A = function(a,b){return rt.app(a,b)};

// A few combinators.
id   = L(0);
succ = L(L(L(A(1,A(A(2,1),0)))));
mul  = L(L(L(A(2,A(1,0)))));

// Church numbers up to 100.
var c = [L(L(0))];
for (var i=0; i<100; ++i)
    c.push(A(succ,c[c.length - 1]));

// This is a rather stupid way to take the power of 2, but
// may be a good example. It crates a scott-encoded binary
// tree of depth = n with all leaves = 1, and then sums it.
pow2 = L(A(A(A(A(A(A(0,L(L(L(L(L(A(A(A(2,2),A(4,3)),A(4,3))))))))
    ,L(L(L(L(A(1,3)))))),L(L(A(1,0)))),L(L(L(A(A(A(A(A(0,2),L(0)),
    L(L(0))),L(L(L(A(1,A(A(2,1),0)))))),A(A(A(1,2),L(0)),
    L(L(0)))))))),L(0)),L(L(0))))

// Test = (pow2 church_9) -- 2^9
main = A(pow2,c[9]);

// Outputs results
console.log("Normal of `main`:");
console.log(rt.toString(rt.normal(main)));
console.log();
console.log("Node count      :",rt.left_.length);
console.log("Beta-reductions :",stats.beta_reductions);
console.log("Reductions      :",stats.reductions,"("+(stats.reductions-stats.unmemoized_reductions)+" memoized)");
console.log("Calls to subs   :",stats.subs_calls,"("+(stats.subs_calls-stats.unmemoized_subs_calls)+" memoized)");
console.log("Max bruijn var  :",stats.max_var);

//Result on my computer:
//
//With hash consing:
//
//Node count      : 3589
//Beta-reductions : 893
//Reductions      : 210516 (204195 memoized)
//Calls to subs   : 15095 (5556 memoized)
//Max bruijn var  : 30
//real	0m0.227s
//user	0m0.195s
//sys	0m0.036s
//
// Without hash consing:
//
//Node count      : 695773
//Beta-reductions : 13616
//Reductions      : 893424 (0 memoized)
//Calls to subs   : 972923 (0 memoized)
//Max bruijn var  : 30
//real	0m0.943s
//user	0m0.853s
//sys	0m0.093s
//
// I believe hash consing + auto memoizing is useful and **not** replicable by memoizing because:
//
//
// 1. Saving **a lot** of memory (think voxel worlds...)
//
// Suppose you use a balanced tree to represent an array.
//
//   data Tree a = Node (Tree a) (Tree a) | Leaf a
//
// Suppose that you fill an array of length 8 with zeros.
//
// tree = Node
//     (Node (Node (Leaf 0) (Leaf 0)) (Node (Leaf 0) (Leaf 0)))
//     (Node (Node (Leaf 0) (Leaf 0)) (Node (Leaf 0) (Leaf 0)))
//
// Without hash consing, we have this on memory:
// a = 0 0
// b = 0 0
// c = 0 0
// d = 0 0
// e = a b
// f = c d
// tree = e f
// So, 8*(8-1)*2 = 3584 bytes.
//
// With hash consing, we have this:
// a = 0 0
// b = a a
// tree = b b
// So, log2(8)*2*32 = 192 bytes
//
// Similarly, for a 1024x1024 array we have
//
// No hash consing : 67043328 bytes = 67 mb
// Hash consing    : 1280 bytes     = 0.00128 mb
//
// I actually *really* needed this for
//
//
// 2. Automatic memoization
//
// With hash consing, the naive fib is O(N) instead of O(2^N).
// How cool would be able to teach the naive fib for students
// and it actually work? But there is more to this than that.
// You can argue that this can be replicated by just appending
// "memoize" to your function definition, but that is not true.
// because:
//
// a) Except if you are memoizing functions from ints to ints
//    and so on, you need to take the hash of the structure and
//    that can be expensive. You essentially can't memoize
//    `length` on haskell in a way that actually makes it faster.
//    With hash-consing, `length [GIANT_LIST]` will only take a lot
//    of time once. Then never again.
//
// b) It still doesn't memoize subexpressions.
//
// c) It protects the whole language for duplicated computation.
//    Not just what you specifically tag.
//
// And I really believe this will make real programs MUCH faster.
// Think in how much duplicated computation happens under the hoods.
// Hash-consing is the kind of thing that has a bad rep because it
// takes a 2~3 constant factor out of small benchmarks.
// But for real-life programs it can be stupid.
//
//
// 3. O(1) equality
//
// With memoization, you can compare any structure for equality in O(1)
// regardless of how deep it is. How cool would it be to be able to
// guiltylessly compare for equality on production code, without having
// to create IDs for hashing and so on.
//
//
// 4. O(1) hashing
//
// Since everything is hashed, "hashable" isn't necessary anymore.
// You can take the hash of anything in O(1) as well.
//
//
// If it is not unbearably hard to implement, I don't see why hash-
// -consing shouldn't a compiler option.
	// This is a simple interpreter for the untyped lambda calculus
	// in JavaScript, used as an example of the possible benefits of
	// hash-consing. Skip to the end of the file to see the tests.
	// ~ SrPeixinho

	var i32size = Math.pow(2,32);
	var i32sizeInv = 1/i32size;
	var stats = {
	beta_reductions : 0,
	reductions : 0,
	unmemoized_reductions : 0,
	subs_calls : 0,
	unmemoized_subs_calls : 0,
	max_var : 0};
	function Runtime(hashCons){
	this.subsMemo = {};
	this.left_ = [];
	this.right_ = [];
	this.normal_ = {};
	this.buckets = 1000003;
	this.hash = new Array(this.buckets);
	this.hashCons = hashCons;
	for (var i=0; i<this.buckets; ++i)
	this.hash[i] = [];
	};
	Runtime.prototype.app = function(x,y){
	// Applies term "x" to term "y".
	// This is where memory the allocation happens.
	if (this.hashCons){
	var bucketIndex = ((x>>>0)*31+(y>>>0))%this.buckets;
	for(var j=0,bucket=this.hash[bucketIndex],l=bucket.length; j<l; j+=1){
	ptr = bucket[j];
	if (x === this.left(ptr) && y === this.right(ptr))
	return ptr;
	};
	};
	var pos = this.left_.length;
	var ptr = 0x80000000\|pos
	this.left_[pos] = x;
	this.right_[pos] = y;
	if (this.hashCons)
	bucket[j] = ptr;
	return ptr;
	};
	Runtime.prototype.isApp = function(node){
	return (node >>> 20) === 0x800;
	};
	Runtime.prototype.left = function(node){
	return this.left_[(node & 0x000FFFFF)];
	};
	Runtime.prototype.right = function(node){
	return this.right_[(node & 0x000FFFFF)];
	};
	Runtime.prototype.isLam = function(node){
	return node & 0x3FF00000;
	};
	Runtime.prototype.lam = function(node){
	return node + 0x00100000;
	};
	Runtime.prototype.body = function(node){
	return (node >>> 20) === 0x800 ? this.right(node) : node - 0x00100000;
	};
	Runtime.prototype.isVar = function(node){
	return !(node >>> 20);
	};
	Runtime.prototype.normal = function(x){
	++stats.reductions;
	var self = this;
	if (this.hashCons && this.normal_[x])
	return this.normal_[x];
	if (this.isApp(x)){
	var l = this.normal(this.left(x));
	var r = this.normal(this.right(x));
	var ll = this.left(l);
	var lr = this.right(l);
	};
	++stats.unmemoized_reductions;
	var normal = (
	this.isVar(x) ? (stats.max_var < x ? (stats.max_var=x) : x)
	: this.isLam(x) ? this.lam(this.normal(this.body(x)))
	: this.isLam(l) ? this.apply(l,r)
	: this.app(l,r));
	if (this.hashCons)
	this.normal_[x] = normal;
	return normal;
	};
	Runtime.prototype.subs = function (t,d,w,x){
	// Substitutes by "t" all variables of "x",
	// that are bound to "d"th abstraction
	// above it. Add "x" to free variables.
	++stats.subs_calls;
	var hash = ""+t+"_"+d+"_"+w+"_"+x; // bad, bad, todo: improve
	if (this.hashCons && this.subsMemo[hash])
	return this.subsMemo[hash];
	++stats.unmemoized_subs_calls;
	// Don't even bother trying to read this.
	return this.subsMemo[hash]
	= this.isApp(x) ? this.app(this.subs(t,d,w,this.left(x)),this.subs(t,d,w,this.right(x)))
	: this.isLam(x) ? this.lam(this.subs(t,d+1,w,this.body(x)))
	: this.isVar(x) ? (t && x===d ? this.subs(0,-1,d,t) : x>d ? x+w : x)
	: x;
	};
	Runtime.prototype.apply = function(f,x){
	++stats.beta_reductions;
	return this.normal(this.subs(x,0,-1,this.body(f)));
	};
	Runtime.prototype.toString = function(x){
	return (this.isApp(x) ? "("+this.toString(this.left(x))+" "+this.toString(this.right(x))+")" :
	this.isLam(x) ? "(λ "+this.toString(this.body(x))+")"
	: "v"+x);
	};


	// Change this to "false" to disable hash consing.
	var useHashConsing = true;

	// Starts runtime + helpers.
	var rt = new Runtime(useHashConsing);
	L = function(x){return rt.lam(x)};
	A = function(a,b){return rt.app(a,b)};

	// A few combinators.
	id = L(0);
	succ = L(L(L(A(1,A(A(2,1),0)))));
	mul = L(L(L(A(2,A(1,0)))));

	// Church numbers up to 100.
	var c = [L(L(0))];
	for (var i=0; i<100; ++i)
	c.push(A(succ,c[c.length - 1]));

	// This is a rather stupid way to take the power of 2, but
	// may be a good example. It crates a scott-encoded binary
	// tree of depth = n with all leaves = 1, and then sums it.
	pow2 = L(A(A(A(A(A(A(0,L(L(L(L(L(A(A(A(2,2),A(4,3)),A(4,3))))))))
	,L(L(L(L(A(1,3)))))),L(L(A(1,0)))),L(L(L(A(A(A(A(A(0,2),L(0)),
	L(L(0))),L(L(L(A(1,A(A(2,1),0)))))),A(A(A(1,2),L(0)),
	L(L(0)))))))),L(0)),L(L(0))))

	// Test = (pow2 church_9) -- 2^9
	main = A(pow2,c[9]);

	// Outputs results
	console.log("Normal of `main`:");
	console.log(rt.toString(rt.normal(main)));
	console.log();
	console.log("Node count :",rt.left_.length);
	console.log("Beta-reductions :",stats.beta_reductions);
	console.log("Reductions :",stats.reductions,"("+(stats.reductions-stats.unmemoized_reductions)+" memoized)");
	console.log("Calls to subs :",stats.subs_calls,"("+(stats.subs_calls-stats.unmemoized_subs_calls)+" memoized)");
	console.log("Max bruijn var :",stats.max_var);

	//Result on my computer:
	//
	//With hash consing:
	//
	//Node count : 3589
	//Beta-reductions : 893
	//Reductions : 210516 (204195 memoized)
	//Calls to subs : 15095 (5556 memoized)
	//Max bruijn var : 30
	//real 0m0.227s
	//user 0m0.195s
	//sys 0m0.036s
	//
	// Without hash consing:
	//
	//Node count : 695773
	//Beta-reductions : 13616
	//Reductions : 893424 (0 memoized)
	//Calls to subs : 972923 (0 memoized)
	//Max bruijn var : 30
	//real 0m0.943s
	//user 0m0.853s
	//sys 0m0.093s
	//
	// I believe hash consing + auto memoizing is useful and not replicable by memoizing because:
	//
	//
	// 1. Saving a lot of memory (think voxel worlds...)
	//
	// Suppose you use a balanced tree to represent an array.
	//
	// data Tree a = Node (Tree a) (Tree a) \| Leaf a
	//
	// Suppose that you fill an array of length 8 with zeros.
	//
	// tree = Node
	// (Node (Node (Leaf 0) (Leaf 0)) (Node (Leaf 0) (Leaf 0)))
	// (Node (Node (Leaf 0) (Leaf 0)) (Node (Leaf 0) (Leaf 0)))
	//
	// Without hash consing, we have this on memory:
	// a = 0 0
	// b = 0 0
	// c = 0 0
	// d = 0 0
	// e = a b
	// f = c d
	// tree = e f
	// So, 8(8-1)2 = 3584 bytes.
	//
	// With hash consing, we have this:
	// a = 0 0
	// b = a a
	// tree = b b
	// So, log2(8)232 = 192 bytes
	//
	// Similarly, for a 1024x1024 array we have
	//
	// No hash consing : 67043328 bytes = 67 mb
	// Hash consing : 1280 bytes = 0.00128 mb
	//
	// I actually really needed this for
	//
	//
	// 2. Automatic memoization
	//
	// With hash consing, the naive fib is O(N) instead of O(2^N).
	// How cool would be able to teach the naive fib for students
	// and it actually work? But there is more to this than that.
	// You can argue that this can be replicated by just appending
	// "memoize" to your function definition, but that is not true.
	// because:
	//
	// a) Except if you are memoizing functions from ints to ints
	// and so on, you need to take the hash of the structure and
	// that can be expensive. You essentially can't memoize
	// `length` on haskell in a way that actually makes it faster.
	// With hash-consing, `length [GIANT_LIST]` will only take a lot
	// of time once. Then never again.
	//
	// b) It still doesn't memoize subexpressions.
	//
	// c) It protects the whole language for duplicated computation.
	// Not just what you specifically tag.
	//
	// And I really believe this will make real programs MUCH faster.
	// Think in how much duplicated computation happens under the hoods.
	// Hash-consing is the kind of thing that has a bad rep because it
	// takes a 2~3 constant factor out of small benchmarks.
	// But for real-life programs it can be stupid.
	//
	//
	// 3. O(1) equality
	//
	// With memoization, you can compare any structure for equality in O(1)
	// regardless of how deep it is. How cool would it be to be able to
	// guiltylessly compare for equality on production code, without having
	// to create IDs for hashing and so on.
	//
	//
	// 4. O(1) hashing
	//
	// Since everything is hashed, "hashable" isn't necessary anymore.
	// You can take the hash of anything in O(1) as well.
	//
	//
	// If it is not unbearably hard to implement, I don't see why hash-
	// -consing shouldn't a compiler option.