Skip to content

Instantly share code, notes, and snippets.

@viclib
Last active October 27, 2021 01:41
Show Gist options
  • Save viclib/507c1a1f7c1206158938 to your computer and use it in GitHub Desktop.
Save viclib/507c1a1f7c1206158938 to your computer and use it in GitHub Desktop.
// This is a simple interpreter for the untyped lambda calculus
// in JavaScript, used as an example of the possible benefits of
// hash-consing. Skip to the end of the file to see the tests.
// ~ SrPeixinho
var i32size = Math.pow(2,32);
var i32sizeInv = 1/i32size;
var stats = {
beta_reductions : 0,
reductions : 0,
unmemoized_reductions : 0,
subs_calls : 0,
unmemoized_subs_calls : 0,
max_var : 0};
function Runtime(hashCons){
this.subsMemo = {};
this.left_ = [];
this.right_ = [];
this.normal_ = {};
this.buckets = 1000003;
this.hash = new Array(this.buckets);
this.hashCons = hashCons;
for (var i=0; i<this.buckets; ++i)
this.hash[i] = [];
};
Runtime.prototype.app = function(x,y){
// Applies term "x" to term "y".
// This is where memory the allocation happens.
if (this.hashCons){
var bucketIndex = ((x>>>0)*31+(y>>>0))%this.buckets;
for(var j=0,bucket=this.hash[bucketIndex],l=bucket.length; j<l; j+=1){
ptr = bucket[j];
if (x === this.left(ptr) && y === this.right(ptr))
return ptr;
};
};
var pos = this.left_.length;
var ptr = 0x80000000|pos
this.left_[pos] = x;
this.right_[pos] = y;
if (this.hashCons)
bucket[j] = ptr;
return ptr;
};
Runtime.prototype.isApp = function(node){
return (node >>> 20) === 0x800;
};
Runtime.prototype.left = function(node){
return this.left_[(node & 0x000FFFFF)];
};
Runtime.prototype.right = function(node){
return this.right_[(node & 0x000FFFFF)];
};
Runtime.prototype.isLam = function(node){
return node & 0x3FF00000;
};
Runtime.prototype.lam = function(node){
return node + 0x00100000;
};
Runtime.prototype.body = function(node){
return (node >>> 20) === 0x800 ? this.right(node) : node - 0x00100000;
};
Runtime.prototype.isVar = function(node){
return !(node >>> 20);
};
Runtime.prototype.normal = function(x){
++stats.reductions;
var self = this;
if (this.hashCons && this.normal_[x])
return this.normal_[x];
if (this.isApp(x)){
var l = this.normal(this.left(x));
var r = this.normal(this.right(x));
var ll = this.left(l);
var lr = this.right(l);
};
++stats.unmemoized_reductions;
var normal = (
this.isVar(x) ? (stats.max_var < x ? (stats.max_var=x) : x)
: this.isLam(x) ? this.lam(this.normal(this.body(x)))
: this.isLam(l) ? this.apply(l,r)
: this.app(l,r));
if (this.hashCons)
this.normal_[x] = normal;
return normal;
};
Runtime.prototype.subs = function (t,d,w,x){
// Substitutes by "t" all variables of "x",
// that are bound to "d"th abstraction
// above it. Add "x" to free variables.
++stats.subs_calls;
var hash = ""+t+"_"+d+"_"+w+"_"+x; // bad, bad, todo: improve
if (this.hashCons && this.subsMemo[hash])
return this.subsMemo[hash];
++stats.unmemoized_subs_calls;
// Don't even bother trying to read this.
return this.subsMemo[hash]
= this.isApp(x) ? this.app(this.subs(t,d,w,this.left(x)),this.subs(t,d,w,this.right(x)))
: this.isLam(x) ? this.lam(this.subs(t,d+1,w,this.body(x)))
: this.isVar(x) ? (t && x===d ? this.subs(0,-1,d,t) : x>d ? x+w : x)
: x;
};
Runtime.prototype.apply = function(f,x){
++stats.beta_reductions;
return this.normal(this.subs(x,0,-1,this.body(f)));
};
Runtime.prototype.toString = function(x){
return (this.isApp(x) ? "("+this.toString(this.left(x))+" "+this.toString(this.right(x))+")" :
this.isLam(x) ? "(λ "+this.toString(this.body(x))+")"
: "v"+x);
};
// Change this to "false" to disable hash consing.
var useHashConsing = true;
// Starts runtime + helpers.
var rt = new Runtime(useHashConsing);
L = function(x){return rt.lam(x)};
A = function(a,b){return rt.app(a,b)};
// A few combinators.
id = L(0);
succ = L(L(L(A(1,A(A(2,1),0)))));
mul = L(L(L(A(2,A(1,0)))));
// Church numbers up to 100.
var c = [L(L(0))];
for (var i=0; i<100; ++i)
c.push(A(succ,c[c.length - 1]));
// This is a rather stupid way to take the power of 2, but
// may be a good example. It crates a scott-encoded binary
// tree of depth = n with all leaves = 1, and then sums it.
pow2 = L(A(A(A(A(A(A(0,L(L(L(L(L(A(A(A(2,2),A(4,3)),A(4,3))))))))
,L(L(L(L(A(1,3)))))),L(L(A(1,0)))),L(L(L(A(A(A(A(A(0,2),L(0)),
L(L(0))),L(L(L(A(1,A(A(2,1),0)))))),A(A(A(1,2),L(0)),
L(L(0)))))))),L(0)),L(L(0))))
// Test = (pow2 church_9) -- 2^9
main = A(pow2,c[9]);
// Outputs results
console.log("Normal of `main`:");
console.log(rt.toString(rt.normal(main)));
console.log();
console.log("Node count :",rt.left_.length);
console.log("Beta-reductions :",stats.beta_reductions);
console.log("Reductions :",stats.reductions,"("+(stats.reductions-stats.unmemoized_reductions)+" memoized)");
console.log("Calls to subs :",stats.subs_calls,"("+(stats.subs_calls-stats.unmemoized_subs_calls)+" memoized)");
console.log("Max bruijn var :",stats.max_var);
//Result on my computer:
//
//With hash consing:
//
//Node count : 3589
//Beta-reductions : 893
//Reductions : 210516 (204195 memoized)
//Calls to subs : 15095 (5556 memoized)
//Max bruijn var : 30
//real 0m0.227s
//user 0m0.195s
//sys 0m0.036s
//
// Without hash consing:
//
//Node count : 695773
//Beta-reductions : 13616
//Reductions : 893424 (0 memoized)
//Calls to subs : 972923 (0 memoized)
//Max bruijn var : 30
//real 0m0.943s
//user 0m0.853s
//sys 0m0.093s
//
// I believe hash consing + auto memoizing is useful and **not** replicable by memoizing because:
//
//
// 1. Saving **a lot** of memory (think voxel worlds...)
//
// Suppose you use a balanced tree to represent an array.
//
// data Tree a = Node (Tree a) (Tree a) | Leaf a
//
// Suppose that you fill an array of length 8 with zeros.
//
// tree = Node
// (Node (Node (Leaf 0) (Leaf 0)) (Node (Leaf 0) (Leaf 0)))
// (Node (Node (Leaf 0) (Leaf 0)) (Node (Leaf 0) (Leaf 0)))
//
// Without hash consing, we have this on memory:
// a = 0 0
// b = 0 0
// c = 0 0
// d = 0 0
// e = a b
// f = c d
// tree = e f
// So, 8*(8-1)*2 = 3584 bytes.
//
// With hash consing, we have this:
// a = 0 0
// b = a a
// tree = b b
// So, log2(8)*2*32 = 192 bytes
//
// Similarly, for a 1024x1024 array we have
//
// No hash consing : 67043328 bytes = 67 mb
// Hash consing : 1280 bytes = 0.00128 mb
//
// I actually *really* needed this for
//
//
// 2. Automatic memoization
//
// With hash consing, the naive fib is O(N) instead of O(2^N).
// How cool would be able to teach the naive fib for students
// and it actually work? But there is more to this than that.
// You can argue that this can be replicated by just appending
// "memoize" to your function definition, but that is not true.
// because:
//
// a) Except if you are memoizing functions from ints to ints
// and so on, you need to take the hash of the structure and
// that can be expensive. You essentially can't memoize
// `length` on haskell in a way that actually makes it faster.
// With hash-consing, `length [GIANT_LIST]` will only take a lot
// of time once. Then never again.
//
// b) It still doesn't memoize subexpressions.
//
// c) It protects the whole language for duplicated computation.
// Not just what you specifically tag.
//
// And I really believe this will make real programs MUCH faster.
// Think in how much duplicated computation happens under the hoods.
// Hash-consing is the kind of thing that has a bad rep because it
// takes a 2~3 constant factor out of small benchmarks.
// But for real-life programs it can be stupid.
//
//
// 3. O(1) equality
//
// With memoization, you can compare any structure for equality in O(1)
// regardless of how deep it is. How cool would it be to be able to
// guiltylessly compare for equality on production code, without having
// to create IDs for hashing and so on.
//
//
// 4. O(1) hashing
//
// Since everything is hashed, "hashable" isn't necessary anymore.
// You can take the hash of anything in O(1) as well.
//
//
// If it is not unbearably hard to implement, I don't see why hash-
// -consing shouldn't a compiler option.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment