Skip to content

Instantly share code, notes, and snippets.

@chiral
Created November 22, 2013 11:45
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chiral/7598645 to your computer and use it in GitHub Desktop.
Save chiral/7598645 to your computer and use it in GitHub Desktop.
An implimentation of "Tree Edit Distance" algorithm. This is a straight forward code for the formula presented on the PFI's blog page: http://research.preferred.jp/2012/02/tree-edit-distance/
function attach(obj,methods) {
var wrap = function(a,f) {
return function() { return f.apply(a,arguments); }
};
for (var name in methods) {
if (obj[name]) throw "conflict:"+name;
obj[name] = wrap(obj,methods[name]);
}
return obj;
}
function to_key_default($,$node) {
var n = $node[0];
var a = n.attributes;
var r=[n.tagName.toLowerCase()];
for (var i=0; i<a.length; i++) {
var s=a[i].nodeName;
if (s=='id' || s=='class') {
if (s=='id') s='#';
if (s=='class') s='.';
if (a[i].nodeValue) s+=a[i].nodeValue;
}
r.push(s);
}
return '<'+r.join(' ')+'>';
}
function $children($,$node) {
var cs=[];
$node.children().each(function(){
cs.push($(this));
});
return cs;
}
function traverse($,$siblings,nodes,to_key) {
var siblings=[];
var N=$siblings.length;
for (var i=N-1; i>=0; i--) {
var $cs=$children($,$siblings[i]);
var children=traverse($,$cs,nodes,to_key);
siblings.push(nodes.length);
nodes.push({
id: nodes.length,
children: children,
key: to_key($,$siblings[i])
});
}
return siblings;
}
var init = function($,$node,to_key) {
var nodes = [];
if (!to_key) to_key=to_key_default;
var children=traverse($,$children($,$node),nodes,to_key);
nodes.push({id:nodes.length,
children:children,
key:to_key($,$node)});
return attach({
nodes:nodes, F:[{from:0,to:nodes.length-1}]
},{
index: function(F) {
return F[F.length-1].from*this.nodes.length+F[0].to;
},
max_index: function() {
return this.nodes.length*this.nodes.length;
},
R: function(F) {
return F[0].to;
},
C: function(F) {
var r=[];
var c=this.nodes[F[0].to].children;
for (var i=c.length-1; i>=0; i--){
var from=i>0?c[i-1]+1:F[0].from;
r.push({from:from,to:c[i]});
}
return r;
},
L: function(F,r) {
if (!r) r=[];
for (var i=1; i<F.length; i++) {
r.push(F[i]);
}
return r;
},
});
}
var memoized = function(T1,T2) {
var tbl={};
var index=function(F1,F2) {
var index1=T1.index(F1);
var index2=T2.index(F2);
var M1=T1.max_index();
return M1*index2+index1;
};
var size=function(F) {
return F[0].to-F[F.length-1].from+1;
};
return attach({},{
gamma: function(id1,id2) {
if (id1==null || id2==null) {
if (id1!=null) return 1;
if (id2!=null) return 1;
return 0;
}
if (T1.nodes[id1].key!=T2.nodes[id2].key) return 1;
return 0;
},
distance: function(F1,F2) {
if (F1.length==0 || F2.length==0) {
if (F1.length) return size(F1);
if (F2.length) return size(F2);
return 0;
}
var ti=index(F1,F2);
var t=tbl[ti];
if (t!=null) return t.d;
var d={};
d.edit=this.gamma(T1.R(F1),T2.R(F2))+
this.distance(T1.C(F1),T2.C(F2))+
this.distance(T1.L(F1),T2.L(F2));
d.del1=this.gamma(T1.R(F1),null)+
this.distance(T1.L(F1,T1.C(F1)),F2);
d.del2=this.gamma(null,T2.R(F2))+
this.distance(F1,T2.L(F2,T2.C(F2)));
for (var op in d) {
if (t==null || d[op]<t.d) t={op:op,d:d[op]};
}
tbl[ti]=t;
return t.d;
},
get_ops: function(F1,F2,ops) {
var key_str=function(T,F) {
var r=[];
var to=F[0].to,from=F[F.length-1].from;
for (var i=to; i>=from; i--) {
r.push(T.nodes[i].key);
}
return r.join(',');
};
if (F1.length==0 || F2.length==0) {
if (F1.length) ops.push('rest1:'+key_str(T1,F1));
if (F2.length) ops.push('rest2:'+key_str(T2,F2));
return;
}
var ti=index(F1,F2);
var t=tbl[ti];
var k1=''+T1.nodes[F1[0].to].key;
var k2=''+T2.nodes[F2[0].to].key;
switch (t.op) {
case "del1":
ops.push('del1:'+k1);
this.get_ops(T1.L(F1,T1.C(F1)),F2,ops);
break;
case "del2":
ops.push('del2:'+k2);
this.get_ops(F1,T2.L(F2,T2.C(F2)),ops);
break;
case "edit":
ops.push(k1+(k1==k2?'=':'/')+k2);
this.get_ops(T1.C(F1),T2.C(F2),ops);
this.get_ops(T1.L(F1),T2.L(F2),ops);
break;
}
}
});
}
exports.calc = function($,$node1,$node2,ops,to_key) {
var T1 = init($,$node1);
var T2 = init($,$node2);
var m=memoized(T1,T2);
var d=m.distance(T1.F,T2.F);
if (ops) m.get_ops(T1.F,T2.F,ops);
return d;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment