Skip to content

Instantly share code, notes, and snippets.

@lindenb
Last active August 29, 2015 13:58
Show Gist options
  • Save lindenb/10281068 to your computer and use it in GitHub Desktop.
Save lindenb/10281068 to your computer and use it in GitHub Desktop.
answer to biostar http://www.biostars.org/p/97409/ "Collapse clades in a Newick tree with average distance to tips < x"
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<script>
var newick= null;
/* parser generated by jison 0.4.13 */
/*
Returns a Parser object of the following structure:
Parser: {
yy: {}
}
Parser.prototype: {
yy: {},
trace: function(),
symbols_: {associative list: name ==> number},
terminals_: {associative list: number ==> name},
productions_: [...],
performAction: function anonymous(yytext, yyleng, yylineno, yy, yystate, $$, _$),
table: [...],
defaultActions: {...},
parseError: function(str, hash),
parse: function(input),
lexer: {
EOF: 1,
parseError: function(str, hash),
setInput: function(input),
input: function(),
unput: function(str),
more: function(),
less: function(n),
pastInput: function(),
upcomingInput: function(),
showPosition: function(),
test_match: function(regex_match_array, rule_index),
next: function(),
lex: function(),
begin: function(condition),
popState: function(),
_currentRules: function(),
topState: function(),
pushState: function(condition),
options: {
ranges: boolean (optional: true ==> token location info will include a .range[] member)
flex: boolean (optional: true ==> flex-like lexing behaviour where the rules are tested exhaustively to find the longest match)
backtrack_lexer: boolean (optional: true ==> lexer regexes are tested in order and for each matching regex the action code is invoked; the lexer terminates the scan when a token is returned by the action code)
},
performAction: function(yy, yy_, $avoiding_name_collisions, YY_START),
rules: [...],
conditions: {associative list: name ==> set},
}
}
token location info (@$, _$, etc.): {
first_line: n,
last_line: n,
first_column: n,
last_column: n,
range: [start_number, end_number] (where the numbers are indexes into the input string, regular zero-based)
}
the parseError function receives a 'hash' object with these members for lexer and parser errors: {
text: (matched text)
token: (the produced terminal token, if any)
line: (yylineno)
}
while parser (grammar) errors will also provide these members, i.e. parser errors deliver a superset of attributes: {
loc: (yylloc)
expected: (string describing the set of expected tokens)
recoverable: (boolean: TRUE when the parser has a error recovery rule available for this particular error)
}
*/
var parser = (function(){
var parser = {trace: function trace(){},
yy: {},
symbols_: {"error":2,"input":3,"descendant_list":4,"optional_label":5,"optional_length":6,"SEMICOLON":7,"EOF":8,"OPAR":9,"descendant_list_item":10,"CPAR":11,"subtree":12,"COMMA":13,"label":14,"COLON":15,"number":16,"STRING":17,"NUMBER":18,"$accept":0,"$end":1},
terminals_: {2:"error",7:"SEMICOLON",8:"EOF",9:"OPAR",11:"CPAR",13:"COMMA",15:"COLON",17:"STRING",18:"NUMBER"},
productions_: [0,[3,5],[4,3],[10,1],[10,3],[12,3],[12,2],[5,0],[5,1],[6,0],[6,2],[14,1],[16,1]],
performAction: function anonymous(yytext, yyleng, yylineno, yy, yystate, $$, _$) {
/* this == yyval */
var $0 = $$.length - 1;
switch (yystate) {
case 1:
var tree={next:null,child:null};
tree.child=$$[$0-4];
tree.label=$$[$0-3];
tree.length=$$[$0-2];
newick=tree;
break;
case 2:
this.$=$$[$0-1];
break;
case 3:
this.$=$$[$0];
break;
case 4:
var last=$$[$0-2];
this.$=$$[$0-2];
while(last.next!=null)
{
last=last.next;
}
last.next=$$[$0];
break;
case 5:
this.$={next:null};
this.$.child=$$[$0-2];
this.$.label=$$[$0-1];
this.$.length=$$[$0];
break;
case 6:
this.$={next:null,child:null};
this.$.label=$$[$0-1];
this.$.length=$$[$0];
break;
case 7: this.$=null;
break;
case 8: this.$=$$[$0];
break;
case 9: this.$=null;
break;
case 10: this.$=$$[$0];
break;
case 11: this.$=$$[$0];
break;
case 12: this.$=Number($$[$0]);
break;
}
},
table: [{3:1,4:2,9:[1,3]},{1:[3]},{5:4,7:[2,7],14:5,15:[2,7],17:[1,6]},{4:9,9:[1,3],10:7,12:8,14:10,17:[1,6]},{6:11,7:[2,9],15:[1,12]},{7:[2,8],11:[2,8],13:[2,8],15:[2,8]},{7:[2,11],11:[2,11],13:[2,11],15:[2,11]},{11:[1,13],13:[1,14]},{11:[2,3],13:[2,3]},{5:15,11:[2,7],13:[2,7],14:5,15:[2,7],17:[1,6]},{6:16,11:[2,9],13:[2,9],15:[1,12]},{7:[1,17]},{16:18,18:[1,19]},{7:[2,2],11:[2,2],13:[2,2],15:[2,2],17:[2,2]},{4:9,9:[1,3],12:20,14:10,17:[1,6]},{6:21,11:[2,9],13:[2,9],15:[1,12]},{11:[2,6],13:[2,6]},{8:[1,22]},{7:[2,10],11:[2,10],13:[2,10]},{7:[2,12],11:[2,12],13:[2,12]},{11:[2,4],13:[2,4]},{11:[2,5],13:[2,5]},{1:[2,1]}],
defaultActions: {22:[2,1]},
parseError: function parseError(str,hash){if(hash.recoverable){this.trace(str)}else{throw new Error(str)}},
parse: function parse(input) {
var self = this, stack = [0], vstack = [null], lstack = [], table = this.table, yytext = '', yylineno = 0, yyleng = 0, recovering = 0, TERROR = 2, EOF = 1;
var args = lstack.slice.call(arguments, 1);
this.lexer.setInput(input);
this.lexer.yy = this.yy;
this.yy.lexer = this.lexer;
this.yy.parser = this;
if (typeof this.lexer.yylloc == 'undefined') {
this.lexer.yylloc = {};
}
var yyloc = this.lexer.yylloc;
lstack.push(yyloc);
var ranges = this.lexer.options && this.lexer.options.ranges;
if (typeof this.yy.parseError === 'function') {
this.parseError = this.yy.parseError;
} else {
this.parseError = Object.getPrototypeOf(this).parseError;
}
function popStack(n) {
stack.length = stack.length - 2 * n;
vstack.length = vstack.length - n;
lstack.length = lstack.length - n;
}
function lex() {
var token;
token = self.lexer.lex() || EOF;
if (typeof token !== 'number') {
token = self.symbols_[token] || token;
}
return token;
}
var symbol, preErrorSymbol, state, action, a, r, yyval = {}, p, len, newState, expected;
while (true) {
state = stack[stack.length - 1];
if (this.defaultActions[state]) {
action = this.defaultActions[state];
} else {
if (symbol === null || typeof symbol == 'undefined') {
symbol = lex();
}
action = table[state] && table[state][symbol];
}
if (typeof action === 'undefined' || !action.length || !action[0]) {
var errStr = '';
expected = [];
for (p in table[state]) {
if (this.terminals_[p] && p > TERROR) {
expected.push('\'' + this.terminals_[p] + '\'');
}
}
if (this.lexer.showPosition) {
errStr = 'Parse error on line ' + (yylineno + 1) + ':\n' + this.lexer.showPosition() + '\nExpecting ' + expected.join(', ') + ', got \'' + (this.terminals_[symbol] || symbol) + '\'';
} else {
errStr = 'Parse error on line ' + (yylineno + 1) + ': Unexpected ' + (symbol == EOF ? 'end of input' : '\'' + (this.terminals_[symbol] || symbol) + '\'');
}
this.parseError(errStr, {
text: this.lexer.match,
token: this.terminals_[symbol] || symbol,
line: this.lexer.yylineno,
loc: yyloc,
expected: expected
});
}
if (action[0] instanceof Array && action.length > 1) {
throw new Error('Parse Error: multiple actions possible at state: ' + state + ', token: ' + symbol);
}
switch (action[0]) {
case 1:
stack.push(symbol);
vstack.push(this.lexer.yytext);
lstack.push(this.lexer.yylloc);
stack.push(action[1]);
symbol = null;
if (!preErrorSymbol) {
yyleng = this.lexer.yyleng;
yytext = this.lexer.yytext;
yylineno = this.lexer.yylineno;
yyloc = this.lexer.yylloc;
if (recovering > 0) {
recovering--;
}
} else {
symbol = preErrorSymbol;
preErrorSymbol = null;
}
break;
case 2:
len = this.productions_[action[1]][1];
yyval.$ = vstack[vstack.length - len];
yyval._$ = {
first_line: lstack[lstack.length - (len || 1)].first_line,
last_line: lstack[lstack.length - 1].last_line,
first_column: lstack[lstack.length - (len || 1)].first_column,
last_column: lstack[lstack.length - 1].last_column
};
if (ranges) {
yyval._$.range = [
lstack[lstack.length - (len || 1)].range[0],
lstack[lstack.length - 1].range[1]
];
}
r = this.performAction.apply(yyval, [
yytext,
yyleng,
yylineno,
this.yy,
action[1],
vstack,
lstack
].concat(args));
if (typeof r !== 'undefined') {
return r;
}
if (len) {
stack = stack.slice(0, -1 * len * 2);
vstack = vstack.slice(0, -1 * len);
lstack = lstack.slice(0, -1 * len);
}
stack.push(this.productions_[action[1]][0]);
vstack.push(yyval.$);
lstack.push(yyval._$);
newState = table[stack[stack.length - 2]][stack[stack.length - 1]];
stack.push(newState);
break;
case 3:
return true;
}
}
return true;
}};
/* generated by jison-lex 0.2.1 */
var lexer = (function(){
var lexer = {
EOF:1,
parseError:function parseError(str,hash){
"use strict";
if(this.yy.parser){this.yy.parser.parseError(str,hash)}else{throw new Error(str)}},
// resets the lexer, sets new input
setInput:function (input){
"use strict";
this._input=input;this._more=this._backtrack=this.done=false;this.yylineno=this.yyleng=0;this.yytext=this.matched=this.match="";this.conditionStack=["INITIAL"];this.yylloc={first_line:1,first_column:0,last_line:1,last_column:0};if(this.options.ranges){this.yylloc.range=[0,0]}this.offset=0;return this},
// consumes and returns one char from the input
input:function (){
"use strict";
var ch=this._input[0];this.yytext+=ch;this.yyleng++;this.offset++;this.match+=ch;this.matched+=ch;var lines=ch.match(/(?:\r\n?|\n).*/g);if(lines){this.yylineno++;this.yylloc.last_line++}else{this.yylloc.last_column++}if(this.options.ranges){this.yylloc.range[1]++}this._input=this._input.slice(1);return ch},
// unshifts one char (or a string) into the input
unput:function (ch){
"use strict";
var len=ch.length;var lines=ch.split(/(?:\r\n?|\n)/g);this._input=ch+this._input;this.yytext=this.yytext.substr(0,this.yytext.length-len-1);this.offset-=len;var oldLines=this.match.split(/(?:\r\n?|\n)/g);this.match=this.match.substr(0,this.match.length-1);this.matched=this.matched.substr(0,this.matched.length-1);if(lines.length-1){this.yylineno-=lines.length-1}var r=this.yylloc.range;this.yylloc={first_line:this.yylloc.first_line,last_line:this.yylineno+1,first_column:this.yylloc.first_column,last_column:lines?(lines.length===oldLines.length?this.yylloc.first_column:0)+oldLines[oldLines.length-lines.length].length-lines[0].length:this.yylloc.first_column-len};if(this.options.ranges){this.yylloc.range=[r[0],r[0]+this.yyleng-len]}this.yyleng=this.yytext.length;return this},
// When called from action, caches matched text and appends it on next action
more:function (){
"use strict";
this._more=true;return this},
// When called from action, signals the lexer that this rule fails to match the input, so the next matching rule (regex) should be tested instead.
reject:function (){
"use strict";
if(this.options.backtrack_lexer){this._backtrack=true}else{return this.parseError("Lexical error on line "+(this.yylineno+1)+". You can only invoke reject() in the lexer when the lexer is of the backtracking persuasion (options.backtrack_lexer = true).\n"+this.showPosition(),{text:"",token:null,line:this.yylineno})}return this},
// retain first n characters of the match
less:function (n){
"use strict";
this.unput(this.match.slice(n))},
// displays already matched input, i.e. for error messages
pastInput:function (){
"use strict";
var past=this.matched.substr(0,this.matched.length-this.match.length);return(past.length>20?"...":"")+past.substr(-20).replace(/\n/g,"")},
// displays upcoming input, i.e. for error messages
upcomingInput:function (){
"use strict";
var next=this.match;if(next.length<20){next+=this._input.substr(0,20-next.length)}return(next.substr(0,20)+(next.length>20?"...":"")).replace(/\n/g,"")},
// displays the character position where the lexing error occurred, i.e. for error messages
showPosition:function (){
"use strict";
var pre=this.pastInput();var c=new Array(pre.length+1).join("-");return pre+this.upcomingInput()+"\n"+c+"^"},
// test the lexed token: return FALSE when not a match, otherwise return token
test_match:function (match,indexed_rule){
"use strict";
var token,lines,backup;if(this.options.backtrack_lexer){backup={yylineno:this.yylineno,yylloc:{first_line:this.yylloc.first_line,last_line:this.last_line,first_column:this.yylloc.first_column,last_column:this.yylloc.last_column},yytext:this.yytext,match:this.match,matches:this.matches,matched:this.matched,yyleng:this.yyleng,offset:this.offset,_more:this._more,_input:this._input,yy:this.yy,conditionStack:this.conditionStack.slice(0),done:this.done};if(this.options.ranges){backup.yylloc.range=this.yylloc.range.slice(0)}}lines=match[0].match(/(?:\r\n?|\n).*/g);if(lines){this.yylineno+=lines.length}this.yylloc={first_line:this.yylloc.last_line,last_line:this.yylineno+1,first_column:this.yylloc.last_column,last_column:lines?lines[lines.length-1].length-lines[lines.length-1].match(/\r?\n?/)[0].length:this.yylloc.last_column+match[0].length};this.yytext+=match[0];this.match+=match[0];this.matches=match;this.yyleng=this.yytext.length;if(this.options.ranges){this.yylloc.range=[this.offset,this.offset+=this.yyleng]}this._more=false;this._backtrack=false;this._input=this._input.slice(match[0].length);this.matched+=match[0];token=this.performAction.call(this,this.yy,this,indexed_rule,this.conditionStack[this.conditionStack.length-1]);if(this.done&&this._input){this.done=false}if(token){return token}else if(this._backtrack){for(var k in backup){this[k]=backup[k]}return false}return false},
// return next match in input
next:function (){
"use strict";
if(this.done){return this.EOF}if(!this._input){this.done=true}var token,match,tempMatch,index;if(!this._more){this.yytext="";this.match=""}var rules=this._currentRules();for(var i=0;i<rules.length;i++){tempMatch=this._input.match(this.rules[rules[i]]);if(tempMatch&&(!match||tempMatch[0].length>match[0].length)){match=tempMatch;index=i;if(this.options.backtrack_lexer){token=this.test_match(tempMatch,rules[i]);if(token!==false){return token}else if(this._backtrack){match=false;continue}else{return false}}else if(!this.options.flex){break}}}if(match){token=this.test_match(match,rules[index]);if(token!==false){return token}return false}if(this._input===""){return this.EOF}else{return this.parseError("Lexical error on line "+(this.yylineno+1)+". Unrecognized text.\n"+this.showPosition(),{text:"",token:null,line:this.yylineno})}},
// return next match that has a token
lex:function lex(){
"use strict";
var r=this.next();if(r){return r}else{return this.lex()}},
// activates a new lexer condition state (pushes the new lexer condition state onto the condition stack)
begin:function begin(condition){
"use strict";
this.conditionStack.push(condition)},
// pop the previously active lexer condition state off the condition stack
popState:function popState(){
"use strict";
var n=this.conditionStack.length-1;if(n>0){return this.conditionStack.pop()}else{return this.conditionStack[0]}},
// produce the lexer rule set which is active for the currently active lexer condition state
_currentRules:function _currentRules(){
"use strict";
if(this.conditionStack.length&&this.conditionStack[this.conditionStack.length-1]){return this.conditions[this.conditionStack[this.conditionStack.length-1]].rules}else{return this.conditions["INITIAL"].rules}},
// return the currently active lexer condition state; when an index argument is provided it produces the N-th previous condition state, if available
topState:function topState(n){
"use strict";
n=this.conditionStack.length-1-Math.abs(n||0);if(n>=0){return this.conditionStack[n]}else{return"INITIAL"}},
// alias for begin(condition)
pushState:function pushState(condition){
"use strict";
this.begin(condition)},
// return the number of states currently on the stack
stateStackSize:function stateStackSize(){
"use strict";
return this.conditionStack.length},
options: {},
performAction: function anonymous(yy, yy_, $avoiding_name_collisions, YY_START) {
var YYSTATE=YY_START;
switch($avoiding_name_collisions) {
case 0:/* skip whitespace */
break;
case 1:return 18
break;
case 2:return 15;
break;
case 3:return 7;
break;
case 4:return 11;
break;
case 5:return 9;
break;
case 6:return 13;
break;
case 7: return 17;
break;
case 8:return 8
break;
case 9:return 'INVALID'
break;
}
},
rules: [/^(?:\s+)/,/^(?:[0-9]+(\.[0-9]+)?\b)/,/^(?::)/,/^(?:;)/,/^(?:\))/,/^(?:\()/,/^(?:,)/,/^(?:[a-zA-Z_][a-zA-Z_0-9]*)/,/^(?:$)/,/^(?:.)/],
conditions: {"INITIAL":{"rules":[0,1,2,3,4,5,6,7,8,9],"inclusive":true}}
};
return lexer;
})();
parser.lexer = lexer;
function Parser () {
this.yy = {};
}
Parser.prototype = parser;parser.Parser = Parser;
return new Parser;
})();
if (typeof require !== 'undefined' && typeof exports !== 'undefined') {
exports.parser = parser;
exports.Parser = parser.Parser;
exports.parse = function () { return parser.parse.apply(parser, arguments); };
exports.main = function commonjsMain(args){if(!args[1]){console.log("Usage: "+args[0]+" FILE");process.exit(1)}var source=require("fs").readFileSync(require("path").normalize(args[1]),"utf8");return exports.parser.parse(source)};
if (typeof module !== 'undefined' && require.main === module) {
exports.main(process.argv.slice(1));
}
}
function dump( t)
{
var s="";
if(t.label!=null) {s+=t.label;}
if(t.child!=null)
{
s+="(";
var c=t.child;
var i=0;
while(c!=null)
{
if(i!=0) s+=",";
s+=dump(c);
c=c.next;
++i;
}
s+=")";
}
if(t.length!=null) {s+=":"+t.length;}
return s;
}
function simplify2(c)
{
var newlabel="";
var w=0.0;
var c=(t.child);
while(c!=null)
{
var c2=simplify(c);
c=c.next;
}
print("W:"+w);
if(w<1.0)
{
return {label:newlabel,child:null,length:w,next:null};
}
else
{
return c;
}
}
function simplify(t)
{
var w=0.0;
var newlabel="";
if(t.child!=null)
{
var array=[];
var n=0;
var c=(t.child);
while(c!=null)
{
var c2=simplify(c);
w+=c2.length;
newlabel+=c2.label;
c=c.next;
++n;
}
if(w/n< Number(document.getElementById("id2").value))
{
t.child=null;
t.label=newlabel;
//t.length+=(w/n);
}
}
return t;
}
function convertNewick()
{
try
{
var tree=document.getElementById("id1").value;
parser.parse(tree);
document.getElementById("id3").value=(dump(simplify(newick)));
}
catch(err)
{
alert(err);
}
}
</script>
</head>
<body>
Answer to <a href="http://www.biostars.org/p/97409/">http://www.biostars.org/p/97409/</a>. See also: <a href="http://plindenbaum.blogspot.fr/2012/07/parsing-newick-format-in-c-using-flex.html">http://plindenbaum.blogspot.fr/2012/07/parsing-newick-format-in-c-using-flex.html</a>.<br/>
<label for="id1">Newick:</label><input id="id1" value="(A:0.556705,(B:0.251059,C:0.251059):0.305646):0.556705;"/><br/>
<label for="id2">Treshold:</label><input id="id2" type="number" value="0.26"/><br/>
<label for="id3">Convert:</label><input id="id3" value=""/><br/>
<button onclick="convertNewick();">Convert</button><br/>
<h2>Jison syntax <a href="http://zaach.github.io/jison/try/">http://zaach.github.io/jison/try/</a></h2>
<pre style="background-color:lightgray;">
/* lexical grammar */
%lex
%%
\s+ /* skip whitespace */
[0-9]+("."[0-9]+)?\b return 'NUMBER'
":" return 'COLON';
";" return 'SEMICOLON';
")" return 'CPAR';
"(" return 'OPAR';
"," return 'COMMA';
[a-zA-Z_][a-zA-Z_0-9]* { return 'STRING';}
<<EOF>> return 'EOF'
. return 'INVALID'
/lex
%start input
%% /* language grammar */
input: descendant_list optional_label optional_length SEMICOLON EOF
{
var tree={next:null,child:null};
tree.child=$1;
tree.label=$2;
tree.length=$3;
this.tree=tree;
};
descendant_list: OPAR descendant_list_item CPAR
{
$$=$2;
};
descendant_list_item: subtree
{
$$=$1;
}
|descendant_list_item COMMA subtree
{
var last=$1;
$$=$1;
while(last.next!=null)
{
last=last.next;
}
last.next=$3;
}
;
subtree : descendant_list optional_label optional_length
{
$$={next:null};
$$.child=$1;
$$.label=$2;
$$.length=$3;
}
| label optional_length
{
$$={next:null,child:null};
$$.label=$1;
$$.length=$2;
}
;
optional_label: { $$=null;} | label { $$=$1;};
optional_length: { $$=null;} | COLON number { $$=$2;};
label: STRING { $$=$1;};
number: NUMBER { $$=Number($1);};
</pre>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment