Skip to content

Instantly share code, notes, and snippets.

@darkyen
Created November 9, 2012 21:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save darkyen/4048208 to your computer and use it in GitHub Desktop.
Save darkyen/4048208 to your computer and use it in GitHub Desktop.
Tokenizer for creating JavaScript objects from NLP Tree Parse from opennlp or stanford parser
function Tokenize(str){
// something to keep track of parens nesting
var current = {};
if(str){
if( str[0] == '(')
str = str.substr(1,str.length-2);
var raw = str.split(' ');
current.name = raw.shift();
current.body = [];
str = raw.join(" ");
var fetched = false;
var level = {
open:0,
close:0
};
var curr_beg = 0;
for( var i =0 ; i < str.length ; i++){
switch(str[i]){
case '(':
if(level.open < 1){
curr_beg = i;
}
level.open++;
break;
case ')':
level.close++;
if(level.open === level.close){
var _temp = Tokenize(str.substr(curr_beg,i-curr_beg+1));
current.body.push(_temp);
fetched = true;
}
level.close--;
level.open--;
break;
}
}
// Check Sum
if( !fetched || !current.body.length ){
current.body = str;
}
}
return current;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment