For DocumentJS, I'm trying to quickly get line numbers from a file given a character position. For a given source, my character position would always increase. So I wanted to call it like:
var getLine = lineNumber("BIG SOURCE\n...");
getLine(54) //->2
getLine(3453) //->55
I created 2 different ways of doing it.
This method essentially checks if the regexp's lastIndex is less than index, if it does, it keeps calling exec and incrementing curLine until lastIndex is past index.
newLine = /\n/g,
lineNumber = function( source ) {
// reset lastIndex
newLine.lastIndex = 0;
var curLine = 0,
curIndex = newLine.lastIndex;
return function( index ) {
// if we haven't already, split the
if ( index <= curIndex ) {
return curLine;
}
curLine++;
while ( newLine.exec(source) && newLine.lastIndex <= index ) {
curLine++;
}
return curLine;
}
},
Way 2 splits the source into an array of lines and uses those to increment curIndex.
lineNumber = function( source ) {
// reset lastIndex
newLine.lastIndex = 0;
var curLine = 0,
curIndex, lines;
return function( index ) {
if (!lines ) {
lines = source.split('\n');
curIndex = lines[0].length + 1
}
// if we haven't already, split the
if ( index <= curIndex ) {
return curLine;
}
curLine++;
while ( lines[curLine] && (curIndex += lines[curLine].length + 1) <= index ) {
curLine++;
}
return curLine;
}
};
Way 2 is about 50x faster!
I am very curious about the perf diff on other environments also. Maybe the RegExp implementation in rhino is extra slow for some reason. Maybe all the extra garbage collection of cleaning up temporary arrays and strings is cheaper for some reason. Inquiring minds want to know ;)