For DocumentJS, I'm trying to quickly get line numbers from a file given a character position. For a given source, my character position would always increase. So I wanted to call it like:
var getLine = lineNumber("BIG SOURCE\n...");
getLine(54) //->2
getLine(3453) //->55
I created 2 different ways of doing it.
This method essentially checks if the regexp's lastIndex is less than index, if it does, it keeps calling exec and incrementing curLine until lastIndex is past index.
newLine = /\n/g,
lineNumber = function( source ) {
// reset lastIndex
newLine.lastIndex = 0;
var curLine = 0,
curIndex = newLine.lastIndex;
return function( index ) {
// if we haven't already, split the
if ( index <= curIndex ) {
return curLine;
}
curLine++;
while ( newLine.exec(source) && newLine.lastIndex <= index ) {
curLine++;
}
return curLine;
}
},
Way 2 splits the source into an array of lines and uses those to increment curIndex.
lineNumber = function( source ) {
// reset lastIndex
newLine.lastIndex = 0;
var curLine = 0,
curIndex, lines;
return function( index ) {
if (!lines ) {
lines = source.split('\n');
curIndex = lines[0].length + 1
}
// if we haven't already, split the
if ( index <= curIndex ) {
return curLine;
}
curLine++;
while ( lines[curLine] && (curIndex += lines[curLine].length + 1) <= index ) {
curLine++;
}
return curLine;
}
};
Way 2 is about 50x faster!
Alternatively you could maybe do a single pass to get the positions of all the newline characters and then lookup the closest newline position any time you needed it instead of doing the parsing on demand. I wonder if that would be any faster.