Skip to content

Instantly share code, notes, and snippets.

@vilicvane
Last active February 19, 2020 09:58
Show Gist options
  • Save vilicvane/1353586a3f6f46102ac3 to your computer and use it in GitHub Desktop.
Save vilicvane/1353586a3f6f46102ac3 to your computer and use it in GitHub Desktop.
A lexer (behave like Angular expression lexer) written with the help of regular expression.
/**
* A test using regular expression for lexing
* by vilicvane<https://vilic.github.io/>
*
* It turns out to be 50% faster comparing to original Angular lexer before JIT Compilation, but 50% slower then (in Chrome).
* And only 1/3 of the original lexer counting the core code lines.
* Though it's because the regular expressions are doing most of the work.
*
* Note: The regular expression and related enum declaration are managed by https://github.com/vilic/regex-tools/.
*/
interface Token {
index: number;
text: string;
constant?: boolean;
identifier?: boolean;
operator?: boolean;
value?: any;
}
/* /$expressionLex/ */
const enum ExpressionLexGroup {
string = 1,
stringUnexpectedToken = 3,
number,
numberInvalidExponent,
identifier,
punctuation,
whitespace,
operator
}
var expressionLexRegex = /* /$expressionLex/ */ /((["'])(?:(?!\2|[\\\r\n\u2028\u2029])[\s\S]|\\(?:['"\\bfnrtv]|[^'"\\bfnrtv\dxu\r\n\u2028\u2029]|0(?!\d)|x[\da-fA-F]{2}|u[\da-fA-F]{4})|\\(?:\r?\n|\r(?!\n)|[\u2028\u2029]))*(?:\2|(\\?)))|((?:(?:(?:0|[1-9]\d*)(?:\.\d*)?|\.\d+)(?:[eE](?:[+-]?\d+|()))?|0[xX][\da-fA-F]+))|([a-zA-Z$_][\w$]*)|([(){}[\].,;:?])|(\s+)|([-+*\/%<>]|[!=]={0,2}|[<>]=|&&|\|\|)|(?:)/g; // for poor github syntax highlight: '
function parseExpressionLex(source: string): Token[] {
var index = 0;
var tokens: Token[] = [];
while (index < source.length) {
let start = expressionLexRegex.lastIndex = index;
let groups = expressionLexRegex.exec(source);
index = expressionLexRegex.lastIndex;
if (groups[ExpressionLexGroup.string]) {
let unexpectedToken = groups[ExpressionLexGroup.stringUnexpectedToken];
if (typeof unexpectedToken == 'string') {
if (unexpectedToken == '\\') {
throwSyntaxError('Invalid escape');
} else {
throwSyntaxError('Unterminated quote');
}
} else {
let text = groups[0];
tokens.push({
index: start,
text,
constant: true,
value: eval(text)
});
}
} else if (groups[ExpressionLexGroup.number]) {
let invalidExponent = groups[ExpressionLexGroup.numberInvalidExponent];
if (invalidExponent == '') {
throwSyntaxError('Invalid exponent');
} else {
let text = groups[0];
tokens.push({
index: start,
text,
constant: true,
value: eval(text)
});
}
} else if (groups[ExpressionLexGroup.identifier]) {
tokens.push({
index: start,
text: groups[0],
identifier: true
});
} else if (groups[ExpressionLexGroup.punctuation]) {
tokens.push({
index: start,
text: groups[0]
});
} else if (groups[ExpressionLexGroup.whitespace]) {
// do nothing...
} else if (groups[ExpressionLexGroup.operator]) {
tokens.push({
index: start,
text: groups[0],
operator: true
});
} else {
throwSyntaxError('Unexpected token');
}
}
return tokens;
function throwSyntaxError(message: string, errorIndex = index, ignoreSpaces = true) {
if (ignoreSpaces) {
let originalIndex = errorIndex;
for (; errorIndex < source.length; errorIndex++) {
if (/\S/.test(source[errorIndex])) {
break;
}
}
if (errorIndex == source.length) {
errorIndex = originalIndex;
}
}
var i = errorIndex;
for (; i >= 0; i--) {
if (/[\r\n]/.test(source.charAt(i))) {
i++;
break;
}
}
var lineNumber = (source.substr(0, i + 1).match(/\r?\n/g) || []).length;
var columnNumber = errorIndex - i;
var line = (source.substr(i).match(/.*/) || [''])[0];
var arrowLine = Array(columnNumber + 1).join(' ') + '^';
message = `${message} at (${lineNumber},${columnNumber}):
${line}
${arrowLine}`;
throw new SyntaxError(message);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment