Skip to content

Instantly share code, notes, and snippets.

@YairRand
Last active June 2, 2021 12:55
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save YairRand/e22aded969e6de8cbb283e62868153a1 to your computer and use it in GitHub Desktop.
Save YairRand/e22aded969e6de8cbb283e62868153a1 to your computer and use it in GitHub Desktop.
Script to translate Lua code between "English" and using keywords in other languages
var KW = {
en: {
'true': 'true',
'false': 'false',
'nil': 'nil',
'do': 'do',
'if': 'if',
'in': 'in',
'or': 'or',
'and': 'and',
'end': 'end',
'for': 'for',
'not': 'not',
'else': 'else',
'then': 'then',
'break': 'break',
'local': 'local',
'until': 'until',
'while': 'while',
'elseif': 'elseif',
'repeat': 'repeat',
'return': 'return',
'function': 'function'
},
// For testing: A "test language" where each keyword is reversed from English.
test: {
'true': 'eurt',
'false': 'eslaf',
'nil': 'lin',
'do': 'od',
'if': 'fi',
'in': 'ni',
'or': 'ro',
'and': 'dna',
'end': 'dne',
'for': 'rof',
'not': 'ton',
'else': 'esle',
'then': 'neht',
'break': 'kaerb',
'local': 'lacol',
'until': 'litnu',
'while': 'elihw',
'elseif': 'fiesle',
'repeat': 'taeper',
'return': 'nruter',
'function': 'noitcnuf'
}
};
/**
* @param {String} code The Lua code to translate from one language to another.
* @param {String} lang The language code of the language to translate to or from.
* @param {Boolean} toEnglish Whether to translate from the selected language to
* English or from English to the selected language.
* @return {String} The translated code.
*
* @example
* translateLua( 'local foo = true', 'test', false );
* // returns 'lacol foo = eurt', in the "test language" where each keyword is reversed.
*/
function translateLua( code, lang, toEnglish ) {
var parseLang = toEnglish ? lang : 'en',
luaparse = setupLuaparse( parseLang ),
parser = luaparse.parse( code, { wait: true } ),
tokensToReplace = [],
tokenTypes = luaparse.tokenTypes,
kwTypes = [ 'BooleanLiteral', 'Keyword', 'NilLiteral' ].map( key => tokenTypes[ key ] ),
kwMap = toEnglish ? Object.fromEntries( Object.entries( KW[ lang ] ).map( x => x.reverse() ) ) : KW[ lang ],
sourceLangKWs = Object.keys( kwMap ),
targetLangKWs = Object.values( kwMap );
// Avoid naming conflicts with keywords.
function processVar( name ) {
var p = name.match( /^_*/ ),
underscores = p[ 0 ],
rName = underscores ? name.substr( underscores.length ) : name;
if ( targetLangKWs.includes( rName ) ) {
if ( !sourceLangKWs.includes( rName ) ) {
return '_' + name;
} else {
return;
}
}
if ( sourceLangKWs.includes( rName ) ) {
// Remove initial underscore.
return name.substr( 1 );
}
return;
}
for ( var token, replaceVar; ( token = parser.lex() ).type !== tokenTypes.EOF; ) {
if ( kwTypes.includes( token.type ) ) {
tokensToReplace.push( { range: token.range, newValue: kwMap[ token.value ] } );
} else if ( token.type === tokenTypes.Identifier ) {
replaceVar = processVar( token.value );
if ( replaceVar ) {
tokensToReplace.push( { range: token.range, newValue: replaceVar } );
}
}
}
return tokensToReplace.length ?
tokensToReplace.map( ( tokenToReplace, i, all ) => {
var range = tokenToReplace.range;
return (
code.substring( i === 0 ? 0 : all[ i - 1 ].range[ 1 ], range[ 0 ] ) +
tokenToReplace.newValue
);
} ).join( '' ) + code.substr( tokensToReplace.slice( -1 )[ 0 ].range[ 1 ] ) :
code; // No keywords or variables, no changes.
}
// ---------------------
// The following code is from https://github.com/oxyc/luaparse/blob/master/luaparse.js ,
// modified to use KW instead of hardcoded strings as tokens.
// Licensed under the MIT license:
// Copyright (c) Oskar Schöldström 2012-2014
//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to
// permit persons to whom the Software is furnished to do so, subject to
// the following conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
var setupLuaparse = (function (factory) {
'use strict';
return function ( lang ) {
var luaparse = {};
factory( luaparse, KW[ lang ] );
return luaparse;
};
}(function (exports, LKW) {
'use strict';
exports.version = '0.2.1';
var input, options, length, features;
// Options can be set either globally on the parser object through
// defaultOptions, or during the parse call.
var defaultOptions = exports.defaultOptions = {
// Explicitly tell the parser when the input ends.
wait: false
// Store comments as an array in the chunk object.
, comments: true
// Track identifier scopes by adding an isLocal attribute to each
// identifier-node.
, scope: false
// Store location information on each syntax node as
// `loc: { start: { line, column }, end: { line, column } }`.
, locations: false
// Store the start and end character locations on each syntax node as
// `range: [start, end]`.
, ranges: false
// A callback which will be invoked when a syntax node has been completed.
// The node which has been created will be passed as the only parameter.
, onCreateNode: null
// A callback which will be invoked when a new scope is created.
, onCreateScope: null
// A callback which will be invoked when the current scope is destroyed.
, onDestroyScope: null
// A callback which will be invoked when a local variable is declared in the current scope.
// The variable's name will be passed as the only parameter
, onLocalDeclaration: null
// The version of Lua targeted by the parser (string; allowed values are
// '5.1', '5.2', '5.3').
, luaVersion: '5.1'
// Whether to allow code points outside the Basic Latin block in identifiers
, extendedIdentifiers: false
};
// The available tokens expressed as enum flags so they can be checked with
// bitwise operations.
var EOF = 1, StringLiteral = 2, Keyword = 4, Identifier = 8
, NumericLiteral = 16, Punctuator = 32, BooleanLiteral = 64
, NilLiteral = 128, VarargLiteral = 256;
exports.tokenTypes = { EOF: EOF, StringLiteral: StringLiteral
, Keyword: Keyword, Identifier: Identifier, NumericLiteral: NumericLiteral
, Punctuator: Punctuator, BooleanLiteral: BooleanLiteral
, NilLiteral: NilLiteral, VarargLiteral: VarargLiteral
};
// As this parser is a bit different from luas own, the error messages
// will be different in some situations.
var errors = exports.errors = {
unexpected: 'unexpected %1 \'%2\' near \'%3\''
, expected: '\'%1\' expected near \'%2\''
, expectedToken: '%1 expected near \'%2\''
, unfinishedString: 'unfinished string near \'%1\''
, malformedNumber: 'malformed number near \'%1\''
, invalidVar: 'invalid left-hand side of assignment near \'%1\''
, decimalEscapeTooLarge: 'decimal escape too large near \'%1\''
, invalidEscape: 'invalid escape sequence near \'%1\''
, hexadecimalDigitExpected: 'hexadecimal digit expected near \'%1\''
, braceExpected: 'missing \'%1\' near \'%2\''
, tooLargeCodepoint: 'UTF-8 value too large near \'%1\''
, unfinishedLongString: 'unfinished long string (starting at line %1) near \'%2\''
, unfinishedLongComment: 'unfinished long comment (starting at line %1) near \'%2\''
, ambiguousSyntax: 'ambiguous syntax (function call x new statement) near \'%1\''
};
// ### Abstract Syntax Tree
//
// The default AST structure is inspired by the Mozilla Parser API but can
// easily be customized by overriding these functions.
var ast = exports.ast = {
labelStatement: function(label) {
return {
type: 'LabelStatement'
, label: label
};
}
, breakStatement: function() {
return {
type: 'BreakStatement'
};
}
, gotoStatement: function(label) {
return {
type: 'GotoStatement'
, label: label
};
}
, returnStatement: function(args) {
return {
type: 'ReturnStatement'
, 'arguments': args
};
}
, ifStatement: function(clauses) {
return {
type: 'IfStatement'
, clauses: clauses
};
}
, ifClause: function(condition, body) {
return {
type: 'IfClause'
, condition: condition
, body: body
};
}
, elseifClause: function(condition, body) {
return {
type: 'ElseifClause'
, condition: condition
, body: body
};
}
, elseClause: function(body) {
return {
type: 'ElseClause'
, body: body
};
}
, whileStatement: function(condition, body) {
return {
type: 'WhileStatement'
, condition: condition
, body: body
};
}
, doStatement: function(body) {
return {
type: 'DoStatement'
, body: body
};
}
, repeatStatement: function(condition, body) {
return {
type: 'RepeatStatement'
, condition: condition
, body: body
};
}
, localStatement: function(variables, init) {
return {
type: 'LocalStatement'
, variables: variables
, init: init
};
}
, assignmentStatement: function(variables, init) {
return {
type: 'AssignmentStatement'
, variables: variables
, init: init
};
}
, callStatement: function(expression) {
return {
type: 'CallStatement'
, expression: expression
};
}
, functionStatement: function(identifier, parameters, isLocal, body) {
return {
type: 'FunctionDeclaration'
, identifier: identifier
, isLocal: isLocal
, parameters: parameters
, body: body
};
}
, forNumericStatement: function(variable, start, end, step, body) {
return {
type: 'ForNumericStatement'
, variable: variable
, start: start
, end: end
, step: step
, body: body
};
}
, forGenericStatement: function(variables, iterators, body) {
return {
type: 'ForGenericStatement'
, variables: variables
, iterators: iterators
, body: body
};
}
, chunk: function(body) {
return {
type: 'Chunk'
, body: body
};
}
, identifier: function(name) {
return {
type: 'Identifier'
, name: name
};
}
, literal: function(type, value, raw) {
type = (type === StringLiteral) ? 'StringLiteral'
: (type === NumericLiteral) ? 'NumericLiteral'
: (type === BooleanLiteral) ? 'BooleanLiteral'
: (type === NilLiteral) ? 'NilLiteral'
: 'VarargLiteral';
return {
type: type
, value: value
, raw: raw
};
}
, tableKey: function(key, value) {
return {
type: 'TableKey'
, key: key
, value: value
};
}
, tableKeyString: function(key, value) {
return {
type: 'TableKeyString'
, key: key
, value: value
};
}
, tableValue: function(value) {
return {
type: 'TableValue'
, value: value
};
}
, tableConstructorExpression: function(fields) {
return {
type: 'TableConstructorExpression'
, fields: fields
};
}
, binaryExpression: function(operator, left, right) {
var type = (LKW['and'] === operator || LKW['or'] === operator) ?
'LogicalExpression' :
'BinaryExpression';
return {
type: type
, operator: operator
, left: left
, right: right
};
}
, unaryExpression: function(operator, argument) {
return {
type: 'UnaryExpression'
, operator: operator
, argument: argument
};
}
, memberExpression: function(base, indexer, identifier) {
return {
type: 'MemberExpression'
, indexer: indexer
, identifier: identifier
, base: base
};
}
, indexExpression: function(base, index) {
return {
type: 'IndexExpression'
, base: base
, index: index
};
}
, callExpression: function(base, args) {
return {
type: 'CallExpression'
, base: base
, 'arguments': args
};
}
, tableCallExpression: function(base, args) {
return {
type: 'TableCallExpression'
, base: base
, 'arguments': args
};
}
, stringCallExpression: function(base, argument) {
return {
type: 'StringCallExpression'
, base: base
, argument: argument
};
}
, comment: function(value, raw) {
return {
type: 'Comment'
, value: value
, raw: raw
};
}
};
// Wrap up the node object.
function finishNode(node) {
// Pop a `Marker` off the location-array and attach its location data.
if (trackLocations) {
var location = locations.pop();
location.complete();
location.bless(node);
}
if (options.onCreateNode) options.onCreateNode(node);
return node;
}
// Helpers
// -------
var slice = Array.prototype.slice
, toString = Object.prototype.toString
, indexOf = function indexOf(array, element) {
for (var i = 0, length = array.length; i < length; ++i) {
if (array[i] === element) return i;
}
return -1;
};
// Iterate through an array of objects and return the index of an object
// with a matching property.
function indexOfObject(array, property, element) {
for (var i = 0, length = array.length; i < length; ++i) {
if (array[i][property] === element) return i;
}
return -1;
}
// A sprintf implementation using %index (beginning at 1) to input
// arguments in the format string.
//
// Example:
//
// // Unexpected function in token
// sprintf('Unexpected %2 in %1.', 'token', 'function');
function sprintf(format) {
var args = slice.call(arguments, 1);
format = format.replace(/%(\d)/g, function (match, index) {
return '' + args[index - 1] || /* istanbul ignore next */ '';
});
return format;
}
// Returns a new object with the properties from all objectes passed as
// arguments. Last argument takes precedence.
//
// Example:
//
// this.options = extend(options, { output: false });
function extend() {
var args = slice.call(arguments)
, dest = {}
, src, prop;
for (var i = 0, length = args.length; i < length; ++i) {
src = args[i];
for (prop in src)
/* istanbul ignore else */
if (src.hasOwnProperty(prop)) {
dest[prop] = src[prop];
}
}
return dest;
}
// ### Error functions
// XXX: Eliminate this function and change the error type to be different from SyntaxError.
// This will unfortunately be a breaking change, because some downstream users depend
// on the error thrown being an instance of SyntaxError. For example, the Ace editor:
// <https://github.com/ajaxorg/ace/blob/4c7e5eb3f5d5ca9434847be51834a4e41661b852/lib/ace/mode/lua_worker.js#L55>
function fixupError(e) {
/* istanbul ignore if */
if (!Object.create)
return e;
return Object.create(e, {
'line': { 'writable': true, value: e.line },
'index': { 'writable': true, value: e.index },
'column': { 'writable': true, value: e.column }
});
}
// #### Raise an exception.
//
// Raise an exception by passing a token, a string format and its paramters.
//
// The passed tokens location will automatically be added to the error
// message if it exists, if not it will default to the lexers current
// position.
//
// Example:
//
// // [1:0] expected [ near (
// raise(token, "expected %1 near %2", '[', token.value);
function raise(token) {
var message = sprintf.apply(null, slice.call(arguments, 1))
, error, col;
if ('undefined' !== typeof token.line) {
col = token.range[0] - token.lineStart;
error = fixupError(new SyntaxError(sprintf('[%1:%2] %3', token.line, col, message)));
error.line = token.line;
error.index = token.range[0];
error.column = col;
} else {
col = index - lineStart + 1;
error = fixupError(new SyntaxError(sprintf('[%1:%2] %3', line, col, message)));
error.index = index;
error.line = line;
error.column = col;
}
throw error;
}
// #### Raise an unexpected token error.
//
// Example:
//
// // expected <name> near '0'
// raiseUnexpectedToken('<name>', token);
function raiseUnexpectedToken(type, token) {
raise(token, errors.expectedToken, type, token.value);
}
// #### Raise a general unexpected error
//
// Usage should pass either a token object or a symbol string which was
// expected. We can also specify a nearby token such as <eof>, this will
// default to the currently active token.
//
// Example:
//
// // Unexpected symbol 'end' near '<eof>'
// unexpected(token);
//
// If there's no token in the buffer it means we have reached <eof>.
function unexpected(found) {
var near = lookahead.value;
if ('undefined' !== typeof found.type) {
var type;
switch (found.type) {
case StringLiteral: type = 'string'; break;
case Keyword: type = 'keyword'; break;
case Identifier: type = 'identifier'; break;
case NumericLiteral: type = 'number'; break;
case Punctuator: type = 'symbol'; break;
case BooleanLiteral: type = 'boolean'; break;
case NilLiteral:
return raise(found, errors.unexpected, 'symbol', 'nil', near);
}
return raise(found, errors.unexpected, type, found.value, near);
}
return raise(found, errors.unexpected, 'symbol', found, near);
}
// Lexer
// -----
//
// The lexer, or the tokenizer reads the input string character by character
// and derives a token left-right. To be as efficient as possible the lexer
// prioritizes the common cases such as identifiers. It also works with
// character codes instead of characters as string comparisons was the
// biggest bottleneck of the parser.
//
// If `options.comments` is enabled, all comments encountered will be stored
// in an array which later will be appended to the chunk object. If disabled,
// they will simply be disregarded.
//
// When the lexer has derived a valid token, it will be returned as an object
// containing its value and as well as its position in the input string (this
// is always enabled to provide proper debug messages).
//
// `lex()` starts lexing and returns the following token in the stream.
var index
, token
, previousToken
, lookahead
, comments
, tokenStart
, line
, lineStart;
exports.lex = lex;
function lex() {
skipWhiteSpace();
// Skip comments beginning with --
while (45 === input.charCodeAt(index) &&
45 === input.charCodeAt(index + 1)) {
scanComment();
skipWhiteSpace();
}
if (index >= length) return {
type : EOF
, value: '<eof>'
, line: line
, lineStart: lineStart
, range: [index, index]
};
var charCode = input.charCodeAt(index)
, next = input.charCodeAt(index + 1);
// Memorize the range index where the token begins.
tokenStart = index;
if (isIdentifierStart(charCode)) return scanIdentifierOrKeyword();
switch (charCode) {
case 39: case 34: // '"
return scanStringLiteral();
case 48: case 49: case 50: case 51: case 52: case 53:
case 54: case 55: case 56: case 57: // 0-9
return scanNumericLiteral();
case 46: // .
// If the dot is followed by a digit it's a float.
if (isDecDigit(next)) return scanNumericLiteral();
if (46 === next) {
if (46 === input.charCodeAt(index + 2)) return scanVarargLiteral();
return scanPunctuator('..');
}
return scanPunctuator('.');
case 61: // =
if (61 === next) return scanPunctuator('==');
return scanPunctuator('=');
case 62: // >
if (features.bitwiseOperators)
if (62 === next) return scanPunctuator('>>');
if (61 === next) return scanPunctuator('>=');
return scanPunctuator('>');
case 60: // <
if (features.bitwiseOperators)
if (60 === next) return scanPunctuator('<<');
if (61 === next) return scanPunctuator('<=');
return scanPunctuator('<');
case 126: // ~
if (61 === next) return scanPunctuator('~=');
if (!features.bitwiseOperators)
break;
return scanPunctuator('~');
case 58: // :
if (features.labels)
if (58 === next) return scanPunctuator('::');
return scanPunctuator(':');
case 91: // [
// Check for a multiline string, they begin with [= or [[
if (91 === next || 61 === next) return scanLongStringLiteral();
return scanPunctuator('[');
case 47: // /
// Check for integer division op (//)
if (features.integerDivision)
if (47 === next) return scanPunctuator('//');
return scanPunctuator('/');
case 38: case 124: // & |
if (!features.bitwiseOperators)
break;
/* fall through */
case 42: case 94: case 37: case 44: case 123: case 125:
case 93: case 40: case 41: case 59: case 35: case 45:
case 43: // * ^ % , { } ] ( ) ; # - +
return scanPunctuator(input.charAt(index));
}
return unexpected(input.charAt(index));
}
// Whitespace has no semantic meaning in lua so simply skip ahead while
// tracking the encounted newlines. Any kind of eol sequence is counted as a
// single line.
function consumeEOL() {
var charCode = input.charCodeAt(index)
, peekCharCode = input.charCodeAt(index + 1);
if (isLineTerminator(charCode)) {
// Count \n\r and \r\n as one newline.
if (10 === charCode && 13 === peekCharCode) ++index;
if (13 === charCode && 10 === peekCharCode) ++index;
++line;
lineStart = ++index;
return true;
}
return false;
}
function skipWhiteSpace() {
while (index < length) {
var charCode = input.charCodeAt(index);
if (isWhiteSpace(charCode)) {
++index;
} else if (!consumeEOL()) {
break;
}
}
}
function encodeUTF8(codepoint) {
if (codepoint < 0x80) {
return String.fromCharCode(codepoint);
} else if (codepoint < 0x800) {
return String.fromCharCode(
0xc0 | (codepoint >> 6) ,
0x80 | ( codepoint & 0x3f)
);
} else if (codepoint < 0x10000) {
return String.fromCharCode(
0xe0 | (codepoint >> 12) ,
0x80 | ((codepoint >> 6) & 0x3f),
0x80 | ( codepoint & 0x3f)
);
} else if (codepoint < 0x110000) {
return String.fromCharCode(
0xf0 | (codepoint >> 18) ,
0x80 | ((codepoint >> 12) & 0x3f),
0x80 | ((codepoint >> 6) & 0x3f),
0x80 | ( codepoint & 0x3f)
);
} else {
return null;
}
}
// This function takes a JavaScript string, encodes it in WTF-8 and
// reinterprets the resulting code units as code points; i.e. it encodes
// the string in what was the original meaning of WTF-8.
//
// For a detailed rationale, see the README.md file, section
// "Note on character encodings".
function fixupHighCharacters(s) {
return s.replace(/[\ud800-\udbff][\udc00-\udfff]|[^\x00-\x7f]/g, function (m) {
if (m.length === 1)
return encodeUTF8(m.charCodeAt(0));
return encodeUTF8(0x10000 + (((m.charCodeAt(0) & 0x3ff) << 10) | (m.charCodeAt(1) & 0x3ff)));
});
}
// Identifiers, keywords, booleans and nil all look the same syntax wise. We
// simply go through them one by one and defaulting to an identifier if no
// previous case matched.
function scanIdentifierOrKeyword() {
var value, type;
// Slicing the input string is prefered before string concatenation in a
// loop for performance reasons.
while (isIdentifierPart(input.charCodeAt(++index)));
value = fixupHighCharacters(input.slice(tokenStart, index));
// Decide on the token type and possibly cast the value.
if (isKeyword(value)) {
type = Keyword;
} else if (LKW['true'] === value || LKW['false'] === value) {
type = BooleanLiteral;
value = (LKW['true'] === value);
} else if (LKW['nil'] === value) {
type = NilLiteral;
value = null;
} else {
type = Identifier;
}
return {
type: type
, value: value
, line: line
, lineStart: lineStart
, range: [tokenStart, index]
};
}
// Once a punctuator reaches this function it should already have been
// validated so we simply return it as a token.
function scanPunctuator(value) {
index += value.length;
return {
type: Punctuator
, value: value
, line: line
, lineStart: lineStart
, range: [tokenStart, index]
};
}
// A vararg literal consists of three dots.
function scanVarargLiteral() {
index += 3;
return {
type: VarargLiteral
, value: '...'
, line: line
, lineStart: lineStart
, range: [tokenStart, index]
};
}
// Find the string literal by matching the delimiter marks used.
function scanStringLiteral() {
var delimiter = input.charCodeAt(index++)
, beginLine = line
, beginLineStart = lineStart
, stringStart = index
, string = ''
, charCode;
while (index < length) {
charCode = input.charCodeAt(index++);
if (delimiter === charCode) break;
if (92 === charCode) { // backslash
string += fixupHighCharacters(input.slice(stringStart, index - 1)) + readEscapeSequence();
stringStart = index;
}
// EOF or `\n` terminates a string literal. If we haven't found the
// ending delimiter by now, raise an exception.
else if (index >= length || isLineTerminator(charCode)) {
string += input.slice(stringStart, index - 1);
raise({}, errors.unfinishedString, string + String.fromCharCode(charCode));
}
}
string += fixupHighCharacters(input.slice(stringStart, index - 1));
return {
type: StringLiteral
, value: string
, line: beginLine
, lineStart: beginLineStart
, lastLine: line
, lastLineStart: lineStart
, range: [tokenStart, index]
};
}
// Expect a multiline string literal and return it as a regular string
// literal, if it doesn't validate into a valid multiline string, throw an
// exception.
function scanLongStringLiteral() {
var beginLine = line
, beginLineStart = lineStart
, string = readLongString(false);
// Fail if it's not a multiline literal.
if (false === string) raise(token, errors.expected, '[', token.value);
return {
type: StringLiteral
, value: fixupHighCharacters(string)
, line: beginLine
, lineStart: beginLineStart
, lastLine: line
, lastLineStart: lineStart
, range: [tokenStart, index]
};
}
// Numeric literals will be returned as floating-point numbers instead of
// strings. The raw value should be retrieved from slicing the input string
// later on in the process.
//
// If a hexadecimal number is encountered, it will be converted.
function scanNumericLiteral() {
var character = input.charAt(index)
, next = input.charAt(index + 1);
var value = ('0' === character && 'xX'.indexOf(next || null) >= 0) ?
readHexLiteral() : readDecLiteral();
return {
type: NumericLiteral
, value: value
, line: line
, lineStart: lineStart
, range: [tokenStart, index]
};
}
// Lua hexadecimals have an optional fraction part and an optional binary
// exoponent part. These are not included in JavaScript so we will compute
// all three parts separately and then sum them up at the end of the function
// with the following algorithm.
//
// Digit := toDec(digit)
// Fraction := toDec(fraction) / 16 ^ fractionCount
// BinaryExp := 2 ^ binaryExp
// Number := ( Digit + Fraction ) * BinaryExp
function readHexLiteral() {
var fraction = 0 // defaults to 0 as it gets summed
, binaryExponent = 1 // defaults to 1 as it gets multiplied
, binarySign = 1 // positive
, digit, fractionStart, exponentStart, digitStart;
digitStart = index += 2; // Skip 0x part
// A minimum of one hex digit is required.
if (!isHexDigit(input.charCodeAt(index)))
raise({}, errors.malformedNumber, input.slice(tokenStart, index));
while (isHexDigit(input.charCodeAt(index))) ++index;
// Convert the hexadecimal digit to base 10.
digit = parseInt(input.slice(digitStart, index), 16);
// Fraction part i optional.
if ('.' === input.charAt(index)) {
fractionStart = ++index;
while (isHexDigit(input.charCodeAt(index))) ++index;
fraction = input.slice(fractionStart, index);
// Empty fraction parts should default to 0, others should be converted
// 0.x form so we can use summation at the end.
fraction = (fractionStart === index) ? 0
: parseInt(fraction, 16) / Math.pow(16, index - fractionStart);
}
// Binary exponents are optional
if ('pP'.indexOf(input.charAt(index) || null) >= 0) {
++index;
// Sign part is optional and defaults to 1 (positive).
if ('+-'.indexOf(input.charAt(index) || null) >= 0)
binarySign = ('+' === input.charAt(index++)) ? 1 : -1;
exponentStart = index;
// The binary exponent sign requires a decimal digit.
if (!isDecDigit(input.charCodeAt(index)))
raise({}, errors.malformedNumber, input.slice(tokenStart, index));
while (isDecDigit(input.charCodeAt(index))) ++index;
binaryExponent = input.slice(exponentStart, index);
// Calculate the binary exponent of the number.
binaryExponent = Math.pow(2, binaryExponent * binarySign);
}
return (digit + fraction) * binaryExponent;
}
// Decimal numbers are exactly the same in Lua and in JavaScript, because of
// this we check where the token ends and then parse it with native
// functions.
function readDecLiteral() {
while (isDecDigit(input.charCodeAt(index))) ++index;
// Fraction part is optional
if ('.' === input.charAt(index)) {
++index;
// Fraction part defaults to 0
while (isDecDigit(input.charCodeAt(index))) ++index;
}
// Exponent part is optional.
if ('eE'.indexOf(input.charAt(index) || null) >= 0) {
++index;
// Sign part is optional.
if ('+-'.indexOf(input.charAt(index) || null) >= 0) ++index;
// An exponent is required to contain at least one decimal digit.
if (!isDecDigit(input.charCodeAt(index)))
raise({}, errors.malformedNumber, input.slice(tokenStart, index));
while (isDecDigit(input.charCodeAt(index))) ++index;
}
return parseFloat(input.slice(tokenStart, index));
}
function readUnicodeEscapeSequence() {
var sequenceStart = index++;
if (input.charAt(index++) !== '{')
raise({}, errors.braceExpected, '{', '\\' + input.slice(sequenceStart, index));
if (!isHexDigit(input.charCodeAt(index)))
raise({}, errors.hexadecimalDigitExpected, '\\' + input.slice(sequenceStart, index));
while (input.charCodeAt(index) === 0x30) ++index;
var escStart = index;
while (isHexDigit(input.charCodeAt(index))) {
++index;
if (index - escStart > 6)
raise({}, errors.tooLargeCodepoint, '\\' + input.slice(sequenceStart, index));
}
var b = input.charAt(index++);
if (b !== '}') {
if ((b === '"') || (b === "'"))
raise({}, errors.braceExpected, '}', '\\' + input.slice(sequenceStart, index--));
else
raise({}, errors.hexadecimalDigitExpected, '\\' + input.slice(sequenceStart, index));
}
var codepoint = parseInt(input.slice(escStart, index - 1), 16);
codepoint = encodeUTF8(codepoint);
if (codepoint === null) {
raise({}, errors.tooLargeCodepoint, '\\' + input.slice(sequenceStart, index));
}
return codepoint;
}
// Translate escape sequences to the actual characters.
function readEscapeSequence() {
var sequenceStart = index;
switch (input.charAt(index)) {
// Lua allow the following escape sequences.
case 'a': ++index; return '\x07';
case 'n': ++index; return '\n';
case 'r': ++index; return '\r';
case 't': ++index; return '\t';
case 'v': ++index; return '\x0b';
case 'b': ++index; return '\b';
case 'f': ++index; return '\f';
// Backslash at the end of the line. We treat all line endings as equivalent,
// and as representing the [LF] character (code 10). Lua 5.1 through 5.3
// have been verified to behave the same way.
case '\r':
case '\n':
consumeEOL();
return '\n';
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
// \ddd, where ddd is a sequence of up to three decimal digits.
while (isDecDigit(input.charCodeAt(index)) && index - sequenceStart < 3) ++index;
var ddd = parseInt(input.slice(sequenceStart, index), 10);
if (ddd > 255) {
raise({}, errors.decimalEscapeTooLarge, '\\' + ddd);
}
return String.fromCharCode(ddd);
case 'z':
if (features.skipWhitespaceEscape) {
++index;
skipWhiteSpace();
return '';
}
break;
case 'x':
if (features.hexEscapes) {
// \xXX, where XX is a sequence of exactly two hexadecimal digits
if (isHexDigit(input.charCodeAt(index + 1)) &&
isHexDigit(input.charCodeAt(index + 2))) {
index += 3;
return String.fromCharCode(parseInt(input.slice(sequenceStart + 1, index), 16));
}
raise({}, errors.hexadecimalDigitExpected, '\\' + input.slice(sequenceStart, index + 2));
}
break;
case 'u':
if (features.unicodeEscapes)
return readUnicodeEscapeSequence();
break;
case '\\': case '"': case "'":
return input.charAt(index++);
}
if (features.strictEscapes)
raise({}, errors.invalidEscape, '\\' + input.slice(sequenceStart, index + 1));
return input.charAt(index++);
}
// Comments begin with -- after which it will be decided if they are
// multiline comments or not.
//
// The multiline functionality works the exact same way as with string
// literals so we reuse the functionality.
function scanComment() {
tokenStart = index;
index += 2; // --
var character = input.charAt(index)
, content = ''
, isLong = false
, commentStart = index
, lineStartComment = lineStart
, lineComment = line;
if ('[' === character) {
content = readLongString(true);
// This wasn't a multiline comment after all.
if (false === content) content = character;
else isLong = true;
}
// Scan until next line as long as it's not a multiline comment.
if (!isLong) {
while (index < length) {
if (isLineTerminator(input.charCodeAt(index))) break;
++index;
}
if (options.comments) content = input.slice(commentStart, index);
}
if (options.comments) {
var node = ast.comment(content, input.slice(tokenStart, index));
// `Marker`s depend on tokens available in the parser and as comments are
// intercepted in the lexer all location data is set manually.
if (options.locations) {
node.loc = {
start: { line: lineComment, column: tokenStart - lineStartComment }
, end: { line: line, column: index - lineStart }
};
}
if (options.ranges) {
node.range = [tokenStart, index];
}
if (options.onCreateNode) options.onCreateNode(node);
comments.push(node);
}
}
// Read a multiline string by calculating the depth of `=` characters and
// then appending until an equal depth is found.
function readLongString(isComment) {
var level = 0
, content = ''
, terminator = false
, character, stringStart, firstLine = line;
++index; // [
// Calculate the depth of the comment.
while ('=' === input.charAt(index + level)) ++level;
// Exit, this is not a long string afterall.
if ('[' !== input.charAt(index + level)) return false;
index += level + 1;
// If the first character is a newline, ignore it and begin on next line.
if (isLineTerminator(input.charCodeAt(index))) consumeEOL();
stringStart = index;
while (index < length) {
// To keep track of line numbers run the `consumeEOL()` which increments
// its counter.
while (isLineTerminator(input.charCodeAt(index))) consumeEOL();
character = input.charAt(index++);
// Once the delimiter is found, iterate through the depth count and see
// if it matches.
if (']' === character) {
terminator = true;
for (var i = 0; i < level; ++i) {
if ('=' !== input.charAt(index + i)) terminator = false;
}
if (']' !== input.charAt(index + level)) terminator = false;
}
// We reached the end of the multiline string. Get out now.
if (terminator) {
content += input.slice(stringStart, index - 1);
index += level + 1;
return content;
}
}
raise({}, isComment ?
errors.unfinishedLongComment :
errors.unfinishedLongString,
firstLine, '<eof>');
}
// ## Lex functions and helpers.
// Read the next token.
//
// This is actually done by setting the current token to the lookahead and
// reading in the new lookahead token.
function next() {
previousToken = token;
token = lookahead;
lookahead = lex();
}
// Consume a token if its value matches. Once consumed or not, return the
// success of the operation.
function consume(value) {
if (value === token.value) {
next();
return true;
}
return false;
}
// Expect the next token value to match. If not, throw an exception.
function expect(value) {
if (value === token.value) next();
else raise(token, errors.expected, value, token.value);
}
// ### Validation functions
function isWhiteSpace(charCode) {
return 9 === charCode || 32 === charCode || 0xB === charCode || 0xC === charCode;
}
function isLineTerminator(charCode) {
return 10 === charCode || 13 === charCode;
}
function isDecDigit(charCode) {
return charCode >= 48 && charCode <= 57;
}
function isHexDigit(charCode) {
return (charCode >= 48 && charCode <= 57) || (charCode >= 97 && charCode <= 102) || (charCode >= 65 && charCode <= 70);
}
// From [Lua 5.2](http://www.lua.org/manual/5.2/manual.html#8.1) onwards
// identifiers cannot use 'locale-dependent' letters (i.e. dependent on the C locale).
// On the other hand, LuaJIT allows arbitrary octets ≥ 128 in identifiers.
function isIdentifierStart(charCode) {
if ((charCode >= 65 && charCode <= 90) || (charCode >= 97 && charCode <= 122) || 95 === charCode)
return true;
if (options.extendedIdentifiers && charCode >= 128)
return true;
return false;
}
function isIdentifierPart(charCode) {
if ((charCode >= 65 && charCode <= 90) || (charCode >= 97 && charCode <= 122) || 95 === charCode || (charCode >= 48 && charCode <= 57))
return true;
if (options.extendedIdentifiers && charCode >= 128)
return true;
return false;
}
// [3.1 Lexical Conventions](http://www.lua.org/manual/5.2/manual.html#3.1)
//
// `true`, `false` and `nil` will not be considered keywords, but literals.
function isKeyword(id) {
if (LKW['do'] === id || LKW['if'] === id || LKW['in'] === id || LKW['or'] === id ||
LKW['and'] === id || LKW['end'] === id || LKW['for'] === id || LKW['not'] === id ||
LKW['else'] === id || LKW['then'] === id ||
LKW['break'] === id || LKW['local'] === id || LKW['until'] === id || LKW['while'] === id ||
LKW['elseif'] === id || LKW['repeat'] === id || LKW['return'] === id ||
LKW['function'] === id
)
return true;
if (features.labels && !features.contextualGoto)
return ('goto' === id);
return false;
}
function isUnary(token) {
if (Punctuator === token.type) return '#-~'.indexOf(token.value) >= 0;
if (Keyword === token.type) return LKW['not'] === token.value;
return false;
}
// @TODO this needs to be rethought.
function isCallExpression(expression) {
switch (expression.type) {
case 'CallExpression':
case 'TableCallExpression':
case 'StringCallExpression':
return true;
}
return false;
}
// Check if the token syntactically closes a block.
function isBlockFollow(token) {
if (EOF === token.type) return true;
if (Keyword !== token.type) return false;
switch (token.value) {
case LKW['else']: case LKW['elseif']:
case LKW['end']: case LKW['until']:
return true;
default:
return false;
}
}
// Scope
// -----
// Store each block scope as a an array of identifier names. Each scope is
// stored in an FILO-array.
var scopes
// The current scope index
, scopeDepth
// A list of all global identifier nodes.
, globals;
// Create a new scope inheriting all declarations from the previous scope.
function createScope() {
var scope = Array.apply(null, scopes[scopeDepth++]);
scopes.push(scope);
if (options.onCreateScope) options.onCreateScope();
}
// Exit and remove the current scope.
function destroyScope() {
var scope = scopes.pop();
scopeDepth--;
if (options.onDestroyScope) options.onDestroyScope();
}
// Add identifier name to the current scope if it doesnt already exist.
function scopeIdentifierName(name) {
if (options.onLocalDeclaration) options.onLocalDeclaration(name);
if (-1 !== indexOf(scopes[scopeDepth], name)) return;
scopes[scopeDepth].push(name);
}
// Add identifier to the current scope
function scopeIdentifier(node) {
scopeIdentifierName(node.name);
attachScope(node, true);
}
// Attach scope information to node. If the node is global, store it in the
// globals array so we can return the information to the user.
function attachScope(node, isLocal) {
if (!isLocal && -1 === indexOfObject(globals, 'name', node.name))
globals.push(node);
node.isLocal = isLocal;
}
// Is the identifier name available in this scope.
function scopeHasName(name) {
return (-1 !== indexOf(scopes[scopeDepth], name));
}
// Location tracking
// -----------------
//
// Locations are stored in FILO-array as a `Marker` object consisting of both
// `loc` and `range` data. Once a `Marker` is popped off the list an end
// location is added and the data is attached to a syntax node.
var locations = []
, trackLocations;
function createLocationMarker() {
return new Marker(token);
}
function Marker(token) {
if (options.locations) {
this.loc = {
start: {
line: token.line
, column: token.range[0] - token.lineStart
}
, end: {
line: 0
, column: 0
}
};
}
if (options.ranges) this.range = [token.range[0], 0];
}
// Complete the location data stored in the `Marker` by adding the location
// of the *previous token* as an end location.
Marker.prototype.complete = function() {
if (options.locations) {
this.loc.end.line = previousToken.lastLine || previousToken.line;
this.loc.end.column = previousToken.range[1] - (previousToken.lastLineStart || previousToken.lineStart);
}
if (options.ranges) {
this.range[1] = previousToken.range[1];
}
};
Marker.prototype.bless = function (node) {
if (this.loc) {
var loc = this.loc;
node.loc = {
start: {
line: loc.start.line,
column: loc.start.column
},
end: {
line: loc.end.line,
column: loc.end.column
}
};
}
if (this.range) {
node.range = [
this.range[0],
this.range[1]
];
}
};
// Create a new `Marker` and add it to the FILO-array.
function markLocation() {
if (trackLocations) locations.push(createLocationMarker());
}
// Push an arbitrary `Marker` object onto the FILO-array.
function pushLocation(marker) {
if (trackLocations) locations.push(marker);
}
// Parse functions
// ---------------
// Chunk is the main program object. Syntactically it's the same as a block.
//
// chunk ::= block
function parseChunk() {
next();
markLocation();
if (options.scope) createScope();
var body = parseBlock();
if (options.scope) destroyScope();
if (EOF !== token.type) unexpected(token);
// If the body is empty no previousToken exists when finishNode runs.
if (trackLocations && !body.length) previousToken = token;
return finishNode(ast.chunk(body));
}
// A block contains a list of statements with an optional return statement
// as its last statement.
//
// block ::= {stat} [retstat]
function parseBlock(terminator) {
var block = []
, statement;
while (!isBlockFollow(token)) {
// Return has to be the last statement in a block.
// Likewise 'break' in Lua older than 5.2
if (LKW['return'] === token.value || (!features.relaxedBreak && LKW['break'] === token.value)) {
block.push(parseStatement());
break;
}
statement = parseStatement();
consume(';');
// Statements are only added if they are returned, this allows us to
// ignore some statements, such as EmptyStatement.
if (statement) block.push(statement);
}
// Doesn't really need an ast node
return block;
}
// There are two types of statements, simple and compound.
//
// statement ::= break | goto | do | while | repeat | return
// | if | for | function | local | label | assignment
// | functioncall | ';'
function parseStatement() {
markLocation();
if (Keyword === token.type) {
switch (token.value) {
case LKW['local']: next(); return parseLocalStatement();
case LKW['if']: next(); return parseIfStatement();
case LKW['return']: next(); return parseReturnStatement();
case LKW['function']: next();
var name = parseFunctionName();
return parseFunctionDeclaration(name);
case LKW['while']: next(); return parseWhileStatement();
case LKW['for']: next(); return parseForStatement();
case LKW['repeat']: next(); return parseRepeatStatement();
case LKW['break']: next(); return parseBreakStatement();
case LKW['do']: next(); return parseDoStatement();
case LKW['goto']: next(); return parseGotoStatement();
}
}
if (features.contextualGoto &&
token.type === Identifier && token.value === 'goto' &&
lookahead.type === Identifier && lookahead.value !== 'goto') {
next(); return parseGotoStatement();
}
if (Punctuator === token.type) {
if (consume('::')) return parseLabelStatement();
}
// Assignments memorizes the location and pushes it manually for wrapper
// nodes. Additionally empty `;` statements should not mark a location.
if (trackLocations) locations.pop();
// When a `;` is encounted, simply eat it without storing it.
if (features.emptyStatement) {
if (consume(';')) return;
}
return parseAssignmentOrCallStatement();
}
// ## Statements
// label ::= '::' Name '::'
function parseLabelStatement() {
var name = token.value
, label = parseIdentifier();
if (options.scope) {
scopeIdentifierName('::' + name + '::');
attachScope(label, true);
}
expect('::');
return finishNode(ast.labelStatement(label));
}
// break ::= 'break'
function parseBreakStatement() {
return finishNode(ast.breakStatement());
}
// goto ::= 'goto' Name
function parseGotoStatement() {
var name = token.value
, label = parseIdentifier();
return finishNode(ast.gotoStatement(label));
}
// do ::= 'do' block 'end'
function parseDoStatement() {
if (options.scope) createScope();
var body = parseBlock();
if (options.scope) destroyScope();
expect(LKW['end']);
return finishNode(ast.doStatement(body));
}
// while ::= 'while' exp 'do' block 'end'
function parseWhileStatement() {
var condition = parseExpectedExpression();
expect(LKW['do']);
if (options.scope) createScope();
var body = parseBlock();
if (options.scope) destroyScope();
expect(LKW['end']);
return finishNode(ast.whileStatement(condition, body));
}
// repeat ::= 'repeat' block 'until' exp
function parseRepeatStatement() {
if (options.scope) createScope();
var body = parseBlock();
expect(LKW['until']);
var condition = parseExpectedExpression();
if (options.scope) destroyScope();
return finishNode(ast.repeatStatement(condition, body));
}
// retstat ::= 'return' [exp {',' exp}] [';']
function parseReturnStatement() {
var expressions = [];
if (LKW['end'] !== token.value) {
var expression = parseExpression();
if (null != expression) expressions.push(expression);
while (consume(',')) {
expression = parseExpectedExpression();
expressions.push(expression);
}
consume(';'); // grammar tells us ; is optional here.
}
return finishNode(ast.returnStatement(expressions));
}
// if ::= 'if' exp 'then' block {elif} ['else' block] 'end'
// elif ::= 'elseif' exp 'then' block
function parseIfStatement() {
var clauses = []
, condition
, body
, marker;
// IfClauses begin at the same location as the parent IfStatement.
// It ends at the start of `end`, `else`, or `elseif`.
if (trackLocations) {
marker = locations[locations.length - 1];
locations.push(marker);
}
condition = parseExpectedExpression();
expect(LKW['then']);
if (options.scope) createScope();
body = parseBlock();
if (options.scope) destroyScope();
clauses.push(finishNode(ast.ifClause(condition, body)));
if (trackLocations) marker = createLocationMarker();
while (consume(LKW['elseif'])) {
pushLocation(marker);
condition = parseExpectedExpression();
expect(LKW['then']);
if (options.scope) createScope();
body = parseBlock();
if (options.scope) destroyScope();
clauses.push(finishNode(ast.elseifClause(condition, body)));
if (trackLocations) marker = createLocationMarker();
}
if (consume(LKW['else'])) {
// Include the `else` in the location of ElseClause.
if (trackLocations) {
marker = new Marker(previousToken);
locations.push(marker);
}
if (options.scope) createScope();
body = parseBlock();
if (options.scope) destroyScope();
clauses.push(finishNode(ast.elseClause(body)));
}
expect(LKW['end']);
return finishNode(ast.ifStatement(clauses));
}
// There are two types of for statements, generic and numeric.
//
// for ::= Name '=' exp ',' exp [',' exp] 'do' block 'end'
// for ::= namelist 'in' explist 'do' block 'end'
// namelist ::= Name {',' Name}
// explist ::= exp {',' exp}
function parseForStatement() {
var variable = parseIdentifier()
, body;
// The start-identifier is local.
if (options.scope) {
createScope();
scopeIdentifier(variable);
}
// If the first expression is followed by a `=` punctuator, this is a
// Numeric For Statement.
if (consume('=')) {
// Start expression
var start = parseExpectedExpression();
expect(',');
// End expression
var end = parseExpectedExpression();
// Optional step expression
var step = consume(',') ? parseExpectedExpression() : null;
expect(LKW['do']);
body = parseBlock();
expect(LKW['end']);
if (options.scope) destroyScope();
return finishNode(ast.forNumericStatement(variable, start, end, step, body));
}
// If not, it's a Generic For Statement
else {
// The namelist can contain one or more identifiers.
var variables = [variable];
while (consume(',')) {
variable = parseIdentifier();
// Each variable in the namelist is locally scoped.
if (options.scope) scopeIdentifier(variable);
variables.push(variable);
}
expect(LKW['in']);
var iterators = [];
// One or more expressions in the explist.
do {
var expression = parseExpectedExpression();
iterators.push(expression);
} while (consume(','));
expect(LKW['do']);
body = parseBlock();
expect(LKW['end']);
if (options.scope) destroyScope();
return finishNode(ast.forGenericStatement(variables, iterators, body));
}
}
// Local statements can either be variable assignments or function
// definitions. If a function definition is found, it will be delegated to
// `parseFunctionDeclaration()` with the isLocal flag.
//
// This AST structure might change into a local assignment with a function
// child.
//
// local ::= 'local' 'function' Name funcdecl
// | 'local' Name {',' Name} ['=' exp {',' exp}]
function parseLocalStatement() {
var name;
if (Identifier === token.type) {
var variables = []
, init = [];
do {
name = parseIdentifier();
variables.push(name);
} while (consume(','));
if (consume('=')) {
do {
var expression = parseExpectedExpression();
init.push(expression);
} while (consume(','));
}
// Declarations doesn't exist before the statement has been evaluated.
// Therefore assignments can't use their declarator. And the identifiers
// shouldn't be added to the scope until the statement is complete.
if (options.scope) {
for (var i = 0, l = variables.length; i < l; ++i) {
scopeIdentifier(variables[i]);
}
}
return finishNode(ast.localStatement(variables, init));
}
if (consume(LKW['function'])) {
name = parseIdentifier();
if (options.scope) {
scopeIdentifier(name);
createScope();
}
// MemberExpressions are not allowed in local function statements.
return parseFunctionDeclaration(name, true);
} else {
raiseUnexpectedToken('<name>', token);
}
}
function validateVar(node) {
// @TODO we need something not dependent on the exact AST used. see also isCallExpression()
if (node.inParens || (['Identifier', 'MemberExpression', 'IndexExpression'].indexOf(node.type) === -1)) {
raise(token, errors.invalidVar, token.value);
}
}
// assignment ::= varlist '=' explist
// var ::= Name | prefixexp '[' exp ']' | prefixexp '.' Name
// varlist ::= var {',' var}
// explist ::= exp {',' exp}
//
// call ::= callexp
// callexp ::= prefixexp args | prefixexp ':' Name args
function parseAssignmentOrCallStatement() {
// Keep a reference to the previous token for better error messages in case
// of invalid statement
var previous = token
, expression, marker;
if (trackLocations) marker = createLocationMarker();
expression = parsePrefixExpression();
if (null == expression) return unexpected(token);
if (',='.indexOf(token.value) >= 0) {
var variables = [expression]
, init = []
, exp;
validateVar(expression);
while (consume(',')) {
exp = parsePrefixExpression();
if (null == exp) raiseUnexpectedToken('<expression>', token);
validateVar(exp);
variables.push(exp);
}
expect('=');
do {
exp = parseExpectedExpression();
init.push(exp);
} while (consume(','));
pushLocation(marker);
return finishNode(ast.assignmentStatement(variables, init));
}
if (isCallExpression(expression)) {
pushLocation(marker);
return finishNode(ast.callStatement(expression));
}
// The prefix expression was neither part of an assignment or a
// callstatement, however as it was valid it's been consumed, so raise
// the exception on the previous token to provide a helpful message.
return unexpected(previous);
}
// ### Non-statements
// Identifier ::= Name
function parseIdentifier() {
markLocation();
var identifier = token.value;
if (Identifier !== token.type) raiseUnexpectedToken('<name>', token);
next();
return finishNode(ast.identifier(identifier));
}
// Parse the functions parameters and body block. The name should already
// have been parsed and passed to this declaration function. By separating
// this we allow for anonymous functions in expressions.
//
// For local functions there's a boolean parameter which needs to be set
// when parsing the declaration.
//
// funcdecl ::= '(' [parlist] ')' block 'end'
// parlist ::= Name {',' Name} | [',' '...'] | '...'
function parseFunctionDeclaration(name, isLocal) {
var parameters = [];
expect('(');
// The declaration has arguments
if (!consume(')')) {
// Arguments are a comma separated list of identifiers, optionally ending
// with a vararg.
while (true) {
if (Identifier === token.type) {
var parameter = parseIdentifier();
// Function parameters are local.
if (options.scope) scopeIdentifier(parameter);
parameters.push(parameter);
if (consume(',')) continue;
}
// No arguments are allowed after a vararg.
else if (VarargLiteral === token.type) {
parameters.push(parsePrimaryExpression());
} else {
raiseUnexpectedToken('<name> or \'...\'', token);
}
expect(')');
break;
}
}
var body = parseBlock();
expect(LKW['end']);
if (options.scope) destroyScope();
isLocal = isLocal || false;
return finishNode(ast.functionStatement(name, parameters, isLocal, body));
}
// Parse the function name as identifiers and member expressions.
//
// Name {'.' Name} [':' Name]
function parseFunctionName() {
var base, name, marker;
if (trackLocations) marker = createLocationMarker();
base = parseIdentifier();
if (options.scope) {
attachScope(base, scopeHasName(base.name));
createScope();
}
while (consume('.')) {
pushLocation(marker);
name = parseIdentifier();
base = finishNode(ast.memberExpression(base, '.', name));
}
if (consume(':')) {
pushLocation(marker);
name = parseIdentifier();
base = finishNode(ast.memberExpression(base, ':', name));
if (options.scope) scopeIdentifierName('self');
}
return base;
}
// tableconstructor ::= '{' [fieldlist] '}'
// fieldlist ::= field {fieldsep field} fieldsep
// field ::= '[' exp ']' '=' exp | Name = 'exp' | exp
//
// fieldsep ::= ',' | ';'
function parseTableConstructor() {
var fields = []
, key, value;
while (true) {
markLocation();
if (Punctuator === token.type && consume('[')) {
key = parseExpectedExpression();
expect(']');
expect('=');
value = parseExpectedExpression();
fields.push(finishNode(ast.tableKey(key, value)));
} else if (Identifier === token.type) {
if ('=' === lookahead.value) {
key = parseIdentifier();
next();
value = parseExpectedExpression();
fields.push(finishNode(ast.tableKeyString(key, value)));
} else {
value = parseExpectedExpression();
fields.push(finishNode(ast.tableValue(value)));
}
} else {
if (null == (value = parseExpression())) {
locations.pop();
break;
}
fields.push(finishNode(ast.tableValue(value)));
}
if (',;'.indexOf(token.value) >= 0) {
next();
continue;
}
break;
}
expect('}');
return finishNode(ast.tableConstructorExpression(fields));
}
// Expression parser
// -----------------
//
// Expressions are evaluated and always return a value. If nothing is
// matched null will be returned.
//
// exp ::= (unop exp | primary | prefixexp ) { binop exp }
//
// primary ::= nil | false | true | Number | String | '...'
// | functiondef | tableconstructor
//
// prefixexp ::= (Name | '(' exp ')' ) { '[' exp ']'
// | '.' Name | ':' Name args | args }
//
function parseExpression() {
var expression = parseSubExpression(0);
return expression;
}
// Parse an expression expecting it to be valid.
function parseExpectedExpression() {
var expression = parseExpression();
if (null == expression) raiseUnexpectedToken('<expression>', token);
else return expression;
}
// Return the precedence priority of the operator.
//
// As unary `-` can't be distinguished from binary `-`, unary precedence
// isn't described in this table but in `parseSubExpression()` itself.
//
// As this function gets hit on every expression it's been optimized due to
// the expensive CompareICStub which took ~8% of the parse time.
function binaryPrecedence(operator) {
var charCode = operator.charCodeAt(0)
, length = operator.length;
if (1 === length) {
switch (charCode) {
case 94: return 12; // ^
case 42: case 47: case 37: return 10; // * / %
case 43: case 45: return 9; // + -
case 38: return 6; // &
case 126: return 5; // ~
case 124: return 4; // |
case 60: case 62: return 3; // < >
}
} else if (2 === length) {
switch (charCode) {
case 47: return 10; // //
case 46: return 8; // ..
case 60: case 62:
if('<<' === operator || '>>' === operator) return 7; // << >>
return 3; // <= >=
case 61: case 126: return 3; // == ~=
case 111: return 1; // or
}
} else if (97 === charCode && LKW['and'] === operator) return 2;
return 0;
}
// Implement an operator-precedence parser to handle binary operator
// precedence.
//
// We use this algorithm because it's compact, it's fast and Lua core uses
// the same so we can be sure our expressions are parsed in the same manner
// without excessive amounts of tests.
//
// exp ::= (unop exp | primary | prefixexp ) { binop exp }
function parseSubExpression(minPrecedence) {
var operator = token.value
// The left-hand side in binary operations.
, expression, marker;
if (trackLocations) marker = createLocationMarker();
// UnaryExpression
if (isUnary(token)) {
markLocation();
next();
var argument = parseSubExpression(10);
if (argument == null) raiseUnexpectedToken('<expression>', token);
expression = finishNode(ast.unaryExpression(operator, argument));
}
if (null == expression) {
// PrimaryExpression
expression = parsePrimaryExpression();
// PrefixExpression
if (null == expression) {
expression = parsePrefixExpression();
}
}
// This is not a valid left hand expression.
if (null == expression) return null;
var precedence;
while (true) {
operator = token.value;
precedence = (Punctuator === token.type || Keyword === token.type) ?
binaryPrecedence(operator) : 0;
if (precedence === 0 || precedence <= minPrecedence) break;
// Right-hand precedence operators
if ('^' === operator || '..' === operator) precedence--;
next();
var right = parseSubExpression(precedence);
if (null == right) raiseUnexpectedToken('<expression>', token);
// Push in the marker created before the loop to wrap its entirety.
if (trackLocations) locations.push(marker);
expression = finishNode(ast.binaryExpression(operator, expression, right));
}
return expression;
}
// prefixexp ::= prefix {suffix}
// prefix ::= Name | '(' exp ')'
// suffix ::= '[' exp ']' | '.' Name | ':' Name args | args
//
// args ::= '(' [explist] ')' | tableconstructor | String
function parsePrefixExpression() {
var base, name, marker;
if (trackLocations) marker = createLocationMarker();
// The prefix
if (Identifier === token.type) {
name = token.value;
base = parseIdentifier();
// Set the parent scope.
if (options.scope) attachScope(base, scopeHasName(name));
} else if (consume('(')) {
base = parseExpectedExpression();
expect(')');
base.inParens = true; // XXX: quick and dirty. needed for validateVar
} else {
return null;
}
// The suffix
var expression, identifier;
while (true) {
if (Punctuator === token.type) {
switch (token.value) {
case '[':
pushLocation(marker);
next();
expression = parseExpectedExpression();
expect(']');
base = finishNode(ast.indexExpression(base, expression));
break;
case '.':
pushLocation(marker);
next();
identifier = parseIdentifier();
base = finishNode(ast.memberExpression(base, '.', identifier));
break;
case ':':
pushLocation(marker);
next();
identifier = parseIdentifier();
base = finishNode(ast.memberExpression(base, ':', identifier));
// Once a : is found, this has to be a CallExpression, otherwise
// throw an error.
pushLocation(marker);
base = parseCallExpression(base);
break;
case '(': case '{': // args
pushLocation(marker);
base = parseCallExpression(base);
break;
default:
return base;
}
} else if (StringLiteral === token.type) {
pushLocation(marker);
base = parseCallExpression(base);
} else {
break;
}
}
return base;
}
// args ::= '(' [explist] ')' | tableconstructor | String
function parseCallExpression(base) {
if (Punctuator === token.type) {
switch (token.value) {
case '(':
if (!features.emptyStatement) {
if (token.line !== previousToken.line)
raise({}, errors.ambiguousSyntax, token.value);
}
next();
// List of expressions
var expressions = [];
var expression = parseExpression();
if (null != expression) expressions.push(expression);
while (consume(',')) {
expression = parseExpectedExpression();
expressions.push(expression);
}
expect(')');
return finishNode(ast.callExpression(base, expressions));
case '{':
markLocation();
next();
var table = parseTableConstructor();
return finishNode(ast.tableCallExpression(base, table));
}
} else if (StringLiteral === token.type) {
return finishNode(ast.stringCallExpression(base, parsePrimaryExpression()));
}
raiseUnexpectedToken('function arguments', token);
}
// primary ::= String | Numeric | nil | true | false
// | functiondef | tableconstructor | '...'
function parsePrimaryExpression() {
var literals = StringLiteral | NumericLiteral | BooleanLiteral | NilLiteral | VarargLiteral
, value = token.value
, type = token.type
, marker;
if (trackLocations) marker = createLocationMarker();
if (type & literals) {
pushLocation(marker);
var raw = input.slice(token.range[0], token.range[1]);
next();
return finishNode(ast.literal(type, value, raw));
} else if (Keyword === type && LKW['function'] === value) {
pushLocation(marker);
next();
if (options.scope) createScope();
return parseFunctionDeclaration(null);
} else if (consume('{')) {
pushLocation(marker);
return parseTableConstructor();
}
}
// Parser
// ------
// Export the main parser.
//
// - `wait` Hold parsing until end() is called. Defaults to false
// - `comments` Store comments. Defaults to true.
// - `scope` Track identifier scope. Defaults to false.
// - `locations` Store location information. Defaults to false.
// - `ranges` Store the start and end character locations. Defaults to
// false.
// - `onCreateNode` Callback which will be invoked when a syntax node is
// created.
// - `onCreateScope` Callback which will be invoked when a new scope is
// created.
// - `onDestroyScope` Callback which will be invoked when the current scope
// is destroyed.
//
// Example:
//
// var parser = require('luaparser');
// parser.parse('i = 0');
exports.parse = parse;
var versionFeatures = {
'5.1': {
},
'5.2': {
labels: true,
emptyStatement: true,
hexEscapes: true,
skipWhitespaceEscape: true,
strictEscapes: true,
relaxedBreak: true
},
'5.3': {
labels: true,
emptyStatement: true,
hexEscapes: true,
skipWhitespaceEscape: true,
strictEscapes: true,
unicodeEscapes: true,
bitwiseOperators: true,
integerDivision: true,
relaxedBreak: true
},
'LuaJIT': {
// XXX: LuaJIT language features may depend on compilation options; may need to
// rethink how to handle this. Specifically, there is a LUAJIT_ENABLE_LUA52COMPAT
// that removes contextual goto. Maybe add 'LuaJIT-5.2compat' as well?
labels: true,
contextualGoto: true,
hexEscapes: true,
skipWhitespaceEscape: true,
strictEscapes: true,
unicodeEscapes: true
}
};
function parse(_input, _options) {
if ('undefined' === typeof _options && 'object' === typeof _input) {
_options = _input;
_input = undefined;
}
if (!_options) _options = {};
input = _input || '';
options = extend(defaultOptions, _options);
// Rewind the lexer
index = 0;
line = 1;
lineStart = 0;
length = input.length;
// When tracking identifier scope, initialize with an empty scope.
scopes = [[]];
scopeDepth = 0;
globals = [];
locations = [];
if (!(features = versionFeatures[options.luaVersion])) {
throw new Error(sprintf("Lua version '%1' not supported", options.luaVersion));
}
if (options.comments) comments = [];
if (!options.wait) return end();
return exports;
}
// Write to the source code buffer without beginning the parse.
exports.write = write;
function write(_input) {
input += String(_input);
length = input.length;
return exports;
}
// Send an EOF and begin parsing.
exports.end = end;
function end(_input) {
if ('undefined' !== typeof _input) write(_input);
// Ignore shebangs.
if (input && input.substr(0, 2) === '#!') input = input.replace(/^.*/, function (line) {
return line.replace(/./g, ' ');
});
length = input.length;
trackLocations = options.locations || options.ranges;
// Initialize with a lookahead token.
lookahead = lex();
var chunk = parseChunk();
if (options.comments) chunk.comments = comments;
if (options.scope) chunk.globals = globals;
/* istanbul ignore if */
if (locations.length > 0)
throw new Error('Location tracking failed. This is most likely a bug in luaparse');
return chunk;
}
}));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment