Created
October 14, 2019 01:29
-
-
Save KFlash/f8812201b16c2643cb0d83e39a657f3e to your computer and use it in GitHub Desktop.
lawrence
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/**** COMMON.TS ******/ | |
export function consumeLineBreak(parser: ParserState, lastIsCR: 0 | 1): void { | |
parser.index++; | |
parser.offset = parser.index; | |
parser.newLine = 1; | |
if (lastIsCR === 0) parser.lineStartOffset++; | |
} | |
export function isLineTerminator(cp: number): boolean { | |
return cp & ~(Chars.CarriageReturn | Chars.LineFeed | Chars.ParagraphSeparator | Chars.LineSeparator) | |
? false | |
: cp === Chars.CarriageReturn || cp === Chars.LineFeed || (cp ^ Chars.LineSeparator) <= 1; | |
} | |
/**** COMMENTS ******/ | |
export function skipSingleLineComment(parser: ParserState): Token { | |
while (parser.index < parser.length && !isLineTerminator(parser.source.charCodeAt(++parser.index))) {} | |
return Token.Comment; | |
} | |
export function skipMultiLineComment(parser: ParserState): Token { | |
let lastIsCR: 0 | 1 = 0; | |
while (parser.index < parser.length) { | |
let char = parser.source.charCodeAt(parser.index); | |
while (char === Chars.Asterisk) { | |
char = parser.source.charCodeAt(++parser.index); | |
if (char === Chars.Slash) { | |
parser.index++; | |
return Token.Comment; | |
} | |
} | |
if (char === Chars.CarriageReturn) { | |
consumeLineBreak(parser, 0); | |
lastIsCR = 1; | |
} | |
if (char === Chars.LineFeed) { | |
consumeLineBreak(parser, lastIsCR); | |
lastIsCR = 0; | |
} else if (char > 127 && (char ^ Chars.LineSeparator) <= 1) { | |
lastIsCR = 0; | |
consumeLineBreak(parser, lastIsCR); | |
} | |
parser.index++; | |
} | |
return Token.Comment; | |
} | |
/**** SCAN.TS *****/ | |
import { ParserState, Context } from '../common'; | |
import { Token } from '../token'; | |
import { Chars } from '../chars'; | |
import { consumeLineBreak, isUnicodeOnlySpace } from './common'; | |
import { skipSingleLineComment, skipMultiLineComment } from './comments'; | |
import { scanMaybeIdentifier, scanIdentifier, scanUnicodeEscapeIdStart } from './identifier'; | |
import { scanNumeric, scanFloatingNumeric, scanLeadingZero } from './numeric'; | |
import { scanStringLiteral } from './string'; | |
import { scanRegularExpression } from './regexp'; | |
import { CharTypes, CharFlags } from './charClassifier'; | |
import { scanTemplate } from './template'; | |
import { report, Errors } from '../errors'; | |
export const firstCharKinds = [ | |
/* 0 - Null */ Token.Error, | |
/* 1 - Start of Heading */ Token.Error, | |
/* 2 - Start of Text */ Token.Error, | |
/* 3 - End of Text */ Token.Error, | |
/* 4 - End of Transm. */ Token.Error, | |
/* 5 - Enquiry */ Token.Error, | |
/* 6 - Acknowledgment */ Token.Error, | |
/* 7 - Bell */ Token.Error, | |
/* 8 - Backspace */ Token.Error, | |
/* 9 - Horizontal Tab */ Token.WhiteSpace, | |
/* 10 - Line Feed */ Token.LineFeed, | |
/* 11 - Vertical Tab */ Token.WhiteSpace, | |
/* 12 - Form Feed */ Token.WhiteSpace, | |
/* 13 - Carriage Return */ Token.CarriageReturn, | |
/* 14 - Shift Out */ Token.Error, | |
/* 15 - Shift In */ Token.Error, | |
/* 16 - Data Line Escape */ Token.Error, | |
/* 17 - Device Control 1 */ Token.Error, | |
/* 18 - Device Control 2 */ Token.Error, | |
/* 19 - Device Control 3 */ Token.Error, | |
/* 20 - Device Control 4 */ Token.Error, | |
/* 21 - Negative Ack. */ Token.Error, | |
/* 22 - Synchronous Idle */ Token.Error, | |
/* 23 - End of Transmit */ Token.Error, | |
/* 24 - Cancel */ Token.Error, | |
/* 25 - End of Medium */ Token.Error, | |
/* 26 - Substitute */ Token.Error, | |
/* 27 - Escape */ Token.Error, | |
/* 28 - File Separator */ Token.Error, | |
/* 29 - Group Separator */ Token.Error, | |
/* 30 - Record Separator */ Token.Error, | |
/* 31 - Unit Separator */ Token.Error, | |
/* 32 - Space */ Token.WhiteSpace, | |
/* 33 - ! */ Token.Negate, | |
/* 34 - " */ Token.StringLiteral, | |
/* 35 - # */ Token.Error, | |
/* 36 - $ */ Token.Identifier, | |
/* 37 - % */ Token.Modulo, | |
/* 38 - & */ Token.BitwiseAnd, | |
/* 39 - ' */ Token.StringLiteral, | |
/* 40 - ( */ Token.LeftParen, | |
/* 41 - ) */ Token.RightParen, | |
/* 42 - * */ Token.Multiply, | |
/* 43 - + */ Token.Add, | |
/* 44 - , */ Token.Comma, | |
/* 45 - - */ Token.Subtract, | |
/* 46 - . */ Token.Period, | |
/* 47 - / */ Token.Divide, | |
/* 48 - 0 */ Token.LeadingZero, | |
/* 49 - 1 */ Token.NumericLiteral, | |
/* 50 - 2 */ Token.NumericLiteral, | |
/* 51 - 3 */ Token.NumericLiteral, | |
/* 52 - 4 */ Token.NumericLiteral, | |
/* 53 - 5 */ Token.NumericLiteral, | |
/* 54 - 6 */ Token.NumericLiteral, | |
/* 55 - 7 */ Token.NumericLiteral, | |
/* 56 - 8 */ Token.NumericLiteral, | |
/* 57 - 9 */ Token.NumericLiteral, | |
/* 58 - : */ Token.Colon, | |
/* 59 - ; */ Token.Semicolon, | |
/* 60 - < */ Token.LessThan, | |
/* 61 - = */ Token.Assign, | |
/* 62 - > */ Token.GreaterThan, | |
/* 63 - ? */ Token.QuestionMark, | |
/* 64 - @ */ Token.Error, | |
/* 65 - A */ Token.Identifier, | |
/* 66 - B */ Token.Identifier, | |
/* 67 - C */ Token.Identifier, | |
/* 68 - D */ Token.Identifier, | |
/* 69 - E */ Token.Identifier, | |
/* 70 - F */ Token.Identifier, | |
/* 71 - G */ Token.Identifier, | |
/* 72 - H */ Token.Identifier, | |
/* 73 - I */ Token.Identifier, | |
/* 74 - J */ Token.Identifier, | |
/* 75 - K */ Token.Identifier, | |
/* 76 - L */ Token.Identifier, | |
/* 77 - M */ Token.Identifier, | |
/* 78 - N */ Token.Identifier, | |
/* 79 - O */ Token.Identifier, | |
/* 80 - P */ Token.Identifier, | |
/* 81 - Q */ Token.Identifier, | |
/* 82 - R */ Token.Identifier, | |
/* 83 - S */ Token.Identifier, | |
/* 84 - T */ Token.Identifier, | |
/* 85 - U */ Token.Identifier, | |
/* 86 - V */ Token.Identifier, | |
/* 87 - W */ Token.Identifier, | |
/* 88 - X */ Token.Identifier, | |
/* 89 - Y */ Token.Identifier, | |
/* 90 - Z */ Token.Identifier, | |
/* 91 - [ */ Token.LeftBracket, | |
/* 92 - \ */ Token.EscapedIdentifier, | |
/* 93 - ] */ Token.RightBracket, | |
/* 94 - ^ */ Token.BitwiseXor, | |
/* 95 - _ */ Token.Identifier, | |
/* 96 - ` */ Token.TemplateTail, | |
/* 97 - a */ Token.IdentifierOrKeyword, | |
/* 98 - b */ Token.IdentifierOrKeyword, | |
/* 99 - c */ Token.IdentifierOrKeyword, | |
/* 100 - d */ Token.IdentifierOrKeyword, | |
/* 101 - e */ Token.IdentifierOrKeyword, | |
/* 102 - f */ Token.IdentifierOrKeyword, | |
/* 103 - g */ Token.IdentifierOrKeyword, | |
/* 104 - h */ Token.Identifier, | |
/* 105 - i */ Token.IdentifierOrKeyword, | |
/* 106 - j */ Token.Identifier, | |
/* 107 - k */ Token.IdentifierOrKeyword, | |
/* 108 - l */ Token.IdentifierOrKeyword, | |
/* 109 - m */ Token.IdentifierOrKeyword, | |
/* 110 - n */ Token.IdentifierOrKeyword, | |
/* 111 - o */ Token.IdentifierOrKeyword, | |
/* 112 - p */ Token.IdentifierOrKeyword, | |
/* 113 - q */ Token.Identifier, | |
/* 114 - r */ Token.IdentifierOrKeyword, | |
/* 115 - s */ Token.IdentifierOrKeyword, | |
/* 116 - t */ Token.IdentifierOrKeyword, | |
/* 117 - u */ Token.IdentifierOrKeyword, | |
/* 118 - v */ Token.IdentifierOrKeyword, | |
/* 119 - w */ Token.IdentifierOrKeyword, | |
/* 120 - x */ Token.Identifier, | |
/* 121 - y */ Token.IdentifierOrKeyword, | |
/* 122 - z */ Token.IdentifierOrKeyword, | |
/* 123 - { */ Token.LeftBrace, | |
/* 124 - | */ Token.BitwiseOr, | |
/* 125 - } */ Token.RightBrace, | |
/* 126 - ~ */ Token.Complement, | |
/* 127 - Delete */ Token.Error | |
]; | |
export function scan(parser: ParserState, context: Context): Token { | |
let lastIsCR: 0 | 1 = 0; | |
let lineStart: 0 | 1 = 1; | |
while (parser.index < parser.length) { | |
const char = parser.source.charCodeAt(parser.index); | |
parser.start = parser.index; | |
parser.column = parser.index - parser.offset; | |
parser.line = parser.lineStartOffset; | |
if (char > 0x7e) { | |
if ((char ^ Chars.LineSeparator) <= 1) { | |
lastIsCR = 0; | |
consumeLineBreak(parser, lastIsCR); | |
continue; | |
} | |
if (isUnicodeOnlySpace(char)) { | |
parser.index++; | |
continue; | |
} | |
return scanMaybeIdentifier(parser, context, char); | |
} | |
const token = firstCharKinds[char]; | |
switch (token) { | |
case Token.RightBrace: | |
case Token.LeftBrace: | |
case Token.Comma: | |
case Token.Colon: | |
case Token.Complement: | |
case Token.LeftParen: | |
case Token.RightParen: | |
case Token.Semicolon: | |
case Token.LeftBracket: | |
case Token.RightBracket: | |
case Token.Error: | |
parser.index++; | |
return token; | |
// general whitespace | |
case Token.WhiteSpace: | |
// Spaces frequently come in groups, so use a tight inner loop to skip | |
while (CharTypes[parser.source.charCodeAt(++parser.index)] & CharFlags.WhiteSpace) {} | |
break; | |
// line terminators | |
case Token.CarriageReturn: | |
consumeLineBreak(parser, 0); | |
lastIsCR = 1; | |
break; | |
case Token.LineFeed: | |
consumeLineBreak(parser, lastIsCR); | |
lastIsCR = 0; | |
break; | |
// `a`...`z` | |
case Token.IdentifierOrKeyword: | |
return scanIdentifier(parser, context, char, /* canBeKeyword */ 1); | |
// `A`...`Z`, `_var`, `$var` | |
case Token.Identifier: | |
return scanIdentifier(parser, context, char, /* canBeKeyword */ 0); | |
// `1`...`9` | |
case Token.NumericLiteral: | |
return scanNumeric(parser, context, char); | |
// `0` | |
case Token.LeadingZero: | |
return scanLeadingZero(parser, context); | |
// `string` | |
case Token.StringLiteral: | |
return scanStringLiteral(parser, context, char) as Token; | |
case Token.TemplateTail: | |
return scanTemplate(parser, context); | |
// `\\u{N}var` | |
case Token.EscapedIdentifier: | |
return scanUnicodeEscapeIdStart(parser, context); | |
// `?`, `??`, `?.` | |
case Token.QuestionMark: { | |
parser.index++; | |
if (parser.index >= parser.length) return Token.QuestionMark; | |
if ((context & Context.OptionsNext) < 1) return Token.QuestionMark; | |
let next = parser.source.charCodeAt(parser.index); | |
if (next === Chars.QuestionMark) { | |
parser.index++; | |
return Token.Coalesce; | |
} | |
if (next === Chars.Period) { | |
let index = parser.index + 1; | |
next = parser.source.charCodeAt(index); | |
if (next >= Chars.Zero && next <= Chars.Nine) { | |
return Token.QuestionMark; | |
} | |
parser.index = index; | |
return Token.QuestionMarkPeriod; | |
} | |
return Token.QuestionMark; | |
} | |
// `/`, `/=`, `/>`, '/*..*/' | |
case Token.Divide: { | |
parser.index++; | |
if (parser.index < parser.length) { | |
const next = parser.source.charCodeAt(parser.index); | |
if (context & Context.AllowRegExp) { | |
return scanRegularExpression(parser, context); | |
} | |
if (next === Chars.Slash) { | |
parser.index++; | |
skipSingleLineComment(parser); | |
continue; | |
} | |
if (next === Chars.Asterisk) { | |
parser.index++; | |
skipMultiLineComment(parser); | |
continue; | |
} | |
if (next === Chars.EqualSign) { | |
parser.index++; | |
return Token.DivideAssign; | |
} | |
} | |
return Token.Divide; | |
} | |
// `<`, `<=`, `<<`, `<<=`, `</`, `<!--` | |
case Token.LessThan: { | |
parser.index++; // skip `<` | |
if (parser.index < parser.length) { | |
let next = parser.source.charCodeAt(parser.index); | |
if (next === Chars.LessThan) { | |
if (parser.source.charCodeAt(++parser.index) === Chars.EqualSign) { | |
parser.index++; | |
return Token.ShiftLeftAssign; | |
} | |
return Token.ShiftLeft; | |
} | |
if (next === Chars.EqualSign) { | |
parser.index++; | |
return Token.LessThanOrEqual; | |
} | |
if ( | |
next === Chars.Exclamation && | |
(context & Context.Module) === 0 && | |
parser.source.charCodeAt(parser.index + 2) === Chars.Hyphen && | |
parser.source.charCodeAt(parser.index + 1) === Chars.Hyphen | |
) { | |
parser.index += 2; | |
skipSingleLineComment(parser); | |
break; | |
} | |
} | |
return Token.LessThan; | |
} | |
// `-`, `--`, `-=`, `-->` | |
case Token.Subtract: { | |
let index = parser.index + 1; | |
if (index < parser.length) { | |
const next = parser.source.charCodeAt(index); | |
if (next === Chars.Hyphen) { | |
if (parser.source.charCodeAt(index + 1) === Chars.GreaterThan) { | |
if ((context & Context.Module) === 0 && (lineStart || parser.newLine)) { | |
parser.index = index + 1; | |
skipSingleLineComment(parser); | |
continue; | |
} | |
return Token.LessThan; | |
} | |
parser.index = index + 1; | |
return Token.Decrement; | |
} | |
if (next === Chars.EqualSign) { | |
parser.index = index + 1; | |
return Token.SubtractAssign; | |
} | |
} | |
parser.index++; | |
return Token.Subtract; | |
} | |
// `.`, `...`, `.123` (numeric literal) | |
case Token.Period: { | |
let index = parser.index + 1; | |
if (index < parser.length) { | |
const next = parser.source.charCodeAt(index); | |
if (next === Chars.Period) { | |
if (index < parser.length && parser.source.charCodeAt(++index) === Chars.Period) { | |
parser.index = index + 1; | |
return Token.Ellipsis; | |
} | |
} | |
if (next >= Chars.Zero && next <= Chars.Nine) { | |
return scanFloatingNumeric(parser, context); | |
} | |
} | |
parser.index++; | |
return Token.Period; | |
} | |
// `=`, `==`, `===`, `=>` | |
case Token.Assign: { | |
parser.index++; | |
if (parser.index >= parser.length) return Token.Assign; | |
const char = parser.source.charCodeAt(parser.index); | |
if (char === Chars.EqualSign) { | |
if (parser.source.charCodeAt(++parser.index) !== Chars.EqualSign) return Token.LooseEqual; | |
parser.index++; | |
return Token.StrictEqual; | |
} | |
if (char === Chars.GreaterThan) { | |
parser.index++; | |
return Token.Arrow; | |
} | |
return Token.Assign; | |
} | |
// `!`, `!=`, `!==` | |
case Token.Negate: | |
const index = parser.index + 1; | |
if (parser.source.charCodeAt(index) === Chars.EqualSign) { | |
if (parser.source.charCodeAt(index + 1) === Chars.EqualSign) { | |
parser.index = index + 2; | |
return Token.StrictNotEqual; | |
} | |
parser.index = index + 1; | |
return Token.LooseNotEqual; | |
} | |
parser.index = index; | |
return Token.Negate; | |
// `%`, `%=` | |
case Token.Modulo: | |
if (parser.source.charCodeAt(++parser.index) !== Chars.EqualSign) return Token.Modulo; | |
parser.index++; | |
return Token.ModuloAssign; | |
// `*`, `**`, `*=`, `**=` | |
case Token.Multiply: { | |
parser.index++; | |
if (parser.index >= parser.length) return Token.Multiply; | |
const char = parser.source.charCodeAt(parser.index); | |
if (char === Chars.EqualSign) { | |
parser.index++; | |
return Token.MultiplyAssign; | |
} | |
if (char !== Chars.Asterisk) return Token.Multiply; | |
if (parser.source.charCodeAt(++parser.index) !== Chars.EqualSign) return Token.Exponentiate; | |
parser.index++; | |
return Token.ExponentiateAssign; | |
} | |
// `^`, `^=` | |
case Token.BitwiseXor: | |
if (parser.source.charCodeAt(++parser.index) !== Chars.EqualSign) return Token.BitwiseXor; | |
parser.index++; | |
return Token.BitwiseXorAssign; | |
// `+`, `++`, `+=` | |
case Token.Add: { | |
parser.index++; | |
if (parser.index >= parser.length) return Token.Add; | |
const char = parser.source.charCodeAt(parser.index); | |
if (char === Chars.Plus) { | |
parser.index++; | |
return Token.Increment; | |
} | |
if (char === Chars.EqualSign) { | |
parser.index++; | |
return Token.AddAssign; | |
} | |
return Token.Add; | |
} | |
// `|`, `||`, `|=` | |
case Token.BitwiseOr: { | |
parser.index++; | |
if (parser.index >= parser.length) return Token.BitwiseOr; | |
const char = parser.source.charCodeAt(parser.index); | |
if (char === Chars.VerticalBar) { | |
parser.index++; | |
return Token.LogicalOr; | |
} | |
if (char === Chars.EqualSign) { | |
parser.index++; | |
return Token.BitwiseOrAssign; | |
} | |
return Token.BitwiseOr; | |
} | |
// `>`, `>=`, `>>`, `>>>`, `>>=`, `>>>=` | |
case Token.GreaterThan: { | |
let char = parser.source.charCodeAt(++parser.index); | |
if (char === Chars.EqualSign) { | |
parser.index++; | |
return Token.GreaterThanOrEqual; | |
} | |
if (char !== Chars.GreaterThan) return Token.GreaterThan; | |
char = parser.source.charCodeAt(++parser.index); | |
if (char === Chars.GreaterThan) { | |
if (parser.source.charCodeAt(++parser.index) !== Chars.EqualSign) return Token.LogicalShiftRight; | |
parser.index++; | |
return Token.LogicalShiftRightAssign; | |
} | |
if (char === Chars.EqualSign) { | |
parser.index++; | |
return Token.ShiftRightAssign; | |
} | |
return Token.ShiftRight; | |
} | |
// `&`, `&&`, `&=` | |
case Token.BitwiseAnd: { | |
parser.index++; | |
if (parser.index >= parser.length) return Token.BitwiseAnd; | |
const char = parser.source.charCodeAt(parser.index); | |
if (char === Chars.Ampersand) { | |
parser.index++; | |
return Token.LogicalAnd; | |
} | |
if (char === Chars.EqualSign) { | |
parser.index++; | |
return Token.BitwiseAndAssign; | |
} | |
return Token.BitwiseAnd; | |
} | |
default: | |
// Invalid ASCII code point/unit | |
report(parser, Errors.Unexpected); | |
} | |
lineStart = 0; | |
} | |
return Token.EOF; | |
} | |
export function nextToken(parser: ParserState, context: Context): void { | |
parser.newLine = 0; | |
parser.endIndex = parser.index; | |
parser.endColumn = parser.index - parser.offset; | |
parser.endLine = parser.lineStartOffset; | |
parser.token = scan(parser, context); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment