Skip to content

Instantly share code, notes, and snippets.

@KFlash
Created October 14, 2019 01:29
Show Gist options
  • Save KFlash/f8812201b16c2643cb0d83e39a657f3e to your computer and use it in GitHub Desktop.
Save KFlash/f8812201b16c2643cb0d83e39a657f3e to your computer and use it in GitHub Desktop.
lawrence
/**** COMMON.TS ******/
export function consumeLineBreak(parser: ParserState, lastIsCR: 0 | 1): void {
parser.index++;
parser.offset = parser.index;
parser.newLine = 1;
if (lastIsCR === 0) parser.lineStartOffset++;
}
export function isLineTerminator(cp: number): boolean {
return cp & ~(Chars.CarriageReturn | Chars.LineFeed | Chars.ParagraphSeparator | Chars.LineSeparator)
? false
: cp === Chars.CarriageReturn || cp === Chars.LineFeed || (cp ^ Chars.LineSeparator) <= 1;
}
/**** COMMENTS ******/
export function skipSingleLineComment(parser: ParserState): Token {
while (parser.index < parser.length && !isLineTerminator(parser.source.charCodeAt(++parser.index))) {}
return Token.Comment;
}
export function skipMultiLineComment(parser: ParserState): Token {
let lastIsCR: 0 | 1 = 0;
while (parser.index < parser.length) {
let char = parser.source.charCodeAt(parser.index);
while (char === Chars.Asterisk) {
char = parser.source.charCodeAt(++parser.index);
if (char === Chars.Slash) {
parser.index++;
return Token.Comment;
}
}
if (char === Chars.CarriageReturn) {
consumeLineBreak(parser, 0);
lastIsCR = 1;
}
if (char === Chars.LineFeed) {
consumeLineBreak(parser, lastIsCR);
lastIsCR = 0;
} else if (char > 127 && (char ^ Chars.LineSeparator) <= 1) {
lastIsCR = 0;
consumeLineBreak(parser, lastIsCR);
}
parser.index++;
}
return Token.Comment;
}
/**** SCAN.TS *****/
import { ParserState, Context } from '../common';
import { Token } from '../token';
import { Chars } from '../chars';
import { consumeLineBreak, isUnicodeOnlySpace } from './common';
import { skipSingleLineComment, skipMultiLineComment } from './comments';
import { scanMaybeIdentifier, scanIdentifier, scanUnicodeEscapeIdStart } from './identifier';
import { scanNumeric, scanFloatingNumeric, scanLeadingZero } from './numeric';
import { scanStringLiteral } from './string';
import { scanRegularExpression } from './regexp';
import { CharTypes, CharFlags } from './charClassifier';
import { scanTemplate } from './template';
import { report, Errors } from '../errors';
export const firstCharKinds = [
/* 0 - Null */ Token.Error,
/* 1 - Start of Heading */ Token.Error,
/* 2 - Start of Text */ Token.Error,
/* 3 - End of Text */ Token.Error,
/* 4 - End of Transm. */ Token.Error,
/* 5 - Enquiry */ Token.Error,
/* 6 - Acknowledgment */ Token.Error,
/* 7 - Bell */ Token.Error,
/* 8 - Backspace */ Token.Error,
/* 9 - Horizontal Tab */ Token.WhiteSpace,
/* 10 - Line Feed */ Token.LineFeed,
/* 11 - Vertical Tab */ Token.WhiteSpace,
/* 12 - Form Feed */ Token.WhiteSpace,
/* 13 - Carriage Return */ Token.CarriageReturn,
/* 14 - Shift Out */ Token.Error,
/* 15 - Shift In */ Token.Error,
/* 16 - Data Line Escape */ Token.Error,
/* 17 - Device Control 1 */ Token.Error,
/* 18 - Device Control 2 */ Token.Error,
/* 19 - Device Control 3 */ Token.Error,
/* 20 - Device Control 4 */ Token.Error,
/* 21 - Negative Ack. */ Token.Error,
/* 22 - Synchronous Idle */ Token.Error,
/* 23 - End of Transmit */ Token.Error,
/* 24 - Cancel */ Token.Error,
/* 25 - End of Medium */ Token.Error,
/* 26 - Substitute */ Token.Error,
/* 27 - Escape */ Token.Error,
/* 28 - File Separator */ Token.Error,
/* 29 - Group Separator */ Token.Error,
/* 30 - Record Separator */ Token.Error,
/* 31 - Unit Separator */ Token.Error,
/* 32 - Space */ Token.WhiteSpace,
/* 33 - ! */ Token.Negate,
/* 34 - " */ Token.StringLiteral,
/* 35 - # */ Token.Error,
/* 36 - $ */ Token.Identifier,
/* 37 - % */ Token.Modulo,
/* 38 - & */ Token.BitwiseAnd,
/* 39 - ' */ Token.StringLiteral,
/* 40 - ( */ Token.LeftParen,
/* 41 - ) */ Token.RightParen,
/* 42 - * */ Token.Multiply,
/* 43 - + */ Token.Add,
/* 44 - , */ Token.Comma,
/* 45 - - */ Token.Subtract,
/* 46 - . */ Token.Period,
/* 47 - / */ Token.Divide,
/* 48 - 0 */ Token.LeadingZero,
/* 49 - 1 */ Token.NumericLiteral,
/* 50 - 2 */ Token.NumericLiteral,
/* 51 - 3 */ Token.NumericLiteral,
/* 52 - 4 */ Token.NumericLiteral,
/* 53 - 5 */ Token.NumericLiteral,
/* 54 - 6 */ Token.NumericLiteral,
/* 55 - 7 */ Token.NumericLiteral,
/* 56 - 8 */ Token.NumericLiteral,
/* 57 - 9 */ Token.NumericLiteral,
/* 58 - : */ Token.Colon,
/* 59 - ; */ Token.Semicolon,
/* 60 - < */ Token.LessThan,
/* 61 - = */ Token.Assign,
/* 62 - > */ Token.GreaterThan,
/* 63 - ? */ Token.QuestionMark,
/* 64 - @ */ Token.Error,
/* 65 - A */ Token.Identifier,
/* 66 - B */ Token.Identifier,
/* 67 - C */ Token.Identifier,
/* 68 - D */ Token.Identifier,
/* 69 - E */ Token.Identifier,
/* 70 - F */ Token.Identifier,
/* 71 - G */ Token.Identifier,
/* 72 - H */ Token.Identifier,
/* 73 - I */ Token.Identifier,
/* 74 - J */ Token.Identifier,
/* 75 - K */ Token.Identifier,
/* 76 - L */ Token.Identifier,
/* 77 - M */ Token.Identifier,
/* 78 - N */ Token.Identifier,
/* 79 - O */ Token.Identifier,
/* 80 - P */ Token.Identifier,
/* 81 - Q */ Token.Identifier,
/* 82 - R */ Token.Identifier,
/* 83 - S */ Token.Identifier,
/* 84 - T */ Token.Identifier,
/* 85 - U */ Token.Identifier,
/* 86 - V */ Token.Identifier,
/* 87 - W */ Token.Identifier,
/* 88 - X */ Token.Identifier,
/* 89 - Y */ Token.Identifier,
/* 90 - Z */ Token.Identifier,
/* 91 - [ */ Token.LeftBracket,
/* 92 - \ */ Token.EscapedIdentifier,
/* 93 - ] */ Token.RightBracket,
/* 94 - ^ */ Token.BitwiseXor,
/* 95 - _ */ Token.Identifier,
/* 96 - ` */ Token.TemplateTail,
/* 97 - a */ Token.IdentifierOrKeyword,
/* 98 - b */ Token.IdentifierOrKeyword,
/* 99 - c */ Token.IdentifierOrKeyword,
/* 100 - d */ Token.IdentifierOrKeyword,
/* 101 - e */ Token.IdentifierOrKeyword,
/* 102 - f */ Token.IdentifierOrKeyword,
/* 103 - g */ Token.IdentifierOrKeyword,
/* 104 - h */ Token.Identifier,
/* 105 - i */ Token.IdentifierOrKeyword,
/* 106 - j */ Token.Identifier,
/* 107 - k */ Token.IdentifierOrKeyword,
/* 108 - l */ Token.IdentifierOrKeyword,
/* 109 - m */ Token.IdentifierOrKeyword,
/* 110 - n */ Token.IdentifierOrKeyword,
/* 111 - o */ Token.IdentifierOrKeyword,
/* 112 - p */ Token.IdentifierOrKeyword,
/* 113 - q */ Token.Identifier,
/* 114 - r */ Token.IdentifierOrKeyword,
/* 115 - s */ Token.IdentifierOrKeyword,
/* 116 - t */ Token.IdentifierOrKeyword,
/* 117 - u */ Token.IdentifierOrKeyword,
/* 118 - v */ Token.IdentifierOrKeyword,
/* 119 - w */ Token.IdentifierOrKeyword,
/* 120 - x */ Token.Identifier,
/* 121 - y */ Token.IdentifierOrKeyword,
/* 122 - z */ Token.IdentifierOrKeyword,
/* 123 - { */ Token.LeftBrace,
/* 124 - | */ Token.BitwiseOr,
/* 125 - } */ Token.RightBrace,
/* 126 - ~ */ Token.Complement,
/* 127 - Delete */ Token.Error
];
export function scan(parser: ParserState, context: Context): Token {
let lastIsCR: 0 | 1 = 0;
let lineStart: 0 | 1 = 1;
while (parser.index < parser.length) {
const char = parser.source.charCodeAt(parser.index);
parser.start = parser.index;
parser.column = parser.index - parser.offset;
parser.line = parser.lineStartOffset;
if (char > 0x7e) {
if ((char ^ Chars.LineSeparator) <= 1) {
lastIsCR = 0;
consumeLineBreak(parser, lastIsCR);
continue;
}
if (isUnicodeOnlySpace(char)) {
parser.index++;
continue;
}
return scanMaybeIdentifier(parser, context, char);
}
const token = firstCharKinds[char];
switch (token) {
case Token.RightBrace:
case Token.LeftBrace:
case Token.Comma:
case Token.Colon:
case Token.Complement:
case Token.LeftParen:
case Token.RightParen:
case Token.Semicolon:
case Token.LeftBracket:
case Token.RightBracket:
case Token.Error:
parser.index++;
return token;
// general whitespace
case Token.WhiteSpace:
// Spaces frequently come in groups, so use a tight inner loop to skip
while (CharTypes[parser.source.charCodeAt(++parser.index)] & CharFlags.WhiteSpace) {}
break;
// line terminators
case Token.CarriageReturn:
consumeLineBreak(parser, 0);
lastIsCR = 1;
break;
case Token.LineFeed:
consumeLineBreak(parser, lastIsCR);
lastIsCR = 0;
break;
// `a`...`z`
case Token.IdentifierOrKeyword:
return scanIdentifier(parser, context, char, /* canBeKeyword */ 1);
// `A`...`Z`, `_var`, `$var`
case Token.Identifier:
return scanIdentifier(parser, context, char, /* canBeKeyword */ 0);
// `1`...`9`
case Token.NumericLiteral:
return scanNumeric(parser, context, char);
// `0`
case Token.LeadingZero:
return scanLeadingZero(parser, context);
// `string`
case Token.StringLiteral:
return scanStringLiteral(parser, context, char) as Token;
case Token.TemplateTail:
return scanTemplate(parser, context);
// `\\u{N}var`
case Token.EscapedIdentifier:
return scanUnicodeEscapeIdStart(parser, context);
// `?`, `??`, `?.`
case Token.QuestionMark: {
parser.index++;
if (parser.index >= parser.length) return Token.QuestionMark;
if ((context & Context.OptionsNext) < 1) return Token.QuestionMark;
let next = parser.source.charCodeAt(parser.index);
if (next === Chars.QuestionMark) {
parser.index++;
return Token.Coalesce;
}
if (next === Chars.Period) {
let index = parser.index + 1;
next = parser.source.charCodeAt(index);
if (next >= Chars.Zero && next <= Chars.Nine) {
return Token.QuestionMark;
}
parser.index = index;
return Token.QuestionMarkPeriod;
}
return Token.QuestionMark;
}
// `/`, `/=`, `/>`, '/*..*/'
case Token.Divide: {
parser.index++;
if (parser.index < parser.length) {
const next = parser.source.charCodeAt(parser.index);
if (context & Context.AllowRegExp) {
return scanRegularExpression(parser, context);
}
if (next === Chars.Slash) {
parser.index++;
skipSingleLineComment(parser);
continue;
}
if (next === Chars.Asterisk) {
parser.index++;
skipMultiLineComment(parser);
continue;
}
if (next === Chars.EqualSign) {
parser.index++;
return Token.DivideAssign;
}
}
return Token.Divide;
}
// `<`, `<=`, `<<`, `<<=`, `</`, `<!--`
case Token.LessThan: {
parser.index++; // skip `<`
if (parser.index < parser.length) {
let next = parser.source.charCodeAt(parser.index);
if (next === Chars.LessThan) {
if (parser.source.charCodeAt(++parser.index) === Chars.EqualSign) {
parser.index++;
return Token.ShiftLeftAssign;
}
return Token.ShiftLeft;
}
if (next === Chars.EqualSign) {
parser.index++;
return Token.LessThanOrEqual;
}
if (
next === Chars.Exclamation &&
(context & Context.Module) === 0 &&
parser.source.charCodeAt(parser.index + 2) === Chars.Hyphen &&
parser.source.charCodeAt(parser.index + 1) === Chars.Hyphen
) {
parser.index += 2;
skipSingleLineComment(parser);
break;
}
}
return Token.LessThan;
}
// `-`, `--`, `-=`, `-->`
case Token.Subtract: {
let index = parser.index + 1;
if (index < parser.length) {
const next = parser.source.charCodeAt(index);
if (next === Chars.Hyphen) {
if (parser.source.charCodeAt(index + 1) === Chars.GreaterThan) {
if ((context & Context.Module) === 0 && (lineStart || parser.newLine)) {
parser.index = index + 1;
skipSingleLineComment(parser);
continue;
}
return Token.LessThan;
}
parser.index = index + 1;
return Token.Decrement;
}
if (next === Chars.EqualSign) {
parser.index = index + 1;
return Token.SubtractAssign;
}
}
parser.index++;
return Token.Subtract;
}
// `.`, `...`, `.123` (numeric literal)
case Token.Period: {
let index = parser.index + 1;
if (index < parser.length) {
const next = parser.source.charCodeAt(index);
if (next === Chars.Period) {
if (index < parser.length && parser.source.charCodeAt(++index) === Chars.Period) {
parser.index = index + 1;
return Token.Ellipsis;
}
}
if (next >= Chars.Zero && next <= Chars.Nine) {
return scanFloatingNumeric(parser, context);
}
}
parser.index++;
return Token.Period;
}
// `=`, `==`, `===`, `=>`
case Token.Assign: {
parser.index++;
if (parser.index >= parser.length) return Token.Assign;
const char = parser.source.charCodeAt(parser.index);
if (char === Chars.EqualSign) {
if (parser.source.charCodeAt(++parser.index) !== Chars.EqualSign) return Token.LooseEqual;
parser.index++;
return Token.StrictEqual;
}
if (char === Chars.GreaterThan) {
parser.index++;
return Token.Arrow;
}
return Token.Assign;
}
// `!`, `!=`, `!==`
case Token.Negate:
const index = parser.index + 1;
if (parser.source.charCodeAt(index) === Chars.EqualSign) {
if (parser.source.charCodeAt(index + 1) === Chars.EqualSign) {
parser.index = index + 2;
return Token.StrictNotEqual;
}
parser.index = index + 1;
return Token.LooseNotEqual;
}
parser.index = index;
return Token.Negate;
// `%`, `%=`
case Token.Modulo:
if (parser.source.charCodeAt(++parser.index) !== Chars.EqualSign) return Token.Modulo;
parser.index++;
return Token.ModuloAssign;
// `*`, `**`, `*=`, `**=`
case Token.Multiply: {
parser.index++;
if (parser.index >= parser.length) return Token.Multiply;
const char = parser.source.charCodeAt(parser.index);
if (char === Chars.EqualSign) {
parser.index++;
return Token.MultiplyAssign;
}
if (char !== Chars.Asterisk) return Token.Multiply;
if (parser.source.charCodeAt(++parser.index) !== Chars.EqualSign) return Token.Exponentiate;
parser.index++;
return Token.ExponentiateAssign;
}
// `^`, `^=`
case Token.BitwiseXor:
if (parser.source.charCodeAt(++parser.index) !== Chars.EqualSign) return Token.BitwiseXor;
parser.index++;
return Token.BitwiseXorAssign;
// `+`, `++`, `+=`
case Token.Add: {
parser.index++;
if (parser.index >= parser.length) return Token.Add;
const char = parser.source.charCodeAt(parser.index);
if (char === Chars.Plus) {
parser.index++;
return Token.Increment;
}
if (char === Chars.EqualSign) {
parser.index++;
return Token.AddAssign;
}
return Token.Add;
}
// `|`, `||`, `|=`
case Token.BitwiseOr: {
parser.index++;
if (parser.index >= parser.length) return Token.BitwiseOr;
const char = parser.source.charCodeAt(parser.index);
if (char === Chars.VerticalBar) {
parser.index++;
return Token.LogicalOr;
}
if (char === Chars.EqualSign) {
parser.index++;
return Token.BitwiseOrAssign;
}
return Token.BitwiseOr;
}
// `>`, `>=`, `>>`, `>>>`, `>>=`, `>>>=`
case Token.GreaterThan: {
let char = parser.source.charCodeAt(++parser.index);
if (char === Chars.EqualSign) {
parser.index++;
return Token.GreaterThanOrEqual;
}
if (char !== Chars.GreaterThan) return Token.GreaterThan;
char = parser.source.charCodeAt(++parser.index);
if (char === Chars.GreaterThan) {
if (parser.source.charCodeAt(++parser.index) !== Chars.EqualSign) return Token.LogicalShiftRight;
parser.index++;
return Token.LogicalShiftRightAssign;
}
if (char === Chars.EqualSign) {
parser.index++;
return Token.ShiftRightAssign;
}
return Token.ShiftRight;
}
// `&`, `&&`, `&=`
case Token.BitwiseAnd: {
parser.index++;
if (parser.index >= parser.length) return Token.BitwiseAnd;
const char = parser.source.charCodeAt(parser.index);
if (char === Chars.Ampersand) {
parser.index++;
return Token.LogicalAnd;
}
if (char === Chars.EqualSign) {
parser.index++;
return Token.BitwiseAndAssign;
}
return Token.BitwiseAnd;
}
default:
// Invalid ASCII code point/unit
report(parser, Errors.Unexpected);
}
lineStart = 0;
}
return Token.EOF;
}
export function nextToken(parser: ParserState, context: Context): void {
parser.newLine = 0;
parser.endIndex = parser.index;
parser.endColumn = parser.index - parser.offset;
parser.endLine = parser.lineStartOffset;
parser.token = scan(parser, context);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment