Created
September 23, 2021 14:05
-
-
Save kaby76/b659eb0c1067e082e846f5f87e89fd79 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(function PlaygroundExample() { | |
const createToken = chevrotain.createToken; | |
const Lexer = chevrotain.Lexer; | |
const EmbeddedActionsParser = chevrotain.EmbeddedActionsParser; | |
const tokenMatcher = chevrotain.tokenMatcher; | |
/** | |
* An Example of implementing a Calculator with embedded actions (semantics). | |
* | |
* Embedded actions mean that the semantics of the grammar (in our case the calculation of the numerical result) | |
* are written as part of (inside) the grammar rules. | |
* | |
* This can be useful for simple use cases and it is also very fast. | |
* It can become very verbose for complex use cases, see the same grammar with separated semantics | |
* for an alternative: | |
* https://github.com/chevrotain/chevrotain/blob/master/examples/grammars/calculator/calculator_pure_grammar.js | |
*/ | |
// ----------------- lexer ----------------- | |
// using the NA pattern marks this Token class as 'irrelevant' for the Lexer. | |
// AdditionOperator defines a Tokens category, The parser can match against such categories | |
// as a convenience to reduce verbosity. | |
const AdditionOperator = createToken({ | |
name: "AdditionOperator", | |
pattern: Lexer.NA | |
}) | |
const Plus = createToken({ | |
name: "Plus", | |
pattern: /\+/, | |
categories: AdditionOperator | |
}) | |
const Minus = createToken({ | |
name: "Minus", | |
pattern: /-/, | |
categories: AdditionOperator | |
}) | |
const MultiplicationOperator = createToken({ | |
name: "MultiplicationOperator", | |
pattern: Lexer.NA | |
}) | |
const Multi = createToken({ | |
name: "Multi", | |
pattern: /\*/, | |
categories: MultiplicationOperator | |
}) | |
const Div = createToken({ | |
name: "Div", | |
pattern: /\//, | |
categories: MultiplicationOperator | |
}) | |
const LParen = createToken({ name: "LParen", pattern: /\(/ }) | |
const RParen = createToken({ name: "RParen", pattern: /\)/ }) | |
const NumberLiteral = createToken({ | |
name: "NumberLiteral", | |
pattern: /[1-9]\d*/ | |
}) | |
const PowerFunc = createToken({ name: "PowerFunc", pattern: /power/ }) | |
const Comma = createToken({ name: "Comma", pattern: /,/ }) | |
// marking WhiteSpace as 'SKIPPED' makes the lexer skip it. | |
const WhiteSpace = createToken({ | |
name: "WhiteSpace", | |
pattern: /\s+/, | |
group: Lexer.SKIPPED | |
}) | |
const allTokens = [ | |
// whitespace is normally very common so it should be placed first to speed up the lexer's performance | |
WhiteSpace, | |
Plus, | |
Minus, | |
Multi, | |
Div, | |
LParen, | |
RParen, | |
NumberLiteral, | |
AdditionOperator, | |
MultiplicationOperator, | |
PowerFunc, | |
Comma | |
] | |
const CalculatorLexer = new Lexer(allTokens) | |
// ----------------- parser ----------------- | |
// We must extend `EmbeddedActionsParser` to enable support | |
// for output based on the embedded actions. | |
class CalculatorParser extends EmbeddedActionsParser { | |
// Unfortunately no support for class fields with initializer in ES2015, only in esNext... | |
// so the parsing rules are defined inside the constructor, as each parsing rule must be initialized by | |
// invoking RULE(...) | |
// see: https://github.com/jeffmo/es-class-fields-and-static-properties | |
constructor() { | |
super(allTokens) | |
const $ = this | |
$.RULE("expression", () => { | |
return $.SUBRULE($.additionExpression) | |
}) | |
// lowest precedence thus it is first in the rule chain | |
// The precedence of binary expressions is determined by how far down the Parse Tree | |
// The binary expression appears. | |
$.RULE("additionExpression", () => { | |
let value, op, rhsVal | |
// parsing part | |
value = $.SUBRULE($.multiplicationExpression) | |
$.MANY(() => { | |
// consuming 'AdditionOperator' will consume either Plus or Minus as they are subclasses of AdditionOperator | |
op = $.CONSUME(AdditionOperator) | |
// the index "2" in SUBRULE2 is needed to identify the unique position in the grammar during runtime | |
rhsVal = $.SUBRULE2($.multiplicationExpression) | |
// interpreter part | |
if (tokenMatcher(op, Plus)) { | |
value += rhsVal | |
} else { | |
// op instanceof Minus | |
value -= rhsVal | |
} | |
}) | |
return value | |
}) | |
$.RULE("multiplicationExpression", () => { | |
let value, op, rhsVal | |
// parsing part | |
value = $.SUBRULE($.atomicExpression) | |
$.MANY(() => { | |
op = $.CONSUME(MultiplicationOperator) | |
// the index "2" in SUBRULE2 is needed to identify the unique position in the grammar during runtime | |
rhsVal = $.SUBRULE2($.atomicExpression) | |
// interpreter part | |
if (tokenMatcher(op, Multi)) { | |
value *= rhsVal | |
} else { | |
// op instanceof Div | |
value /= rhsVal | |
} | |
}) | |
return value | |
}) | |
$.RULE("atomicExpression", () => { | |
return $.OR([ | |
// parenthesisExpression has the highest precedence and thus it appears | |
// in the "lowest" leaf in the expression ParseTree. | |
{ ALT: () => $.SUBRULE($.parenthesisExpression) }, | |
{ ALT: () => parseInt($.CONSUME(NumberLiteral).image, 10) }, | |
{ ALT: () => $.SUBRULE($.powerFunction) } | |
]) | |
}) | |
$.RULE("parenthesisExpression", () => { | |
let expValue | |
$.CONSUME(LParen) | |
expValue = $.SUBRULE($.expression) | |
$.CONSUME(RParen) | |
return expValue | |
}) | |
$.RULE("powerFunction", () => { | |
let base, exponent | |
$.CONSUME(PowerFunc) | |
$.CONSUME(LParen) | |
base = $.SUBRULE($.expression) | |
$.CONSUME(Comma) | |
exponent = $.SUBRULE2($.expression) | |
$.CONSUME(RParen) | |
return Math.pow(base, exponent) | |
}) | |
// very important to call this after all the rules have been defined. | |
// otherwise the parser may not work correctly as it will lack information | |
// derived during the self analysis phase. | |
this.performSelfAnalysis() | |
} | |
} | |
// for the playground to work the returned object must contain these fields | |
return { | |
lexer: CalculatorLexer, | |
parser: CalculatorParser, | |
defaultRule: "expression" | |
}; | |
}()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment