Skip to content

Instantly share code, notes, and snippets.

@jwtd
Last active December 22, 2019 02:03
Show Gist options
  • Save jwtd/63e8572500469abce8aaef3a1a776f7f to your computer and use it in GitHub Desktop.
Save jwtd/63e8572500469abce8aaef3a1a776f7f to your computer and use it in GitHub Desktop.
Trying to parse a bar delimited string that has multiple variations.
/*
Trying to parse a bar delimited string that has multiple variations.
Load this code into https://sap.github.io/chevrotain/playground/
Test the strings...
A|B|C
A|B
A
|A|B|C|
|A|B|
|A|
The first 4 work, but the fifth "|A|B|" is breaking, because the last bar
is partially matching the gdtTertiaryDatum (line 83).
It doesn't seem like this should happen, because of option in top level notation rule (line 64)
*/
(function datumGrammarOnlyExample() {
// ----------------- Lexer -----------------
const createToken = chevrotain.createToken;
const Lexer = chevrotain.Lexer;
const WhiteSpace = createToken({ name: 'WhiteSpace', pattern: /( |\t)+/ });
const VPipe = createToken({ name: 'VPipe', pattern: '|' });
const GdtPrimaryDatum = createToken({ name: 'GdtPrimaryDatum', pattern: 'A' });
const GdtSecondDatum = createToken({ name: 'GdtSecondDatum', pattern: 'B' });
const GdtTertiaryDatum = createToken({ name: 'GdtTertiaryDatum', pattern: 'C' });
const tokens = [WhiteSpace, VPipe, GdtPrimaryDatum, GdtSecondDatum, GdtTertiaryDatum];
const datumLexer = new Lexer(tokens, {
// Less position info tracked, reduces verbosity of the playground output.
positionTracking: "onlyStart"
});
// Labels only affect error messages and Diagrams.
WhiteSpace.LABEL = "' '";
VPipe.LABEL = "'|'";
GdtPrimaryDatum.LABEL = "'A'";
GdtSecondDatum.LABEL = "'B'";
GdtTertiaryDatum.LABEL = "'C'";
// ----------------- parser -----------------
const Parser = chevrotain.Parser;
class datumParser extends Parser {
constructor() {
super(tokens, {
recoveryEnabled: true
})
const $ = this;
$.RULE('notation', () => {
$.OPTION(() => {
$.CONSUME(VPipe);
});
$.SUBRULE($.gdtDatums);
$.OPTION2(() => {
$.CONSUME4(VPipe);
});
});
// A A|B A|B|C
$.RULE('gdtDatums', () => {
$.CONSUME(GdtPrimaryDatum);
$.MANY({
GATE: () => tokenMatcher($.LA(2), GdtSecondDatum),
DEF: () => {
$.CONSUME2(VPipe);
$.CONSUME2(GdtSecondDatum);
$.MANY2({
GATE: () => tokenMatcher($.LA(2), GdtTertiaryDatum),
DEF: () => {
$.CONSUME3(VPipe);
$.CONSUME3(GdtTertiaryDatum);
}
});
}
});
});
// very important to call this after all the rules have been setup.
// otherwise the parser may not work correctly as it will lack information
// derived from the self analysis.
this.performSelfAnalysis();
}
}
// for the playground to work the returned object must contain these fields
return {
lexer: datumLexer,
parser: datumParser,
defaultRule: "notation"
};
}())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment