Last active
December 22, 2019 02:03
-
-
Save jwtd/63e8572500469abce8aaef3a1a776f7f to your computer and use it in GitHub Desktop.
Trying to parse a bar delimited string that has multiple variations.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Trying to parse a bar delimited string that has multiple variations. | |
Load this code into https://sap.github.io/chevrotain/playground/ | |
Test the strings... | |
A|B|C | |
A|B | |
A | |
|A|B|C| | |
|A|B| | |
|A| | |
The first 4 work, but the fifth "|A|B|" is breaking, because the last bar | |
is partially matching the gdtTertiaryDatum (line 83). | |
It doesn't seem like this should happen, because of option in top level notation rule (line 64) | |
*/ | |
(function datumGrammarOnlyExample() { | |
// ----------------- Lexer ----------------- | |
const createToken = chevrotain.createToken; | |
const Lexer = chevrotain.Lexer; | |
const WhiteSpace = createToken({ name: 'WhiteSpace', pattern: /( |\t)+/ }); | |
const VPipe = createToken({ name: 'VPipe', pattern: '|' }); | |
const GdtPrimaryDatum = createToken({ name: 'GdtPrimaryDatum', pattern: 'A' }); | |
const GdtSecondDatum = createToken({ name: 'GdtSecondDatum', pattern: 'B' }); | |
const GdtTertiaryDatum = createToken({ name: 'GdtTertiaryDatum', pattern: 'C' }); | |
const tokens = [WhiteSpace, VPipe, GdtPrimaryDatum, GdtSecondDatum, GdtTertiaryDatum]; | |
const datumLexer = new Lexer(tokens, { | |
// Less position info tracked, reduces verbosity of the playground output. | |
positionTracking: "onlyStart" | |
}); | |
// Labels only affect error messages and Diagrams. | |
WhiteSpace.LABEL = "' '"; | |
VPipe.LABEL = "'|'"; | |
GdtPrimaryDatum.LABEL = "'A'"; | |
GdtSecondDatum.LABEL = "'B'"; | |
GdtTertiaryDatum.LABEL = "'C'"; | |
// ----------------- parser ----------------- | |
const Parser = chevrotain.Parser; | |
class datumParser extends Parser { | |
constructor() { | |
super(tokens, { | |
recoveryEnabled: true | |
}) | |
const $ = this; | |
$.RULE('notation', () => { | |
$.OPTION(() => { | |
$.CONSUME(VPipe); | |
}); | |
$.SUBRULE($.gdtDatums); | |
$.OPTION2(() => { | |
$.CONSUME4(VPipe); | |
}); | |
}); | |
// A A|B A|B|C | |
$.RULE('gdtDatums', () => { | |
$.CONSUME(GdtPrimaryDatum); | |
$.MANY({ | |
GATE: () => tokenMatcher($.LA(2), GdtSecondDatum), | |
DEF: () => { | |
$.CONSUME2(VPipe); | |
$.CONSUME2(GdtSecondDatum); | |
$.MANY2({ | |
GATE: () => tokenMatcher($.LA(2), GdtTertiaryDatum), | |
DEF: () => { | |
$.CONSUME3(VPipe); | |
$.CONSUME3(GdtTertiaryDatum); | |
} | |
}); | |
} | |
}); | |
}); | |
// very important to call this after all the rules have been setup. | |
// otherwise the parser may not work correctly as it will lack information | |
// derived from the self analysis. | |
this.performSelfAnalysis(); | |
} | |
} | |
// for the playground to work the returned object must contain these fields | |
return { | |
lexer: datumLexer, | |
parser: datumParser, | |
defaultRule: "notation" | |
}; | |
}()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment