jwtd/dataum_lexer.js

## dataum_lexer.js
/*
Trying to parse a bar delimited string that has multiple variations.

Load this code into https://sap.github.io/chevrotain/playground/

Test the strings...

A|B|C
A|B
A
|A|B|C|
|A|B|
|A|

The first 4 work, but the fifth "|A|B|" is breaking, because the last bar
is partially matching the gdtTertiaryDatum (line 83).
It doesn't seem like this should happen, because of option in top level notation rule (line 64)


*/
(function datumGrammarOnlyExample() {
  // ----------------- Lexer -----------------
  const createToken = chevrotain.createToken;
  const Lexer = chevrotain.Lexer;

  const WhiteSpace = createToken({ name: 'WhiteSpace', pattern: /( |\t)+/ });
  const VPipe = createToken({ name: 'VPipe', pattern: '|' });
  const GdtPrimaryDatum = createToken({ name: 'GdtPrimaryDatum', pattern: 'A' });
  const GdtSecondDatum = createToken({ name: 'GdtSecondDatum', pattern: 'B' });
  const GdtTertiaryDatum = createToken({ name: 'GdtTertiaryDatum', pattern: 'C' });


  const tokens = [WhiteSpace, VPipe, GdtPrimaryDatum, GdtSecondDatum, GdtTertiaryDatum];

  const datumLexer = new Lexer(tokens, {
    // Less position info tracked, reduces verbosity of the playground output.
    positionTracking: "onlyStart"
  });

  // Labels only affect error messages and Diagrams.
  WhiteSpace.LABEL = "' '";
  VPipe.LABEL = "'|'";
  GdtPrimaryDatum.LABEL = "'A'";
  GdtSecondDatum.LABEL = "'B'";
  GdtTertiaryDatum.LABEL = "'C'";


  // ----------------- parser -----------------
  const Parser = chevrotain.Parser;

  class datumParser extends Parser {
    constructor() {
      super(tokens, {
        recoveryEnabled: true
      })

      const $ = this;

    $.RULE('notation', () => {
      $.OPTION(() => {
        $.CONSUME(VPipe);
      });
      $.SUBRULE($.gdtDatums);
      $.OPTION2(() => {
        $.CONSUME4(VPipe);
      });
    });

    // A  A|B  A|B|C
    $.RULE('gdtDatums', () => {
      $.CONSUME(GdtPrimaryDatum);
      $.MANY({
        GATE: () => tokenMatcher($.LA(2), GdtSecondDatum),
        DEF: () => {
          $.CONSUME2(VPipe);
          $.CONSUME2(GdtSecondDatum);
          $.MANY2({
            GATE: () => tokenMatcher($.LA(2), GdtTertiaryDatum),
            DEF: () => {
              $.CONSUME3(VPipe);
              $.CONSUME3(GdtTertiaryDatum);
            }
          });
        }
      });
    });

      // very important to call this after all the rules have been setup.
      // otherwise the parser may not work correctly as it will lack information
      // derived from the self analysis.
      this.performSelfAnalysis();
    }

  }

  // for the playground to work the returned object must contain these fields
  return {
    lexer: datumLexer,
    parser: datumParser,
    defaultRule: "notation"
  };
}())
	/*
	Trying to parse a bar delimited string that has multiple variations.

	Load this code into https://sap.github.io/chevrotain/playground/

	Test the strings...

	A\|B\|C
	A\|B
	A
	\|A\|B\|C\|
	\|A\|B\|
	\|A\|

	The first 4 work, but the fifth "\|A\|B\|" is breaking, because the last bar
	is partially matching the gdtTertiaryDatum (line 83).
	It doesn't seem like this should happen, because of option in top level notation rule (line 64)


	*/
	(function datumGrammarOnlyExample() {
	// ----------------- Lexer -----------------
	const createToken = chevrotain.createToken;
	const Lexer = chevrotain.Lexer;

	const WhiteSpace = createToken({ name: 'WhiteSpace', pattern: /( \|\t)+/ });
	const VPipe = createToken({ name: 'VPipe', pattern: '\|' });
	const GdtPrimaryDatum = createToken({ name: 'GdtPrimaryDatum', pattern: 'A' });
	const GdtSecondDatum = createToken({ name: 'GdtSecondDatum', pattern: 'B' });
	const GdtTertiaryDatum = createToken({ name: 'GdtTertiaryDatum', pattern: 'C' });


	const tokens = [WhiteSpace, VPipe, GdtPrimaryDatum, GdtSecondDatum, GdtTertiaryDatum];

	const datumLexer = new Lexer(tokens, {
	// Less position info tracked, reduces verbosity of the playground output.
	positionTracking: "onlyStart"
	});

	// Labels only affect error messages and Diagrams.
	WhiteSpace.LABEL = "' '";
	VPipe.LABEL = "'\|'";
	GdtPrimaryDatum.LABEL = "'A'";
	GdtSecondDatum.LABEL = "'B'";
	GdtTertiaryDatum.LABEL = "'C'";


	// ----------------- parser -----------------
	const Parser = chevrotain.Parser;

	class datumParser extends Parser {
	constructor() {
	super(tokens, {
	recoveryEnabled: true
	})

	const $ = this;

	$.RULE('notation', () => {
	$.OPTION(() => {
	$.CONSUME(VPipe);
	});
	$.SUBRULE($.gdtDatums);
	$.OPTION2(() => {
	$.CONSUME4(VPipe);
	});
	});

	// A A\|B A\|B\|C
	$.RULE('gdtDatums', () => {
	$.CONSUME(GdtPrimaryDatum);
	$.MANY({
	GATE: () => tokenMatcher($.LA(2), GdtSecondDatum),
	DEF: () => {
	$.CONSUME2(VPipe);
	$.CONSUME2(GdtSecondDatum);
	$.MANY2({
	GATE: () => tokenMatcher($.LA(2), GdtTertiaryDatum),
	DEF: () => {
	$.CONSUME3(VPipe);
	$.CONSUME3(GdtTertiaryDatum);
	}
	});
	}
	});
	});

	// very important to call this after all the rules have been setup.
	// otherwise the parser may not work correctly as it will lack information
	// derived from the self analysis.
	this.performSelfAnalysis();
	}

	}

	// for the playground to work the returned object must contain these fields
	return {
	lexer: datumLexer,
	parser: datumParser,
	defaultRule: "notation"
	};
	}())