Skip to content

Instantly share code, notes, and snippets.

@lorefnon
Last active May 19, 2019 10:51
Show Gist options
  • Save lorefnon/39386d06f75f1e4a8ee5455af36540aa to your computer and use it in GitHub Desktop.
Save lorefnon/39386d06f75f1e4a8ee5455af36540aa to your computer and use it in GitHub Desktop.
Chevrotain parser for a subset of jq syntax

About

Parses jq-inspired concise object access & transformation syntax using chevrotain

Supported

  • Object member access: .bar.baz
  • Array member access: .[0]
  • Slices: .[0:9]
  • Construction of objects and arrays: {a: .b.c}, {b: .[0].foo}, [.a.b, .c.d]

Unsupported

Assignment, arithmetic, functions, I/O, modules


Railroad diagram

// Try it out in the chevrotain playground:
// https://sap.github.io/chevrotain/playground/
(function() {
// ----------------- Lexer -----------------
const createToken = chevrotain.createToken;
const Lexer = chevrotain.Lexer;
const True = createToken({ name: "True", pattern: /true/ });
const False = createToken({ name: "False", pattern: /false/ });
const Null = createToken({ name: "Null", pattern: /null/ });
const LParen = createToken({ name: "LParen", pattern: /\(/ });
const RParen = createToken({ name: "RParen", pattern: /\)/ });
const LCurly = createToken({ name: "LCurly", pattern: /{/ });
const RCurly = createToken({ name: "RCurly", pattern: /}/ });
const LSquare = createToken({ name: "LSquare", pattern: /\[/ });
const RSquare = createToken({ name: "RSquare", pattern: /]/ });
const Comma = createToken({ name: "Comma", pattern: /,/ });
const Colon = createToken({ name: "Colon", pattern: /:/ });
const Pipe = createToken({ name: "Pipe", pattern: /\|/ });
const Dot = createToken({ name: "Dot", pattern: /\./ });
const Identifier = createToken({
name: "Identifier",
pattern: /[a-z_][a-z0-9_]*/i
});
const QuestionMark = createToken({ name: "QuestionMark", pattern: /\?/ });
const StringLiteral = createToken({
name: "StringLiteral",
pattern: /"(:?[^\\"\n\r]+|\\(:?[bfnrtv"\\/]|u[0-9a-fA-F]{4}))*"/
});
const NumberLiteral = createToken({
name: "NumberLiteral",
pattern: /-?(0|[1-9]\d*)(\.\d+)?([eE][+-]?\d+)?/
});
const WhiteSpace = createToken({
name: "WhiteSpace",
pattern: /\s+/,
group: Lexer.SKIPPED
});
const jsonTokens = [
WhiteSpace,
NumberLiteral,
StringLiteral,
RCurly,
LCurly,
LSquare,
RSquare,
LParen,
RParen,
Comma,
Colon,
True,
False,
Null,
Dot,
Pipe,
QuestionMark,
Identifier
];
const PipelineLexer = new Lexer(jsonTokens, {
// Less position info tracked, reduces verbosity of the playground output.
positionTracking: "onlyStart"
});
// Labels only affect error messages and Diagrams.
LCurly.LABEL = "'{'";
RCurly.LABEL = "'}'";
LSquare.LABEL = "'['";
RSquare.LABEL = "']'";
Comma.LABEL = "','";
Colon.LABEL = "':'";
// ----------------- parser -----------------
const Parser = chevrotain.Parser;
class PipelineParser extends Parser {
constructor() {
super(jsonTokens, {
recoveryEnabled: true
});
const $ = this;
$.RULE("pipeline", () => {
$.MANY_SEP({
SEP: Pipe,
DEF: () => {
$.SUBRULE($.expression);
}
});
});
$.RULE("slice", () => {
$.OPTION(() => {
$.CONSUME(NumberLiteral);
});
$.CONSUME2(Colon);
$.OPTION2(() => {
$.CONSUME3(NumberLiteral);
});
});
$.RULE("memberAccessor", () => {
$.AT_LEAST_ONE(() => {
$.CONSUME(Dot);
$.OR([
{
ALT: () => {
$.CONSUME(LSquare);
$.OR2([
{
ALT: () => {
$.CONSUME2(Identifier);
}
},
{
ALT: () => {
$.CONSUME2(StringLiteral);
}
},
{
ALT: () => {
$.SUBRULE($.slice);
}
},
{
ALT: () => {
$.CONSUME2(NumberLiteral);
}
}
]);
$.CONSUME(RSquare);
}
},
{
ALT: () => {
$.CONSUME(Identifier);
}
}
]);
$.OPTION(() => {
$.CONSUME(QuestionMark);
});
});
});
$.RULE("objectConstructor", () => {
$.CONSUME(LCurly);
$.MANY_SEP({
SEP: Comma,
DEF: () => {
$.SUBRULE($.objectItemConstructor);
}
});
$.CONSUME(RCurly);
});
$.RULE("objectKey", () => {
$.OR([
{
ALT: () => {
$.CONSUME(StringLiteral);
}
},
{
ALT: () => {
$.CONSUME(Identifier);
}
},
{
ALT: () => {
$.CONSUME(LSquare);
$.SUBRULE($.expression);
$.CONSUME(RSquare);
}
}
]);
});
$.RULE("objectItemConstructor", () => {
$.OR([
{
ALT: () => {
$.SUBRULE($.objectKey);
$.CONSUME2(Colon);
$.SUBRULE2($.expression);
}
},
{ ALT: () => $.SUBRULE($.objectSpread) }
]);
});
$.RULE("arrayConstructor", () => {
$.CONSUME(LSquare);
$.SUBRULE($.arrayMemberConstructor);
$.CONSUME(RSquare);
});
$.RULE("objectSpread", () => {
$.CONSUME(Dot);
$.CONSUME2(Dot);
$.CONSUME3(Dot);
$.OR([
{ ALT: () => $.CONSUME(Identifier) },
{ ALT: () => $.SUBRULE($.wrappedExpression) }
]);
});
$.RULE("arrayMemberConstructor", () => {
$.MANY_SEP({
SEP: Comma,
DEF: () => {
$.SUBRULE($.expression);
}
});
});
$.RULE("value", () => {
$.OR([
{ ALT: () => $.CONSUME(StringLiteral) },
{ ALT: () => $.CONSUME(NumberLiteral) },
{ ALT: () => $.SUBRULE($.objectConstructor) },
{ ALT: () => $.SUBRULE($.arrayConstructor) },
{ ALT: () => $.CONSUME(True) },
{ ALT: () => $.CONSUME(False) },
{ ALT: () => $.CONSUME(Null) }
]);
});
$.RULE("rawExpression", () => {
$.OR([
{
ALT: () => {
$.SUBRULE($.memberAccessor);
}
},
{
ALT: () => {
$.SUBRULE($.value);
}
}
]);
});
$.RULE("wrappedExpression", () => {
$.CONSUME(LParen);
$.SUBRULE($.expression);
$.CONSUME(RParen);
});
$.RULE("expression", () => {
$.OR([
{
ALT: () => {
$.SUBRULE($.rawExpression);
}
},
{
ALT: () => {
$.SUBRULE($.wrappedExpression);
}
}
]);
});
// very important to call this after all the rules have been setup.
// otherwise the parser may not work correctly as it will lack information
// derived from the self analysis.
this.performSelfAnalysis();
}
}
// for the playground to work the returned object must contain these fields
return {
lexer: PipelineLexer,
parser: PipelineParser,
defaultRule: "pipeline"
};
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment