Created
September 25, 2021 01:28
-
-
Save chronos-tachyon/241b481825373b4323bfb2166a951604 to your computer and use it in GitHub Desktop.
A self-describing PEG grammar (WIP)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package peggysue; | |
option go:import_path = "github.com/chronos-tachyon/peggysue/grammar"; | |
private rule WS ::= [\t\n\v\f\r\x85\p{Z}]+ ; | |
private rule OWS ::= <WS>? ; | |
private rule OctDigit ::= [0-7] ; | |
private rule DecDigit ::= [0-9] ; | |
private rule HexDigit ::= [0-9A-Fa-f] ; | |
private rule Alphabetic ::= [A-Za-z] ; | |
private rule Alphanumeric ::= [0-9A-Za-z] ; | |
private rule Identifier ::= <Alphabetic> <Alphanumeric>* ( '_' <Alphanumeric>+ )* ; | |
private rule DottedIdentifier ::= ( <Identifier> '.' )* <Identifier> ; | |
private rule CharEscape ::= ( | |
'\\' [\x21-\x2f\x3a-\x3f\x40\x5b-\x5e\x60\x7b-\x7e] | |
/ | |
'\\' [0abtnvfre] | |
/ | |
'\\o{' <OctDigit>+ '}' | |
/ | |
'\\o' <OctDigit>{1,3} | |
/ | |
'\\x{' <HexDigit>+ '}' | |
/ | |
'\\x' <HexDigit>{1,2} | |
/ | |
'\\u' <HexDigit>{1,4} | |
/ | |
'\\U' <HexDigit>{1,8} | |
); | |
private rule SetEndpoint ::= ( | |
[\x21-\x5a\x5e-\x7e] | |
/ | |
<CharEscape> | |
); | |
private rule SetEscape ::= ( | |
'\\' [dwshvDWSHV] | |
/ | |
'\\p{' <Identifier> '}' | |
/ | |
'\\P{' <Identifier> '}' | |
); | |
private rule SetInversion ::= ( | |
'^' | |
\mark(IsInverted:True) | |
/ | |
\pass | |
\mark(IsInverted:False) | |
); | |
private rule SetItem ::= ( | |
$Lo( <SetEndpoint> ) | |
'-' | |
$Hi( <SetEndpoint> ) | |
\mark(Type:LoHi) | |
/ | |
$Lo( <SetEscape> ) | |
\mark(Type:Lo) | |
/ | |
$Lo( <SetEndpoint> ) | |
\mark(Type:Lo) | |
); | |
private rule NonTerminal ::= ( | |
'<' | |
<OWS> | |
$Name( <DottedIdentifier> ) | |
<OWS> | |
'>' | |
); | |
private rule CharSet ::= ( | |
'[' | |
%Inversion( <SetInversion> ) | |
%Items( <SetItem> )* | |
']' | |
); | |
private rule CharAny ::= '.' ; | |
private rule Macro ::= ( | |
'\\pass' | |
\mark(Type:Pass) | |
/ | |
'\\fail' | |
\mark(Type:Fail) | |
/ | |
'\\eof' | |
\mark(Type:EOF) | |
/ | |
'\\grapheme' | |
\mark(Type:Grapheme) | |
/ | |
'\\X' | |
\mark(Type:Grapheme) | |
/ | |
'\\mark(' | |
<OWS> | |
$MarkField( '_' / <Identifier> ) | |
<OWS> | |
':' | |
<OWS> | |
$MarkValue( <Identifier> ) | |
<OWS> | |
')' | |
\mark(Type:Mark) | |
/ | |
'\\commit(' | |
<OWS> | |
$CommitSkip( <DecDigit>* ) | |
<OWS> | |
')' | |
\mark(Type:Commit) | |
); | |
private rule RepeatSpec ::= ( | |
$Lo( <DecDigit>+ ) | |
<OWS> | |
',' | |
<OWS> | |
$Hi( <DecDigit>+ ) | |
\mark(Type:LoAndHi) | |
/ | |
$Lo( <DecDigit>+ ) | |
<OWS> | |
',' | |
\mark(Type:JustLo) | |
/ | |
$Lo( <DecDigit>+ ) | |
\mark(Type:LoSameAsHi) | |
/ | |
',' | |
<OWS> | |
$Hi( <DecDigit>+ ) | |
\mark(Type:JustHi) | |
); | |
private rule SQItem ::= ( | |
$Normal( [^\\']+ ) | |
\mark(Type:Normal) | |
/ | |
$Escape( <CharEscape> ) | |
\mark(Type:Escape) | |
); | |
private rule DQItem ::= ( | |
$Normal( [^\\"]+ ) | |
\mark(Type:Normal) | |
/ | |
$Escape( <CharEscape> ) | |
\mark(Type:Escape) | |
); | |
private rule NumSign ::= ( | |
'-' | |
\mark(IsNegative:True) | |
/ | |
'+' | |
\mark(IsNegative:False) | |
/ | |
\pass | |
\mark(IsNegative:False) | |
); | |
private rule SQLiteral ::= "'" %Item( <SQItem> )* "'" ; | |
private rule DQLiteral ::= '"' %Item( <DQItem> )* '"' ; | |
private rule NullLiteral ::= ( | |
'null' | |
); | |
private rule BoolLiteral ::= ( | |
'false' | |
\mark(Value:False) | |
/ | |
'true' | |
\mark(Value:True) | |
); | |
private rule IntLiteral ::= ( | |
%Sign( <NumSign> ) | |
( | |
'0' | |
[Xx] | |
$Digits( <HexDigit>+ ) | |
\mark(Base:Hex) | |
/ | |
'0' | |
[Oo] | |
$Digits( <OctDigit>+ ) | |
\mark(Base:Octal) | |
/ | |
$Digits( <DecDigit>+ ) | |
\mark(Base:Decimal) | |
) | |
); | |
private rule StringLiteral ::= ( | |
%SQ( <SQLiteral> ) | |
\mark(Type:SQ) | |
/ | |
%DQ( <DQLiteral> ) | |
\mark(Type:DQ) | |
); | |
private rule Value ::= ( | |
%NullValue( <NullLiteral> ) | |
\mark(Type:Null) | |
/ | |
%BoolValue( <BoolLiteral> ) | |
\mark(Type:Bool) | |
/ | |
%IntValue( <IntLiteral> ) | |
\mark(Type:Int) | |
/ | |
%StringValue( <StringLiteral> ) | |
\mark(Type:String) | |
); | |
private rule RuleVisibility ::= ( | |
'public' | |
\mark(Public:True) | |
/ | |
'private' | |
\mark(Public:False) | |
); | |
private rule Expr4 ::= ( | |
'$' | |
$Field( <Identifier> ) | |
'(' | |
<OWS> | |
%Child( <Expr> ) | |
<OWS> | |
')' | |
\mark(Type:StringCaptureGroup) | |
/ | |
'%' | |
$Field( <Identifier> ) | |
'(' | |
<OWS> | |
%Child( <NonTerminal> ) | |
<OWS> | |
')' | |
\mark(Type:StructCaptureGroup) | |
/ | |
'%' | |
'_' | |
'(' | |
<OWS> | |
%Child( <NonTerminal> ) | |
<OWS> | |
')' | |
\mark(Type:StructEmbedCaptureGroup) | |
/ | |
'(' | |
<OWS> | |
%Child( <Expr> ) | |
<OWS> | |
')' | |
\mark(Type:NonCaptureGroup) | |
/ | |
%NonTerminal( <NonTerminal> ) | |
\mark(Type:NonTerminal) | |
/ | |
%StringLiteral( <StringLiteral> ) | |
\mark(Type:StringLiteral) | |
/ | |
%CharSet( <CharSet> ) | |
\mark(Type:CharSet) | |
/ | |
%CharAny( <CharAny> ) | |
\mark(Type:CharAny) | |
/ | |
%Macro( <Macro> ) | |
\mark(Type:Macro) | |
); | |
private rule Expr3 ::= ( | |
%Child( <Expr4> ) | |
( | |
<OWS> | |
'?' | |
\mark(Type:ZeroOne) | |
/ | |
<OWS> | |
'*' | |
\mark(Type:ZeroInf) | |
/ | |
<OWS> | |
'+' | |
\mark(Type:OneInf) | |
/ | |
<OWS> | |
'{' | |
<OWS> | |
%Spec( <RepeatSpec> ) | |
<OWS> | |
'}' | |
\mark(Type:Custom) | |
/ | |
\pass | |
\mark(Type:None) | |
) | |
); | |
private rule Expr2 ::= ( | |
( | |
'&' <OWS> \mark(Type:And) | |
/ | |
'!' <OWS> \mark(Type:Not) | |
/ | |
\pass \mark(Type:None) | |
) | |
%Child( <Expr3> ) | |
); | |
private rule Expr1 ::= ( | |
%Children( <Expr2> ) | |
( | |
<OWS> | |
%Children( <Expr2> ) | |
)* | |
); | |
public rule Expr ::= ( | |
%Children( <Expr1> ) | |
( | |
<OWS> | |
'/' | |
<OWS> | |
%Children( <Expr1> ) | |
)* | |
); | |
private rule PackageStmt ::= ( | |
<OWS> | |
'package' | |
<WS> | |
$Name( <DottedIdentifier> ) | |
<OWS> | |
';' | |
); | |
private rule RequireStmt ::= ( | |
<OWS> | |
'require' | |
<WS> | |
$Name( <DottedIdentifier> ) | |
<OWS> | |
';' | |
); | |
private rule OptionStmt ::= ( | |
<OWS> | |
'option' | |
<WS> | |
$Language( <Identifier> ) | |
<OWS> | |
':' | |
<OWS> | |
$Name( <Identifier> ) | |
<OWS> | |
'=' | |
<OWS> | |
%Value( <Value> ) | |
<OWS> | |
';' | |
); | |
private rule RuleStmt ::= ( | |
<OWS> | |
%Visibility( <RuleVisibility> ) | |
<WS> | |
'rule' | |
<WS> | |
$Name( <Identifier> ) | |
<OWS> | |
'=' | |
<OWS> | |
%Body( <Expr> ) | |
<OWS> | |
';' | |
); | |
public rule File ::= ( | |
%PackageStmt( <PackageStmt> ) | |
%RequireStmts( <RequireStmt> )* | |
%OptionStmts( <OptionStmt> )* | |
%RuleStmts( <RuleStmt> )* | |
<OWS> | |
\eof | |
); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment