Last active
May 9, 2018 13:23
-
-
Save duangsuse/ca3aae7d3a44999d72acd0c0e2d7750d to your computer and use it in GitHub Desktop.
Lite ohm.js syntax definition
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Complete Lite Desugared Syntax (Ohm PEG) | |
// Lite parser by duangsuse, no rights reserved (lexical rules see https://ohmlang.github.io/editor) | |
Lite { | |
// The JavaScript lexical rules | |
// §A.1 Lexical Grammar -- https://es5.github.io/#A.1 | |
Program = CompStmt | |
sourceCharacter = any | |
// Override Ohm's built-in definition of space. | |
space := whitespace | comment | |
whitespace = "\t" | |
| "\x0B" -- verticalTab | |
| "\x0C" -- formFeed | |
| " " | |
| "\u00A0" -- noBreakSpace | |
| "\uFEFF" -- byteOrderMark | |
| unicodeSpaceSeparator | |
lineTerminator = "\n" | "\r" | "\u2028" | "\u2029" | |
lineTerminatorSequence = "\n" | "\r" ~"\n" | "\u2028" | "\u2029" | "\r\n" | |
comment = multiLineComment | singleLineComment | |
multiLineComment = ">####<" (~"<####>" sourceCharacter)* "<####>" | |
singleLineComment = "#" (~lineTerminator sourceCharacter)* | |
identifier (an identifier) = "@"? ~reservedWord identifierName | |
identifierName = identifierStart identifierPart* | |
identifierStart = letter | "$" | "_" | |
| "\\" unicodeEscapeSequence -- escaped | |
identifierPart = identifierStart | unicodeCombiningMark | |
| unicodeDigit | unicodeConnectorPunctuation | |
| "\u200C" | "\u200D" | |
letter += unicodeCategoryNl | |
unicodeCategoryNl | |
= "\u2160".."\u2182" | "\u3007" | "\u3021".."\u3029" | |
unicodeDigit (a digit) | |
= "\u0030".."\u0039" | "\u0660".."\u0669" | "\u06F0".."\u06F9" | "\u0966".."\u096F" | "\u09E6".."\u09EF" | "\u0A66".."\u0A6F" | "\u0AE6".."\u0AEF" | "\u0B66".."\u0B6F" | "\u0BE7".."\u0BEF" | "\u0C66".."\u0C6F" | "\u0CE6".."\u0CEF" | "\u0D66".."\u0D6F" | "\u0E50".."\u0E59" | "\u0ED0".."\u0ED9" | "\u0F20".."\u0F29" | "\uFF10".."\uFF19" | |
unicodeCombiningMark (a Unicode combining mark) | |
= "\u0300".."\u0345" | "\u0360".."\u0361" | "\u0483".."\u0486" | "\u0591".."\u05A1" | "\u05A3".."\u05B9" | "\u05BB".."\u05BD" | "\u05BF".."\u05BF" | "\u05C1".."\u05C2" | "\u05C4".."\u05C4" | "\u064B".."\u0652" | "\u0670".."\u0670" | "\u06D6".."\u06DC" | "\u06DF".."\u06E4" | "\u06E7".."\u06E8" | "\u06EA".."\u06ED" | "\u0901".."\u0902" | "\u093C".."\u093C" | "\u0941".."\u0948" | "\u094D".."\u094D" | "\u0951".."\u0954" | "\u0962".."\u0963" | "\u0981".."\u0981" | "\u09BC".."\u09BC" | "\u09C1".."\u09C4" | "\u09CD".."\u09CD" | "\u09E2".."\u09E3" | "\u0A02".."\u0A02" | "\u0A3C".."\u0A3C" | "\u0A41".."\u0A42" | "\u0A47".."\u0A48" | "\u0A4B".."\u0A4D" | "\u0A70".."\u0A71" | "\u0A81".."\u0A82" | "\u0ABC".."\u0ABC" | "\u0AC1".."\u0AC5" | "\u0AC7".."\u0AC8" | "\u0ACD".."\u0ACD" | "\u0B01".."\u0B01" | "\u0B3C".."\u0B3C" | "\u0B3F".."\u0B3F" | "\u0B41".."\u0B43" | "\u0B4D".."\u0B4D" | "\u0B56".."\u0B56" | "\u0B82".."\u0B82" | "\u0BC0".."\u0BC0" | "\u0BCD".."\u0BCD" | "\u0C3E".."\u0C40" | "\u0C46".."\u0C48" | "\u0C4A".."\u0C4D" | "\u0C55".."\u0C56" | "\u0CBF".."\u0CBF" | "\u0CC6".."\u0CC6" | "\u0CCC".."\u0CCD" | "\u0D41".."\u0D43" | "\u0D4D".."\u0D4D" | "\u0E31".."\u0E31" | "\u0E34".."\u0E3A" | "\u0E47".."\u0E4E" | "\u0EB1".."\u0EB1" | "\u0EB4".."\u0EB9" | "\u0EBB".."\u0EBC" | "\u0EC8".."\u0ECD" | "\u0F18".."\u0F19" | "\u0F35".."\u0F35" | "\u0F37".."\u0F37" | "\u0F39".."\u0F39" | "\u0F71".."\u0F7E" | "\u0F80".."\u0F84" | "\u0F86".."\u0F87" | "\u0F90".."\u0F95" | "\u0F97".."\u0F97" | "\u0F99".."\u0FAD" | "\u0FB1".."\u0FB7" | "\u0FB9".."\u0FB9" | "\u20D0".."\u20DC" | "\u20E1".."\u20E1" | "\u302A".."\u302F" | "\u3099".."\u309A" | "\uFB1E".."\uFB1E" | "\uFE20".."\uFE23" | |
unicodeConnectorPunctuation = "\u005F" | "\u203F".."\u2040" | "\u30FB" | "\uFE33".."\uFE34" | "\uFE4D".."\uFE4F" | "\uFF3F" | "\uFF65" | |
unicodeSpaceSeparator = "\u2000".."\u200B" | "\u3000" | |
reservedWord = keyword | nullLiteral | booleanLiteral | |
// Note: keywords that are the complete prefix of another keyword should | |
// be prioritized (e.g. 'in' should come before 'instanceof') | |
keyword = break | do | scope | in | |
| to | else | elif | if | |
| as | next | return | endKeyword | |
| or | for | and | while | |
| require | def | import | |
/* | |
Note: Punctuator and DivPunctuator (see https://es5.github.io/x7.html#x7.7) are | |
not currently used by this grammar. | |
*/ | |
literal = nullLiteral | booleanLiteral | numericLiteral | stringLiteral | |
nullLiteral = "nil" ~identifierPart | |
booleanLiteral = ("true" | "false") ~identifierPart | |
// For semantics on how decimal literals are constructed, see section 7.8.3 | |
// Note that the ordering of hexIntegerLiteral and decimalLiteral is reversed w.r.t. the spec | |
// This is intentional: the order decimalLiteral | hexIntegerLiteral will parse | |
// "0x..." as a decimal literal "0" followed by "x..." | |
numericLiteral = octalIntegerLiteral | hexIntegerLiteral | decimalLiteral | |
decimalLiteral = decimalIntegerLiteral "." decimalDigit* exponentPart -- bothParts | |
| "." decimalDigit+ exponentPart -- decimalsOnly | |
| decimalIntegerLiteral exponentPart -- integerOnly | |
decimalIntegerLiteral = nonZeroDigit decimalDigit* -- nonZero | |
| "0" -- zero | |
decimalDigit = "0".."9" | |
nonZeroDigit = "1".."9" | |
exponentPart = exponentIndicator signedInteger -- present | |
| -- absent | |
exponentIndicator = "e" | "E" | |
signedInteger = "+" decimalDigit* -- positive | |
| "-" decimalDigit* -- negative | |
| decimalDigit+ -- noSign | |
hexIntegerLiteral = "0x" hexDigit+ | |
| "0X" hexDigit+ | |
// hexDigit defined in Ohm's built-in rules (otherwise: hexDigit = "0".."9" | "a".."f" | "A".."F") | |
octalIntegerLiteral = "0" octalDigit+ | |
octalDigit = "0".."7" | |
// For semantics on how string literals are constructed, see section 7.8.4 | |
stringLiteral = "\"" doubleStringCharacter* "\"" | |
| "'" singleStringCharacter* "'" | |
doubleStringCharacter = ~("\"" | "\\" | lineTerminator) sourceCharacter -- nonEscaped | |
| "\\" escapeSequence -- escaped | |
| lineContinuation -- lineContinuation | |
singleStringCharacter = ~("'" | "\\" | lineTerminator) sourceCharacter -- nonEscaped | |
| "\\" escapeSequence -- escaped | |
| lineContinuation -- lineContinuation | |
lineContinuation = "\\" lineTerminatorSequence | |
escapeSequence = unicodeEscapeSequence | |
| hexEscapeSequence | |
| octalEscapeSequence | |
| characterEscapeSequence // Must come last. | |
characterEscapeSequence = singleEscapeCharacter | |
| nonEscapeCharacter | |
singleEscapeCharacter = "'" | "\"" | "\\" | "b" | "f" | "n" | "r" | "t" | "v" | |
nonEscapeCharacter = ~(escapeCharacter | lineTerminator) sourceCharacter | |
escapeCharacter = singleEscapeCharacter | decimalDigit | "x" | "u" | |
octalEscapeSequence = zeroToThree octalDigit octalDigit -- whole | |
| fourToSeven octalDigit -- eightTimesfourToSeven | |
| zeroToThree octalDigit ~decimalDigit -- eightTimesZeroToThree | |
| octalDigit ~decimalDigit -- octal | |
hexEscapeSequence = "x" hexDigit hexDigit | |
unicodeEscapeSequence = "u" hexDigit hexDigit hexDigit hexDigit | |
zeroToThree = "0".."3" | |
fourToSeven = "4".."7" | |
// === Implementation-level rules (not part of the spec) === | |
// A semicolon is "automatically inserted" if a newline or the end of the input stream is | |
// reached, or the offending token is "}". | |
// See https://es5.github.io/#x7.9 for more information. | |
// NOTE: Applications of this rule *must* appear in a lexical context -- either in the body of a | |
// lexical rule, or inside `#()`. | |
sc = ";" | end | lineTerminator | comment | |
// Convenience rules for parsing keyword tokens. | |
break = "break" ~identifierPart | |
do = "do" ~identifierPart | |
scope = "scope" ~identifierPart | |
in = "in" ~identifierPart | |
else = "else" ~identifierPart | |
elif = "elif" ~identifierPart | |
if = "if" ~identifierPart | |
as = "as" ~identifierPart | |
next = "next" ~identifierPart | |
return = "return" ~identifierPart | |
endKeyword = "end" ~identifierPart | |
or = "or" ~identifierPart | |
for = "for" ~identifierPart | |
and = "and" ~identifierPart | |
while = "while" ~identifierPart | |
require = "require" ~identifierPart | |
def = "def" ~identifierPart | |
import = "import" ~identifierPart | |
to = "to" ~identifierPart | |
// end of modified javascript lexical rules | |
// start of expressions | |
// lite operator precedence | |
// | or in | |
// & and | |
// < > <= >= != == !== === | |
// << | |
// to | |
// + - | |
// * / % | |
// ** :: as | |
// Unary- ! ++ -- . | |
// left recursion | |
Exp | |
= OrExp | |
OrExp | |
= OrExp "|" AndExp -- or | |
| OrExp or AndExp -- orKeyword | |
| OrExp in AndExp -- in | |
| AndExp | |
AndExp | |
= AndExp "&" RelationExp -- and | |
| AndExp and RelationExp -- andKeyword | |
| RelationExp | |
RelationExp | |
= RelationExp "<" ShiftExp -- lessThan | |
| RelationExp ">" ShiftExp -- greaterThan | |
| RelationExp "<=" ShiftExp -- lessEqual | |
| RelationExp ">=" ShiftExp -- greaterEqual | |
| RelationExp "!=" ShiftExp -- notEqual | |
| RelationExp "==" ShiftExp -- equal | |
| RelationExp "!==" ShiftExp -- notFullEqual | |
| RelationExp "===" ShiftExp -- fullEqual | |
| ShiftExp | |
ShiftExp | |
= ShiftExp "<<" RangeExp -- shift | |
| RangeExp | |
RangeExp | |
= RangeExp to AddExp -- range | |
| AddExp | |
AddExp | |
= AddExp "+" MulExp -- plus | |
| AddExp "-" MulExp -- minus | |
| MulExp | |
MulExp | |
= MulExp "*" ExpExp -- times | |
| MulExp "/" ExpExp -- divide | |
| MulExp "%" ExpExp -- remainder | |
| ExpExp | |
ExpExp | |
= ExpExp "**" ExpExp -- power | |
| ExpExp "::" identifier -- square | |
| ExpExp as identifier -- as | |
| PriExp | |
PriExp | |
= "(" Exp ")" -- paren | |
| "-" PriExp -- neg | |
| "!" PriExp -- not | |
| identifier "++" -- inc | |
| identifier "--" -- dec | |
| literal -- literal | |
| Call -- callExp | |
| LiteExpr -- liteExp | |
LiteExpr | |
= List | Table | BraceBlock | DoBlock | |
Divider | |
= (", " | " " | ",") | |
List | |
= "[" ExpList "]" -- simpleList | |
| ":[" (~"]" sourceCharacter)* "]" -- wordList | |
ExpList | |
= (Exp Divider?)* | |
Table | |
= "{" KvList "}" | |
KvList | |
= (identifier ":" Exp ("," | "\n")?)* | |
// callEasy has a bug, help wanted. see https://github.com/duangsuse/Lite/issues/4 | |
Call | |
= Call "(" ExpList ")" -- call | |
| Call "." identifier -- callIndex | |
| Call "[" Exp "]" -- justIndex | |
| Call ExpList -- callEasy | |
| identifier ~"=" -- justIdentifier | |
BraceBlock | |
= "{" NameListB? (SimpleStatement ":"?)* "}" | |
NameList | |
= "("? (identifier Divider?)* ")"? | |
NameListB | |
= "|" (identifier Divider?)* "|" | |
DoBlock | |
= do NameListB? Block | |
// end Exp part | |
SimpleStatement | |
= Exp -- expressionStatement | |
| Break -- break | |
| Next -- continue | |
| Import -- import | |
| Require -- require | |
| Return -- return | |
| Assign -- assignment | |
| IndexEq -- indexLet | |
| Arrow -- arrowLet | |
Break | |
= break | |
Next | |
= next | |
Import | |
= import (~lineTerminator sourceCharacter)* | |
Require | |
= require (~lineTerminator sourceCharacter)* | |
Return | |
= return Exp? | |
Assign | |
= identifier "=" Exp | |
IndexEq | |
= Exp "[" Exp "]" "=" Exp | |
Arrow | |
= Exp "->" identifier Exp | |
Statement | |
= SimpleStatement -- simpleStatement | |
| Def -- defineMethod | |
| For -- forLoop | |
| While -- whileLoop | |
| Scope -- scope | |
| If -- controlFlow | |
| "\n" -- nop | |
Def | |
= def identifier sc Block -- defEasy | |
| def identifier sc Exp sc -- defExpr | |
| def identifier NameList sc Block -- def | |
For | |
= for identifier in Exp sc Block | |
While | |
= while Exp sc Block | |
Scope | |
= scope identifier? sc Block | |
If | |
= if Exp sc Block -- simpleEnd | |
| if Exp sc CompStmt else Block -- ifElse | |
| if Exp sc CompStmt (elif Exp sc CompStmt)* (else CompStmt)? endKeyword -- ifElif | |
Block | |
= CompStmt endKeyword | |
CompStmt | |
= (Statement sc?)* | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment