Created
May 10, 2018 04:36
-
-
Save duangsuse/c971b8a13a741472b15b65585688797f to your computer and use it in GitHub Desktop.
Lite Lexical Rules and Syntax
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * | |
* Copyright (C) 1998-2015 Gerwin Klein <lsf@jflex.de> * | |
* All rights reserved. * | |
* * | |
* License: BSD * | |
* * | |
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ | |
/* Java 1.2 language lexer specification */ | |
/* Modified by duangsuse to compat Lite lexical rules */ | |
/* Use together with unicode.flex for Unicode preprocesssing */ | |
/* and java12.cup for a Java 1.2 parser */ | |
/* Note that this lexer specification is not tuned for speed. | |
It is in fact quite slow on integer and floating point literals, | |
because the input is read twice and the methods used to parse | |
the numbers are not very fast. | |
For a production quality application (e.g. a Java compiler) | |
this could be optimized */ | |
import beaver.Symbol; | |
import beaver.Scanner; | |
%% | |
%public | |
%class Lexer | |
%extends Scanner | |
%function nextToken | |
%type Symbol | |
%yylexthrow Scanner.Exception | |
%eofval{ | |
return new Symbol(Terminals.EOF, "end of file"); | |
%eofval} | |
%unicode | |
%line | |
%column | |
%debug | |
%{ | |
StringBuilder string = new StringBuilder(); | |
private Symbol symbol(int type) { | |
return new Symbol(type, yyline + 1, yycolumn + 1); | |
} | |
private Symbol symbol(int type, Object value) { | |
return new Symbol(type, yyline + 1, yycolumn + 1, value); | |
} | |
/** | |
* assumes correct representation of a long value for | |
* specified radix in scanner buffer from <code>start</code> | |
* to <code>end</code> | |
*/ | |
private long parseLong(int start, int end, int radix) { | |
long result = 0; | |
long digit; | |
for (int i = start; i < end; i++) { | |
digit = Character.digit(yycharat(i), radix); | |
result *= radix; | |
result += digit; | |
} | |
return result; | |
} | |
%} | |
/* main character classes */ | |
LineTerminator = \r|\n|\r\n | |
InputCharacter = [^\r\n] | |
WhiteSpace = [ \t\f] | |
/* comments */ | |
Comment = {TraditionalComment} | {EndOfLineComment} | | |
{DocumentationComment} | |
TraditionalComment = ">#" [^*] ~"#<" | ">#" "*"+ "#<" | |
EndOfLineComment = "#" {InputCharacter}* {LineTerminator}? | |
DocumentationComment = ">#" "*"+ [^>#] ~"#<" | |
/* identifiers */ | |
Identifier = [:jletter:][:jletterdigit:]* | |
/* integer literals */ | |
DecIntegerLiteral = 0 | [1-9][0-9]* | |
DecLongLiteral = {DecIntegerLiteral} [lL] | |
HexIntegerLiteral = 0 [xX] 0* {HexDigit} {1,8} | |
HexLongLiteral = 0 [xX] 0* {HexDigit} {1,16} [lL] | |
HexDigit = [0-9a-fA-F] | |
OctIntegerLiteral = 0+ [1-3]? {OctDigit} {1,15} | |
OctLongLiteral = 0+ 1? {OctDigit} {1,21} [lL] | |
OctDigit = [0-7] | |
/* floating point literals */ | |
FloatLiteral = ({FLit1}|{FLit2}|{FLit3}) {Exponent}? [fF] | |
DoubleLiteral = ({FLit1}|{FLit2}|{FLit3}) {Exponent}? | |
FLit1 = [0-9]+ \. [0-9]* | |
FLit2 = \. [0-9]+ | |
FLit3 = [0-9]+ | |
Exponent = [eE] [+-]? [0-9]+ | |
/* string and character literals */ | |
StringCharacter = [^\r\n\"\\] | |
SingleCharacter = [^\r\n\'\\] | |
%state STRING, STRING_SINGLE | |
%% | |
<YYINITIAL> { | |
/* keywords */ | |
"def" { return symbol(DEFINE); } | |
"do" { return symbol(DO); } | |
"break" { return symbol(BREAK); } | |
"next" { return symbol(NEXT); } | |
"return" { return symbol(RETURN); } | |
"scope" { return symbol(SCOPE); } | |
"while" { return symbol(WHILE); } | |
"for" { return symbol(FOR); } | |
"in" { return symbol(IN); } | |
"as" { return symbol(AS); } | |
"to" { return symbol(TO); } | |
"if" { return symbol(IF); } | |
"elif" { return symbol(ELIF); } | |
"else" { return symbol(ELSE); } | |
"import" { return symbol(IMPORT); } | |
"require" { return symbol(REQUIRE); } | |
"end" { return symbol(END); } | |
"and" { return symbol(ANDK); } | |
"or" { return symbol(ORK); } | |
/* boolean literals */ | |
"true" { return symbol(BOOLEAN_LITERAL, true); } | |
"false" { return symbol(BOOLEAN_LITERAL, false); } | |
/* null literal */ | |
"nil" { return symbol(NULL_LITERAL); } | |
/* separators */ | |
"(" { return symbol(LPAREN); } | |
")" { return symbol(RPAREN); } | |
"{" { return symbol(LBRACE); } | |
"}" { return symbol(RBRACE); } | |
"[" { return symbol(LBRACK); } | |
"]" { return symbol(RBRACK); } | |
";" { return symbol(SEMICOLON); } | |
"," { return symbol(COMMA); } | |
"." { return symbol(DOT); } | |
"@" { return symbol(AT); } | |
/* operators */ | |
"=" { return symbol(EQ); } | |
">" { return symbol(GT); } | |
"<" { return symbol(LT); } | |
"!" { return symbol(NOT); } | |
":" { return symbol(COLON); } | |
"==" { return symbol(EQUAL); } | |
">=" { return symbol(GE); } | |
"<=" { return symbol(LE); } | |
"!=" { return symbol(NOTEQ); } | |
"===" { return symbol(FULLEQ); } | |
"!==" { return symbol(NOTFULLEQ); } | |
"++" { return symbol(INC); } | |
"--" { return symbol(DEC); } | |
"+" { return symbol(PLUS); } | |
"-" { return symbol(SUB); } | |
"*" { return symbol(MULT); } | |
"/" { return symbol(DIV); } | |
"&" { return symbol(AND); } | |
"|" { return symbol(OR); } | |
"^" { return symbol(XOR); } | |
"%" { return symbol(MOD); } | |
"**" { return symbol(PWR); } | |
"<<" { return symbol(LSHIFT); } | |
">>" { return symbol(RSHIFT); } | |
"+=" { return symbol(PLUSEQ); } | |
"-=" { return symbol(SUBEQ); } | |
"*=" { return symbol(MULTEQ); } | |
"::" { return symbol(SQUARE); } | |
/* string literal */ | |
\" { yybegin(STRING); string.setLength(0); } | |
\' { yybegin(STRING_SINGLE); string.setLength(0); } | |
/* numeric literals */ | |
/* This is matched together with the minus, because the number is too big to | |
be represented by a positive integer. */ | |
"-2147483648" { return symbol(INTEGER_LITERAL, new Integer(Integer.MIN_VALUE)); } | |
{DecIntegerLiteral} { return symbol(INTEGER_LITERAL, new Integer(yytext())); } | |
{DecLongLiteral} { return symbol(INTEGER_LITERAL, new Long(yytext().substring(0, yylength() - 1))); } | |
{HexIntegerLiteral} { return symbol(INTEGER_LITERAL, new Integer((int) parseLong(2, yylength(), 16))); } | |
{HexLongLiteral} { return symbol(INTEGER_LITERAL, new Long(parseLong(2, yylength() - 1, 16))); } | |
{OctIntegerLiteral} { return symbol(INTEGER_LITERAL, new Integer((int) parseLong(0, yylength(), 8))); } | |
{OctLongLiteral} { return symbol(INTEGER_LITERAL, new Long(parseLong(0, yylength() - 1, 8))); } | |
{FloatLiteral} { return symbol(FLOATING_POINT_LITERAL, new Float(yytext().substring(0, yylength() - 1))); } | |
{DoubleLiteral} { return symbol(FLOATING_POINT_LITERAL, new Double(yytext())); } | |
{DoubleLiteral}[dD] { return symbol(FLOATING_POINT_LITERAL, new Double(yytext().substring(0, yylength() - 1))); } | |
/* comments */ | |
{Comment} { /* ignore */ } | |
/* whitespace */ | |
{WhiteSpace} { /* ignore */ } | |
/* newline */ | |
{LineTerminator} { return symbol(NEWLINE); } | |
/* identifiers */ | |
{Identifier} { return symbol(IDENTIFIER, yytext()); } | |
} | |
<STRING> { | |
\" { yybegin(YYINITIAL); return symbol(STRING_LITERAL, string.toString()); } | |
{StringCharacter}+ { string.append(yytext()); } | |
/* escape sequences */ | |
"\\b" { string.append('\b'); } | |
"\\t" { string.append('\t'); } | |
"\\n" { string.append('\n'); } | |
"\\f" { string.append('\f'); } | |
"\\r" { string.append('\r'); } | |
"\\\"" { string.append('\"'); } | |
"\\'" { string.append('\''); } | |
"\\\\" { string.append('\\'); } | |
\\[0-3]?{OctDigit}?{OctDigit} { char val = (char) Integer.parseInt(yytext().substring(1), 8); | |
string.append(val); } | |
/* error cases */ | |
\\. { throw new RuntimeException("Illegal escape sequence \"" + yytext() + "\""); } | |
{LineTerminator} { throw new RuntimeException("Unterminated string at end of line"); } | |
} | |
<STRING_SINGLE> { | |
\' { yybegin(YYINITIAL); return symbol(STRING_LITERAL_SINGLE, string.toString()); } | |
{SingleCharacter}+ { string.append(yytext()); } | |
/* escape sequences */ | |
"\\b" { string.append('\b'); } | |
"\\t" { string.append('\t'); } | |
"\\n" { string.append('\n'); } | |
"\\'" { string.append('\''); } | |
/* error cases */ | |
\\. { throw new RuntimeException("Illegal escape sequence \'" + yytext() + "\'"); } | |
{LineTerminator} { throw new RuntimeException("Unterminated single-quoted string at end of line"); } | |
} | |
/* error fallback */ | |
[^] { throw new RuntimeException("Illegal character \"" + yytext() + "\" at line " + yyline + ", column " + yycolumn); } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Complete Lite Desugared Syntax (Ohm PEG) | |
// Lite parser by duangsuse, no rights reserved (lexical rules see https://ohmlang.github.io/editor) | |
Lite { | |
// The JavaScript lexical rules | |
// §A.1 Lexical Grammar -- https://es5.github.io/#A.1 | |
Program = CompStmt | |
sourceCharacter = any | |
// Override Ohm's built-in definition of space. | |
space := whitespace | comment | |
whitespace = "\t" | |
| "\x0B" -- verticalTab | |
| "\x0C" -- formFeed | |
| " " | |
| "\u00A0" -- noBreakSpace | |
| "\uFEFF" -- byteOrderMark | |
| unicodeSpaceSeparator | |
lineTerminator = "\n" | "\r" | "\u2028" | "\u2029" | |
lineTerminatorSequence = "\n" | "\r" ~"\n" | "\u2028" | "\u2029" | "\r\n" | |
comment = multiLineComment | singleLineComment | |
multiLineComment = ">####<" (~"<####>" sourceCharacter)* "<####>" | |
singleLineComment = "#" (~lineTerminator sourceCharacter)* | |
identifier (an identifier) = "@"? ~reservedWord identifierName | |
identifierName = identifierStart identifierPart* | |
identifierStart = letter | "$" | "_" | |
| "\\" unicodeEscapeSequence -- escaped | |
identifierPart = identifierStart | unicodeCombiningMark | |
| unicodeDigit | unicodeConnectorPunctuation | |
| "\u200C" | "\u200D" | |
letter += unicodeCategoryNl | |
unicodeCategoryNl | |
= "\u2160".."\u2182" | "\u3007" | "\u3021".."\u3029" | |
unicodeDigit (a digit) | |
= "\u0030".."\u0039" | "\u0660".."\u0669" | "\u06F0".."\u06F9" | "\u0966".."\u096F" | "\u09E6".."\u09EF" | "\u0A66".."\u0A6F" | "\u0AE6".."\u0AEF" | "\u0B66".."\u0B6F" | "\u0BE7".."\u0BEF" | "\u0C66".."\u0C6F" | "\u0CE6".."\u0CEF" | "\u0D66".."\u0D6F" | "\u0E50".."\u0E59" | "\u0ED0".."\u0ED9" | "\u0F20".."\u0F29" | "\uFF10".."\uFF19" | |
unicodeCombiningMark (a Unicode combining mark) | |
= "\u0300".."\u0345" | "\u0360".."\u0361" | "\u0483".."\u0486" | "\u0591".."\u05A1" | "\u05A3".."\u05B9" | "\u05BB".."\u05BD" | "\u05BF".."\u05BF" | "\u05C1".."\u05C2" | "\u05C4".."\u05C4" | "\u064B".."\u0652" | "\u0670".."\u0670" | "\u06D6".."\u06DC" | "\u06DF".."\u06E4" | "\u06E7".."\u06E8" | "\u06EA".."\u06ED" | "\u0901".."\u0902" | "\u093C".."\u093C" | "\u0941".."\u0948" | "\u094D".."\u094D" | "\u0951".."\u0954" | "\u0962".."\u0963" | "\u0981".."\u0981" | "\u09BC".."\u09BC" | "\u09C1".."\u09C4" | "\u09CD".."\u09CD" | "\u09E2".."\u09E3" | "\u0A02".."\u0A02" | "\u0A3C".."\u0A3C" | "\u0A41".."\u0A42" | "\u0A47".."\u0A48" | "\u0A4B".."\u0A4D" | "\u0A70".."\u0A71" | "\u0A81".."\u0A82" | "\u0ABC".."\u0ABC" | "\u0AC1".."\u0AC5" | "\u0AC7".."\u0AC8" | "\u0ACD".."\u0ACD" | "\u0B01".."\u0B01" | "\u0B3C".."\u0B3C" | "\u0B3F".."\u0B3F" | "\u0B41".."\u0B43" | "\u0B4D".."\u0B4D" | "\u0B56".."\u0B56" | "\u0B82".."\u0B82" | "\u0BC0".."\u0BC0" | "\u0BCD".."\u0BCD" | "\u0C3E".."\u0C40" | "\u0C46".."\u0C48" | "\u0C4A".."\u0C4D" | "\u0C55".."\u0C56" | "\u0CBF".."\u0CBF" | "\u0CC6".."\u0CC6" | "\u0CCC".."\u0CCD" | "\u0D41".."\u0D43" | "\u0D4D".."\u0D4D" | "\u0E31".."\u0E31" | "\u0E34".."\u0E3A" | "\u0E47".."\u0E4E" | "\u0EB1".."\u0EB1" | "\u0EB4".."\u0EB9" | "\u0EBB".."\u0EBC" | "\u0EC8".."\u0ECD" | "\u0F18".."\u0F19" | "\u0F35".."\u0F35" | "\u0F37".."\u0F37" | "\u0F39".."\u0F39" | "\u0F71".."\u0F7E" | "\u0F80".."\u0F84" | "\u0F86".."\u0F87" | "\u0F90".."\u0F95" | "\u0F97".."\u0F97" | "\u0F99".."\u0FAD" | "\u0FB1".."\u0FB7" | "\u0FB9".."\u0FB9" | "\u20D0".."\u20DC" | "\u20E1".."\u20E1" | "\u302A".."\u302F" | "\u3099".."\u309A" | "\uFB1E".."\uFB1E" | "\uFE20".."\uFE23" | |
unicodeConnectorPunctuation = "\u005F" | "\u203F".."\u2040" | "\u30FB" | "\uFE33".."\uFE34" | "\uFE4D".."\uFE4F" | "\uFF3F" | "\uFF65" | |
unicodeSpaceSeparator = "\u2000".."\u200B" | "\u3000" | |
reservedWord = keyword | nullLiteral | booleanLiteral | |
// Note: keywords that are the complete prefix of another keyword should | |
// be prioritized (e.g. 'in' should come before 'instanceof') | |
keyword = break | do | scope | in | |
| to | else | elif | if | |
| as | next | return | endKeyword | |
| or | for | and | while | |
| require | def | import | |
/* | |
Note: Punctuator and DivPunctuator (see https://es5.github.io/x7.html#x7.7) are | |
not currently used by this grammar. | |
*/ | |
literal = nullLiteral | booleanLiteral | numericLiteral | stringLiteral | |
nullLiteral = "nil" ~identifierPart | |
booleanLiteral = ("true" | "false") ~identifierPart | |
// For semantics on how decimal literals are constructed, see section 7.8.3 | |
// Note that the ordering of hexIntegerLiteral and decimalLiteral is reversed w.r.t. the spec | |
// This is intentional: the order decimalLiteral | hexIntegerLiteral will parse | |
// "0x..." as a decimal literal "0" followed by "x..." | |
numericLiteral = octalIntegerLiteral | hexIntegerLiteral | decimalLiteral | |
decimalLiteral = decimalIntegerLiteral "." decimalDigit* exponentPart -- bothParts | |
| "." decimalDigit+ exponentPart -- decimalsOnly | |
| decimalIntegerLiteral exponentPart -- integerOnly | |
decimalIntegerLiteral = nonZeroDigit decimalDigit* -- nonZero | |
| "0" -- zero | |
decimalDigit = "0".."9" | |
nonZeroDigit = "1".."9" | |
exponentPart = exponentIndicator signedInteger -- present | |
| -- absent | |
exponentIndicator = "e" | "E" | |
signedInteger = "+" decimalDigit* -- positive | |
| "-" decimalDigit* -- negative | |
| decimalDigit+ -- noSign | |
hexIntegerLiteral = "0x" hexDigit+ | |
| "0X" hexDigit+ | |
// hexDigit defined in Ohm's built-in rules (otherwise: hexDigit = "0".."9" | "a".."f" | "A".."F") | |
octalIntegerLiteral = "0" octalDigit+ | |
octalDigit = "0".."7" | |
// For semantics on how string literals are constructed, see section 7.8.4 | |
stringLiteral = "\"" doubleStringCharacter* "\"" | |
| "'" singleStringCharacter* "'" | |
doubleStringCharacter = ~("\"" | "\\" | lineTerminator) sourceCharacter -- nonEscaped | |
| "\\" escapeSequence -- escaped | |
| lineContinuation -- lineContinuation | |
singleStringCharacter = ~("'" | "\\" | lineTerminator) sourceCharacter -- nonEscaped | |
| "\\" escapeSequence -- escaped | |
| lineContinuation -- lineContinuation | |
lineContinuation = "\\" lineTerminatorSequence | |
escapeSequence = unicodeEscapeSequence | |
| hexEscapeSequence | |
| octalEscapeSequence | |
| characterEscapeSequence // Must come last. | |
characterEscapeSequence = singleEscapeCharacter | |
| nonEscapeCharacter | |
singleEscapeCharacter = "'" | "\"" | "\\" | "b" | "f" | "n" | "r" | "t" | "v" | |
nonEscapeCharacter = ~(escapeCharacter | lineTerminator) sourceCharacter | |
escapeCharacter = singleEscapeCharacter | decimalDigit | "x" | "u" | |
octalEscapeSequence = zeroToThree octalDigit octalDigit -- whole | |
| fourToSeven octalDigit -- eightTimesfourToSeven | |
| zeroToThree octalDigit ~decimalDigit -- eightTimesZeroToThree | |
| octalDigit ~decimalDigit -- octal | |
hexEscapeSequence = "x" hexDigit hexDigit | |
unicodeEscapeSequence = "u" hexDigit hexDigit hexDigit hexDigit | |
zeroToThree = "0".."3" | |
fourToSeven = "4".."7" | |
// === Implementation-level rules (not part of the spec) === | |
// A semicolon is "automatically inserted" if a newline or the end of the input stream is | |
// reached, or the offending token is "}". | |
// See https://es5.github.io/#x7.9 for more information. | |
// NOTE: Applications of this rule *must* appear in a lexical context -- either in the body of a | |
// lexical rule, or inside `#()`. | |
sc = ";" | end | lineTerminator | comment | |
// Convenience rules for parsing keyword tokens. | |
break = "break" ~identifierPart | |
do = "do" ~identifierPart | |
scope = "scope" ~identifierPart | |
in = "in" ~identifierPart | |
else = "else" ~identifierPart | |
elif = "elif" ~identifierPart | |
if = "if" ~identifierPart | |
as = "as" ~identifierPart | |
next = "next" ~identifierPart | |
return = "return" ~identifierPart | |
endKeyword = "end" ~identifierPart | |
or = "or" ~identifierPart | |
for = "for" ~identifierPart | |
and = "and" ~identifierPart | |
while = "while" ~identifierPart | |
require = "require" ~identifierPart | |
def = "def" ~identifierPart | |
import = "import" ~identifierPart | |
to = "to" ~identifierPart | |
// end of modified javascript lexical rules | |
// start of expressions | |
// lite operator precedence | |
// | or in | |
// & and | |
// < > <= >= != == !== === | |
// << | |
// to | |
// + - | |
// * / % | |
// ** :: as | |
// Unary- ! ++ -- . | |
// left recursion | |
Exp | |
= OrExp | |
OrExp | |
= OrExp "|" AndExp -- or | |
| OrExp or AndExp -- orKeyword | |
| OrExp in AndExp -- in | |
| AndExp | |
AndExp | |
= AndExp "&" RelationExp -- and | |
| AndExp and RelationExp -- andKeyword | |
| RelationExp | |
RelationExp | |
= RelationExp "<" ShiftExp -- lessThan | |
| RelationExp ">" ShiftExp -- greaterThan | |
| RelationExp "<=" ShiftExp -- lessEqual | |
| RelationExp ">=" ShiftExp -- greaterEqual | |
| RelationExp "!=" ShiftExp -- notEqual | |
| RelationExp "==" ShiftExp -- equal | |
| RelationExp "!==" ShiftExp -- notFullEqual | |
| RelationExp "===" ShiftExp -- fullEqual | |
| ShiftExp | |
ShiftExp | |
= ShiftExp "<<" RangeExp -- shift | |
| RangeExp | |
RangeExp | |
= RangeExp to AddExp -- range | |
| AddExp | |
AddExp | |
= AddExp "+" MulExp -- plus | |
| AddExp "-" MulExp -- minus | |
| MulExp | |
MulExp | |
= MulExp "*" ExpExp -- times | |
| MulExp "/" ExpExp -- divide | |
| MulExp "%" ExpExp -- remainder | |
| ExpExp | |
ExpExp | |
= ExpExp "**" ExpExp -- power | |
| ExpExp "::" identifier -- square | |
| ExpExp as identifier -- as | |
| PriExp | |
PriExp | |
= "(" Exp ")" -- paren | |
| "-" PriExp -- neg | |
| "!" PriExp -- not | |
| identifier "++" -- inc | |
| identifier "--" -- dec | |
| literal -- literal | |
| Call -- callExp | |
| LiteExpr -- liteExp | |
LiteExpr | |
= List | Table | BraceBlock | DoBlock | |
Divider | |
= (", " | " " | ",") | |
List | |
= "[" ExpList "]" -- simpleList | |
| ":[" (~"]" sourceCharacter)* "]" -- wordList | |
ExpList | |
= (Exp Divider?)* | |
Table | |
= "{" KvList "}" | |
KvList | |
= (identifier ":" Exp ("," | "\n")?)* | |
// callEasy has a bug, help wanted. see https://github.com/duangsuse/Lite/issues/4 | |
Call | |
= Call "(" ExpList ")" -- call | |
| Call "." identifier -- callIndex | |
| Call "[" Exp "]" -- justIndex | |
| Call ExpList -- callEasy | |
| identifier ~"=" -- justIdentifier | |
BraceBlock | |
= "{" NameListB? (SimpleStatement ":"?)* "}" | |
NameList | |
= "("? (identifier Divider?)* ")"? | |
NameListB | |
= "|" (identifier Divider?)* "|" | |
DoBlock | |
= do NameListB? Block | |
// end Exp part | |
SimpleStatement | |
= Exp -- expressionStatement | |
| Break -- break | |
| Next -- continue | |
| Import -- import | |
| Require -- require | |
| Return -- return | |
| Assign -- assignment | |
| IndexEq -- indexLet | |
| Arrow -- arrowLet | |
Break | |
= break | |
Next | |
= next | |
Import | |
= import (~lineTerminator sourceCharacter)* | |
Require | |
= require (~lineTerminator sourceCharacter)* | |
Return | |
= return Exp? | |
Assign | |
= identifier "=" Exp | |
IndexEq | |
= Exp "[" Exp "]" "=" Exp | |
Arrow | |
= Exp "->" identifier Exp | |
Statement | |
= SimpleStatement -- simpleStatement | |
| Def -- defineMethod | |
| For -- forLoop | |
| While -- whileLoop | |
| Scope -- scope | |
| If -- controlFlow | |
| "\n" -- nop | |
Def | |
= def identifier sc Block -- defEasy | |
| def identifier sc Exp sc -- defExpr | |
| def identifier NameList sc Block -- def | |
For | |
= for identifier in Exp sc Block | |
While | |
= while Exp sc Block | |
Scope | |
= scope identifier? sc Block | |
If | |
= if Exp sc Block -- simpleEnd | |
| if Exp sc CompStmt else Block -- ifElse | |
| if Exp sc CompStmt (elif Exp sc CompStmt)* (else CompStmt)? endKeyword -- ifElif | |
Block | |
= CompStmt endKeyword | |
CompStmt | |
= (Statement sc?)* | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * | |
* Copyright (C) 1998-2015 Gerwin Klein <lsf@jflex.de> * | |
* All rights reserved. * | |
* * | |
* License: BSD * | |
* * | |
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ | |
/* Section 3.3 of the Java Language Specification : | |
UnicodeInputCharacter: | |
UnicodeEscape | |
RawInputCharacter | |
UnicodeEscape: | |
\ UnicodeMarker HexDigit HexDigit HexDigit HexDigit | |
UnicodeMarker: | |
u | |
UnicodeMarker u | |
RawInputCharacter: | |
any Unicode character | |
HexDigit: one of | |
0 1 2 3 4 5 6 7 8 9 a b c d e f A B C D E F | |
only an even number of '\' is eligible to start a Unicode escape sequence | |
*/ | |
import java.io.*; | |
%% | |
%public | |
%final | |
%class UnicodeEscapes | |
%extends FilterReader | |
%int | |
%function read | |
%16bit | |
UnicodeEscape = {UnicodeMarker} {HexDigit} {4} | |
UnicodeMarker = "u"+ | |
HexDigit = [0-9a-fA-F] | |
%state DIGITS | |
%init{ | |
super(in); | |
%init} | |
%{ | |
private boolean even; | |
private int value() { | |
int r = 0; | |
for (int k = zzMarkedPos - 4; k < zzMarkedPos; k++) { | |
int c = zzBuffer[k]; | |
if (c >= 'a') | |
c -= 'a' - 10; | |
else if (c >= 'A') | |
c -= 'A' - 10; | |
else | |
c -= '0'; | |
r <<= 4; | |
r += c; | |
} | |
return r; | |
} | |
public int read(char cbuf[], int off, int len) throws IOException { | |
if (!ready()) return -1; | |
len += off; | |
for (int i=off; i<len; i++) { | |
int c = read(); | |
if (c < 0) | |
return i - off; | |
else | |
cbuf[i] = (char) c; | |
} | |
return len - off; | |
} | |
public boolean markSupported() { | |
return false; | |
} | |
public boolean ready() throws IOException { | |
return !zzAtEOF && (zzCurrentPos < zzEndRead || zzReader.ready()); | |
} | |
%} | |
%% | |
<YYINITIAL> { | |
\\ { even = false; return '\\'; } | |
\\ / \\ { even = !even; return '\\'; } | |
\\ / "u" { | |
if (even) { | |
even = false; | |
return '\\'; | |
} | |
else | |
yybegin(DIGITS); | |
} | |
[^] { return zzBuffer[zzStartRead]; } | |
<<EOF>> { return -1; } | |
} | |
<DIGITS> { | |
{UnicodeEscape} { yybegin(YYINITIAL); return value(); } | |
[^] { throw new Error("Incorrect Unicode escape"); } | |
<<EOF>> { throw new Error("EOF in Unicode escape"); } | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment