Skip to content

Instantly share code, notes, and snippets.

@duangsuse
Created May 10, 2018 04:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save duangsuse/c971b8a13a741472b15b65585688797f to your computer and use it in GitHub Desktop.
Save duangsuse/c971b8a13a741472b15b65585688797f to your computer and use it in GitHub Desktop.
Lite Lexical Rules and Syntax
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* Copyright (C) 1998-2015 Gerwin Klein <lsf@jflex.de> *
* All rights reserved. *
* *
* License: BSD *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* Java 1.2 language lexer specification */
/* Modified by duangsuse to compat Lite lexical rules */
/* Use together with unicode.flex for Unicode preprocesssing */
/* and java12.cup for a Java 1.2 parser */
/* Note that this lexer specification is not tuned for speed.
It is in fact quite slow on integer and floating point literals,
because the input is read twice and the methods used to parse
the numbers are not very fast.
For a production quality application (e.g. a Java compiler)
this could be optimized */
import beaver.Symbol;
import beaver.Scanner;
%%
%public
%class Lexer
%extends Scanner
%function nextToken
%type Symbol
%yylexthrow Scanner.Exception
%eofval{
return new Symbol(Terminals.EOF, "end of file");
%eofval}
%unicode
%line
%column
%debug
%{
StringBuilder string = new StringBuilder();
private Symbol symbol(int type) {
return new Symbol(type, yyline + 1, yycolumn + 1);
}
private Symbol symbol(int type, Object value) {
return new Symbol(type, yyline + 1, yycolumn + 1, value);
}
/**
* assumes correct representation of a long value for
* specified radix in scanner buffer from <code>start</code>
* to <code>end</code>
*/
private long parseLong(int start, int end, int radix) {
long result = 0;
long digit;
for (int i = start; i < end; i++) {
digit = Character.digit(yycharat(i), radix);
result *= radix;
result += digit;
}
return result;
}
%}
/* main character classes */
LineTerminator = \r|\n|\r\n
InputCharacter = [^\r\n]
WhiteSpace = [ \t\f]
/* comments */
Comment = {TraditionalComment} | {EndOfLineComment} |
{DocumentationComment}
TraditionalComment = ">#" [^*] ~"#<" | ">#" "*"+ "#<"
EndOfLineComment = "#" {InputCharacter}* {LineTerminator}?
DocumentationComment = ">#" "*"+ [^>#] ~"#<"
/* identifiers */
Identifier = [:jletter:][:jletterdigit:]*
/* integer literals */
DecIntegerLiteral = 0 | [1-9][0-9]*
DecLongLiteral = {DecIntegerLiteral} [lL]
HexIntegerLiteral = 0 [xX] 0* {HexDigit} {1,8}
HexLongLiteral = 0 [xX] 0* {HexDigit} {1,16} [lL]
HexDigit = [0-9a-fA-F]
OctIntegerLiteral = 0+ [1-3]? {OctDigit} {1,15}
OctLongLiteral = 0+ 1? {OctDigit} {1,21} [lL]
OctDigit = [0-7]
/* floating point literals */
FloatLiteral = ({FLit1}|{FLit2}|{FLit3}) {Exponent}? [fF]
DoubleLiteral = ({FLit1}|{FLit2}|{FLit3}) {Exponent}?
FLit1 = [0-9]+ \. [0-9]*
FLit2 = \. [0-9]+
FLit3 = [0-9]+
Exponent = [eE] [+-]? [0-9]+
/* string and character literals */
StringCharacter = [^\r\n\"\\]
SingleCharacter = [^\r\n\'\\]
%state STRING, STRING_SINGLE
%%
<YYINITIAL> {
/* keywords */
"def" { return symbol(DEFINE); }
"do" { return symbol(DO); }
"break" { return symbol(BREAK); }
"next" { return symbol(NEXT); }
"return" { return symbol(RETURN); }
"scope" { return symbol(SCOPE); }
"while" { return symbol(WHILE); }
"for" { return symbol(FOR); }
"in" { return symbol(IN); }
"as" { return symbol(AS); }
"to" { return symbol(TO); }
"if" { return symbol(IF); }
"elif" { return symbol(ELIF); }
"else" { return symbol(ELSE); }
"import" { return symbol(IMPORT); }
"require" { return symbol(REQUIRE); }
"end" { return symbol(END); }
"and" { return symbol(ANDK); }
"or" { return symbol(ORK); }
/* boolean literals */
"true" { return symbol(BOOLEAN_LITERAL, true); }
"false" { return symbol(BOOLEAN_LITERAL, false); }
/* null literal */
"nil" { return symbol(NULL_LITERAL); }
/* separators */
"(" { return symbol(LPAREN); }
")" { return symbol(RPAREN); }
"{" { return symbol(LBRACE); }
"}" { return symbol(RBRACE); }
"[" { return symbol(LBRACK); }
"]" { return symbol(RBRACK); }
";" { return symbol(SEMICOLON); }
"," { return symbol(COMMA); }
"." { return symbol(DOT); }
"@" { return symbol(AT); }
/* operators */
"=" { return symbol(EQ); }
">" { return symbol(GT); }
"<" { return symbol(LT); }
"!" { return symbol(NOT); }
":" { return symbol(COLON); }
"==" { return symbol(EQUAL); }
">=" { return symbol(GE); }
"<=" { return symbol(LE); }
"!=" { return symbol(NOTEQ); }
"===" { return symbol(FULLEQ); }
"!==" { return symbol(NOTFULLEQ); }
"++" { return symbol(INC); }
"--" { return symbol(DEC); }
"+" { return symbol(PLUS); }
"-" { return symbol(SUB); }
"*" { return symbol(MULT); }
"/" { return symbol(DIV); }
"&" { return symbol(AND); }
"|" { return symbol(OR); }
"^" { return symbol(XOR); }
"%" { return symbol(MOD); }
"**" { return symbol(PWR); }
"<<" { return symbol(LSHIFT); }
">>" { return symbol(RSHIFT); }
"+=" { return symbol(PLUSEQ); }
"-=" { return symbol(SUBEQ); }
"*=" { return symbol(MULTEQ); }
"::" { return symbol(SQUARE); }
/* string literal */
\" { yybegin(STRING); string.setLength(0); }
\' { yybegin(STRING_SINGLE); string.setLength(0); }
/* numeric literals */
/* This is matched together with the minus, because the number is too big to
be represented by a positive integer. */
"-2147483648" { return symbol(INTEGER_LITERAL, new Integer(Integer.MIN_VALUE)); }
{DecIntegerLiteral} { return symbol(INTEGER_LITERAL, new Integer(yytext())); }
{DecLongLiteral} { return symbol(INTEGER_LITERAL, new Long(yytext().substring(0, yylength() - 1))); }
{HexIntegerLiteral} { return symbol(INTEGER_LITERAL, new Integer((int) parseLong(2, yylength(), 16))); }
{HexLongLiteral} { return symbol(INTEGER_LITERAL, new Long(parseLong(2, yylength() - 1, 16))); }
{OctIntegerLiteral} { return symbol(INTEGER_LITERAL, new Integer((int) parseLong(0, yylength(), 8))); }
{OctLongLiteral} { return symbol(INTEGER_LITERAL, new Long(parseLong(0, yylength() - 1, 8))); }
{FloatLiteral} { return symbol(FLOATING_POINT_LITERAL, new Float(yytext().substring(0, yylength() - 1))); }
{DoubleLiteral} { return symbol(FLOATING_POINT_LITERAL, new Double(yytext())); }
{DoubleLiteral}[dD] { return symbol(FLOATING_POINT_LITERAL, new Double(yytext().substring(0, yylength() - 1))); }
/* comments */
{Comment} { /* ignore */ }
/* whitespace */
{WhiteSpace} { /* ignore */ }
/* newline */
{LineTerminator} { return symbol(NEWLINE); }
/* identifiers */
{Identifier} { return symbol(IDENTIFIER, yytext()); }
}
<STRING> {
\" { yybegin(YYINITIAL); return symbol(STRING_LITERAL, string.toString()); }
{StringCharacter}+ { string.append(yytext()); }
/* escape sequences */
"\\b" { string.append('\b'); }
"\\t" { string.append('\t'); }
"\\n" { string.append('\n'); }
"\\f" { string.append('\f'); }
"\\r" { string.append('\r'); }
"\\\"" { string.append('\"'); }
"\\'" { string.append('\''); }
"\\\\" { string.append('\\'); }
\\[0-3]?{OctDigit}?{OctDigit} { char val = (char) Integer.parseInt(yytext().substring(1), 8);
string.append(val); }
/* error cases */
\\. { throw new RuntimeException("Illegal escape sequence \"" + yytext() + "\""); }
{LineTerminator} { throw new RuntimeException("Unterminated string at end of line"); }
}
<STRING_SINGLE> {
\' { yybegin(YYINITIAL); return symbol(STRING_LITERAL_SINGLE, string.toString()); }
{SingleCharacter}+ { string.append(yytext()); }
/* escape sequences */
"\\b" { string.append('\b'); }
"\\t" { string.append('\t'); }
"\\n" { string.append('\n'); }
"\\'" { string.append('\''); }
/* error cases */
\\. { throw new RuntimeException("Illegal escape sequence \'" + yytext() + "\'"); }
{LineTerminator} { throw new RuntimeException("Unterminated single-quoted string at end of line"); }
}
/* error fallback */
[^] { throw new RuntimeException("Illegal character \"" + yytext() + "\" at line " + yyline + ", column " + yycolumn); }
// Complete Lite Desugared Syntax (Ohm PEG)
// Lite parser by duangsuse, no rights reserved (lexical rules see https://ohmlang.github.io/editor)
Lite {
// The JavaScript lexical rules
// §A.1 Lexical Grammar -- https://es5.github.io/#A.1
Program = CompStmt
sourceCharacter = any
// Override Ohm's built-in definition of space.
space := whitespace | comment
whitespace = "\t"
| "\x0B" -- verticalTab
| "\x0C" -- formFeed
| " "
| "\u00A0" -- noBreakSpace
| "\uFEFF" -- byteOrderMark
| unicodeSpaceSeparator
lineTerminator = "\n" | "\r" | "\u2028" | "\u2029"
lineTerminatorSequence = "\n" | "\r" ~"\n" | "\u2028" | "\u2029" | "\r\n"
comment = multiLineComment | singleLineComment
multiLineComment = ">####<" (~"<####>" sourceCharacter)* "<####>"
singleLineComment = "#" (~lineTerminator sourceCharacter)*
identifier (an identifier) = "@"? ~reservedWord identifierName
identifierName = identifierStart identifierPart*
identifierStart = letter | "$" | "_"
| "\\" unicodeEscapeSequence -- escaped
identifierPart = identifierStart | unicodeCombiningMark
| unicodeDigit | unicodeConnectorPunctuation
| "\u200C" | "\u200D"
letter += unicodeCategoryNl
unicodeCategoryNl
= "\u2160".."\u2182" | "\u3007" | "\u3021".."\u3029"
unicodeDigit (a digit)
= "\u0030".."\u0039" | "\u0660".."\u0669" | "\u06F0".."\u06F9" | "\u0966".."\u096F" | "\u09E6".."\u09EF" | "\u0A66".."\u0A6F" | "\u0AE6".."\u0AEF" | "\u0B66".."\u0B6F" | "\u0BE7".."\u0BEF" | "\u0C66".."\u0C6F" | "\u0CE6".."\u0CEF" | "\u0D66".."\u0D6F" | "\u0E50".."\u0E59" | "\u0ED0".."\u0ED9" | "\u0F20".."\u0F29" | "\uFF10".."\uFF19"
unicodeCombiningMark (a Unicode combining mark)
= "\u0300".."\u0345" | "\u0360".."\u0361" | "\u0483".."\u0486" | "\u0591".."\u05A1" | "\u05A3".."\u05B9" | "\u05BB".."\u05BD" | "\u05BF".."\u05BF" | "\u05C1".."\u05C2" | "\u05C4".."\u05C4" | "\u064B".."\u0652" | "\u0670".."\u0670" | "\u06D6".."\u06DC" | "\u06DF".."\u06E4" | "\u06E7".."\u06E8" | "\u06EA".."\u06ED" | "\u0901".."\u0902" | "\u093C".."\u093C" | "\u0941".."\u0948" | "\u094D".."\u094D" | "\u0951".."\u0954" | "\u0962".."\u0963" | "\u0981".."\u0981" | "\u09BC".."\u09BC" | "\u09C1".."\u09C4" | "\u09CD".."\u09CD" | "\u09E2".."\u09E3" | "\u0A02".."\u0A02" | "\u0A3C".."\u0A3C" | "\u0A41".."\u0A42" | "\u0A47".."\u0A48" | "\u0A4B".."\u0A4D" | "\u0A70".."\u0A71" | "\u0A81".."\u0A82" | "\u0ABC".."\u0ABC" | "\u0AC1".."\u0AC5" | "\u0AC7".."\u0AC8" | "\u0ACD".."\u0ACD" | "\u0B01".."\u0B01" | "\u0B3C".."\u0B3C" | "\u0B3F".."\u0B3F" | "\u0B41".."\u0B43" | "\u0B4D".."\u0B4D" | "\u0B56".."\u0B56" | "\u0B82".."\u0B82" | "\u0BC0".."\u0BC0" | "\u0BCD".."\u0BCD" | "\u0C3E".."\u0C40" | "\u0C46".."\u0C48" | "\u0C4A".."\u0C4D" | "\u0C55".."\u0C56" | "\u0CBF".."\u0CBF" | "\u0CC6".."\u0CC6" | "\u0CCC".."\u0CCD" | "\u0D41".."\u0D43" | "\u0D4D".."\u0D4D" | "\u0E31".."\u0E31" | "\u0E34".."\u0E3A" | "\u0E47".."\u0E4E" | "\u0EB1".."\u0EB1" | "\u0EB4".."\u0EB9" | "\u0EBB".."\u0EBC" | "\u0EC8".."\u0ECD" | "\u0F18".."\u0F19" | "\u0F35".."\u0F35" | "\u0F37".."\u0F37" | "\u0F39".."\u0F39" | "\u0F71".."\u0F7E" | "\u0F80".."\u0F84" | "\u0F86".."\u0F87" | "\u0F90".."\u0F95" | "\u0F97".."\u0F97" | "\u0F99".."\u0FAD" | "\u0FB1".."\u0FB7" | "\u0FB9".."\u0FB9" | "\u20D0".."\u20DC" | "\u20E1".."\u20E1" | "\u302A".."\u302F" | "\u3099".."\u309A" | "\uFB1E".."\uFB1E" | "\uFE20".."\uFE23"
unicodeConnectorPunctuation = "\u005F" | "\u203F".."\u2040" | "\u30FB" | "\uFE33".."\uFE34" | "\uFE4D".."\uFE4F" | "\uFF3F" | "\uFF65"
unicodeSpaceSeparator = "\u2000".."\u200B" | "\u3000"
reservedWord = keyword | nullLiteral | booleanLiteral
// Note: keywords that are the complete prefix of another keyword should
// be prioritized (e.g. 'in' should come before 'instanceof')
keyword = break | do | scope | in
| to | else | elif | if
| as | next | return | endKeyword
| or | for | and | while
| require | def | import
/*
Note: Punctuator and DivPunctuator (see https://es5.github.io/x7.html#x7.7) are
not currently used by this grammar.
*/
literal = nullLiteral | booleanLiteral | numericLiteral | stringLiteral
nullLiteral = "nil" ~identifierPart
booleanLiteral = ("true" | "false") ~identifierPart
// For semantics on how decimal literals are constructed, see section 7.8.3
// Note that the ordering of hexIntegerLiteral and decimalLiteral is reversed w.r.t. the spec
// This is intentional: the order decimalLiteral | hexIntegerLiteral will parse
// "0x..." as a decimal literal "0" followed by "x..."
numericLiteral = octalIntegerLiteral | hexIntegerLiteral | decimalLiteral
decimalLiteral = decimalIntegerLiteral "." decimalDigit* exponentPart -- bothParts
| "." decimalDigit+ exponentPart -- decimalsOnly
| decimalIntegerLiteral exponentPart -- integerOnly
decimalIntegerLiteral = nonZeroDigit decimalDigit* -- nonZero
| "0" -- zero
decimalDigit = "0".."9"
nonZeroDigit = "1".."9"
exponentPart = exponentIndicator signedInteger -- present
| -- absent
exponentIndicator = "e" | "E"
signedInteger = "+" decimalDigit* -- positive
| "-" decimalDigit* -- negative
| decimalDigit+ -- noSign
hexIntegerLiteral = "0x" hexDigit+
| "0X" hexDigit+
// hexDigit defined in Ohm's built-in rules (otherwise: hexDigit = "0".."9" | "a".."f" | "A".."F")
octalIntegerLiteral = "0" octalDigit+
octalDigit = "0".."7"
// For semantics on how string literals are constructed, see section 7.8.4
stringLiteral = "\"" doubleStringCharacter* "\""
| "'" singleStringCharacter* "'"
doubleStringCharacter = ~("\"" | "\\" | lineTerminator) sourceCharacter -- nonEscaped
| "\\" escapeSequence -- escaped
| lineContinuation -- lineContinuation
singleStringCharacter = ~("'" | "\\" | lineTerminator) sourceCharacter -- nonEscaped
| "\\" escapeSequence -- escaped
| lineContinuation -- lineContinuation
lineContinuation = "\\" lineTerminatorSequence
escapeSequence = unicodeEscapeSequence
| hexEscapeSequence
| octalEscapeSequence
| characterEscapeSequence // Must come last.
characterEscapeSequence = singleEscapeCharacter
| nonEscapeCharacter
singleEscapeCharacter = "'" | "\"" | "\\" | "b" | "f" | "n" | "r" | "t" | "v"
nonEscapeCharacter = ~(escapeCharacter | lineTerminator) sourceCharacter
escapeCharacter = singleEscapeCharacter | decimalDigit | "x" | "u"
octalEscapeSequence = zeroToThree octalDigit octalDigit -- whole
| fourToSeven octalDigit -- eightTimesfourToSeven
| zeroToThree octalDigit ~decimalDigit -- eightTimesZeroToThree
| octalDigit ~decimalDigit -- octal
hexEscapeSequence = "x" hexDigit hexDigit
unicodeEscapeSequence = "u" hexDigit hexDigit hexDigit hexDigit
zeroToThree = "0".."3"
fourToSeven = "4".."7"
// === Implementation-level rules (not part of the spec) ===
// A semicolon is "automatically inserted" if a newline or the end of the input stream is
// reached, or the offending token is "}".
// See https://es5.github.io/#x7.9 for more information.
// NOTE: Applications of this rule *must* appear in a lexical context -- either in the body of a
// lexical rule, or inside `#()`.
sc = ";" | end | lineTerminator | comment
// Convenience rules for parsing keyword tokens.
break = "break" ~identifierPart
do = "do" ~identifierPart
scope = "scope" ~identifierPart
in = "in" ~identifierPart
else = "else" ~identifierPart
elif = "elif" ~identifierPart
if = "if" ~identifierPart
as = "as" ~identifierPart
next = "next" ~identifierPart
return = "return" ~identifierPart
endKeyword = "end" ~identifierPart
or = "or" ~identifierPart
for = "for" ~identifierPart
and = "and" ~identifierPart
while = "while" ~identifierPart
require = "require" ~identifierPart
def = "def" ~identifierPart
import = "import" ~identifierPart
to = "to" ~identifierPart
// end of modified javascript lexical rules
// start of expressions
// lite operator precedence
// | or in
// & and
// < > <= >= != == !== ===
// <<
// to
// + -
// * / %
// ** :: as
// Unary- ! ++ -- .
// left recursion
Exp
= OrExp
OrExp
= OrExp "|" AndExp -- or
| OrExp or AndExp -- orKeyword
| OrExp in AndExp -- in
| AndExp
AndExp
= AndExp "&" RelationExp -- and
| AndExp and RelationExp -- andKeyword
| RelationExp
RelationExp
= RelationExp "<" ShiftExp -- lessThan
| RelationExp ">" ShiftExp -- greaterThan
| RelationExp "<=" ShiftExp -- lessEqual
| RelationExp ">=" ShiftExp -- greaterEqual
| RelationExp "!=" ShiftExp -- notEqual
| RelationExp "==" ShiftExp -- equal
| RelationExp "!==" ShiftExp -- notFullEqual
| RelationExp "===" ShiftExp -- fullEqual
| ShiftExp
ShiftExp
= ShiftExp "<<" RangeExp -- shift
| RangeExp
RangeExp
= RangeExp to AddExp -- range
| AddExp
AddExp
= AddExp "+" MulExp -- plus
| AddExp "-" MulExp -- minus
| MulExp
MulExp
= MulExp "*" ExpExp -- times
| MulExp "/" ExpExp -- divide
| MulExp "%" ExpExp -- remainder
| ExpExp
ExpExp
= ExpExp "**" ExpExp -- power
| ExpExp "::" identifier -- square
| ExpExp as identifier -- as
| PriExp
PriExp
= "(" Exp ")" -- paren
| "-" PriExp -- neg
| "!" PriExp -- not
| identifier "++" -- inc
| identifier "--" -- dec
| literal -- literal
| Call -- callExp
| LiteExpr -- liteExp
LiteExpr
= List | Table | BraceBlock | DoBlock
Divider
= (", " | " " | ",")
List
= "[" ExpList "]" -- simpleList
| ":[" (~"]" sourceCharacter)* "]" -- wordList
ExpList
= (Exp Divider?)*
Table
= "{" KvList "}"
KvList
= (identifier ":" Exp ("," | "\n")?)*
// callEasy has a bug, help wanted. see https://github.com/duangsuse/Lite/issues/4
Call
= Call "(" ExpList ")" -- call
| Call "." identifier -- callIndex
| Call "[" Exp "]" -- justIndex
| Call ExpList -- callEasy
| identifier ~"=" -- justIdentifier
BraceBlock
= "{" NameListB? (SimpleStatement ":"?)* "}"
NameList
= "("? (identifier Divider?)* ")"?
NameListB
= "|" (identifier Divider?)* "|"
DoBlock
= do NameListB? Block
// end Exp part
SimpleStatement
= Exp -- expressionStatement
| Break -- break
| Next -- continue
| Import -- import
| Require -- require
| Return -- return
| Assign -- assignment
| IndexEq -- indexLet
| Arrow -- arrowLet
Break
= break
Next
= next
Import
= import (~lineTerminator sourceCharacter)*
Require
= require (~lineTerminator sourceCharacter)*
Return
= return Exp?
Assign
= identifier "=" Exp
IndexEq
= Exp "[" Exp "]" "=" Exp
Arrow
= Exp "->" identifier Exp
Statement
= SimpleStatement -- simpleStatement
| Def -- defineMethod
| For -- forLoop
| While -- whileLoop
| Scope -- scope
| If -- controlFlow
| "\n" -- nop
Def
= def identifier sc Block -- defEasy
| def identifier sc Exp sc -- defExpr
| def identifier NameList sc Block -- def
For
= for identifier in Exp sc Block
While
= while Exp sc Block
Scope
= scope identifier? sc Block
If
= if Exp sc Block -- simpleEnd
| if Exp sc CompStmt else Block -- ifElse
| if Exp sc CompStmt (elif Exp sc CompStmt)* (else CompStmt)? endKeyword -- ifElif
Block
= CompStmt endKeyword
CompStmt
= (Statement sc?)*
}
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* Copyright (C) 1998-2015 Gerwin Klein <lsf@jflex.de> *
* All rights reserved. *
* *
* License: BSD *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* Section 3.3 of the Java Language Specification :
UnicodeInputCharacter:
UnicodeEscape
RawInputCharacter
UnicodeEscape:
\ UnicodeMarker HexDigit HexDigit HexDigit HexDigit
UnicodeMarker:
u
UnicodeMarker u
RawInputCharacter:
any Unicode character
HexDigit: one of
0 1 2 3 4 5 6 7 8 9 a b c d e f A B C D E F
only an even number of '\' is eligible to start a Unicode escape sequence
*/
import java.io.*;
%%
%public
%final
%class UnicodeEscapes
%extends FilterReader
%int
%function read
%16bit
UnicodeEscape = {UnicodeMarker} {HexDigit} {4}
UnicodeMarker = "u"+
HexDigit = [0-9a-fA-F]
%state DIGITS
%init{
super(in);
%init}
%{
private boolean even;
private int value() {
int r = 0;
for (int k = zzMarkedPos - 4; k < zzMarkedPos; k++) {
int c = zzBuffer[k];
if (c >= 'a')
c -= 'a' - 10;
else if (c >= 'A')
c -= 'A' - 10;
else
c -= '0';
r <<= 4;
r += c;
}
return r;
}
public int read(char cbuf[], int off, int len) throws IOException {
if (!ready()) return -1;
len += off;
for (int i=off; i<len; i++) {
int c = read();
if (c < 0)
return i - off;
else
cbuf[i] = (char) c;
}
return len - off;
}
public boolean markSupported() {
return false;
}
public boolean ready() throws IOException {
return !zzAtEOF && (zzCurrentPos < zzEndRead || zzReader.ready());
}
%}
%%
<YYINITIAL> {
\\ { even = false; return '\\'; }
\\ / \\ { even = !even; return '\\'; }
\\ / "u" {
if (even) {
even = false;
return '\\';
}
else
yybegin(DIGITS);
}
[^] { return zzBuffer[zzStartRead]; }
<<EOF>> { return -1; }
}
<DIGITS> {
{UnicodeEscape} { yybegin(YYINITIAL); return value(); }
[^] { throw new Error("Incorrect Unicode escape"); }
<<EOF>> { throw new Error("EOF in Unicode escape"); }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment