Skip to content

Instantly share code, notes, and snippets.

@sorear
Created June 12, 2012 21:36
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sorear/2920292 to your computer and use it in GitHub Desktop.
Save sorear/2920292 to your computer and use it in GitHub Desktop.
Partial Go grammar with correct semicolon insertion
grammar Go {
# this is a funny grammar because we're trying to simulate a separate lexer
# so in the main "grammar" part don't parse characters yourself, use only
# lexeme tokens.
# Normally the 'real' cursor always points after whitespace. If the
# 'logical' cursor is to point to an inserted semicolon, the 'real' cursor
# will be placed ONE CHARACTER BEFORE the next token start (and the
# location will be marked in @*ADDSEMI)
token ws() { } # no automatic whitespace processing
token l_space_raw() {
[ <[\x20\x09\x0D\x0A]>
| '//' \H*
| '/*' .*? '*/'
]*
}
token l_letter { <:Letter> | _ }
token l_decimal_digit { <[ 0 .. 9 ]> }
token l_octal_digit { <[ 0 .. 7 ]> }
token l_hex_digit { <[ 0..9 A..F a..f ]> }
# Because we can't actually modify the incoming character stream, we
# have to simulate semicolon insertion; in particular, non-semicolon
# lexemes cannot match at an inserted semicolon point
method l_space($semi) {
my ($ws) = self.l_space;
if $semi && $ws ~~ /<[\x0D\x0A]>/ {
@*ADDSEMI[$ws.to-1] := True;
return self.cursor($ws.to-1);
} else {
return $ws;
}
}
token l_notbeforesemi() { <?{ !@*ADDSEMI[$¢.pos] }> }
my %kw = <break case chan const continue default defer else fallthrough
for func go goto if import interface map package range return select
struct switch type var> X=> True;
my %space = <break continue fallthrough return ++ -- ) ] }>;
my %opextend = « << >> &^ += -= *= /= %= &= |= ^= <<= >>= &^= && || <-
++ -- == != <= >= := ... »;
token l_idorkw() {
<!l_notbeforesemi>
$<chars> = [<:Letter> \w*]
<.l_space(!%kw{$<chars>} || %space{$<chars>})>
}
token l_id() { <l_idorkw> <?{ !%kw{~$<l_idorkw><chars>} }> }
token l_kw($str) { <l_idorkw> <?{ $<l_idorkw><chars> eq $str }> }
token l_semi {
{ return self.cursor(self.pos+1) if @*ADDSEMI[self.pos] }
';'
<.l_space(False)>
}
# use l_semi for ';'
token l_op($str) {
<!l_notbeforesemi>
$str
<!before $<next>=[.] <?{ %opextend{$str ~ $<next>} }> >
<.l_space(%space{$str})>
}
token l_int_lit {
<!l_notbeforesemi>
[ <[1..9]> <[0..9]>*
| 0 <[0..7]>*
| 0 <[xX]> <l_hex_digit>+
]
<![ e E . i ]>
<.l_space(True)>
}
token l_exp { <[eE]> <[+-]>? <[0..9]>+ }
token l_float_guts {
[ <[0..9]>+ '.' <[0..9]>* <l_exp>?
| <[0..9]>+ <l_exp>
| '.' <[0..9]>+ <l_exp>?
]
}
token l_float_lit {
<!l_notbeforesemi>
<l_float_guts>
<.l_space(True)>
}
token l_imag_lit {
<!l_notbeforesemi>
[ <[0..9]>+ | <l_float_guts> ]
i
<.l_space(True)>
}
# ... you get the idea.
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment