sorear/go.pl6

## go.pl6
grammar Go {
    # this is a funny grammar because we're trying to simulate a separate lexer
    # so in the main "grammar" part don't parse characters yourself, use only
    # lexeme tokens.

    # Normally the 'real' cursor always points after whitespace.  If the
    # 'logical' cursor is to point to an inserted semicolon, the 'real' cursor
    # will be placed ONE CHARACTER BEFORE the next token start (and the
    # location will be marked in @*ADDSEMI)

    token ws() { } # no automatic whitespace processing

    token l_space_raw() {
        [ <[\x20\x09\x0D\x0A]>
        | '//' \H*
        | '/*' .*? '*/'
        ]*
    }

    token l_letter { <:Letter> | _ }
    token l_decimal_digit { <[ 0 .. 9 ]> }
    token l_octal_digit { <[ 0 .. 7 ]> }
    token l_hex_digit { <[ 0..9 A..F a..f ]> }

    # Because we can't actually modify the incoming character stream, we
    # have to simulate semicolon insertion; in particular, non-semicolon
    # lexemes cannot match at an inserted semicolon point
    method l_space($semi) {
        my ($ws) = self.l_space;
        if $semi && $ws ~~ /<[\x0D\x0A]>/ {
            @*ADDSEMI[$ws.to-1] := True;
            return self.cursor($ws.to-1);
        } else {
            return $ws;
        }
    }

    token l_notbeforesemi() { <?{ !@*ADDSEMI[$¢.pos] }> }

    my %kw = <break case chan const continue default defer else fallthrough
        for func go goto if import interface map package range return select
        struct switch type var> X=> True;

    my %space = <break continue fallthrough return ++ -- ) ] }>;

    my %opextend = « << >> &^ += -= *= /= %= &= |= ^= <<= >>= &^= && || <-
        ++ -- == != <= >= := ... »;

    token l_idorkw() {
        <!l_notbeforesemi>
        $<chars> = [<:Letter> \w*]
        <.l_space(!%kw{$<chars>} || %space{$<chars>})>
    }

    token l_id() { <l_idorkw> <?{ !%kw{~$<l_idorkw><chars>} }> }
    token l_kw($str) { <l_idorkw> <?{ $<l_idorkw><chars> eq $str }> }

    token l_semi {
        { return self.cursor(self.pos+1) if @*ADDSEMI[self.pos] }
        ';'
        <.l_space(False)>
    }

    # use l_semi for ';'
    token l_op($str) {
        <!l_notbeforesemi>
        $str
        <!before $<next>=[.] <?{ %opextend{$str ~ $<next>} }> >
        <.l_space(%space{$str})>
    }

    token l_int_lit {
        <!l_notbeforesemi>
        [ <[1..9]> <[0..9]>*
        | 0 <[0..7]>*
        | 0 <[xX]> <l_hex_digit>+
        ]
        <![ e E . i ]>
        <.l_space(True)>
    }

    token l_exp { <[eE]> <[+-]>? <[0..9]>+ }

    token l_float_guts {
        [ <[0..9]>+ '.' <[0..9]>* <l_exp>?
        | <[0..9]>+ <l_exp>
        | '.' <[0..9]>+ <l_exp>?
        ]
    }

    token l_float_lit {
        <!l_notbeforesemi>
        <l_float_guts>
        <.l_space(True)>
    }

    token l_imag_lit {
        <!l_notbeforesemi>
        [ <[0..9]>+ | <l_float_guts> ]
        i
        <.l_space(True)>
    }

    # ... you get the idea.
}
	grammar Go {
	# this is a funny grammar because we're trying to simulate a separate lexer
	# so in the main "grammar" part don't parse characters yourself, use only
	# lexeme tokens.

	# Normally the 'real' cursor always points after whitespace. If the
	# 'logical' cursor is to point to an inserted semicolon, the 'real' cursor
	# will be placed ONE CHARACTER BEFORE the next token start (and the
	# location will be marked in @*ADDSEMI)

	token ws() { } # no automatic whitespace processing

	token l_space_raw() {
	[ <[\x20\x09\x0D\x0A]>
	\| '//' \H*
	\| '/' .? '*/'
	]*
	}

	token l_letter { <:Letter> \| _ }
	token l_decimal_digit { <[ 0 .. 9 ]> }
	token l_octal_digit { <[ 0 .. 7 ]> }
	token l_hex_digit { <[ 0..9 A..F a..f ]> }

	# Because we can't actually modify the incoming character stream, we
	# have to simulate semicolon insertion; in particular, non-semicolon
	# lexemes cannot match at an inserted semicolon point
	method l_space($semi) {
	my ($ws) = self.l_space;
	if $semi && $ws ~~ /<[\x0D\x0A]>/ {
	@*ADDSEMI[$ws.to-1] := True;
	return self.cursor($ws.to-1);
	} else {
	return $ws;
	}
	}

	token l_notbeforesemi() { <?{ !@*ADDSEMI[$¢.pos] }> }

	my %kw = <break case chan const continue default defer else fallthrough
	for func go goto if import interface map package range return select
	struct switch type var> X=> True;

	my %space = <break continue fallthrough return ++ -- ) ] }>;

	my %opextend = « << >> &^ += -= *= /= %= &= \|= ^= <<= >>= &^= && \|\| <-
	++ -- == != <= >= := ... »;

	token l_idorkw() {
	<!l_notbeforesemi>
	$<chars> = [<:Letter> \w*]
	<.l_space(!%kw{$<chars>} \|\| %space{$<chars>})>
	}

	token l_id() { <l_idorkw> <?{ !%kw{~$<l_idorkw><chars>} }> }
	token l_kw($str) { <l_idorkw> <?{ $<l_idorkw><chars> eq $str }> }

	token l_semi {
	{ return self.cursor(self.pos+1) if @*ADDSEMI[self.pos] }
	';'
	<.l_space(False)>
	}

	# use l_semi for ';'
	token l_op($str) {
	<!l_notbeforesemi>
	$str
	<!before $<next>=[.] <?{ %opextend{$str ~ $<next>} }> >
	<.l_space(%space{$str})>
	}

	token l_int_lit {
	<!l_notbeforesemi>
	[ <[1..9]> <[0..9]>*
	\| 0 <[0..7]>*
	\| 0 <[xX]> <l_hex_digit>+
	]
	<![ e E . i ]>
	<.l_space(True)>
	}

	token l_exp { <[eE]> <[+-]>? <[0..9]>+ }

	token l_float_guts {
	[ <[0..9]>+ '.' <[0..9]>* <l_exp>?
	\| <[0..9]>+ <l_exp>
	\| '.' <[0..9]>+ <l_exp>?
	]
	}

	token l_float_lit {
	<!l_notbeforesemi>
	<l_float_guts>
	<.l_space(True)>
	}

	token l_imag_lit {
	<!l_notbeforesemi>
	[ <[0..9]>+ \| <l_float_guts> ]
	i
	<.l_space(True)>
	}

	# ... you get the idea.
	}