Skip to content

Instantly share code, notes, and snippets.

@ajinkyakulkarni
Created June 6, 2014 22:25
Show Gist options
  • Save ajinkyakulkarni/ada5ec1792d25fc1264e to your computer and use it in GitHub Desktop.
Save ajinkyakulkarni/ada5ec1792d25fc1264e to your computer and use it in GitHub Desktop.
/*
[The 'BSD licence']
Copyright (c) 2009 Ales Teska
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Python 3.1 Grammar
*
* Ales Teska
* October 2009
*
*
*/
grammar python3;
// $<Inputs
interactive_input
: simple_stmt? NEWLINE
| compound_stmt NEWLINE
;
file_input : ( NEWLINE | stmt )* ;
eval_input : (NEWLINE)* testlist (NEWLINE)* ;
// $>
//////////////////////////////////////////////
// $<Function and class definition
classdef : decorators? CLASS classname inheritance? COLON suite ;
inheritance : LPAREN arglist? RPAREN ; //TODO: Python 3 manual says 'inheritance ::= "(" [expression_list] ")"'
classname : IDENTIFIER ;
////
funcdef : decorators? DEF funcname LPAREN parameters? RPAREN ( '->' test )? COLON suite ;
parameters :
( tfpdefassgn COMMA )*
(STAR tfpdef? ( COMMA tfpdefassgn )* (COMMA DOUBLESTAR tfpdef)? | DOUBLESTAR tfpdef )
| tfpdefassgn ( COMMA tfpdefassgn )* COMMA?
;
tfpdefassgn : tfpdef ( ASSIGN test )? ;
tfpdef : IDENTIFIER ( COLON test )? ;
funcname : IDENTIFIER ;
////
decorator : AT dotted_name ( LPAREN arglist? RPAREN )? NEWLINE ;
decorators : decorator+ ;
// $>
//////////////////////////////////////////////
// $<Simple statements
stmt : simple_stmt | compound_stmt ;
simple_stmt : small_stmt (SEMI small_stmt)* SEMI? NEWLINE ;
small_stmt
: expr_stmt
| del_stmt
| pass_stmt
| break_stmt
| continue_stmt
| return_stmt
| raise_stmt
| yield_stmt
| import_stmt
| global_stmt
| nonlocal_stmt
| assert_stmt
;
////
del_stmt : DEL exprlist ;
pass_stmt : PASS ;
break_stmt : BREAK ;
continue_stmt : CONTINUE ;
return_stmt : RETURN testlist? ;
raise_stmt : RAISE ( test ( FROM test )? )? ;
global_stmt : GLOBAL IDENTIFIER ( COMMA IDENTIFIER )* ;
nonlocal_stmt : NONLOCAL IDENTIFIER ( COMMA IDENTIFIER )* ;
assert_stmt : ASSERT test ( COMMA test )? ;
////
yield_stmt : yield_expression ;
yield_expression
: YIELD testlist?
;
////
import_stmt
: import_name
| import_from
;
import_name : IMPORT dotted_as_names ;
import_from
: FROM ( ( DOT )* dotted_name | ( DOT )+ )
IMPORT
( STAR
| LPAREN import_as_names RPAREN
| import_as_names
)
;
import_as_name : IDENTIFIER ( AS IDENTIFIER )? ;
dotted_as_name : dotted_name ( AS IDENTIFIER )? ;
import_as_names : import_as_name ( COMMA import_as_name )* COMMA? ;
dotted_as_names : dotted_as_name ( COMMA dotted_as_name )* ;
dotted_name : IDENTIFIER ( DOT IDENTIFIER )* ;
// $>
//////////////////////////////////////////////
// $<Compound statements
compound_stmt
: if_stmt
| while_stmt
| for_stmt
| try_stmt
| with_stmt
| funcdef
| classdef
;
////
suite : simple_stmt
| NEWLINE INDENT ( stmt )+ DEDENT
;
////
if_stmt : IF test COLON suite ( ELIF test COLON suite )* ( ELSE COLON suite )? ;
while_stmt : WHILE test COLON suite ( ELSE COLON suite )? ;
for_stmt : FOR exprlist IN testlist COLON suite ( ELSE COLON suite )? ;
try_stmt : TRY COLON suite try_closure ;
try_closure : ( EXCEPT (test ( AS test )? )? COLON suite )+
( ELSE COLON suite )?
( FINALLY COLON suite )?
| FINALLY COLON suite
;
with_stmt : WITH with_item ( COMMA with_item )* COLON suite ;
with_item : test ( AS expr )? ;
// $>
//////////////////////////////////////////////
// $<Test
test
: or_test ( IF or_test ELSE test )?
| lambdef
;
test_nocond : or_test | lambdef_nocond ;
// $>
//////////////////////////////////////////////
// $<Lambdas
lambdef : LAMBDA varargslist? COLON test ;
lambdef_nocond : LAMBDA varargslist? COLON test_nocond ;
varargslist : (
(vfpdefassgn COMMA)*
(STAR vfpdef? ( COMMA vfpdefassgn )* ( COMMA DOUBLESTAR vfpdef )? | DOUBLESTAR vfpdef )
| vfpdefassgn ( COMMA vfpdefassgn )* COMMA?
)
;
vfpdefassgn : vfpdef ( ASSIGN test )? ;
vfpdef : IDENTIFIER;
// $>
//////////////////////////////////////////////
// $<Boolean operations
or_test : and_test (OR and_test)* ;
and_test : not_test (AND not_test)* ;
not_test : NOT not_test | comparison;
// $>
//////////////////////////////////////////////
// $<Comparisons
comparison : star_expr ( comp_op star_expr )* ;
comp_op
: LESS
| GREATER
| EQUAL
| GREATEREQUAL
| LESSEQUAL
| NOTEQUAL
| IS
| IS NOT
| IN
| NOT IN
;
// $>
//////////////////////////////////////////////
// $<Expressions
expr_stmt : testlist
( augassign ( yield_expression | testlist )
| ( ASSIGN ( yield_expression | testlist ) )*
)
;
augassign
: PLUSEQUAL
| MINUSEQUAL
| STAREQUAL
| SLASHEQUAL
| PERCENTEQUAL
| AMPERSANDEQUAL
| VBAREQUAL
| CIRCUMFLEXEQUAL
| LEFTSHIFTEQUAL
| RIGHTSHIFTEQUAL
| DOUBLESTAREQUAL
| DOUBLESLASHEQUAL
;
star_expr : STAR? expr ;
expr : xor_expr ( VBAR xor_expr )* ;
xor_expr : and_expr ( CIRCUMFLEX and_expr )* ;
and_expr : shift_expr ( AMPERSAND shift_expr )* ;
shift_expr : arith_expr ( ( LEFTSHIFT | RIGHTSHIFT ) arith_expr )* ;
arith_expr : term ( ( PLUS | MINUS ) term )* ;
term : factor ( ( STAR | SLASH | PERCENT | DOUBLESLASH ) factor )* ;
factor : PLUS factor
| MINUS factor
| TILDE factor
| power
;
power : atom trailer* ( DOUBLESTAR factor )? ;
atom : LPAREN ( yield_expression | testlist_comp )? RPAREN
| LBRACK (testlist_comp)? RBRACK
| LCURLY (dictorsetmaker)? RCURLY
| IDENTIFIER
| number
| string
| DOT DOT DOT
| NONE
| TRUE
| FALSE
;
string : STRINGLITERAL+
| BYTESLITERAL +
;
number : INTEGER
| FLOATNUMBER
| IMAGNUMBER
;
trailer : LPAREN arglist? RPAREN
| LBRACK subscriptlist RBRACK
| DOT IDENTIFIER
;
subscriptlist : subscript (COMMA subscript )* COMMA? ;
subscript : test (COLON test? sliceop? )?
| COLON test? sliceop?
;
sliceop : COLON test? ;
exprlist : star_expr ( COMMA star_expr )* COMMA? ;
testlist : test ( COMMA test )* COMMA? ;
// $>
//////////////////////////////////////////////
// $<Comprehensions
testlist_comp : test
( comp_for
| ( COMMA test )* COMMA?
)
;
dictorsetmaker : test ( dictmakerclause | setmakerclause );
dictmakerclause : COLON test ( comp_for | ( COMMA test COLON test)* COMMA? ) ;
setmakerclause : comp_for | ( COMMA test)* COMMA? ;
comp_iter : comp_for | comp_if ;
comp_for : FOR exprlist IN or_test comp_iter? ;
comp_if : IF test_nocond comp_iter? ;
// $>
//////////////////////////////////////////////
// $<Arguments
arglist : (argument COMMA )*
( argument?
| STAR test ( COMMA argument )* ( COMMA DOUBLESTAR test )?
| DOUBLESTAR test
)
;
argument : test comp_for?
| IDENTIFIER ASSIGN test
;
// $>
//////////////////////////////////// LEXER /////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// $<String and Bytes literals
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#string-and-bytes-literals ]]
STRINGLITERAL : STRINGPREFIX? ( SHORTSTRING | LONGSTRING ) ;
STRINGPREFIX
: ( 'r' | 'R' ) ;
SHORTSTRING
: '"' ( ESCAPESEQ | ~( '\\'|'\n'|'"' ) )* '"'
| '\'' ( ESCAPESEQ | ~( '\\'|'\n'|'\'' ) )* '\''
;
LONGSTRING
: '\'\'\'' ( TRIAPOS )* '\'\'\''
| '"""' ( TRIQUOTE )* '"""'
;
BYTESLITERAL : BYTESPREFIX ( SHORTBYTES | LONGBYTES ) ;
BYTESPREFIX
: ( 'b' | 'B' ) ( 'r' | 'R' )? ;
SHORTBYTES
: '"' ( ESCAPESEQ | ~( '\\' | '\n' | '"' ) )* '"'
| '\'' ( ESCAPESEQ | ~( '\\' | '\n' | '\'' ) )* '\''
;
LONGBYTES
: '\'\'\'' ( TRIAPOS )* '\'\'\''
| '"""' ( TRIQUOTE )* '"""'
;
TRIAPOS
: ( '\'' '\'' | '\''? ) ( ESCAPESEQ | ~( '\\' | '\'' ) )+ ;
TRIQUOTE
: ( '"' '"' | '"'? ) ( ESCAPESEQ | ~( '\\' | '"' ) )+ ;
ESCAPESEQ
: '\\' . ;
// $>
////////////////////////////////////////////////////////////////////////////////
// $<Keywords
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#keywords ]]
FALSE : 'False' ;
NONE : 'None' ;
TRUE : 'True' ;
AND : 'and' ;
AS : 'as' ;
ASSERT : 'assert';
FOR : 'for' ;
BREAK : 'break' ;
CLASS : 'class' ;
CONTINUE : 'continue' ;
DEF : 'def' ;
DEL : 'del' ;
ELIF : 'elif' ;
ELSE : 'else' ;
EXCEPT : 'except' ;
FINALLY : 'finally' ;
FROM : 'from' ;
GLOBAL : 'global' ;
IF : 'if' ;
IMPORT : 'import' ;
IN : 'in' ;
IS : 'is' ;
LAMBDA : 'lambda' ;
NONLOCAL : 'nonlocal' ;
NOT : 'not' ;
OR : 'or' ;
PASS : 'pass' ;
RAISE : 'raise' ;
RETURN : 'return' ;
TRY : 'try' ;
WHILE : 'while' ;
WITH : 'with' ;
YIELD : 'yield' ;
// $>
////////////////////////////////////////////////////////////////////////////////
// $<Integer literals
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#integer-literals ]]
INTEGER : DECIMALINTEGER | OCTINTEGER | HEXINTEGER | BININTEGER ;
DECIMALINTEGER
: NONZERODIGIT DIGIT* | '0'+ ;
NONZERODIGIT
: '1' .. '9' ;
DIGIT
: '0' .. '9' ;
OCTINTEGER
: '0' ( 'o' | 'O' ) OCTDIGIT+ ;
HEXINTEGER
: '0' ( 'x' | 'X' ) HEXDIGIT+ ;
BININTEGER
: '0' ( 'b' | 'B' ) BINDIGIT+ ;
OCTDIGIT
: '0' .. '7' ;
HEXDIGIT
: DIGIT | 'a' .. 'f' | 'A' .. 'F' ;
BINDIGIT
: '0' | '1' ;
// $>
////
////////////////////////////////////////////////////////////////////////////////
// $<Floating point literals
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#floating-point-literals ]]
FLOATNUMBER : POINTFLOAT | EXPONENTFLOAT ;
POINTFLOAT
: ( INTPART? FRACTION )
| ( INTPART '.' )
;
EXPONENTFLOAT
: ( INTPART | POINTFLOAT ) EXPONENT ;
INTPART
: DIGIT+ ;
FRACTION
: '.' DIGIT+ ;
EXPONENT
: ( 'e' | 'E' ) ( '+' | '-' )? DIGIT+ ;
// $>
////////////////////////////////////////////////////////////////////////////////
// $<Imaginary literals
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#imaginary-literals ]]
IMAGNUMBER : ( FLOATNUMBER | INTPART ) ( 'j' | 'J' ) ;
// $>
////////////////////////////////////////////////////////////////////////////////
// $<Identifiers
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#identifiers ]]
IDENTIFIER : ID_START ID_CONTINUE* ;
//TODO: <all characters in general categories Lu, Ll, Lt, Lm, Lo, Nl, the underscore, and characters with the Other_ID_Start property> - see python3_pep3131.g
ID_START: '_'| 'A'.. 'Z'| 'a' .. 'z';
//TODO: <all characters in id_start, plus characters in the categories Mn, Mc, Nd, Pc and others with the Other_ID_Continue property> - see python3_pep3131.g
ID_CONTINUE: '_'| 'A'.. 'Z'| 'a' .. 'z'| '0' .. '9';
// $>
////////////////////////////////////////////////////////////////////////////////
// $<Operators
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#operators ]]
PLUS : '+' ;
MINUS : '-' ;
STAR : '*' ;
DOUBLESTAR : '**' ;
SLASH : '/' ;
DOUBLESLASH : '//' ;
PERCENT : '%' ;
LEFTSHIFT : '<<' ;
RIGHTSHIFT : '>>' ;
AMPERSAND : '&' ;
VBAR : '|' ;
CIRCUMFLEX : '^' ;
TILDE : '~' ;
LESS : '<' ;
GREATER : '>' ;
LESSEQUAL : '<=' ;
GREATEREQUAL : '>=' ;
EQUAL : '==' ;
NOTEQUAL : '!=' ;
// $>
//////////////////////////////////////////////
// $<Delimiters
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#delimiters ]]
// Implicit line joining - [[ http://docs.python.org/3.1/reference/lexical_analysis.html#implicit-line-joining ]]
LPAREN : '(' ;
RPAREN : ')' ;
LBRACK : '[' ;
RBRACK : ']' ;
LCURLY : '{' ;
RCURLY : '}' ;
COMMA : ',' ;
COLON : ':' ;
DOT : '.' ;
SEMI : ';' ;
AT : '@' ;
ASSIGN : '=' ;
// Augmented assignment operators
PLUSEQUAL : '+=' ;
MINUSEQUAL : '-=' ;
STAREQUAL : '*=' ;
SLASHEQUAL : '/=' ;
DOUBLESLASHEQUAL: '//=' ;
PERCENTEQUAL : '%=' ;
AMPERSANDEQUAL : '&=' ;
VBAREQUAL : '|=' ;
CIRCUMFLEXEQUAL : '^=' ;
LEFTSHIFTEQUAL : '<<=' ;
RIGHTSHIFTEQUAL : '>>=' ;
DOUBLESTAREQUAL : '**=' ;
// $>
//////////////////////////////////////////////
// $<Line structure
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#line-structure ]]
/** Consume a newline and any whitespace at start of next line
* unless the next line contains only white space, in that case
* emit a newline.
*/
CONTINUED_LINE
: '\\' ('\r')? '\n' ( ' ' | '\t' )*
( NEWLINE )?
;
/** Treat a sequence of blank lines as a single blank line. If
* nested within a (..), {..}, or [..], then ignore newlines.
* If the first newline starts in column one, they are to be ignored.
*
* Frank Wierzbicki added: Also ignore FORMFEEDS (\u000C).
*/
NEWLINE
: ( '\u000C'? '\r'? '\n' )+
;
// $>
//////////////////////////////////////////////
// $<Whitespace
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#whitespace-between-tokens ]]
WS : ( ' ' | '\t' )+ ;
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#indentation ]]
/** Grab everything before a real symbol. Then if newline, kill it
* as this is a blank line. If whitespace followed by comment, kill it
* as it's a comment on a line by itself.
*
* Ignore leading whitespace when nested in [..], (..), {..}.
*/
LEADING_WS:
(
( ' ' | '\t' )+
|
(
' '
| '\t'
)+
(
'\r'? '\n'
)*
)
;
// $>
//////////////////////////////////////////////
// $<Comments
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#comments ]]
COMMENT
: ( ' ' | '\t' )* '#' ( ~'\n' )* '\n'+
| '#' ( ~'\n' )* // let NEWLINE handle \n unless char pos==0 for '#'
;
// $>
// Following two lexer rules are imaginary, condition is never meet ... they are here just to suppress warnings
DEDENT: ('\n');
INDENT: ('\n');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment