-
-
Save ajinkyakulkarni/ada5ec1792d25fc1264e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
[The 'BSD licence'] | |
Copyright (c) 2009 Ales Teska | |
All rights reserved. | |
Redistribution and use in source and binary forms, with or without | |
modification, are permitted provided that the following conditions | |
are met: | |
1. Redistributions of source code must retain the above copyright | |
notice, this list of conditions and the following disclaimer. | |
2. Redistributions in binary form must reproduce the above copyright | |
notice, this list of conditions and the following disclaimer in the | |
documentation and/or other materials provided with the distribution. | |
3. The name of the author may not be used to endorse or promote products | |
derived from this software without specific prior written permission. | |
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | |
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | |
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | |
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | |
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
*/ | |
/* | |
* Python 3.1 Grammar | |
* | |
* Ales Teska | |
* October 2009 | |
* | |
* | |
*/ | |
grammar python3; | |
// $<Inputs | |
interactive_input | |
: simple_stmt? NEWLINE | |
| compound_stmt NEWLINE | |
; | |
file_input : ( NEWLINE | stmt )* ; | |
eval_input : (NEWLINE)* testlist (NEWLINE)* ; | |
// $> | |
////////////////////////////////////////////// | |
// $<Function and class definition | |
classdef : decorators? CLASS classname inheritance? COLON suite ; | |
inheritance : LPAREN arglist? RPAREN ; //TODO: Python 3 manual says 'inheritance ::= "(" [expression_list] ")"' | |
classname : IDENTIFIER ; | |
//// | |
funcdef : decorators? DEF funcname LPAREN parameters? RPAREN ( '->' test )? COLON suite ; | |
parameters : | |
( tfpdefassgn COMMA )* | |
(STAR tfpdef? ( COMMA tfpdefassgn )* (COMMA DOUBLESTAR tfpdef)? | DOUBLESTAR tfpdef ) | |
| tfpdefassgn ( COMMA tfpdefassgn )* COMMA? | |
; | |
tfpdefassgn : tfpdef ( ASSIGN test )? ; | |
tfpdef : IDENTIFIER ( COLON test )? ; | |
funcname : IDENTIFIER ; | |
//// | |
decorator : AT dotted_name ( LPAREN arglist? RPAREN )? NEWLINE ; | |
decorators : decorator+ ; | |
// $> | |
////////////////////////////////////////////// | |
// $<Simple statements | |
stmt : simple_stmt | compound_stmt ; | |
simple_stmt : small_stmt (SEMI small_stmt)* SEMI? NEWLINE ; | |
small_stmt | |
: expr_stmt | |
| del_stmt | |
| pass_stmt | |
| break_stmt | |
| continue_stmt | |
| return_stmt | |
| raise_stmt | |
| yield_stmt | |
| import_stmt | |
| global_stmt | |
| nonlocal_stmt | |
| assert_stmt | |
; | |
//// | |
del_stmt : DEL exprlist ; | |
pass_stmt : PASS ; | |
break_stmt : BREAK ; | |
continue_stmt : CONTINUE ; | |
return_stmt : RETURN testlist? ; | |
raise_stmt : RAISE ( test ( FROM test )? )? ; | |
global_stmt : GLOBAL IDENTIFIER ( COMMA IDENTIFIER )* ; | |
nonlocal_stmt : NONLOCAL IDENTIFIER ( COMMA IDENTIFIER )* ; | |
assert_stmt : ASSERT test ( COMMA test )? ; | |
//// | |
yield_stmt : yield_expression ; | |
yield_expression | |
: YIELD testlist? | |
; | |
//// | |
import_stmt | |
: import_name | |
| import_from | |
; | |
import_name : IMPORT dotted_as_names ; | |
import_from | |
: FROM ( ( DOT )* dotted_name | ( DOT )+ ) | |
IMPORT | |
( STAR | |
| LPAREN import_as_names RPAREN | |
| import_as_names | |
) | |
; | |
import_as_name : IDENTIFIER ( AS IDENTIFIER )? ; | |
dotted_as_name : dotted_name ( AS IDENTIFIER )? ; | |
import_as_names : import_as_name ( COMMA import_as_name )* COMMA? ; | |
dotted_as_names : dotted_as_name ( COMMA dotted_as_name )* ; | |
dotted_name : IDENTIFIER ( DOT IDENTIFIER )* ; | |
// $> | |
////////////////////////////////////////////// | |
// $<Compound statements | |
compound_stmt | |
: if_stmt | |
| while_stmt | |
| for_stmt | |
| try_stmt | |
| with_stmt | |
| funcdef | |
| classdef | |
; | |
//// | |
suite : simple_stmt | |
| NEWLINE INDENT ( stmt )+ DEDENT | |
; | |
//// | |
if_stmt : IF test COLON suite ( ELIF test COLON suite )* ( ELSE COLON suite )? ; | |
while_stmt : WHILE test COLON suite ( ELSE COLON suite )? ; | |
for_stmt : FOR exprlist IN testlist COLON suite ( ELSE COLON suite )? ; | |
try_stmt : TRY COLON suite try_closure ; | |
try_closure : ( EXCEPT (test ( AS test )? )? COLON suite )+ | |
( ELSE COLON suite )? | |
( FINALLY COLON suite )? | |
| FINALLY COLON suite | |
; | |
with_stmt : WITH with_item ( COMMA with_item )* COLON suite ; | |
with_item : test ( AS expr )? ; | |
// $> | |
////////////////////////////////////////////// | |
// $<Test | |
test | |
: or_test ( IF or_test ELSE test )? | |
| lambdef | |
; | |
test_nocond : or_test | lambdef_nocond ; | |
// $> | |
////////////////////////////////////////////// | |
// $<Lambdas | |
lambdef : LAMBDA varargslist? COLON test ; | |
lambdef_nocond : LAMBDA varargslist? COLON test_nocond ; | |
varargslist : ( | |
(vfpdefassgn COMMA)* | |
(STAR vfpdef? ( COMMA vfpdefassgn )* ( COMMA DOUBLESTAR vfpdef )? | DOUBLESTAR vfpdef ) | |
| vfpdefassgn ( COMMA vfpdefassgn )* COMMA? | |
) | |
; | |
vfpdefassgn : vfpdef ( ASSIGN test )? ; | |
vfpdef : IDENTIFIER; | |
// $> | |
////////////////////////////////////////////// | |
// $<Boolean operations | |
or_test : and_test (OR and_test)* ; | |
and_test : not_test (AND not_test)* ; | |
not_test : NOT not_test | comparison; | |
// $> | |
////////////////////////////////////////////// | |
// $<Comparisons | |
comparison : star_expr ( comp_op star_expr )* ; | |
comp_op | |
: LESS | |
| GREATER | |
| EQUAL | |
| GREATEREQUAL | |
| LESSEQUAL | |
| NOTEQUAL | |
| IS | |
| IS NOT | |
| IN | |
| NOT IN | |
; | |
// $> | |
////////////////////////////////////////////// | |
// $<Expressions | |
expr_stmt : testlist | |
( augassign ( yield_expression | testlist ) | |
| ( ASSIGN ( yield_expression | testlist ) )* | |
) | |
; | |
augassign | |
: PLUSEQUAL | |
| MINUSEQUAL | |
| STAREQUAL | |
| SLASHEQUAL | |
| PERCENTEQUAL | |
| AMPERSANDEQUAL | |
| VBAREQUAL | |
| CIRCUMFLEXEQUAL | |
| LEFTSHIFTEQUAL | |
| RIGHTSHIFTEQUAL | |
| DOUBLESTAREQUAL | |
| DOUBLESLASHEQUAL | |
; | |
star_expr : STAR? expr ; | |
expr : xor_expr ( VBAR xor_expr )* ; | |
xor_expr : and_expr ( CIRCUMFLEX and_expr )* ; | |
and_expr : shift_expr ( AMPERSAND shift_expr )* ; | |
shift_expr : arith_expr ( ( LEFTSHIFT | RIGHTSHIFT ) arith_expr )* ; | |
arith_expr : term ( ( PLUS | MINUS ) term )* ; | |
term : factor ( ( STAR | SLASH | PERCENT | DOUBLESLASH ) factor )* ; | |
factor : PLUS factor | |
| MINUS factor | |
| TILDE factor | |
| power | |
; | |
power : atom trailer* ( DOUBLESTAR factor )? ; | |
atom : LPAREN ( yield_expression | testlist_comp )? RPAREN | |
| LBRACK (testlist_comp)? RBRACK | |
| LCURLY (dictorsetmaker)? RCURLY | |
| IDENTIFIER | |
| number | |
| string | |
| DOT DOT DOT | |
| NONE | |
| TRUE | |
| FALSE | |
; | |
string : STRINGLITERAL+ | |
| BYTESLITERAL + | |
; | |
number : INTEGER | |
| FLOATNUMBER | |
| IMAGNUMBER | |
; | |
trailer : LPAREN arglist? RPAREN | |
| LBRACK subscriptlist RBRACK | |
| DOT IDENTIFIER | |
; | |
subscriptlist : subscript (COMMA subscript )* COMMA? ; | |
subscript : test (COLON test? sliceop? )? | |
| COLON test? sliceop? | |
; | |
sliceop : COLON test? ; | |
exprlist : star_expr ( COMMA star_expr )* COMMA? ; | |
testlist : test ( COMMA test )* COMMA? ; | |
// $> | |
////////////////////////////////////////////// | |
// $<Comprehensions | |
testlist_comp : test | |
( comp_for | |
| ( COMMA test )* COMMA? | |
) | |
; | |
dictorsetmaker : test ( dictmakerclause | setmakerclause ); | |
dictmakerclause : COLON test ( comp_for | ( COMMA test COLON test)* COMMA? ) ; | |
setmakerclause : comp_for | ( COMMA test)* COMMA? ; | |
comp_iter : comp_for | comp_if ; | |
comp_for : FOR exprlist IN or_test comp_iter? ; | |
comp_if : IF test_nocond comp_iter? ; | |
// $> | |
////////////////////////////////////////////// | |
// $<Arguments | |
arglist : (argument COMMA )* | |
( argument? | |
| STAR test ( COMMA argument )* ( COMMA DOUBLESTAR test )? | |
| DOUBLESTAR test | |
) | |
; | |
argument : test comp_for? | |
| IDENTIFIER ASSIGN test | |
; | |
// $> | |
//////////////////////////////////// LEXER ///////////////////////////////////// | |
//////////////////////////////////////////////////////////////////////////////// | |
// $<String and Bytes literals | |
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#string-and-bytes-literals ]] | |
STRINGLITERAL : STRINGPREFIX? ( SHORTSTRING | LONGSTRING ) ; | |
STRINGPREFIX | |
: ( 'r' | 'R' ) ; | |
SHORTSTRING | |
: '"' ( ESCAPESEQ | ~( '\\'|'\n'|'"' ) )* '"' | |
| '\'' ( ESCAPESEQ | ~( '\\'|'\n'|'\'' ) )* '\'' | |
; | |
LONGSTRING | |
: '\'\'\'' ( TRIAPOS )* '\'\'\'' | |
| '"""' ( TRIQUOTE )* '"""' | |
; | |
BYTESLITERAL : BYTESPREFIX ( SHORTBYTES | LONGBYTES ) ; | |
BYTESPREFIX | |
: ( 'b' | 'B' ) ( 'r' | 'R' )? ; | |
SHORTBYTES | |
: '"' ( ESCAPESEQ | ~( '\\' | '\n' | '"' ) )* '"' | |
| '\'' ( ESCAPESEQ | ~( '\\' | '\n' | '\'' ) )* '\'' | |
; | |
LONGBYTES | |
: '\'\'\'' ( TRIAPOS )* '\'\'\'' | |
| '"""' ( TRIQUOTE )* '"""' | |
; | |
TRIAPOS | |
: ( '\'' '\'' | '\''? ) ( ESCAPESEQ | ~( '\\' | '\'' ) )+ ; | |
TRIQUOTE | |
: ( '"' '"' | '"'? ) ( ESCAPESEQ | ~( '\\' | '"' ) )+ ; | |
ESCAPESEQ | |
: '\\' . ; | |
// $> | |
//////////////////////////////////////////////////////////////////////////////// | |
// $<Keywords | |
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#keywords ]] | |
FALSE : 'False' ; | |
NONE : 'None' ; | |
TRUE : 'True' ; | |
AND : 'and' ; | |
AS : 'as' ; | |
ASSERT : 'assert'; | |
FOR : 'for' ; | |
BREAK : 'break' ; | |
CLASS : 'class' ; | |
CONTINUE : 'continue' ; | |
DEF : 'def' ; | |
DEL : 'del' ; | |
ELIF : 'elif' ; | |
ELSE : 'else' ; | |
EXCEPT : 'except' ; | |
FINALLY : 'finally' ; | |
FROM : 'from' ; | |
GLOBAL : 'global' ; | |
IF : 'if' ; | |
IMPORT : 'import' ; | |
IN : 'in' ; | |
IS : 'is' ; | |
LAMBDA : 'lambda' ; | |
NONLOCAL : 'nonlocal' ; | |
NOT : 'not' ; | |
OR : 'or' ; | |
PASS : 'pass' ; | |
RAISE : 'raise' ; | |
RETURN : 'return' ; | |
TRY : 'try' ; | |
WHILE : 'while' ; | |
WITH : 'with' ; | |
YIELD : 'yield' ; | |
// $> | |
//////////////////////////////////////////////////////////////////////////////// | |
// $<Integer literals | |
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#integer-literals ]] | |
INTEGER : DECIMALINTEGER | OCTINTEGER | HEXINTEGER | BININTEGER ; | |
DECIMALINTEGER | |
: NONZERODIGIT DIGIT* | '0'+ ; | |
NONZERODIGIT | |
: '1' .. '9' ; | |
DIGIT | |
: '0' .. '9' ; | |
OCTINTEGER | |
: '0' ( 'o' | 'O' ) OCTDIGIT+ ; | |
HEXINTEGER | |
: '0' ( 'x' | 'X' ) HEXDIGIT+ ; | |
BININTEGER | |
: '0' ( 'b' | 'B' ) BINDIGIT+ ; | |
OCTDIGIT | |
: '0' .. '7' ; | |
HEXDIGIT | |
: DIGIT | 'a' .. 'f' | 'A' .. 'F' ; | |
BINDIGIT | |
: '0' | '1' ; | |
// $> | |
//// | |
//////////////////////////////////////////////////////////////////////////////// | |
// $<Floating point literals | |
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#floating-point-literals ]] | |
FLOATNUMBER : POINTFLOAT | EXPONENTFLOAT ; | |
POINTFLOAT | |
: ( INTPART? FRACTION ) | |
| ( INTPART '.' ) | |
; | |
EXPONENTFLOAT | |
: ( INTPART | POINTFLOAT ) EXPONENT ; | |
INTPART | |
: DIGIT+ ; | |
FRACTION | |
: '.' DIGIT+ ; | |
EXPONENT | |
: ( 'e' | 'E' ) ( '+' | '-' )? DIGIT+ ; | |
// $> | |
//////////////////////////////////////////////////////////////////////////////// | |
// $<Imaginary literals | |
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#imaginary-literals ]] | |
IMAGNUMBER : ( FLOATNUMBER | INTPART ) ( 'j' | 'J' ) ; | |
// $> | |
//////////////////////////////////////////////////////////////////////////////// | |
// $<Identifiers | |
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#identifiers ]] | |
IDENTIFIER : ID_START ID_CONTINUE* ; | |
//TODO: <all characters in general categories Lu, Ll, Lt, Lm, Lo, Nl, the underscore, and characters with the Other_ID_Start property> - see python3_pep3131.g | |
ID_START: '_'| 'A'.. 'Z'| 'a' .. 'z'; | |
//TODO: <all characters in id_start, plus characters in the categories Mn, Mc, Nd, Pc and others with the Other_ID_Continue property> - see python3_pep3131.g | |
ID_CONTINUE: '_'| 'A'.. 'Z'| 'a' .. 'z'| '0' .. '9'; | |
// $> | |
//////////////////////////////////////////////////////////////////////////////// | |
// $<Operators | |
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#operators ]] | |
PLUS : '+' ; | |
MINUS : '-' ; | |
STAR : '*' ; | |
DOUBLESTAR : '**' ; | |
SLASH : '/' ; | |
DOUBLESLASH : '//' ; | |
PERCENT : '%' ; | |
LEFTSHIFT : '<<' ; | |
RIGHTSHIFT : '>>' ; | |
AMPERSAND : '&' ; | |
VBAR : '|' ; | |
CIRCUMFLEX : '^' ; | |
TILDE : '~' ; | |
LESS : '<' ; | |
GREATER : '>' ; | |
LESSEQUAL : '<=' ; | |
GREATEREQUAL : '>=' ; | |
EQUAL : '==' ; | |
NOTEQUAL : '!=' ; | |
// $> | |
////////////////////////////////////////////// | |
// $<Delimiters | |
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#delimiters ]] | |
// Implicit line joining - [[ http://docs.python.org/3.1/reference/lexical_analysis.html#implicit-line-joining ]] | |
LPAREN : '(' ; | |
RPAREN : ')' ; | |
LBRACK : '[' ; | |
RBRACK : ']' ; | |
LCURLY : '{' ; | |
RCURLY : '}' ; | |
COMMA : ',' ; | |
COLON : ':' ; | |
DOT : '.' ; | |
SEMI : ';' ; | |
AT : '@' ; | |
ASSIGN : '=' ; | |
// Augmented assignment operators | |
PLUSEQUAL : '+=' ; | |
MINUSEQUAL : '-=' ; | |
STAREQUAL : '*=' ; | |
SLASHEQUAL : '/=' ; | |
DOUBLESLASHEQUAL: '//=' ; | |
PERCENTEQUAL : '%=' ; | |
AMPERSANDEQUAL : '&=' ; | |
VBAREQUAL : '|=' ; | |
CIRCUMFLEXEQUAL : '^=' ; | |
LEFTSHIFTEQUAL : '<<=' ; | |
RIGHTSHIFTEQUAL : '>>=' ; | |
DOUBLESTAREQUAL : '**=' ; | |
// $> | |
////////////////////////////////////////////// | |
// $<Line structure | |
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#line-structure ]] | |
/** Consume a newline and any whitespace at start of next line | |
* unless the next line contains only white space, in that case | |
* emit a newline. | |
*/ | |
CONTINUED_LINE | |
: '\\' ('\r')? '\n' ( ' ' | '\t' )* | |
( NEWLINE )? | |
; | |
/** Treat a sequence of blank lines as a single blank line. If | |
* nested within a (..), {..}, or [..], then ignore newlines. | |
* If the first newline starts in column one, they are to be ignored. | |
* | |
* Frank Wierzbicki added: Also ignore FORMFEEDS (\u000C). | |
*/ | |
NEWLINE | |
: ( '\u000C'? '\r'? '\n' )+ | |
; | |
// $> | |
////////////////////////////////////////////// | |
// $<Whitespace | |
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#whitespace-between-tokens ]] | |
WS : ( ' ' | '\t' )+ ; | |
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#indentation ]] | |
/** Grab everything before a real symbol. Then if newline, kill it | |
* as this is a blank line. If whitespace followed by comment, kill it | |
* as it's a comment on a line by itself. | |
* | |
* Ignore leading whitespace when nested in [..], (..), {..}. | |
*/ | |
LEADING_WS: | |
( | |
( ' ' | '\t' )+ | |
| | |
( | |
' ' | |
| '\t' | |
)+ | |
( | |
'\r'? '\n' | |
)* | |
) | |
; | |
// $> | |
////////////////////////////////////////////// | |
// $<Comments | |
// [[ http://docs.python.org/3.1/reference/lexical_analysis.html#comments ]] | |
COMMENT | |
: ( ' ' | '\t' )* '#' ( ~'\n' )* '\n'+ | |
| '#' ( ~'\n' )* // let NEWLINE handle \n unless char pos==0 for '#' | |
; | |
// $> | |
// Following two lexer rules are imaginary, condition is never meet ... they are here just to suppress warnings | |
DEDENT: ('\n'); | |
INDENT: ('\n'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment