-
-
Save codebrainz/2933703 to your computer and use it in GitHub Desktop.
D [0-9] | |
L [a-zA-Z_] | |
H [a-fA-F0-9] | |
E ([Ee][+-]?{D}+) | |
P ([Pp][+-]?{D}+) | |
FS (f|F|l|L) | |
IS ((u|U)|(u|U)?(l|L|ll|LL)|(l|L|ll|LL)(u|U)) | |
%{ | |
#include <stdio.h> | |
#include "y.tab.h" | |
void count(void); | |
%} | |
%% | |
"/*" { comment(); } | |
"//"[^\n]* { /* consume //-comment */ } | |
"auto" { count(); return(AUTO); } | |
"_Bool" { count(); return(BOOL); } | |
"break" { count(); return(BREAK); } | |
"case" { count(); return(CASE); } | |
"char" { count(); return(CHAR); } | |
"_Complex" { count(); return(COMPLEX); } | |
"const" { count(); return(CONST); } | |
"continue" { count(); return(CONTINUE); } | |
"default" { count(); return(DEFAULT); } | |
"do" { count(); return(DO); } | |
"double" { count(); return(DOUBLE); } | |
"else" { count(); return(ELSE); } | |
"enum" { count(); return(ENUM); } | |
"extern" { count(); return(EXTERN); } | |
"float" { count(); return(FLOAT); } | |
"for" { count(); return(FOR); } | |
"goto" { count(); return(GOTO); } | |
"if" { count(); return(IF); } | |
"_Imaginary" { count(); return(IMAGINARY); } | |
"inline" { count(); return(INLINE); } | |
"int" { count(); return(INT); } | |
"long" { count(); return(LONG); } | |
"register" { count(); return(REGISTER); } | |
"restrict" { count(); return(RESTRICT); } | |
"return" { count(); return(RETURN); } | |
"short" { count(); return(SHORT); } | |
"signed" { count(); return(SIGNED); } | |
"sizeof" { count(); return(SIZEOF); } | |
"static" { count(); return(STATIC); } | |
"struct" { count(); return(STRUCT); } | |
"switch" { count(); return(SWITCH); } | |
"typedef" { count(); return(TYPEDEF); } | |
"union" { count(); return(UNION); } | |
"unsigned" { count(); return(UNSIGNED); } | |
"void" { count(); return(VOID); } | |
"volatile" { count(); return(VOLATILE); } | |
"while" { count(); return(WHILE); } | |
{L}({L}|{D})* { count(); return(check_type()); } | |
0[xX]{H}+{IS}? { count(); return(CONSTANT); } | |
0[0-7]*{IS}? { count(); return(CONSTANT); } | |
[1-9]{D}*{IS}? { count(); return(CONSTANT); } | |
L?'(\\.|[^\\'\n])+' { count(); return(CONSTANT); } | |
{D}+{E}{FS}? { count(); return(CONSTANT); } | |
{D}*"."{D}+{E}?{FS}? { count(); return(CONSTANT); } | |
{D}+"."{D}*{E}?{FS}? { count(); return(CONSTANT); } | |
0[xX]{H}+{P}{FS}? { count(); return(CONSTANT); } | |
0[xX]{H}*"."{H}+{P}?{FS}? { count(); return(CONSTANT); } | |
0[xX]{H}+"."{H}*{P}?{FS}? { count(); return(CONSTANT); } | |
L?\"(\\.|[^\\"\n])*\" { count(); return(STRING_LITERAL); } | |
"..." { count(); return(ELLIPSIS); } | |
">>=" { count(); return(RIGHT_ASSIGN); } | |
"<<=" { count(); return(LEFT_ASSIGN); } | |
"+=" { count(); return(ADD_ASSIGN); } | |
"-=" { count(); return(SUB_ASSIGN); } | |
"*=" { count(); return(MUL_ASSIGN); } | |
"/=" { count(); return(DIV_ASSIGN); } | |
"%=" { count(); return(MOD_ASSIGN); } | |
"&=" { count(); return(AND_ASSIGN); } | |
"^=" { count(); return(XOR_ASSIGN); } | |
"|=" { count(); return(OR_ASSIGN); } | |
">>" { count(); return(RIGHT_OP); } | |
"<<" { count(); return(LEFT_OP); } | |
"++" { count(); return(INC_OP); } | |
"--" { count(); return(DEC_OP); } | |
"->" { count(); return(PTR_OP); } | |
"&&" { count(); return(AND_OP); } | |
"||" { count(); return(OR_OP); } | |
"<=" { count(); return(LE_OP); } | |
">=" { count(); return(GE_OP); } | |
"==" { count(); return(EQ_OP); } | |
"!=" { count(); return(NE_OP); } | |
";" { count(); return(';'); } | |
("{"|"<%") { count(); return('{'); } | |
("}"|"%>") { count(); return('}'); } | |
"," { count(); return(','); } | |
":" { count(); return(':'); } | |
"=" { count(); return('='); } | |
"(" { count(); return('('); } | |
")" { count(); return(')'); } | |
("["|"<:") { count(); return('['); } | |
("]"|":>") { count(); return(']'); } | |
"." { count(); return('.'); } | |
"&" { count(); return('&'); } | |
"!" { count(); return('!'); } | |
"~" { count(); return('~'); } | |
"-" { count(); return('-'); } | |
"+" { count(); return('+'); } | |
"*" { count(); return('*'); } | |
"/" { count(); return('/'); } | |
"%" { count(); return('%'); } | |
"<" { count(); return('<'); } | |
">" { count(); return('>'); } | |
"^" { count(); return('^'); } | |
"|" { count(); return('|'); } | |
"?" { count(); return('?'); } | |
[ \t\v\n\f] { count(); } | |
. { /* Add code to complain about unmatched characters */ } | |
%% | |
int yywrap(void) | |
{ | |
return 1; | |
} | |
void comment(void) | |
{ | |
char c, prev = 0; | |
while ((c = input()) != 0) /* (EOF maps to 0) */ | |
{ | |
if (c == '/' && prev == '*') | |
return; | |
prev = c; | |
} | |
error("unterminated comment"); | |
} | |
int column = 0; | |
void count(void) | |
{ | |
int i; | |
for (i = 0; yytext[i] != '\0'; i++) | |
if (yytext[i] == '\n') | |
column = 0; | |
else if (yytext[i] == '\t') | |
column += 8 - (column % 8); | |
else | |
column++; | |
ECHO; | |
} | |
int check_type(void) | |
{ | |
/* | |
* pseudo code --- this is what it should check | |
* | |
* if (yytext == type_name) | |
* return TYPE_NAME; | |
* | |
* return IDENTIFIER; | |
*/ | |
/* | |
* it actually will only return IDENTIFIER | |
*/ | |
return IDENTIFIER; | |
} |
%token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF | |
%token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP | |
%token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN | |
%token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN | |
%token XOR_ASSIGN OR_ASSIGN TYPE_NAME | |
%token TYPEDEF EXTERN STATIC AUTO REGISTER INLINE RESTRICT | |
%token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID | |
%token BOOL COMPLEX IMAGINARY | |
%token STRUCT UNION ENUM ELLIPSIS | |
%token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN | |
%start translation_unit | |
%% | |
primary_expression | |
: IDENTIFIER | |
| CONSTANT | |
| STRING_LITERAL | |
| '(' expression ')' | |
; | |
postfix_expression | |
: primary_expression | |
| postfix_expression '[' expression ']' | |
| postfix_expression '(' ')' | |
| postfix_expression '(' argument_expression_list ')' | |
| postfix_expression '.' IDENTIFIER | |
| postfix_expression PTR_OP IDENTIFIER | |
| postfix_expression INC_OP | |
| postfix_expression DEC_OP | |
| '(' type_name ')' '{' initializer_list '}' | |
| '(' type_name ')' '{' initializer_list ',' '}' | |
; | |
argument_expression_list | |
: assignment_expression | |
| argument_expression_list ',' assignment_expression | |
; | |
unary_expression | |
: postfix_expression | |
| INC_OP unary_expression | |
| DEC_OP unary_expression | |
| unary_operator cast_expression | |
| SIZEOF unary_expression | |
| SIZEOF '(' type_name ')' | |
; | |
unary_operator | |
: '&' | |
| '*' | |
| '+' | |
| '-' | |
| '~' | |
| '!' | |
; | |
cast_expression | |
: unary_expression | |
| '(' type_name ')' cast_expression | |
; | |
multiplicative_expression | |
: cast_expression | |
| multiplicative_expression '*' cast_expression | |
| multiplicative_expression '/' cast_expression | |
| multiplicative_expression '%' cast_expression | |
; | |
additive_expression | |
: multiplicative_expression | |
| additive_expression '+' multiplicative_expression | |
| additive_expression '-' multiplicative_expression | |
; | |
shift_expression | |
: additive_expression | |
| shift_expression LEFT_OP additive_expression | |
| shift_expression RIGHT_OP additive_expression | |
; | |
relational_expression | |
: shift_expression | |
| relational_expression '<' shift_expression | |
| relational_expression '>' shift_expression | |
| relational_expression LE_OP shift_expression | |
| relational_expression GE_OP shift_expression | |
; | |
equality_expression | |
: relational_expression | |
| equality_expression EQ_OP relational_expression | |
| equality_expression NE_OP relational_expression | |
; | |
and_expression | |
: equality_expression | |
| and_expression '&' equality_expression | |
; | |
exclusive_or_expression | |
: and_expression | |
| exclusive_or_expression '^' and_expression | |
; | |
inclusive_or_expression | |
: exclusive_or_expression | |
| inclusive_or_expression '|' exclusive_or_expression | |
; | |
logical_and_expression | |
: inclusive_or_expression | |
| logical_and_expression AND_OP inclusive_or_expression | |
; | |
logical_or_expression | |
: logical_and_expression | |
| logical_or_expression OR_OP logical_and_expression | |
; | |
conditional_expression | |
: logical_or_expression | |
| logical_or_expression '?' expression ':' conditional_expression | |
; | |
assignment_expression | |
: conditional_expression | |
| unary_expression assignment_operator assignment_expression | |
; | |
assignment_operator | |
: '=' | |
| MUL_ASSIGN | |
| DIV_ASSIGN | |
| MOD_ASSIGN | |
| ADD_ASSIGN | |
| SUB_ASSIGN | |
| LEFT_ASSIGN | |
| RIGHT_ASSIGN | |
| AND_ASSIGN | |
| XOR_ASSIGN | |
| OR_ASSIGN | |
; | |
expression | |
: assignment_expression | |
| expression ',' assignment_expression | |
; | |
constant_expression | |
: conditional_expression | |
; | |
declaration | |
: declaration_specifiers ';' | |
| declaration_specifiers init_declarator_list ';' | |
; | |
declaration_specifiers | |
: storage_class_specifier | |
| storage_class_specifier declaration_specifiers | |
| type_specifier | |
| type_specifier declaration_specifiers | |
| type_qualifier | |
| type_qualifier declaration_specifiers | |
| function_specifier | |
| function_specifier declaration_specifiers | |
; | |
init_declarator_list | |
: init_declarator | |
| init_declarator_list ',' init_declarator | |
; | |
init_declarator | |
: declarator | |
| declarator '=' initializer | |
; | |
storage_class_specifier | |
: TYPEDEF | |
| EXTERN | |
| STATIC | |
| AUTO | |
| REGISTER | |
; | |
type_specifier | |
: VOID | |
| CHAR | |
| SHORT | |
| INT | |
| LONG | |
| FLOAT | |
| DOUBLE | |
| SIGNED | |
| UNSIGNED | |
| BOOL | |
| COMPLEX | |
| IMAGINARY | |
| struct_or_union_specifier | |
| enum_specifier | |
| TYPE_NAME | |
; | |
struct_or_union_specifier | |
: struct_or_union IDENTIFIER '{' struct_declaration_list '}' | |
| struct_or_union '{' struct_declaration_list '}' | |
| struct_or_union IDENTIFIER | |
; | |
struct_or_union | |
: STRUCT | |
| UNION | |
; | |
struct_declaration_list | |
: struct_declaration | |
| struct_declaration_list struct_declaration | |
; | |
struct_declaration | |
: specifier_qualifier_list struct_declarator_list ';' | |
; | |
specifier_qualifier_list | |
: type_specifier specifier_qualifier_list | |
| type_specifier | |
| type_qualifier specifier_qualifier_list | |
| type_qualifier | |
; | |
struct_declarator_list | |
: struct_declarator | |
| struct_declarator_list ',' struct_declarator | |
; | |
struct_declarator | |
: declarator | |
| ':' constant_expression | |
| declarator ':' constant_expression | |
; | |
enum_specifier | |
: ENUM '{' enumerator_list '}' | |
| ENUM IDENTIFIER '{' enumerator_list '}' | |
| ENUM '{' enumerator_list ',' '}' | |
| ENUM IDENTIFIER '{' enumerator_list ',' '}' | |
| ENUM IDENTIFIER | |
; | |
enumerator_list | |
: enumerator | |
| enumerator_list ',' enumerator | |
; | |
enumerator | |
: IDENTIFIER | |
| IDENTIFIER '=' constant_expression | |
; | |
type_qualifier | |
: CONST | |
| RESTRICT | |
| VOLATILE | |
; | |
function_specifier | |
: INLINE | |
; | |
declarator | |
: pointer direct_declarator | |
| direct_declarator | |
; | |
direct_declarator | |
: IDENTIFIER | |
| '(' declarator ')' | |
| direct_declarator '[' type_qualifier_list assignment_expression ']' | |
| direct_declarator '[' type_qualifier_list ']' | |
| direct_declarator '[' assignment_expression ']' | |
| direct_declarator '[' STATIC type_qualifier_list assignment_expression ']' | |
| direct_declarator '[' type_qualifier_list STATIC assignment_expression ']' | |
| direct_declarator '[' type_qualifier_list '*' ']' | |
| direct_declarator '[' '*' ']' | |
| direct_declarator '[' ']' | |
| direct_declarator '(' parameter_type_list ')' | |
| direct_declarator '(' identifier_list ')' | |
| direct_declarator '(' ')' | |
; | |
pointer | |
: '*' | |
| '*' type_qualifier_list | |
| '*' pointer | |
| '*' type_qualifier_list pointer | |
; | |
type_qualifier_list | |
: type_qualifier | |
| type_qualifier_list type_qualifier | |
; | |
parameter_type_list | |
: parameter_list | |
| parameter_list ',' ELLIPSIS | |
; | |
parameter_list | |
: parameter_declaration | |
| parameter_list ',' parameter_declaration | |
; | |
parameter_declaration | |
: declaration_specifiers declarator | |
| declaration_specifiers abstract_declarator | |
| declaration_specifiers | |
; | |
identifier_list | |
: IDENTIFIER | |
| identifier_list ',' IDENTIFIER | |
; | |
type_name | |
: specifier_qualifier_list | |
| specifier_qualifier_list abstract_declarator | |
; | |
abstract_declarator | |
: pointer | |
| direct_abstract_declarator | |
| pointer direct_abstract_declarator | |
; | |
direct_abstract_declarator | |
: '(' abstract_declarator ')' | |
| '[' ']' | |
| '[' assignment_expression ']' | |
| direct_abstract_declarator '[' ']' | |
| direct_abstract_declarator '[' assignment_expression ']' | |
| '[' '*' ']' | |
| direct_abstract_declarator '[' '*' ']' | |
| '(' ')' | |
| '(' parameter_type_list ')' | |
| direct_abstract_declarator '(' ')' | |
| direct_abstract_declarator '(' parameter_type_list ')' | |
; | |
initializer | |
: assignment_expression | |
| '{' initializer_list '}' | |
| '{' initializer_list ',' '}' | |
; | |
initializer_list | |
: initializer | |
| designation initializer | |
| initializer_list ',' initializer | |
| initializer_list ',' designation initializer | |
; | |
designation | |
: designator_list '=' | |
; | |
designator_list | |
: designator | |
| designator_list designator | |
; | |
designator | |
: '[' constant_expression ']' | |
| '.' IDENTIFIER | |
; | |
statement | |
: labeled_statement | |
| compound_statement | |
| expression_statement | |
| selection_statement | |
| iteration_statement | |
| jump_statement | |
; | |
labeled_statement | |
: IDENTIFIER ':' statement | |
| CASE constant_expression ':' statement | |
| DEFAULT ':' statement | |
; | |
compound_statement | |
: '{' '}' | |
| '{' block_item_list '}' | |
; | |
block_item_list | |
: block_item | |
| block_item_list block_item | |
; | |
block_item | |
: declaration | |
| statement | |
; | |
expression_statement | |
: ';' | |
| expression ';' | |
; | |
selection_statement | |
: IF '(' expression ')' statement | |
| IF '(' expression ')' statement ELSE statement | |
| SWITCH '(' expression ')' statement | |
; | |
iteration_statement | |
: WHILE '(' expression ')' statement | |
| DO statement WHILE '(' expression ')' ';' | |
| FOR '(' expression_statement expression_statement ')' statement | |
| FOR '(' expression_statement expression_statement expression ')' statement | |
| FOR '(' declaration expression_statement ')' statement | |
| FOR '(' declaration expression_statement expression ')' statement | |
; | |
jump_statement | |
: GOTO IDENTIFIER ';' | |
| CONTINUE ';' | |
| BREAK ';' | |
| RETURN ';' | |
| RETURN expression ';' | |
; | |
translation_unit | |
: external_declaration | |
| translation_unit external_declaration | |
; | |
external_declaration | |
: function_definition | |
| declaration | |
; | |
function_definition | |
: declaration_specifiers declarator declaration_list compound_statement | |
| declaration_specifiers declarator compound_statement | |
; | |
declaration_list | |
: declaration | |
| declaration_list declaration | |
; | |
%% | |
#include <stdio.h> | |
extern char yytext[]; | |
extern int column; | |
void yyerror(char const *s) | |
{ | |
fflush(stdout); | |
printf("\n%*s\n%*s\n", column, "^", column, s); | |
} |
The grammar does not seem to include simple assignment with a constant?: Eg. int result = 1;
Yes, but where does it allow for a constant value?
The grammar does not seem to include simple assignment with a constant?: Eg. int result = 1;
Yes, but where does it allow for a constant value?
Starting here:
https://gist.github.com/codebrainz/2933703#file-c99-y-L355
Then weaves it's way through the rules from assignment_expression
to conditional_expression
, to logical_or_expression
and so on all the way through the precedence levels to eventually reach primary_expression
where it matches CONSTANT
. If you have a printer you could try printing it and draw a line through all the rules to see how it gets there after many layers of indirection.
What does the '[' '*' ']' syntax mean?
@AngheloAlf
It is the C99 syntax for a VLA in a function prototype.
it cannot handle string literals in multiple quotes, like this: "multiple""quote""string""literal""
Your example case is too easy. I think about this case:
char const *m = "Thi" "s should wo"
"rk" " t"
"oo";
I fixed it by using ([LUu]?\"(\\.|[^\\\"\n])*\"[ \f\v\t\r\n]*)*
Then you have to parse the string with a state machine, but you already needed to do that to implement \x61
and \n
, \r
, \t
, etc. This just adds one more bit of state - whether you are in a double-quote or not. Just enforce that the input is only whitespace when not inside a doublequote, and only emit to the resulting string buffer when inside a double-quote. Flex should only give you valid input with my regex fix.
https://gist.github.com/codebrainz/2933703#file-c99-y-L179