Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
C99 Lex/Flex & YACC/Bison Grammars
D [0-9]
L [a-zA-Z_]
H [a-fA-F0-9]
E ([Ee][+-]?{D}+)
P ([Pp][+-]?{D}+)
FS (f|F|l|L)
IS ((u|U)|(u|U)?(l|L|ll|LL)|(l|L|ll|LL)(u|U))
%{
#include <stdio.h>
#include "y.tab.h"
void count(void);
%}
%%
"/*" { comment(); }
"//"[^\n]* { /* consume //-comment */ }
"auto" { count(); return(AUTO); }
"_Bool" { count(); return(BOOL); }
"break" { count(); return(BREAK); }
"case" { count(); return(CASE); }
"char" { count(); return(CHAR); }
"_Complex" { count(); return(COMPLEX); }
"const" { count(); return(CONST); }
"continue" { count(); return(CONTINUE); }
"default" { count(); return(DEFAULT); }
"do" { count(); return(DO); }
"double" { count(); return(DOUBLE); }
"else" { count(); return(ELSE); }
"enum" { count(); return(ENUM); }
"extern" { count(); return(EXTERN); }
"float" { count(); return(FLOAT); }
"for" { count(); return(FOR); }
"goto" { count(); return(GOTO); }
"if" { count(); return(IF); }
"_Imaginary" { count(); return(IMAGINARY); }
"inline" { count(); return(INLINE); }
"int" { count(); return(INT); }
"long" { count(); return(LONG); }
"register" { count(); return(REGISTER); }
"restrict" { count(); return(RESTRICT); }
"return" { count(); return(RETURN); }
"short" { count(); return(SHORT); }
"signed" { count(); return(SIGNED); }
"sizeof" { count(); return(SIZEOF); }
"static" { count(); return(STATIC); }
"struct" { count(); return(STRUCT); }
"switch" { count(); return(SWITCH); }
"typedef" { count(); return(TYPEDEF); }
"union" { count(); return(UNION); }
"unsigned" { count(); return(UNSIGNED); }
"void" { count(); return(VOID); }
"volatile" { count(); return(VOLATILE); }
"while" { count(); return(WHILE); }
{L}({L}|{D})* { count(); return(check_type()); }
0[xX]{H}+{IS}? { count(); return(CONSTANT); }
0[0-7]*{IS}? { count(); return(CONSTANT); }
[1-9]{D}*{IS}? { count(); return(CONSTANT); }
L?'(\\.|[^\\'\n])+' { count(); return(CONSTANT); }
{D}+{E}{FS}? { count(); return(CONSTANT); }
{D}*"."{D}+{E}?{FS}? { count(); return(CONSTANT); }
{D}+"."{D}*{E}?{FS}? { count(); return(CONSTANT); }
0[xX]{H}+{P}{FS}? { count(); return(CONSTANT); }
0[xX]{H}*"."{H}+{P}?{FS}? { count(); return(CONSTANT); }
0[xX]{H}+"."{H}*{P}?{FS}? { count(); return(CONSTANT); }
L?\"(\\.|[^\\"\n])*\" { count(); return(STRING_LITERAL); }
"..." { count(); return(ELLIPSIS); }
">>=" { count(); return(RIGHT_ASSIGN); }
"<<=" { count(); return(LEFT_ASSIGN); }
"+=" { count(); return(ADD_ASSIGN); }
"-=" { count(); return(SUB_ASSIGN); }
"*=" { count(); return(MUL_ASSIGN); }
"/=" { count(); return(DIV_ASSIGN); }
"%=" { count(); return(MOD_ASSIGN); }
"&=" { count(); return(AND_ASSIGN); }
"^=" { count(); return(XOR_ASSIGN); }
"|=" { count(); return(OR_ASSIGN); }
">>" { count(); return(RIGHT_OP); }
"<<" { count(); return(LEFT_OP); }
"++" { count(); return(INC_OP); }
"--" { count(); return(DEC_OP); }
"->" { count(); return(PTR_OP); }
"&&" { count(); return(AND_OP); }
"||" { count(); return(OR_OP); }
"<=" { count(); return(LE_OP); }
">=" { count(); return(GE_OP); }
"==" { count(); return(EQ_OP); }
"!=" { count(); return(NE_OP); }
";" { count(); return(';'); }
("{"|"<%") { count(); return('{'); }
("}"|"%>") { count(); return('}'); }
"," { count(); return(','); }
":" { count(); return(':'); }
"=" { count(); return('='); }
"(" { count(); return('('); }
")" { count(); return(')'); }
("["|"<:") { count(); return('['); }
("]"|":>") { count(); return(']'); }
"." { count(); return('.'); }
"&" { count(); return('&'); }
"!" { count(); return('!'); }
"~" { count(); return('~'); }
"-" { count(); return('-'); }
"+" { count(); return('+'); }
"*" { count(); return('*'); }
"/" { count(); return('/'); }
"%" { count(); return('%'); }
"<" { count(); return('<'); }
">" { count(); return('>'); }
"^" { count(); return('^'); }
"|" { count(); return('|'); }
"?" { count(); return('?'); }
[ \t\v\n\f] { count(); }
. { /* Add code to complain about unmatched characters */ }
%%
int yywrap(void)
{
return 1;
}
void comment(void)
{
char c, prev = 0;
while ((c = input()) != 0) /* (EOF maps to 0) */
{
if (c == '/' && prev == '*')
return;
prev = c;
}
error("unterminated comment");
}
int column = 0;
void count(void)
{
int i;
for (i = 0; yytext[i] != '\0'; i++)
if (yytext[i] == '\n')
column = 0;
else if (yytext[i] == '\t')
column += 8 - (column % 8);
else
column++;
ECHO;
}
int check_type(void)
{
/*
* pseudo code --- this is what it should check
*
* if (yytext == type_name)
* return TYPE_NAME;
*
* return IDENTIFIER;
*/
/*
* it actually will only return IDENTIFIER
*/
return IDENTIFIER;
}
%token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF
%token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP
%token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN
%token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN
%token XOR_ASSIGN OR_ASSIGN TYPE_NAME
%token TYPEDEF EXTERN STATIC AUTO REGISTER INLINE RESTRICT
%token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID
%token BOOL COMPLEX IMAGINARY
%token STRUCT UNION ENUM ELLIPSIS
%token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN
%start translation_unit
%%
primary_expression
: IDENTIFIER
| CONSTANT
| STRING_LITERAL
| '(' expression ')'
;
postfix_expression
: primary_expression
| postfix_expression '[' expression ']'
| postfix_expression '(' ')'
| postfix_expression '(' argument_expression_list ')'
| postfix_expression '.' IDENTIFIER
| postfix_expression PTR_OP IDENTIFIER
| postfix_expression INC_OP
| postfix_expression DEC_OP
| '(' type_name ')' '{' initializer_list '}'
| '(' type_name ')' '{' initializer_list ',' '}'
;
argument_expression_list
: assignment_expression
| argument_expression_list ',' assignment_expression
;
unary_expression
: postfix_expression
| INC_OP unary_expression
| DEC_OP unary_expression
| unary_operator cast_expression
| SIZEOF unary_expression
| SIZEOF '(' type_name ')'
;
unary_operator
: '&'
| '*'
| '+'
| '-'
| '~'
| '!'
;
cast_expression
: unary_expression
| '(' type_name ')' cast_expression
;
multiplicative_expression
: cast_expression
| multiplicative_expression '*' cast_expression
| multiplicative_expression '/' cast_expression
| multiplicative_expression '%' cast_expression
;
additive_expression
: multiplicative_expression
| additive_expression '+' multiplicative_expression
| additive_expression '-' multiplicative_expression
;
shift_expression
: additive_expression
| shift_expression LEFT_OP additive_expression
| shift_expression RIGHT_OP additive_expression
;
relational_expression
: shift_expression
| relational_expression '<' shift_expression
| relational_expression '>' shift_expression
| relational_expression LE_OP shift_expression
| relational_expression GE_OP shift_expression
;
equality_expression
: relational_expression
| equality_expression EQ_OP relational_expression
| equality_expression NE_OP relational_expression
;
and_expression
: equality_expression
| and_expression '&' equality_expression
;
exclusive_or_expression
: and_expression
| exclusive_or_expression '^' and_expression
;
inclusive_or_expression
: exclusive_or_expression
| inclusive_or_expression '|' exclusive_or_expression
;
logical_and_expression
: inclusive_or_expression
| logical_and_expression AND_OP inclusive_or_expression
;
logical_or_expression
: logical_and_expression
| logical_or_expression OR_OP logical_and_expression
;
conditional_expression
: logical_or_expression
| logical_or_expression '?' expression ':' conditional_expression
;
assignment_expression
: conditional_expression
| unary_expression assignment_operator assignment_expression
;
assignment_operator
: '='
| MUL_ASSIGN
| DIV_ASSIGN
| MOD_ASSIGN
| ADD_ASSIGN
| SUB_ASSIGN
| LEFT_ASSIGN
| RIGHT_ASSIGN
| AND_ASSIGN
| XOR_ASSIGN
| OR_ASSIGN
;
expression
: assignment_expression
| expression ',' assignment_expression
;
constant_expression
: conditional_expression
;
declaration
: declaration_specifiers ';'
| declaration_specifiers init_declarator_list ';'
;
declaration_specifiers
: storage_class_specifier
| storage_class_specifier declaration_specifiers
| type_specifier
| type_specifier declaration_specifiers
| type_qualifier
| type_qualifier declaration_specifiers
| function_specifier
| function_specifier declaration_specifiers
;
init_declarator_list
: init_declarator
| init_declarator_list ',' init_declarator
;
init_declarator
: declarator
| declarator '=' initializer
;
storage_class_specifier
: TYPEDEF
| EXTERN
| STATIC
| AUTO
| REGISTER
;
type_specifier
: VOID
| CHAR
| SHORT
| INT
| LONG
| FLOAT
| DOUBLE
| SIGNED
| UNSIGNED
| BOOL
| COMPLEX
| IMAGINARY
| struct_or_union_specifier
| enum_specifier
| TYPE_NAME
;
struct_or_union_specifier
: struct_or_union IDENTIFIER '{' struct_declaration_list '}'
| struct_or_union '{' struct_declaration_list '}'
| struct_or_union IDENTIFIER
;
struct_or_union
: STRUCT
| UNION
;
struct_declaration_list
: struct_declaration
| struct_declaration_list struct_declaration
;
struct_declaration
: specifier_qualifier_list struct_declarator_list ';'
;
specifier_qualifier_list
: type_specifier specifier_qualifier_list
| type_specifier
| type_qualifier specifier_qualifier_list
| type_qualifier
;
struct_declarator_list
: struct_declarator
| struct_declarator_list ',' struct_declarator
;
struct_declarator
: declarator
| ':' constant_expression
| declarator ':' constant_expression
;
enum_specifier
: ENUM '{' enumerator_list '}'
| ENUM IDENTIFIER '{' enumerator_list '}'
| ENUM '{' enumerator_list ',' '}'
| ENUM IDENTIFIER '{' enumerator_list ',' '}'
| ENUM IDENTIFIER
;
enumerator_list
: enumerator
| enumerator_list ',' enumerator
;
enumerator
: IDENTIFIER
| IDENTIFIER '=' constant_expression
;
type_qualifier
: CONST
| RESTRICT
| VOLATILE
;
function_specifier
: INLINE
;
declarator
: pointer direct_declarator
| direct_declarator
;
direct_declarator
: IDENTIFIER
| '(' declarator ')'
| direct_declarator '[' type_qualifier_list assignment_expression ']'
| direct_declarator '[' type_qualifier_list ']'
| direct_declarator '[' assignment_expression ']'
| direct_declarator '[' STATIC type_qualifier_list assignment_expression ']'
| direct_declarator '[' type_qualifier_list STATIC assignment_expression ']'
| direct_declarator '[' type_qualifier_list '*' ']'
| direct_declarator '[' '*' ']'
| direct_declarator '[' ']'
| direct_declarator '(' parameter_type_list ')'
| direct_declarator '(' identifier_list ')'
| direct_declarator '(' ')'
;
pointer
: '*'
| '*' type_qualifier_list
| '*' pointer
| '*' type_qualifier_list pointer
;
type_qualifier_list
: type_qualifier
| type_qualifier_list type_qualifier
;
parameter_type_list
: parameter_list
| parameter_list ',' ELLIPSIS
;
parameter_list
: parameter_declaration
| parameter_list ',' parameter_declaration
;
parameter_declaration
: declaration_specifiers declarator
| declaration_specifiers abstract_declarator
| declaration_specifiers
;
identifier_list
: IDENTIFIER
| identifier_list ',' IDENTIFIER
;
type_name
: specifier_qualifier_list
| specifier_qualifier_list abstract_declarator
;
abstract_declarator
: pointer
| direct_abstract_declarator
| pointer direct_abstract_declarator
;
direct_abstract_declarator
: '(' abstract_declarator ')'
| '[' ']'
| '[' assignment_expression ']'
| direct_abstract_declarator '[' ']'
| direct_abstract_declarator '[' assignment_expression ']'
| '[' '*' ']'
| direct_abstract_declarator '[' '*' ']'
| '(' ')'
| '(' parameter_type_list ')'
| direct_abstract_declarator '(' ')'
| direct_abstract_declarator '(' parameter_type_list ')'
;
initializer
: assignment_expression
| '{' initializer_list '}'
| '{' initializer_list ',' '}'
;
initializer_list
: initializer
| designation initializer
| initializer_list ',' initializer
| initializer_list ',' designation initializer
;
designation
: designator_list '='
;
designator_list
: designator
| designator_list designator
;
designator
: '[' constant_expression ']'
| '.' IDENTIFIER
;
statement
: labeled_statement
| compound_statement
| expression_statement
| selection_statement
| iteration_statement
| jump_statement
;
labeled_statement
: IDENTIFIER ':' statement
| CASE constant_expression ':' statement
| DEFAULT ':' statement
;
compound_statement
: '{' '}'
| '{' block_item_list '}'
;
block_item_list
: block_item
| block_item_list block_item
;
block_item
: declaration
| statement
;
expression_statement
: ';'
| expression ';'
;
selection_statement
: IF '(' expression ')' statement
| IF '(' expression ')' statement ELSE statement
| SWITCH '(' expression ')' statement
;
iteration_statement
: WHILE '(' expression ')' statement
| DO statement WHILE '(' expression ')' ';'
| FOR '(' expression_statement expression_statement ')' statement
| FOR '(' expression_statement expression_statement expression ')' statement
| FOR '(' declaration expression_statement ')' statement
| FOR '(' declaration expression_statement expression ')' statement
;
jump_statement
: GOTO IDENTIFIER ';'
| CONTINUE ';'
| BREAK ';'
| RETURN ';'
| RETURN expression ';'
;
translation_unit
: external_declaration
| translation_unit external_declaration
;
external_declaration
: function_definition
| declaration
;
function_definition
: declaration_specifiers declarator declaration_list compound_statement
| declaration_specifiers declarator compound_statement
;
declaration_list
: declaration
| declaration_list declaration
;
%%
#include <stdio.h>
extern char yytext[];
extern int column;
void yyerror(char const *s)
{
fflush(stdout);
printf("\n%*s\n%*s\n", column, "^", column, s);
}
@codebrainz
Copy link
Author

codebrainz commented Jun 14, 2012

@thonnyhu
Copy link

thonnyhu commented Sep 30, 2014

How to use this?

@ivladak
Copy link

ivladak commented Mar 26, 2015

This grammar is incomplete. For example it cannot handle string literals in multiple quotes, like this: "multiple""quote""string""literal""

This version seems much better:
http://www.quut.com/c/ANSI-C-grammar-y-2011.html
http://www.quut.com/c/ANSI-C-grammar-l-2011.html

@jesshack10
Copy link

jesshack10 commented Aug 3, 2016

Hi, I am developing a project and all version of C Sintaxis hane given me error at time I tried to compile. Do you have some recomendations to get done the compiling thing? I am using Flex and Bison.

@jooseongjun
Copy link

jooseongjun commented Jan 31, 2017

thx!

@ForeverZyh
Copy link

ForeverZyh commented Mar 26, 2017

thx! and also @ivladak

@Renu1996
Copy link

Renu1996 commented Jul 19, 2017

Hi, how to use this code in ubuntu? It didn't work with the usual lex filename.l, yacc -d filename.y, gcc yy.lex.c yy.tab.h -ll
When I do ./a.out and type some C syntax, it does nothing
So how exactly to use this grammar. Thanks in advance!

@bujoralexandru
Copy link

bujoralexandru commented Dec 5, 2017

@Renu1996
You should look inside the lex/flex specification file. For example purposes, lets say you set eyes on this line:
"&" { count(); return('&'); }
You see the count() function is called for every match that lex/flex executes, so if you want lex/flex to "talk" back to you, you should go inside count() definition and right before the closing bracket write printf("Found something: %s\n", yytext);.

@jesshack10
Compile it using gcc command, not g++ or any C++ specific command. Can you be more specific about the errors that you receive?

@cronof
Copy link

cronof commented Feb 15, 2018

Wow! this understand easily example, thank's you

@stevefan1999-personal
Copy link

stevefan1999-personal commented Jun 7, 2018

Hello, I wonder what is the license of this code?

@VictorEijkhout
Copy link

VictorEijkhout commented Sep 24, 2018

Doesn't this have multiple reduce/reduce conflicts? For instance, there are several rules that expand to IDENTIFIER, which nothing before or after it. I'm trying to adapt this to the "ply" python-lex-yacc module and it complains.

@AngheloAlf
Copy link

AngheloAlf commented Jan 10, 2020

What does the '[' '*' ']' syntax mean?

@pmor13
Copy link

pmor13 commented Jan 19, 2022

@thonnyhu @Renu1996

How to use this?
So how exactly to use this grammar.

flex c99.l && yacc -d c99.y && gcc lex.yy.c y.tab.c -o c99

Also you need a driver. Example:

extern int yyparse();
extern FILE* yyin;
int main(int argc, char *argv[])
{
    yyin = fopen(argv[1], "r");
    if(!yyin)
    {
        printf("couldn't open file for reading\n");
        return 1;
    }
    //yydebug = 1;
    return yyparse();
}

You can put it at the end of c99.y.

@maphoon-parsing
Copy link

maphoon-parsing commented Apr 9, 2022

Hello, I want to use your grammar for testing my own C++ parser generator. Is it OK if I take it and adopt it?
Hans de Nivelle

@codebrainz
Copy link
Author

codebrainz commented Apr 9, 2022

@maphoon-parsing any additions I made to the grammars are OK to use as you wish. They're (almost) entirely based on the sources in the first comment, which these days have updated syntax listed here:

http://www.quut.com/c/ANSI-C-grammar-l-2011.html
http://www.quut.com/c/ANSI-C-grammar-y.html

Best to check with license (if any) of the original source.

@maphoon-parsing
Copy link

maphoon-parsing commented Apr 9, 2022

Thanks, I will have a lecture about my parser generator at C++Now, and for sure people are going to ask me 'can you do a real language, like C'?
I understand that the tokenizer needs to have access to the type information, in order to deal with expressions of form a * b; Is that right? Are there more problems? The preprocessor is a big problem of course.

@codebrainz
Copy link
Author

codebrainz commented Apr 9, 2022

I will have a lecture about my parser generator at C++Now

Cool, if they record a video of your lecture, please link it here.

I understand that the tokenizer needs to have access to the type information, in order to deal with expressions of form a * b; Is that right?

Yeah, C/C++ is rubbish to parse. You can mix type resolution into these stages or produce ambiguous nodes and try to resolve later in further passes.

Are there more problems?

One that comes to mind is the C++ most-vexing parse: https://en.wikipedia.org/wiki/Most_vexing_parse

Most newer languages tweak the syntax to avoid such ambiguities, which along with being able to support multiple passes makes everything much cleaner.

@maphoon-parsing
Copy link

maphoon-parsing commented Apr 9, 2022

Thanks, they do record all sessions. They have some waiting time where the sessions are accessible to sponsors only, but after this time they become public.

I will ask Jutta too for permission. B.t.w. the parser generator is here, but I will upload a better version soon.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment