Skip to content

Instantly share code, notes, and snippets.

@hoehrmann
Created June 22, 2018 22:10
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save hoehrmann/cd4ecd6c28842c03d4a3b243fdf3a4ee to your computer and use it in GitHub Desktop.
Save hoehrmann/cd4ecd6c28842c03d4a3b243fdf3a4ee to your computer and use it in GitHub Desktop.
C11 Lexer grammar
token = _
/ keyword
/ identifier
/ constant
/ string-literal
/ punctuator
preprocessing-token = _
/ header-name
/ identifier
/ pp-number
/ character-constant
/ string-literal
/ punctuator
/ <each non-white-space character that cannot be one of the above>
keyword = _
/ 'auto'
/ '∗'
/ 'if'
/ 'unsigned'
/ 'break'
/ 'inline'
/ 'void'
/ 'case'
/ 'int'
/ 'volatile'
/ 'char'
/ 'long'
/ 'while'
/ 'const'
/ 'register'
/ '_Alignas'
/ 'continue'
/ 'restrict'
/ '_Alignof'
/ 'default'
/ 'return'
/ '_Atomic'
/ 'do'
/ 'short'
/ '_Bool'
/ 'double'
/ 'signed'
/ '_Complex'
/ 'else'
/ 'sizeof'
/ '_Generic'
/ 'enum'
/ 'static'
/ '_Imaginary'
/ 'extern'
/ 'struct'
/ '_Noreturn'
/ 'float'
/ 'switch'
/ '_Static_assert'
/ 'for'
/ 'typedef'
/ '_Thread_local'
/ 'goto'
/ 'union'
identifier = _
/ identifier-nondigit
/ identifier identifier-nondigit
/ identifier digit
identifier-nondigit = _
/ nondigit
/ universal-character-name
/ <other implementation-defined characters>
nondigit = _
/ '_'
/ 'a'
/ 'b'
/ 'c'
/ 'd'
/ 'e'
/ 'f'
/ 'g'
/ 'h'
/ 'i'
/ 'j'
/ 'k'
/ 'l'
/ 'm'
/ 'n'
/ 'o'
/ 'p'
/ 'q'
/ 'r'
/ 's'
/ 't'
/ 'u'
/ 'v'
/ 'w'
/ 'x'
/ 'y'
/ 'z'
/ 'A'
/ 'B'
/ 'C'
/ 'D'
/ 'E'
/ 'F'
/ 'G'
/ 'H'
/ 'I'
/ 'J'
/ 'K'
/ 'L'
/ 'M'
/ 'N'
/ 'O'
/ 'P'
/ 'Q'
/ 'R'
/ 'S'
/ 'T'
/ 'U'
/ 'V'
/ 'W'
/ 'X'
/ 'Y'
/ 'Z'
digit = _
/ '0'
/ '1'
/ '2'
/ '3'
/ '4'
/ '5'
/ '6'
/ '7'
/ '8'
/ '9'
universal-character-name = _
/ '\u' hex-quad
/ '\U' hex-quad hex-quad
hex-quad = _
/ hexadecimal-digit hexadecimal-digit
/ hexadecimal-digit hexadecimal-digit
constant = _
/ integer-constant
/ floating-constant
/ enumeration-constant
/ character-constant
integer-constant =
/ decimal-constant integer-suffix?
/ octal-constant integer-suffix?
/ hexadecimal-constant integer-suffix?
decimal-constant = _
/ nonzero-digit
/ decimal-constant digit
octal-constant = _
/ '0'
/ octal-constant octal-digit
hexadecimal-constant = _
/ hexadecimal-prefix hexadecimal-digit
/ hexadecimal-constant hexadecimal-digit
hexadecimal-prefix = _
/ '0x'
/ '0X'
nonzero-digit = _
/ '1'
/ '2'
/ '3'
/ '4'
/ '5'
/ '6'
/ '7'
/ '8'
/ '9'
octal-digit = _
/ '0'
/ '1'
/ '2'
/ '3'
/ '4'
/ '5'
/ '6'
/ '7'
hexadecimal-digit = _
/ '0'
/ '1'
/ '2'
/ '3'
/ '4'
/ '5'
/ '6'
/ '7'
/ '8'
/ '9'
/ 'a'
/ 'b'
/ 'c'
/ 'd'
/ 'e'
/ 'f'
/ 'A'
/ 'B'
/ 'C'
/ 'D'
/ 'E'
/ 'F'
integer-suffix = _
/ unsigned-suffix long-suffix?
/ unsigned-suffix long-long-suffix
/ long-suffix unsigned-suffix?
/ long-long-suffix unsigned-suffix?
unsigned-suffix = _
/ 'u'
/ 'U'
long-suffix =
/ 'l'
/ 'L'
long-long-suffix = _
/ 'll'
/ 'LL'
floating-constant = _
/ decimal-floating-constant
/ hexadecimal-floating-constant
decimal-floating-constant = _
/ fractional-constant exponent-part? floating-suffix?
/ digit-sequence exponent-part floating-suffix?
hexadecimal-floating-constant = _
/ hexadecimal-prefix hexadecimal-fractional-constant binary-exponent-part floating-suffix?
/ hexadecimal-prefix hexadecimal-digit-sequence binary-exponent-part floating-suffix?
fractional-constant = _
/ digit-sequence? '.' digit-sequence
/ digit-sequence '.'
exponent-part = _
/ 'e' sign? digit-sequence
/ 'E' sign? digit-sequence
sign = _
/ '+'
/ '-'
digit-sequence = _
/ digit
/ digit-sequence digit
hexadecimal-fractional-constant = _
/ hexadecimal-digit-sequence? '.' hexadecimal-digit-sequence
/ hexadecimal-digit-sequence '.'
binary-exponent-part = _
/ 'p' sign? digit-sequence
/ 'P' sign? digit-sequence
hexadecimal-digit-sequence = _
/ hexadecimal-digit
/ hexadecimal-digit-sequence hexadecimal-digit
floating-suffix = _
/ 'f'
/ 'l'
/ 'F'
/ 'L'
enumeration-constant =
identifier
character-constant = _
/ q<'> c-char-sequence q<'>
/ q<L'> c-char-sequence q<'>
/ q<u'> c-char-sequence q<'>
/ q<U'> c-char-sequence q<'>
c-char-sequence = _
/ c-char
/ c-char-sequence c-char
c-char = _
/ <any member of the source character set except
the single-quote ', backslash \, or new-line
character>
/ escape-sequence
escape-sequence = _
/ simple-escape-sequence
/ octal-escape-sequence
/ hexadecimal-escape-sequence
/ universal-character-name
simple-escape-sequence = _
/ q<\'>
/ q<\">
/ q<\?>
/ q<\\>
/ q<\a>
/ q<\b>
/ q<\f>
/ q<\n>
/ q<\r>
/ q<\t>
/ q<\v>
octal-escape-sequence = _
/ '\' octal-digit
/ '\' octal-digit octal-digit
/ '\' octal-digit octal-digit octal-digit
hexadecimal-escape-sequence = _
/ '\x' hexadecimal-digit
/ hexadecimal-escape-sequence hexadecimal-digit
string-literal =
encoding-prefix? '"' s-char-sequence? '"'
encoding-prefix = _
/ 'u8'
/ 'u'
/ 'U'
/ 'L'
s-char-sequence = _
/ s-char
/ s-char-sequence s-char
s-char = _
/ <any member of the source character set except
the double-quote ", backslash \, or new-line character>
/ escape-sequence
punctuator = _
/ '['
/ ']'
/ '('
/ ')'
/ '{'
/ '}'
/ '.'
/ '->'
/ '++'
/ '--'
/ '&'
/ '*'
/ '+'
/ '-'
/ '~'
/ '!'
/ '/'
/ '%'
/ '<<'
/ '>>'
/ '<'
/ '>'
/ '<='
/ '>='
/ '=='
/ '!='
/ '^'
/ '|'
/ '&&'
/ '||'
/ '?'
/ ':'
/ ';'
/ '...'
/ '='
/ '*='
/ '/='
/ '%='
/ '+='
/ '-='
/ '<<='
/ '>>='
/ '&='
/ '^='
/ '|='
/ ','
/ '#'
/ '##'
/ '<:'
/ ':>'
/ '<%'
/ '%>'
/ '%:'
/ '%:%:'
header-name = _
/ '<' h-char-sequence '>'
/ '"' q-char-sequence '"'
h-char-sequence = _
/ h-char
/ h-char-sequence h-char
h-char =
<any member of the source character set except
the new-line character and \>>
q-char-sequence = _
/ q-char
/ q-char-sequence q-char
q-char = _
<any member of the source character set except
the new-line character and ">
pp-number = _
/ digit
/ '.' digit
/ pp-number digit
/ pp-number identifier-nondigit
/ pp-number 'e' sign
/ pp-number 'E' sign
/ pp-number 'p' sign
/ pp-number 'P' sign
/ pp-number '.'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment