Last active
December 17, 2015 00:38
-
-
Save jddurand/5522268 to your computer and use it in GitHub Desktop.
c2ast - with typedef/enum resolv - BEING TESTED
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!env perl | |
# Execute this script with no parameter to get the online help | |
# **************************************************************************** | |
# | |
# This script will do a C language -> AST using Marpa parser | |
# | |
# C.f. https://groups.google.com/forum/?fromgroups=#!forum/marpa-parser | |
# | |
# A symbol table is built using a scanless recognizer | |
# C.f. http://www.cs.dartmouth.edu/~mckeeman/references/JCLT/ResolvingTypedefsInAMultipassCCompiler.pdf | |
# | |
# **************************************************************************** | |
# | |
# Copyright 2013 Jean-Damien Durand | |
# you can redistribute this file and/or modify it under the terms of the | |
# GNU Lesser General Public License as published by the Free Software | |
# Foundation, either version 3 of the License, or (at your option) any later | |
# version. | |
# | |
# This file is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
# Lesser General Public License for more details. | |
# | |
# You should have received a copy of the GNU Lesser | |
# General Public License along with this file. If not, see | |
# http://www.gnu.org/licenses/. | |
# | |
# **************************************************************************** | |
use strict; | |
use diagnostics; | |
use Marpa::R2 2.054_000; | |
use File::Slurp qw/read_file/; | |
use Data::Dumper qw/Dumper/; | |
use POSIX qw/EXIT_SUCCESS EXIT_FAILURE/; | |
use FindBin qw/$Bin/; | |
use Log::Log4perl qw/:easy/; | |
use Log::Any::Adapter; | |
use Log::Any qw/$log/; | |
use Carp qw/longmess/; | |
use Clone qw/clone/; | |
use constant {NO_TRACE_MODE=>0, EVENT_TRACE_MODE=>1, LEXEME_TRACE_MODE=>2}; | |
use constant {LATEST_G1_ORDINAL => -1}; | |
use constant {DOT_PREDICTION => 0, DOT_COMPLETION => -1}; | |
initLog(); | |
############################################################################## | |
# main | |
############################################################################## | |
# Read input and grammar | |
# ---------------------- | |
my $file = shift || usage(EXIT_FAILURE); | |
my $input = read_file($file); | |
my $grammar_source = do { local $/; <DATA> }; | |
# Exhaustive list of variables needed to parse the C language | |
# ----------------------------------------------------------- | |
my $delayedExitScope = 0; # Scope management | |
my $typedefPerScope = [ {} ]; # Typedef definitions per scope level | |
my $enumAnyScope = {}; # Enum definitions, applies to any scope | |
my %G1LocationToTypedef = (); # Cache of G1 Earley Set IDs that has TYPEDEF | |
my %G1LocationToTypedefName = (); # Cache of G1 Earley Set IDs that has TYPEDEF_NAME | |
my $grammar = Marpa::R2::Scanless::G->new({bless_package => 'C::AST', source => \$grammar_source}); | |
my $recce = Marpa::R2::Scanless::R->new({grammar => $grammar}); | |
# These variables caches call results that are done very often with always the same result | |
# ---------------------------------------------------------------------------------------- | |
my $cacheProgress = undef; # Cache of latest $recce->progress(LATEST_G1_ORDINAL) | |
my $cacheEvents = undef; # Cache of latest @{$recce->event(...)} | |
# Parse input | |
# ----------- | |
my $traceMode = NO_TRACE_MODE; # Just to have pretty-print contextual trace | |
my $pos = M_read(); | |
do { | |
cacheProgressAndEvents(); | |
doEvent(); | |
doLexeme(); | |
$traceMode = NO_TRACE_MODE; | |
} while (($pos = M_resume()) < length($input)); | |
# Show AST tree | |
# ------------- | |
my $nvalue = 0; | |
my $valueRef = M_value() || die show_last_expression(); | |
do { | |
++$nvalue; | |
print Dumper($valueRef); | |
$valueRef = M_value(); | |
} while (defined($valueRef)); | |
noticef('Number of parse trees: %d', $nvalue); | |
warnf('Number of parse tree should be 1') if ($nvalue != 1); | |
exit(($nvalue == 1) ? EXIT_SUCCESS : EXIT_FAILURE); | |
######### | |
# initLog | |
######### | |
sub initLog { | |
our $defaultLog4perlConf = <<DEFAULT_LOG4PERL_CONF; | |
log4perl.rootLogger = INFO, Screen | |
log4perl.appender.Screen = Log::Log4perl::Appender::Screen | |
log4perl.appender.Screen.stderr = 0 | |
log4perl.appender.Screen.layout = PatternLayout | |
log4perl.appender.Screen.layout.ConversionPattern = %d %-5p %6P %m{chomp}%n | |
DEFAULT_LOG4PERL_CONF | |
my $filenameLog4perlConf = File::Spec->catfile($Bin, 'log4perl.conf'); | |
if (-r $filenameLog4perlConf && -s $filenameLog4perlConf) { | |
Log::Log4perl::init($filenameLog4perlConf); | |
} else { | |
Log::Log4perl::init(\$defaultLog4perlConf); | |
} | |
Log::Any::Adapter->set('Log4perl'); | |
} | |
############################################ | |
# Trace wrappers to have something pedagogic | |
############################################ | |
sub _trace { | |
my $method = shift; | |
my $formatString = shift; | |
$log->$method(sprintf('%%s :: %s', $formatString), traceHeader(), @_); | |
} | |
sub tracef { _trace('tracef', @_); } | |
sub debugf { _trace('debugf', @_); } | |
sub infof { _trace('infof', @_); } | |
sub fatalf { _trace('fatalf', @_); } | |
sub warnf { _trace('warnf', @_); } | |
sub noticef { _trace('noticef', @_); } | |
sub exitf { fatalf(@_); tracef('EXIT_FAILURE'); exit(EXIT_FAILURE); } | |
############### | |
# cacheProgress | |
############### | |
sub cacheProgress { | |
$cacheProgress = M_progress(LATEST_G1_ORDINAL); | |
} | |
############### | |
# cacheEvents | |
############### | |
sub cacheEvents { | |
my @events = (); | |
my $i = 0; | |
while (defined($_ = M_event($i++))) { | |
push(@events, @{$_}); | |
} | |
$cacheEvents = \@events; | |
} | |
######################## | |
# cacheProgressAndEvents | |
######################## | |
sub cacheProgressAndEvents { | |
cacheProgress(); | |
cacheEvents(); | |
} | |
############################ | |
# show_last_expression | |
############################ | |
sub show_last_expression { | |
my ($start, $end) = M_last_completed_range('translationUnit'); | |
return 'No expression was successfully parsed' if (! defined($start)); | |
my $lastExpression = M_range_to_string($start, $end); | |
return "Last expression successfully parsed was: $lastExpression"; | |
} | |
######### | |
# arrayEq | |
######### | |
sub arrayEq { | |
my ($ap, $bp) = @_; | |
my $rc = 1; | |
if ($#{$ap} != $#{$bp}) { | |
$rc = 0; | |
} else { | |
foreach (0..$#{$ap}) { | |
if ($ap->[$_] ne $bp->[$_]) { | |
$rc = 0; | |
last; | |
} | |
} | |
} | |
return($rc); | |
} | |
#################### | |
# sprintfDotPosition | |
#################### | |
sub sprintfDotPosition { | |
my ($earleySetId, $i, $dotPosition, $lhs, @rhs) = @_; | |
# We insert the 'dot' in the output | |
if (defined($dotPosition)) { | |
if ($dotPosition >= 0) { | |
splice(@rhs, $dotPosition, 0, '.'); | |
} else { | |
# | |
# Completion | |
# | |
push(@rhs, '.'); | |
} | |
} | |
my $rhs = join(' ', map {if ($_ ne '.') {"<$_>"} else {$_}} @rhs); | |
if (defined($earleySetId) && defined($i)) { | |
return sprintf('<%s> ::= %s (ordinal %d, indice %d)', $lhs, $rhs, $earleySetId, $i); | |
} elsif (defined($earleySetId)) { | |
return sprintf('<%s> ::= %s (ordinal %d)', $lhs, $rhs, $earleySetId); | |
} elsif (defined($i)) { | |
return sprintf('<%s> ::= %s (indice %d)', $lhs, $rhs, $i); | |
} else { | |
return sprintf('<%s> ::= %s', $lhs, $rhs); | |
} | |
} | |
################ | |
# findInProgress | |
################ | |
sub findInProgress { | |
my ($earleySetId, $wantedDotPosition, $wantedLhs, $wantedRhsp, $fatalMode) = @_; | |
$fatalMode ||= 0; | |
my $rc = 0; | |
my $i = 0; | |
my $latest_g1_location = M_latest_g1_location(); | |
foreach (($earleySetId == LATEST_G1_ORDINAL || $earleySetId == $latest_g1_location) ? @{$cacheProgress} : @{M_progress($earleySetId)}) { | |
my ($rule_id, $dotPosition, $origin) = @{$_}; | |
next if (defined($wantedDotPosition) && ($dotPosition != $wantedDotPosition)); | |
my ($lhs, @rhs) = $grammar->rule($rule_id); | |
next if (defined($wantedLhs) && ($lhs ne $wantedLhs)); | |
next if (defined($wantedRhsp) && ! arrayEq(\@rhs, $wantedRhsp)); | |
if ($fatalMode) { | |
fatalf('%s', sprintfDotPosition($earleySetId, $i, $dotPosition, $lhs, @rhs)); | |
} else { | |
tracef('%s', sprintfDotPosition($earleySetId, $i, $dotPosition, $lhs, @rhs)); | |
} | |
if (defined($wantedDotPosition) || | |
defined($wantedLhs) || | |
defined($wantedRhsp)) { | |
$rc = 1; | |
last; | |
} | |
++$i; | |
} | |
return($rc); | |
} | |
############################################################################ | |
# traceHeader - Intentionally no wrapper call here, otherwise deep recursion | |
############################################################################ | |
sub traceHeader { | |
my @callers = (); | |
my $i = 1; | |
my @caller; | |
my $_trace = 0; | |
# Consider trace only after _trace() level plus 1 -; | |
while (@caller = caller($i++)) { | |
$caller[3] =~ s/^main:://; | |
push(@callers, $caller[3]) if ($_trace > 1); | |
$_trace++ if ($_trace); | |
$_trace = 1 if ($caller[3] eq '_trace'); | |
} | |
my $info = sprintf('[%5s %3s %15s %-17s]', '', '', '', ''); | |
if ($traceMode == EVENT_TRACE_MODE) { | |
$info = sprintf('[%43s]', join(',', @{$cacheEvents})); | |
} elsif ($traceMode == LEXEME_TRACE_MODE) { | |
my $lexeme = $recce->pause_lexeme(); | |
if (defined($lexeme)) { | |
my ($start, $length) = $recce->pause_span(); | |
my $value = substr($input, $start, $length); | |
my ($line, $column) = $recce->line_column($start); | |
# | |
# Specialisation for the lexeme that are paused before | |
# | |
if (grep {$lexeme eq $_} qw/TYPEDEF_NAME ENUMERATION_CONSTANT IDENTIFIER/) { | |
$lexeme .= '?' | |
} | |
$info = sprintf('[%5d:%3d:%15s %-17s]', $line, $column, $lexeme, "'$value'"); | |
} | |
} | |
return sprintf('[G1 %5d]', $recce->latest_g1_location()) . ' ' . $info . ' ' . join('>', reverse @callers); # Because I prefer this format | |
} | |
######################################################################### | |
# Exhaustive list of Marpa calls - wrapped just to have a logging of them | |
######################################################################### | |
sub M_value { tracef('$recce->value()'); return $recce->value(); } | |
sub M_read { tracef('$recce->read(\$input)'); return $recce->read(\$input); } | |
sub M_resume { tracef('$recce->resume()'); return $recce->resume(); } | |
sub M_last_completed { tracef('$recce->last_completed("%s")', @_ ); return $recce->last_completed(@_); } | |
sub M_last_completed_range { tracef('$recce->last_completed_range("%s")', @_ ); return $recce->last_completed_range(@_); } | |
sub M_range_to_string { tracef('$recce->range_to_string(%d, %d )', @_); return $recce->range_to_string(@_); } | |
sub M_progress { tracef('$recce->progress(%d)', @_); return $recce->progress(@_); } | |
sub M_event { tracef('$recce->event(%d)', @_); return $recce->event(@_); } | |
sub M_pause_lexeme { tracef('$recce->pause_lexeme()'); return $recce->pause_lexeme(); } | |
sub M_pause_span { tracef('$recce->pause_span()'); return $recce->pause_span(); } | |
sub M_line_column { tracef('$recce->line_column(%d)', @_); return $recce->line_column(@_); } | |
sub M_substring { tracef('$recce->substring(%d, %d)', @_); return $recce->substring(@_); } | |
sub M_lexeme_read { tracef('$recce->lexeme_read("%s", %d, %d, "%s")', @_); return $recce->lexeme_read(@_); } | |
sub M_latest_g1_location { tracef('$recce->latest_g1_location()'); return $recce->latest_g1_location(); } | |
############################## | |
# inspectTypeSpecifier | |
############################## | |
sub inspectTypeSpecifier { | |
my ($this_g1_location, $start_g1_locationp, $end_g1_locationp, $dot_location_and_rule_end_conditionp) = @_; | |
our @directDeclaratorEnclosingRules = ( | |
[ 1, 2, 'declarator', [ 'pointer', 'directDeclarator' ] ], | |
[ DOT_PREDICTION, 1, 'declarator', [ 'directDeclarator' ] ], | |
); | |
return(inspectG1('TYPEDEF_NAME', $this_g1_location, $start_g1_locationp, $end_g1_locationp, \@directDeclaratorEnclosingRules, \%G1LocationToTypedefName, $dot_location_and_rule_end_conditionp)); | |
} | |
############################## | |
# inspectDeclarationSpecifiers | |
############################## | |
sub inspectDeclarationSpecifiers { | |
my ($this_g1_location, $start_g1_locationp, $end_g1_locationp, $dot_location_and_rule_end_conditionp) = @_; | |
our @declarationSpecifiersEnclosingRules = ( | |
[ DOT_PREDICTION, 1, 'declaration', [ 'declarationSpecifiers', 'SEMICOLON' ] ], | |
[ DOT_PREDICTION, 1, 'declaration', [ 'declarationSpecifiers', 'initDeclaratorList ', 'SEMICOLON' ] ], | |
[ DOT_PREDICTION, 1, 'parameterDeclaration', [ 'declarationSpecifiers', 'declarator' ] ], | |
[ DOT_PREDICTION, 1, 'parameterDeclaration', [ 'declarationSpecifiers', 'abstractDeclarator' ] ], | |
[ DOT_PREDICTION, 1, 'parameterDeclaration', [ 'declarationSpecifiers' ] ], | |
[ DOT_PREDICTION, 1, 'functionDefinition', [ 'declarationSpecifiers', 'declarator', 'declarationList', 'compoundStatement' ] ], | |
[ DOT_PREDICTION, 1, 'functionDefinition', [ 'declarationSpecifiers', 'declarationList', 'compoundStatement' ] ] | |
); | |
return(inspectG1('TYPEDEF', $this_g1_location, $start_g1_locationp, $end_g1_locationp, \@declarationSpecifiersEnclosingRules, \%G1LocationToTypedef, $dot_location_and_rule_end_conditionp)); | |
} | |
########### | |
# inspectG1 | |
# rc undef: no candidate rule found | |
# rc 0 : candidate rule found, no match on $what | |
# rc 1 : candidate rule found, match on $what | |
########### | |
sub inspectG1 { | |
my ($what, $this_g1_location, $start_g1_locationp, $end_g1_locationp, $candidateRulesp, $matchesInG1p, $dot_location_and_rule_end_conditionp) = @_; | |
$this_g1_location ||= M_latest_g1_location(); | |
my ($start_g1_location, $end_g1_location) = (undef, undef); | |
my $indexInCandidates = 0; | |
my $end_condition = 0; | |
my $rc = undef; | |
while (1) { | |
# | |
# Search | |
# | |
if (! defined($end_g1_location) && defined($candidateRulesp)) { | |
my $i = 0; | |
foreach (@{$candidateRulesp}) { | |
my ($dotPredictionStart, $dotPredictionEnd, $lhs, $rhsp) = @{$_}; | |
if (findInProgress($this_g1_location, $dotPredictionEnd, $lhs, $rhsp, 0)) { | |
$end_g1_location = $this_g1_location; | |
$indexInCandidates = $i; | |
last; | |
} | |
++$i; | |
} | |
} | |
if (defined($end_g1_location) && ! defined($start_g1_location) && defined($candidateRulesp)) { | |
my ($dotPredictionStart, $dotPredictionEnd, $lhs, $rhsp) = @{$candidateRulesp->[$indexInCandidates]}; | |
if (findInProgress($this_g1_location, $dotPredictionStart, $lhs, $rhsp, 0)) { | |
$start_g1_location = $this_g1_location; | |
} | |
} | |
if (defined($start_g1_location) && defined($end_g1_location)) { | |
$rc = 0; | |
tracef('G1 range [%d, %d]', $start_g1_location, $end_g1_location); | |
if ($start_g1_location > $end_g1_location) { | |
exitf('$start_g1_location %d > $end_g1_location %d !?', $start_g1_location, $end_g1_location); | |
} | |
if (grep {exists($matchesInG1p->{$_}) && $matchesInG1p->{$_}} ($start_g1_location..$end_g1_location)) { | |
tracef('G1 range [%d, %d] have %s', $start_g1_location, $end_g1_location, $what); | |
if (defined($start_g1_locationp)) { | |
${$start_g1_locationp} = $start_g1_location; | |
} | |
if (defined($end_g1_locationp)) { | |
${$end_g1_locationp} = $end_g1_location; | |
} | |
$rc = 1; | |
last; | |
} else { | |
tracef('G1 range [%d, %d] do not have %s', $start_g1_location, $end_g1_location, $what); | |
} | |
$start_g1_location = undef; | |
$end_g1_location = undef; | |
} | |
# | |
# End condition | |
if (defined($dot_location_and_rule_end_conditionp)) { | |
foreach (@{$dot_location_and_rule_end_conditionp}) { | |
my ($dotPrediction, $lhs, $rhsp) = @{$_}; | |
if (findInProgress($this_g1_location, $dotPrediction, $lhs, $rhsp, 0)) { | |
$end_condition = 1; | |
last; | |
} | |
} | |
} | |
if ($end_condition) { | |
last; | |
} | |
# | |
# Next loop | |
# | |
if (--$this_g1_location < 0) { | |
last; | |
} | |
} | |
return($rc); | |
} | |
######### | |
# doEvent | |
######### | |
sub doEvent { | |
$traceMode = EVENT_TRACE_MODE; | |
foreach my $event (@{$cacheEvents}) { | |
if ($event eq 'parameterDeclaration') { | |
# | |
# In parameterDeclaration typedef is syntactically allowed but never valid, and | |
# a typedef-name cannot be entered. The that a typedef-name can be | |
# [ANSI C 3.5.4.3, 3.7.1] | |
# | |
doGrammarConstraints('TYPEDEF', [ [DOT_PREDICTION, $event, undef ] ]); | |
doGrammarConstraints('TYPEDEF_NAME', [ [DOT_PREDICTION, $event, undef ] ]); | |
} | |
elsif ($event eq 'directDeclarator') { | |
# | |
# directDeclarator ::= IDENTIFIER . | |
# can introduce names into ordinaty name space when it eventually participates | |
# in the grammar rule: | |
# declaration: declarationSpecifiers initDeclaratorList . SEMICOLON | |
# | |
if (findInProgress(LATEST_G1_ORDINAL , DOT_COMPLETION, 'directDeclarator', ['IDENTIFIER']) && | |
findInProgress(LATEST_G1_ORDINAL , 2, 'declaration', ['declarationSpecifiers', 'initDeclaratorList', 'SEMICOLON'])) { | |
# | |
## In structDeclarator ordinaty name space names cannot be defined | |
# | |
my $directDeclarator = M_substring(M_last_completed('directDeclarator')); | |
if (findInProgress(LATEST_G1_ORDINAL , 1, 'structDeclarator', [ 'declarator', 'COLON', 'constantExpression' ]) || | |
findInProgress(LATEST_G1_ORDINAL , 1, 'structDeclarator', [ 'declarator' ])) { | |
debugf('declaration of IDENTIFIER "%s" in structDeclarator context: parse symbol inactive', $directDeclarator); | |
} else { | |
my $directDeclarator = M_substring(M_last_completed('directDeclarator')); | |
debugf('declaration of IDENTIFIER "%s" that can introduce name in name-space', $directDeclarator); | |
if (inspectDeclarationSpecifiers(undef, undef, undef)) { | |
parseEnterTypedef($directDeclarator); | |
} else { | |
parseObscureTypedef($directDeclarator); | |
} | |
} | |
} | |
} elsif ($event eq 'enumerationConstant') { | |
# | |
# Enum is not scope dependend - from now on it obscures any use of its | |
# identifier in any scope | |
# | |
my $enumerationConstant = M_substring(M_last_completed('enumerationConstant')); | |
parseEnterEnum($enumerationConstant); | |
} elsif ($event eq 'primaryExpression') { | |
# | |
## Anything special to do ? | |
# | |
} | |
} | |
} | |
######################################## | |
# doGrammarConstraints | |
######################################## | |
sub doGrammarConstraints { | |
my ($what, $dot_location_and_rule_end_conditionp) = @_; | |
my $rc = 0; | |
my $this_g1_location = M_latest_g1_location(); | |
my ($start_g1_location, $end_g1_location); | |
if ($what eq 'TYPEDEF') { | |
$rc = inspectDeclarationSpecifiers($this_g1_location, \$start_g1_location, \$end_g1_location, $dot_location_and_rule_end_conditionp); | |
} elsif ($what eq 'TYPEDEF_NAME') { | |
$rc = inspectTypeSpecifier($this_g1_location, \$start_g1_location, \$end_g1_location, $dot_location_and_rule_end_conditionp); | |
} else { | |
exitf('Internal error, bad parameter "%s"', $what); | |
} | |
if (defined($rc) && $rc) { | |
# | |
# Match | |
# | |
if ($start_g1_location < $end_g1_location) { | |
exitf('%s is not allowed in "%s"', $what, M_range_to_string($start_g1_location, $end_g1_location)); | |
} else { | |
if ($start_g1_location < $this_g1_location) { | |
exitf('%s is not allowed in "%s"', $what, M_range_to_string($start_g1_location, $start_g1_location + 1)); | |
} else { | |
exitf('%s is not allowed in "%s"', $what, M_range_to_string($this_g1_location - 1, $this_g1_location)); | |
} | |
} | |
} | |
} | |
##################### | |
# canEnterTypedefName | |
##################### | |
sub canEnterTypedefName { | |
my $rc = 1; | |
if (findInProgress(LATEST_G1_ORDINAL, 1, 'parameterDeclaration', ['declarationSpecifiers', 'declarator'])) { | |
debugf('A parameterDeclaration cannot enter a TYPEDEF_NAME'); | |
$rc = 0; | |
} | |
return($rc); | |
} | |
############################# | |
# canEnterEnumerationConstant | |
############################# | |
sub canEnterEnumerationConstant { | |
my $rc = 1; | |
return($rc); | |
} | |
########## | |
# doLexeme | |
########## | |
sub doLexeme { | |
$traceMode = LEXEME_TRACE_MODE; | |
my $lexeme = M_pause_lexeme(); | |
if (! defined($lexeme)) { | |
return; | |
} | |
# | |
# Ambiguity managenent: 'before' paused lexemes | |
# | |
if (grep {$lexeme eq $_} qw/IDENTIFIER TYPEDEF_NAME ENUMERATION_CONSTANT/) { | |
my ($lexeme_start, $lexeme_length) = M_pause_span(); | |
my $lexeme_value = substr($input, $lexeme_start, $lexeme_length); | |
if (findInProgress(LATEST_G1_ORDINAL, DOT_PREDICTION, 'typeSpecifier', [ 'TYPEDEF_NAME' ]) && parseIsTypedef($lexeme_value) && canEnterTypedefName()) { | |
$lexeme = 'TYPEDEF_NAME'; | |
} elsif (findInProgress(LATEST_G1_ORDINAL, DOT_PREDICTION, 'constant', [ 'ENUMERATION_CONSTANT' ]) && parseIsEnum($lexeme_value) && canEnterEnumerationConstant()) { | |
$lexeme = 'ENUMERATION_CONSTANT'; | |
} else { | |
$lexeme = 'IDENTIFIER'; | |
} | |
# | |
# Push the unambiguated lexeme | |
# | |
debugf('%s "%s"', $lexeme, $lexeme_value); | |
if (! defined(M_lexeme_read($lexeme, $lexeme_start, $lexeme_length, $lexeme_value))) { | |
my ($line, $column) = M_line_column($lexeme_start); | |
exitf('Internal error at line %d, column %d: "%s" cannot be associated to lexeme %s', $line, $column, $lexeme_value, $lexeme); | |
} | |
if ($lexeme eq 'TYPEDEF_NAME') { | |
my $latest_g1_location = M_latest_g1_location; | |
debugf('%s detected at G1 location %d', $lexeme, $latest_g1_location); | |
$G1LocationToTypedefName{$latest_g1_location} = 1; | |
} | |
# | |
# A lexeme_read() can generate an event | |
# | |
cacheProgressAndEvents(); | |
doEvent(); | |
} | |
# | |
# Scope management: Associated with file-scope, function body, compound statement, or prototype | |
# - function body matches compound statement | |
# - file-scope is implicit here, we treat one file at a time | |
# | |
elsif ($lexeme eq 'LPAREN_SCOPE') { | |
parseEnterScope(); | |
} elsif ($lexeme eq 'LCURLY_SCOPE') { | |
if (canReenterScope()) { | |
# | |
# We know now that we are in the functionDefinition beginning of body | |
# It is better to do the check here rather than waiting for the | |
# functionDefinition rule to complete via an event | |
# | |
# In functionDefinion typedef is syntactically allowed but never valid | |
# [ANSI C 3.7.1] | |
# | |
doGrammarConstraints('TYPEDEF', [ [DOT_PREDICTION, 'functionDefinition', undef ] ]); | |
parseReenterScope(); | |
} else { | |
parseEnterScope(); | |
} | |
} elsif ($lexeme eq 'RPAREN_SCOPE') { | |
parseExitScope(); | |
} elsif ($lexeme eq 'RCURLY_SCOPE') { | |
parseExitScope(); | |
} | |
# | |
# Track of TYPEDEF lexeme per G1 location | |
# | |
elsif ($lexeme eq 'TYPEDEF') { | |
my $latest_g1_location = M_latest_g1_location; | |
debugf('%s detected at G1 location %d', $lexeme, $latest_g1_location); | |
$G1LocationToTypedef{$latest_g1_location} = 1; | |
} | |
} | |
################# | |
# canReenterScope | |
################# | |
sub canReenterScope { | |
return (findInProgress(LATEST_G1_ORDINAL - 1, 3, 'functionDefinition', [ 'declarationSpecifiers', 'declarator', 'declarationList', 'compoundStatement' ]) || | |
findInProgress(LATEST_G1_ORDINAL - 1, 2, 'functionDefinition', [ 'declarationSpecifiers', 'declarator', 'compoundStatement' ])); | |
} | |
################################################################################################### | |
# Implementation of | |
## http://www.cs.dartmouth.edu/~mckeeman/references/JCLT/ResolvingTypedefsInAMultipassCCompiler.pdf | |
################################################################################################### | |
sub parseEnterScope { | |
doDelayedExitScope(); | |
my $scope = $#{$typedefPerScope}; | |
push(@{$typedefPerScope}, clone($typedefPerScope->[$scope])); | |
debugf('Duplicated scope %d to %d', $scope, $scope + 1); | |
} | |
sub parseExitScope { | |
my $scope = $#{$typedefPerScope}; | |
$delayedExitScope = 1; | |
debugf('Setting delay flag on scope %d', $scope); | |
} | |
sub parseReenterScope { | |
my $scope = $#{$typedefPerScope}; | |
$delayedExitScope = 0; | |
debugf('Resetting delay flag on scope %d', $scope); | |
} | |
sub parseEnterTypedef { | |
my ($token) = @_; | |
doDelayedExitScope(); | |
my $scope = $#{$typedefPerScope}; | |
$typedefPerScope->[$scope]->{$token} = 1; | |
debugf('"%s" at scope %d', $token, $scope); | |
} | |
sub parseEnterEnum { | |
my ($token) = @_; | |
doDelayedExitScope(); | |
$enumAnyScope->{$token} = 1; | |
debugf('"%s"', $token); | |
} | |
sub parseObscureTypedef { | |
my ($token) = @_; | |
doDelayedExitScope(); | |
my $scope = $#{$typedefPerScope}; | |
$typedefPerScope->[$scope]->{$token} = 0; | |
debugf('"%s obscured at scope %d', $token, $scope); | |
} | |
sub parseIsTypedef { | |
my ($token) = @_; | |
doDelayedExitScope(); | |
my $scope = $#{$typedefPerScope}; | |
my $rc = (exists($typedefPerScope->[$scope]->{$token}) && $typedefPerScope->[$scope]->{$token}) ? 1 : 0; | |
debugf('"%s" at scope %d ? %s', $token, $scope, $rc ? 'yes' : 'no'); | |
return($rc); | |
} | |
sub parseIsEnum { | |
my ($token) = @_; | |
doDelayedExitScope(); | |
my $rc = (exists($enumAnyScope->{$token}) && $enumAnyScope->{$token}) ? 1 : 0; | |
debugf('"%s" ? %s', $token, $rc ? 'yes' : 'no'); | |
return($rc); | |
} | |
sub doDelayedExitScope { | |
if ($delayedExitScope) { | |
my $scope = $#{$typedefPerScope}; | |
pop(@{$typedefPerScope}); | |
$delayedExitScope = 0; | |
debugf('Removed scope %d and resetted delay flag', $scope); | |
} | |
} | |
############################ | |
# usage | |
############################ | |
sub usage { | |
my $rc = shift || EXIT_FAILURE; | |
print STDERR "Usage: $^X $0 file.c\n"; | |
print STDERR "\n"; | |
print STDERR "Warning, file.c must contain only 2011 ISO C\n"; | |
print STDERR "With GNU cpp, here is how to get it from a general .c file:\n"; | |
print STDERR "cpp -P -ansi old.c > new.c\n"; | |
exit($rc); | |
} | |
__DATA__ | |
################################################################################################################ | |
# grammar | |
# | |
# 2011 ISO C, as of http://www.quut.com/c/ANSI-C-grammar-l.html | |
# http://www.quut.com/c/ANSI-C-grammar-y-2011.html | |
# | |
################################################################################################################ | |
# | |
# Defaults | |
# | |
:default ::= action => [values] bless => ::lhs | |
# | |
# G1 (grammar), c.f. http://www.quut.com/c/ANSI-C-grammar-y-2011.html | |
# | |
:start ::= translationUnit | |
event primaryExpression = completed <primaryExpression> | |
primaryExpression | |
::= IDENTIFIER | |
| constant | |
| string | |
| LPAREN expression RPAREN | |
| genericSelection | |
constant | |
::= I_CONSTANT # includes character_constant | |
| F_CONSTANT | |
| ENUMERATION_CONSTANT # after it has been defined as such | |
event enumerationConstant = completed <enumerationConstant> | |
enumerationConstant # before it has been defined as such | |
::= IDENTIFIER | |
string | |
::= STRING_LITERAL | |
| FUNC_NAME | |
genericSelection | |
::= GENERIC LPAREN assignmentExpression COMMA genericAssocList RPAREN | |
genericAssocList | |
::= genericAssociation | |
| genericAssocList COMMA genericAssociation | |
genericAssociation | |
::= typeName COLON assignmentExpression | |
| DEFAULT COLON assignmentExpression | |
postfixExpression | |
::= primaryExpression | |
| postfixExpression LBRACKET expression RBRACKET | |
| postfixExpression LPAREN RPAREN | |
| postfixExpression LPAREN argumentExpressionList RPAREN | |
| postfixExpression DOT IDENTIFIER | |
| postfixExpression PTR_OP IDENTIFIER | |
| postfixExpression INC_OP | |
| postfixExpression DEC_OP | |
| LPAREN typeName RPAREN LCURLY initializerList RCURLY | |
| LPAREN typeName RPAREN LCURLY initializerList COMMA RCURLY | |
argumentExpressionList | |
::= assignmentExpression | |
| argumentExpressionList COMMA assignmentExpression | |
unaryExpression | |
::= postfixExpression | |
| INC_OP unaryExpression | |
| DEC_OP unaryExpression | |
| unaryOperator castExpression | |
| SIZEOF unaryExpression | |
| SIZEOF LPAREN typeName RPAREN | |
| ALIGNOF LPAREN typeName RPAREN | |
unaryOperator | |
::= AMPERSAND | |
| STAR | |
| PLUS | |
| HYPHEN | |
| TILDE | |
| EXCLAMATION | |
castExpression | |
::= unaryExpression | |
| LPAREN typeName RPAREN castExpression | |
multiplicativeExpression | |
::= castExpression | |
| multiplicativeExpression STAR castExpression | |
| multiplicativeExpression SLASH castExpression | |
| multiplicativeExpression PERCENT castExpression | |
additiveExpression | |
::= multiplicativeExpression | |
| additiveExpression PLUS multiplicativeExpression | |
| additiveExpression HYPHEN multiplicativeExpression | |
shiftExpression | |
::= additiveExpression | |
| shiftExpression LEFT_OP additiveExpression | |
| shiftExpression RIGHT_OP additiveExpression | |
relationalExpression | |
::= shiftExpression | |
| relationalExpression LESS_THAN shiftExpression | |
| relationalExpression GREATER_THAN shiftExpression | |
| relationalExpression LE_OP shiftExpression | |
| relationalExpression GE_OP shiftExpression | |
equalityExpression | |
::= relationalExpression | |
| equalityExpression EQ_OP relationalExpression | |
| equalityExpression NE_OP relationalExpression | |
andExpression | |
::= equalityExpression | |
| andExpression AMPERSAND equalityExpression | |
exclusiveOrExpression | |
::= andExpression | |
| exclusiveOrExpression CARET andExpression | |
inclusiveOrExpression | |
::= exclusiveOrExpression | |
| inclusiveOrExpression VERTICAL_BAR exclusiveOrExpression | |
logicalAndExpression | |
::= inclusiveOrExpression | |
| logicalAndExpression AND_OP inclusiveOrExpression | |
logicalOrExpression | |
::= logicalAndExpression | |
| logicalOrExpression OR_OP logicalAndExpression | |
conditionalExpression | |
::= logicalOrExpression | |
| logicalOrExpression QUESTION_MARK expression COLON conditionalExpression | |
assignmentExpression | |
::= conditionalExpression | |
| unaryExpression assignmentOperator assignmentExpression | |
assignmentOperator | |
::= EQUAL | |
| MUL_ASSIGN | |
| DIV_ASSIGN | |
| MOD_ASSIGN | |
| ADD_ASSIGN | |
| SUB_ASSIGN | |
| LEFT_ASSIGN | |
| RIGHT_ASSIGN | |
| AND_ASSIGN | |
| XOR_ASSIGN | |
| OR_ASSIGN | |
expression | |
::= assignmentExpression | |
| expression COMMA assignmentExpression | |
constantExpression | |
::= conditionalExpression # with constraints | |
declaration | |
::= declarationSpecifiers SEMICOLON | |
| declarationSpecifiers initDeclaratorList SEMICOLON | |
| staticAssertDeclaration | |
declarationSpecifiers | |
::= storageClassSpecifier declarationSpecifiers | |
| storageClassSpecifier | |
| typeSpecifier declarationSpecifiers | |
| typeSpecifier | |
| typeQualifier declarationSpecifiers | |
| typeQualifier | |
| functionSpecifier declarationSpecifiers | |
| functionSpecifier | |
| alignmentSpecifier declarationSpecifiers | |
| alignmentSpecifier | |
initDeclaratorList | |
::= initDeclarator | |
| initDeclaratorList COMMA initDeclarator | |
initDeclarator | |
::= declarator EQUAL initializer | |
| declarator | |
storageClassSpecifier | |
::= TYPEDEF # identifiers must be flagged as TYPEDEF_NAME | |
| EXTERN | |
| STATIC | |
| THREAD_LOCAL | |
| AUTO | |
| REGISTER | |
typeSpecifier | |
::= VOID | |
| CHAR | |
| SHORT | |
| INT | |
| LONG | |
| FLOAT | |
| DOUBLE | |
| SIGNED | |
| UNSIGNED | |
| BOOL | |
| COMPLEX | |
| IMAGINARY # non-mandated extension | |
| atomicTypeSpecifier | |
| structOrUnionSpecifier | |
| enumSpecifier | |
| TYPEDEF_NAME # after it has been defined as such | |
structOrUnionSpecifier | |
::= structOrUnion LCURLY structDeclarationList RCURLY | |
| structOrUnion IDENTIFIER LCURLY structDeclarationList RCURLY | |
| structOrUnion IDENTIFIER | |
structOrUnion | |
::= STRUCT | |
| UNION | |
structDeclarationList | |
::= structDeclaration | |
| structDeclarationList structDeclaration | |
structDeclaration | |
::= specifierQualifierList SEMICOLON # for anonymous struct/union | |
| specifierQualifierList structDeclaratorList SEMICOLON | |
| staticAssertDeclaration | |
specifierQualifierList | |
::= typeSpecifier specifierQualifierList | |
| typeSpecifier | |
| typeQualifier specifierQualifierList | |
| typeQualifier | |
structDeclaratorList | |
::= structDeclarator | |
| structDeclaratorList COMMA structDeclarator | |
structDeclarator | |
::= COLON constantExpression | |
| declarator COLON constantExpression | |
| declarator | |
enumSpecifier | |
::= ENUM LCURLY enumeratorList RCURLY | |
| ENUM LCURLY enumeratorList COMMA RCURLY | |
| ENUM IDENTIFIER LCURLY enumeratorList RCURLY | |
| ENUM IDENTIFIER LCURLY enumeratorList COMMA RCURLY | |
| ENUM IDENTIFIER | |
enumeratorList | |
::= enumerator | |
| enumeratorList COMMA enumerator | |
enumerator # identifiers must be flagged as ENUMERATION_CONSTANT | |
::= enumerationConstant EQUAL constantExpression | |
| enumerationConstant | |
atomicTypeSpecifier | |
::= ATOMIC LPAREN typeName RPAREN | |
typeQualifier | |
::= CONST | |
| RESTRICT | |
| VOLATILE | |
| ATOMIC | |
functionSpecifier | |
::= INLINE | |
| NORETURN | |
alignmentSpecifier | |
::= ALIGNAS LPAREN typeName RPAREN | |
| ALIGNAS LPAREN constantExpression RPAREN | |
declarator | |
::= pointer directDeclarator | |
| directDeclarator | |
event directDeclarator = completed <directDeclarator> | |
directDeclarator | |
::= IDENTIFIER | |
| LPAREN declarator RPAREN | |
| directDeclarator LBRACKET RBRACKET | |
| directDeclarator LBRACKET STAR RBRACKET | |
| directDeclarator LBRACKET STATIC typeQualifierList assignmentExpression RBRACKET | |
| directDeclarator LBRACKET STATIC assignmentExpression RBRACKET | |
| directDeclarator LBRACKET typeQualifierList STAR RBRACKET | |
| directDeclarator LBRACKET typeQualifierList STATIC assignmentExpression RBRACKET | |
| directDeclarator LBRACKET typeQualifierList assignmentExpression RBRACKET | |
| directDeclarator LBRACKET typeQualifierList RBRACKET | |
| directDeclarator LBRACKET assignmentExpression RBRACKET | |
| directDeclarator LPAREN_SCOPE parameterTypeList RPAREN_SCOPE | |
| directDeclarator LPAREN RPAREN | |
| directDeclarator LPAREN identifierList RPAREN | |
pointer | |
::= STAR typeQualifierList pointer | |
| STAR typeQualifierList | |
| STAR pointer | |
| STAR | |
typeQualifierList | |
::= typeQualifier | |
| typeQualifierList typeQualifier | |
parameterTypeList | |
::= parameterList COMMA ELLIPSIS | |
| parameterList | |
parameterList | |
::= parameterDeclaration | |
| parameterList COMMA parameterDeclaration | |
event parameterDeclaration = completed <parameterDeclaration> | |
parameterDeclaration | |
::= declarationSpecifiers declarator | |
| declarationSpecifiers abstractDeclarator | |
| declarationSpecifiers | |
identifierList | |
::= IDENTIFIER | |
| identifierList COMMA IDENTIFIER | |
typeName | |
::= specifierQualifierList abstractDeclarator | |
| specifierQualifierList | |
abstractDeclarator | |
::= pointer directAbstractDeclarator | |
| pointer | |
| directAbstractDeclarator | |
directAbstractDeclarator | |
::= LPAREN abstractDeclarator RPAREN | |
| LBRACKET RBRACKET | |
| LBRACKET STAR RBRACKET | |
| LBRACKET STATIC typeQualifierList assignmentExpression RBRACKET | |
| LBRACKET STATIC assignmentExpression RBRACKET | |
| LBRACKET typeQualifierList STATIC assignmentExpression RBRACKET | |
| LBRACKET typeQualifierList assignmentExpression RBRACKET | |
| LBRACKET typeQualifierList RBRACKET | |
| LBRACKET assignmentExpression RBRACKET | |
| directAbstractDeclarator LBRACKET RBRACKET | |
| directAbstractDeclarator LBRACKET STAR RBRACKET | |
| directAbstractDeclarator LBRACKET STATIC typeQualifierList assignmentExpression RBRACKET | |
| directAbstractDeclarator LBRACKET STATIC assignmentExpression RBRACKET | |
| directAbstractDeclarator LBRACKET typeQualifierList assignmentExpression RBRACKET | |
| directAbstractDeclarator LBRACKET typeQualifierList STATIC assignmentExpression RBRACKET | |
| directAbstractDeclarator LBRACKET typeQualifierList RBRACKET | |
| directAbstractDeclarator LBRACKET assignmentExpression RBRACKET | |
| LPAREN RPAREN | |
| LPAREN_SCOPE parameterTypeList RPAREN_SCOPE | |
| directAbstractDeclarator LPAREN RPAREN | |
| directAbstractDeclarator LPAREN_SCOPE parameterTypeList RPAREN_SCOPE | |
initializer | |
::= LCURLY initializerList RCURLY | |
| LCURLY initializerList COMMA RCURLY | |
| assignmentExpression | |
initializerList | |
::= designation initializer | |
| initializer | |
| initializerList COMMA designation initializer | |
| initializerList COMMA initializer | |
designation | |
::= designatorList EQUAL | |
designatorList | |
::= designator | |
| designatorList designator | |
designator | |
::= LBRACKET constantExpression RBRACKET | |
| DOT IDENTIFIER | |
staticAssertDeclaration | |
::= STATIC_ASSERT LPAREN constantExpression COMMA STRING_LITERAL RPAREN SEMICOLON | |
statement | |
::= labeledStatement | |
| compoundStatement | |
| expressionStatement | |
| selectionStatement | |
| iterationStatement | |
| jumpStatement | |
labeledStatement | |
::= IDENTIFIER COLON statement | |
| CASE constantExpression COLON statement | |
| DEFAULT COLON statement | |
compoundStatement | |
::= LCURLY_SCOPE RCURLY_SCOPE | |
| LCURLY_SCOPE blockItemList RCURLY_SCOPE | |
blockItemList | |
::= blockItem | |
| blockItemList blockItem | |
blockItem | |
::= declaration | |
| statement | |
expressionStatement | |
::= SEMICOLON | |
| expression SEMICOLON | |
selectionStatement | |
::= IF LPAREN expression RPAREN statement ELSE statement | |
| IF LPAREN expression RPAREN statement | |
| SWITCH LPAREN expression RPAREN statement | |
iterationStatement | |
::= WHILE LPAREN expression RPAREN statement | |
| DO statement WHILE LPAREN expression RPAREN SEMICOLON | |
| FOR LPAREN expressionStatement expressionStatement RPAREN statement | |
| FOR LPAREN expressionStatement expressionStatement expression RPAREN statement | |
| FOR LPAREN declaration expressionStatement RPAREN statement | |
| FOR LPAREN declaration expressionStatement expression RPAREN statement | |
jumpStatement | |
::= GOTO IDENTIFIER SEMICOLON | |
| CONTINUE SEMICOLON | |
| BREAK SEMICOLON | |
| RETURN SEMICOLON | |
| RETURN expression SEMICOLON | |
translationUnit | |
::= externalDeclaration | |
| translationUnit externalDeclaration | |
externalDeclaration | |
::= functionDefinition | |
| declaration | |
functionDefinition | |
::= declarationSpecifiers declarator declarationList compoundStatement | |
| declarationSpecifiers declarator compoundStatement | |
declarationList | |
::= declaration | |
| declarationList declaration | |
# | |
# G0 (tokens), c.f. http://www.quut.com/c/ANSI-C-grammar-l.html | |
# | |
# Intermediary tokens | |
O ~ [0-7] | |
O_any ~ O* | |
D ~ [0-9] | |
D_any ~ D* | |
D_many ~ D+ | |
NZ ~ [1-9] | |
L ~ [a-zA-Z_] | |
A ~ [a-zA-Z_0-9] | |
A_any ~ A* | |
H ~ [a-fA-F0-9] | |
H_any ~ H* | |
H_many ~ H+ | |
HP ~ '0' [xX] | |
SIGN_maybe ~ [+-] | |
SIGN_maybe ~ | |
E ~ [Ee] SIGN_maybe D_many | |
E_maybe ~ E | |
E_maybe ~ | |
P ~ [Pp] SIGN_maybe D_many | |
FS ~ [fFlL] | |
FS_maybe ~ FS | |
FS_maybe ~ | |
LL ~ 'll' | 'LL' | [lL] | |
LL_maybe ~ LL | |
LL_maybe ~ | |
U ~ [uU] | |
U_maybe ~ U | |
U_maybe ~ | |
IS ~ U LL_maybe | LL U_maybe | |
IS_maybe ~ IS | |
IS_maybe ~ | |
CP ~ [uUL] | |
CP_maybe ~ CP | |
CP_maybe ~ | |
SP ~ 'u8' | [uUL] | |
SP_maybe ~ SP | |
SP_maybe ~ | |
ES_AFTERBS ~ [\'\"\?\\abfnrtv] | |
| O | |
| O O | |
| O O O | |
| 'x' H_many | |
ES ~ BS ES_AFTERBS | |
WS ~ [ \t\v\n\f] | |
WS_any ~ WS* | |
# Lexemes | |
:lexeme ~ <AUTO> priority => -1 | |
AUTO ~ 'auto' | |
:lexeme ~ <BREAK> priority => -2 | |
BREAK ~ 'break' | |
:lexeme ~ <CASE> priority => -3 | |
CASE ~ 'case' | |
:lexeme ~ <CHAR> priority => -4 | |
CHAR ~ 'char' | |
:lexeme ~ <CONST> priority => -5 | |
CONST ~ 'const' | |
:lexeme ~ <CONTINUE> priority => -6 | |
CONTINUE ~ 'continue' | |
:lexeme ~ <DEFAULT> priority => -7 | |
DEFAULT ~ 'default' | |
:lexeme ~ <DO> priority => -8 | |
DO ~ 'do' | |
:lexeme ~ <DOUBLE> priority => -9 | |
DOUBLE ~ 'double' | |
:lexeme ~ <ELSE> priority => -10 | |
ELSE ~ 'else' | |
:lexeme ~ <ENUM> priority => -11 | |
ENUM ~ 'enum' | |
:lexeme ~ <EXTERN> priority => -12 | |
EXTERN ~ 'extern' | |
:lexeme ~ <FLOAT> priority => -13 | |
FLOAT ~ 'float' | |
:lexeme ~ <FOR> priority => -14 | |
FOR ~ 'for' | |
:lexeme ~ <GOTO> priority => -15 | |
GOTO ~ 'goto' | |
:lexeme ~ <IF> priority => -16 | |
IF ~ 'if' | |
:lexeme ~ <INLINE> priority => -17 | |
INLINE ~ 'inline' | |
:lexeme ~ <INT> priority => -18 | |
INT ~ 'int' | |
:lexeme ~ <LONG> priority => -19 | |
LONG ~ 'long' | |
:lexeme ~ <REGISTER> priority => -20 | |
REGISTER ~ 'register' | |
:lexeme ~ <RESTRICT> priority => -21 | |
RESTRICT ~ 'restrict' | |
:lexeme ~ <RETURN> priority => -22 | |
RETURN ~ 'return' | |
:lexeme ~ <SHORT> priority => -23 | |
SHORT ~ 'short' | |
:lexeme ~ <SIGNED> priority => -24 | |
SIGNED ~ 'signed' | |
:lexeme ~ <SIZEOF> priority => -25 | |
SIZEOF ~ 'sizeof' | |
:lexeme ~ <STATIC> priority => -26 | |
STATIC ~ 'static' | |
:lexeme ~ <STRUCT> priority => -27 | |
STRUCT ~ 'struct' | |
:lexeme ~ <SWITCH> priority => -28 | |
SWITCH ~ 'switch' | |
:lexeme ~ <TYPEDEF> priority => -29 pause => after | |
TYPEDEF ~ 'typedef' | |
:lexeme ~ <UNION> priority => -30 | |
UNION ~ 'union' | |
:lexeme ~ <UNSIGNED> priority => -31 | |
UNSIGNED ~ 'unsigned' | |
:lexeme ~ <VOID> priority => -32 | |
VOID ~ 'void' | |
:lexeme ~ <VOLATILE> priority => -33 | |
VOLATILE ~ 'volatile' | |
:lexeme ~ <WHILE> priority => -34 | |
WHILE ~ 'while' | |
:lexeme ~ <ALIGNAS> priority => -35 | |
ALIGNAS ~ '_Alignas' | |
:lexeme ~ <ALIGNOF> priority => -36 | |
ALIGNOF ~ '_Alignof' | |
:lexeme ~ <ATOMIC> priority => -37 | |
ATOMIC ~ '_Atomic' | |
:lexeme ~ <BOOL> priority => -38 | |
BOOL ~ '_Bool' | |
:lexeme ~ <COMPLEX> priority => -39 | |
COMPLEX ~ '_Complex' | |
:lexeme ~ <GENERIC> priority => -40 | |
GENERIC ~ '_Generic' | |
:lexeme ~ <IMAGINARY> priority => -41 | |
IMAGINARY ~ '_Imaginary' | |
:lexeme ~ <NORETURN> priority => -42 | |
NORETURN ~ '_Noreturn' | |
:lexeme ~ <STATIC_ASSERT> priority => -43 | |
STATIC_ASSERT ~ '_Static_assert' | |
:lexeme ~ <THREAD_LOCAL> priority => -44 | |
THREAD_LOCAL ~ '_Thread_local' | |
:lexeme ~ <FUNC_NAME> priority => -45 | |
FUNC_NAME ~ '__func__' | |
# | |
## DETERMINED AT RUN TIME | |
# | |
:lexeme ~ <TYPEDEF_NAME> priority => -100 pause => before | |
:lexeme ~ <ENUMERATION_CONSTANT> priority => -100 pause => before | |
:lexeme ~ <IDENTIFIER> priority => -100 pause => before | |
TYPEDEF_NAME ~ L A_any | |
ENUMERATION_CONSTANT ~ L A_any | |
IDENTIFIER ~ L A_any | |
:lexeme ~ <I_CONSTANT> priority => -101 | |
I_CONSTANT ~ HP H_many IS_maybe | |
| NZ D_any IS_maybe | |
| '0' O_any IS_maybe | |
| CP_maybe QUOTE I_CONSTANT_INSIDE_many QUOTE | |
:lexeme ~ <F_CONSTANT> priority => -102 | |
F_CONSTANT ~ D_many E FS_maybe | |
| D_any '.' D_many E_maybe FS_maybe | |
| D_many '.' E_maybe FS_maybe | |
| HP H_many P FS_maybe | |
| HP H_any '.' H_many P FS_maybe | |
| HP H_many '.' P FS_maybe | |
:lexeme ~ <STRING_LITERAL> priority => -103 | |
STRING_LITERAL ~ STRING_LITERAL_UNIT+ | |
:lexeme ~ <ELLIPSIS> priority => -104 | |
ELLIPSIS ~ '...' | |
:lexeme ~ <RIGHT_ASSIGN> priority => -105 | |
RIGHT_ASSIGN ~ '>>=' | |
:lexeme ~ <LEFT_ASSIGN> priority => -106 | |
LEFT_ASSIGN ~ '<<=' | |
:lexeme ~ <ADD_ASSIGN> priority => -107 | |
ADD_ASSIGN ~ '+=' | |
:lexeme ~ <SUB_ASSIGN> priority => -108 | |
SUB_ASSIGN ~ '-=' | |
:lexeme ~ <MUL_ASSIGN> priority => -109 | |
MUL_ASSIGN ~ '*=' | |
:lexeme ~ <DIV_ASSIGN> priority => -110 | |
DIV_ASSIGN ~ '/=' | |
:lexeme ~ <MOD_ASSIGN> priority => -111 | |
MOD_ASSIGN ~ '%=' | |
:lexeme ~ <AND_ASSIGN> priority => -112 | |
AND_ASSIGN ~ '&=' | |
:lexeme ~ <XOR_ASSIGN> priority => -113 | |
XOR_ASSIGN ~ '^=' | |
:lexeme ~ <OR_ASSIGN> priority => -114 | |
OR_ASSIGN ~ '|=' | |
:lexeme ~ <RIGHT_OP> priority => -115 | |
RIGHT_OP ~ '>>' | |
:lexeme ~ <LEFT_OP> priority => -116 | |
LEFT_OP ~ '<<' | |
:lexeme ~ <INC_OP> priority => -117 | |
INC_OP ~ '++' | |
:lexeme ~ <DEC_OP> priority => -118 | |
DEC_OP ~ '--' | |
:lexeme ~ <PTR_OP> priority => -119 | |
PTR_OP ~ '->' | |
:lexeme ~ <AND_OP> priority => -120 | |
AND_OP ~ '&&' | |
:lexeme ~ <OR_OP> priority => -121 | |
OR_OP ~ '||' | |
:lexeme ~ <LE_OP> priority => -122 | |
LE_OP ~ '<=' | |
:lexeme ~ <GE_OP> priority => -123 | |
GE_OP ~ '>=' | |
:lexeme ~ <EQ_OP> priority => -124 | |
EQ_OP ~ '==' | |
:lexeme ~ <NE_OP> priority => -125 | |
NE_OP ~ '!=' | |
:lexeme ~ <SEMICOLON> priority => -126 | |
SEMICOLON ~ ';' | |
:lexeme ~ <LCURLY> priority => -127 | |
LCURLY ~ '{' | '<%' | |
:lexeme ~ <LCURLY_SCOPE> priority => -127 pause => after | |
LCURLY_SCOPE ~ '{' | '<%' | |
:lexeme ~ <RCURLY> priority => -128 | |
RCURLY ~ '}' | '%>' | |
:lexeme ~ <RCURLY_SCOPE> priority => -128 pause => after | |
RCURLY_SCOPE ~ '}' | '%>' | |
:lexeme ~ <COMMA> priority => -129 | |
COMMA ~ ',' | |
:lexeme ~ <COLON> priority => -130 | |
COLON ~ ':' | |
:lexeme ~ <EQUAL> priority => -131 | |
EQUAL ~ '=' | |
:lexeme ~ <LPAREN> priority => -132 | |
LPAREN ~ '(' | |
:lexeme ~ <LPAREN_SCOPE> priority => -132 pause => after | |
LPAREN_SCOPE ~ '(' | |
:lexeme ~ <RPAREN> priority => -133 | |
RPAREN ~ ')' | |
:lexeme ~ <RPAREN_SCOPE> priority => -133 pause => after | |
RPAREN_SCOPE ~ ')' | |
:lexeme ~ <LBRACKET> priority => -134 | |
LBRACKET ~ '[' | '<:' | |
:lexeme ~ <RBRACKET> priority => -135 | |
RBRACKET ~ ']' | ':>' | |
:lexeme ~ <DOT> priority => -136 | |
DOT ~ '.' | |
:lexeme ~ <AMPERSAND> priority => -137 | |
AMPERSAND ~ '&' | |
:lexeme ~ <EXCLAMATION> priority => -138 | |
EXCLAMATION ~ '!' | |
:lexeme ~ <TILDE> priority => -139 | |
TILDE ~ '~' | |
:lexeme ~ <HYPHEN> priority => -140 | |
HYPHEN ~ '-' | |
:lexeme ~ <PLUS> priority => -141 | |
PLUS ~ '+' | |
:lexeme ~ <STAR> priority => -142 | |
STAR ~ '*' | |
:lexeme ~ <SLASH> priority => -143 | |
SLASH ~ '/' | |
:lexeme ~ <PERCENT> priority => -144 | |
PERCENT ~ '%' | |
:lexeme ~ <LESS_THAN> priority => -145 | |
LESS_THAN ~ '<' | |
:lexeme ~ <GREATER_THAN> priority => -146 | |
GREATER_THAN ~ '>' | |
:lexeme ~ <CARET> priority => -147 | |
CARET ~ '^' | |
:lexeme ~ <VERTICAL_BAR> priority => -148 | |
VERTICAL_BAR ~ '|' | |
:lexeme ~ <QUESTION_MARK> priority => -149 | |
QUESTION_MARK ~ '?' | |
:discard ~ WS # whitespace separates tokens | |
:discard ~ ANYTHING_ELSE # discard bad characters | |
# | |
# Discard of a C comment, c.f. https://gist.github.com/jeffreykegler/5015057 | |
# | |
<C style comment> ~ '/*' <comment interior> '*/' | |
<comment interior> ~ | |
<optional non stars> | |
<optional star prefixed segments> | |
<optional pre final stars> | |
<optional non stars> ~ [^*]* | |
<optional star prefixed segments> ~ <star prefixed segment>* | |
<star prefixed segment> ~ <stars> [^/*] <optional star free text> | |
<stars> ~ [*]+ | |
<optional star free text> ~ [^*]* | |
<optional pre final stars> ~ [*]* | |
:discard ~ <C style comment> | |
# | |
# Discard of a C++ comment | |
# | |
<Cplusplus style comment> ~ '//' <Cplusplus comment interior> | |
<Cplusplus comment interior> ~ [^\n]* | |
:discard ~ <Cplusplus style comment> | |
# | |
# Discard of a Preprocessor comment | |
# | |
<Preprocessor style comment> ~ '#' <Preprocessor comment interior> | |
<Preprocessor comment interior> ~ [^\n]* | |
:discard ~ <Preprocessor style comment> | |
# | |
# Internal tokens added | |
# | |
QUOTE ~ ['] | |
I_CONSTANT_INSIDE ~ [^'\\\n] | |
I_CONSTANT_INSIDE ~ ES | |
I_CONSTANT_INSIDE_many ~ I_CONSTANT_INSIDE+ | |
STRING_LITERAL_INSIDE ~ [^"\\\n] | |
STRING_LITERAL_INSIDE ~ ES | |
STRING_LITERAL_INSIDE_any ~ STRING_LITERAL_INSIDE* | |
STRING_LITERAL_UNIT ~ SP_maybe '"' STRING_LITERAL_INSIDE_any '"' WS_any | |
BS ~ '\' | |
ANYTHING_ELSE ~ [.] | |
# | |
# Lexeme events are appended automatically here | |
# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment