Skip to content

Instantly share code, notes, and snippets.

@jddurand
Last active December 17, 2015 00:38
Show Gist options
  • Save jddurand/5522268 to your computer and use it in GitHub Desktop.
Save jddurand/5522268 to your computer and use it in GitHub Desktop.
c2ast - with typedef/enum resolv - BEING TESTED
#!env perl
# Execute this script with no parameter to get the online help
# ****************************************************************************
#
# This script will do a C language -> AST using Marpa parser
#
# C.f. https://groups.google.com/forum/?fromgroups=#!forum/marpa-parser
#
# A symbol table is built using a scanless recognizer
# C.f. http://www.cs.dartmouth.edu/~mckeeman/references/JCLT/ResolvingTypedefsInAMultipassCCompiler.pdf
#
# ****************************************************************************
#
# Copyright 2013 Jean-Damien Durand
# you can redistribute this file and/or modify it under the terms of the
# GNU Lesser General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# This file is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser
# General Public License along with this file. If not, see
# http://www.gnu.org/licenses/.
#
# ****************************************************************************
use strict;
use diagnostics;
use Marpa::R2 2.054_000;
use File::Slurp qw/read_file/;
use Data::Dumper qw/Dumper/;
use POSIX qw/EXIT_SUCCESS EXIT_FAILURE/;
use FindBin qw/$Bin/;
use Log::Log4perl qw/:easy/;
use Log::Any::Adapter;
use Log::Any qw/$log/;
use Carp qw/longmess/;
use Clone qw/clone/;
use constant {NO_TRACE_MODE=>0, EVENT_TRACE_MODE=>1, LEXEME_TRACE_MODE=>2};
use constant {LATEST_G1_ORDINAL => -1};
use constant {DOT_PREDICTION => 0, DOT_COMPLETION => -1};
initLog();
##############################################################################
# main
##############################################################################
# Read input and grammar
# ----------------------
my $file = shift || usage(EXIT_FAILURE);
my $input = read_file($file);
my $grammar_source = do { local $/; <DATA> };
# Exhaustive list of variables needed to parse the C language
# -----------------------------------------------------------
my $delayedExitScope = 0; # Scope management
my $typedefPerScope = [ {} ]; # Typedef definitions per scope level
my $enumAnyScope = {}; # Enum definitions, applies to any scope
my %G1LocationToTypedef = (); # Cache of G1 Earley Set IDs that has TYPEDEF
my %G1LocationToTypedefName = (); # Cache of G1 Earley Set IDs that has TYPEDEF_NAME
my $grammar = Marpa::R2::Scanless::G->new({bless_package => 'C::AST', source => \$grammar_source});
my $recce = Marpa::R2::Scanless::R->new({grammar => $grammar});
# These variables caches call results that are done very often with always the same result
# ----------------------------------------------------------------------------------------
my $cacheProgress = undef; # Cache of latest $recce->progress(LATEST_G1_ORDINAL)
my $cacheEvents = undef; # Cache of latest @{$recce->event(...)}
# Parse input
# -----------
my $traceMode = NO_TRACE_MODE; # Just to have pretty-print contextual trace
my $pos = M_read();
do {
cacheProgressAndEvents();
doEvent();
doLexeme();
$traceMode = NO_TRACE_MODE;
} while (($pos = M_resume()) < length($input));
# Show AST tree
# -------------
my $nvalue = 0;
my $valueRef = M_value() || die show_last_expression();
do {
++$nvalue;
print Dumper($valueRef);
$valueRef = M_value();
} while (defined($valueRef));
noticef('Number of parse trees: %d', $nvalue);
warnf('Number of parse tree should be 1') if ($nvalue != 1);
exit(($nvalue == 1) ? EXIT_SUCCESS : EXIT_FAILURE);
#########
# initLog
#########
sub initLog {
our $defaultLog4perlConf = <<DEFAULT_LOG4PERL_CONF;
log4perl.rootLogger = INFO, Screen
log4perl.appender.Screen = Log::Log4perl::Appender::Screen
log4perl.appender.Screen.stderr = 0
log4perl.appender.Screen.layout = PatternLayout
log4perl.appender.Screen.layout.ConversionPattern = %d %-5p %6P %m{chomp}%n
DEFAULT_LOG4PERL_CONF
my $filenameLog4perlConf = File::Spec->catfile($Bin, 'log4perl.conf');
if (-r $filenameLog4perlConf && -s $filenameLog4perlConf) {
Log::Log4perl::init($filenameLog4perlConf);
} else {
Log::Log4perl::init(\$defaultLog4perlConf);
}
Log::Any::Adapter->set('Log4perl');
}
############################################
# Trace wrappers to have something pedagogic
############################################
sub _trace {
my $method = shift;
my $formatString = shift;
$log->$method(sprintf('%%s :: %s', $formatString), traceHeader(), @_);
}
sub tracef { _trace('tracef', @_); }
sub debugf { _trace('debugf', @_); }
sub infof { _trace('infof', @_); }
sub fatalf { _trace('fatalf', @_); }
sub warnf { _trace('warnf', @_); }
sub noticef { _trace('noticef', @_); }
sub exitf { fatalf(@_); tracef('EXIT_FAILURE'); exit(EXIT_FAILURE); }
###############
# cacheProgress
###############
sub cacheProgress {
$cacheProgress = M_progress(LATEST_G1_ORDINAL);
}
###############
# cacheEvents
###############
sub cacheEvents {
my @events = ();
my $i = 0;
while (defined($_ = M_event($i++))) {
push(@events, @{$_});
}
$cacheEvents = \@events;
}
########################
# cacheProgressAndEvents
########################
sub cacheProgressAndEvents {
cacheProgress();
cacheEvents();
}
############################
# show_last_expression
############################
sub show_last_expression {
my ($start, $end) = M_last_completed_range('translationUnit');
return 'No expression was successfully parsed' if (! defined($start));
my $lastExpression = M_range_to_string($start, $end);
return "Last expression successfully parsed was: $lastExpression";
}
#########
# arrayEq
#########
sub arrayEq {
my ($ap, $bp) = @_;
my $rc = 1;
if ($#{$ap} != $#{$bp}) {
$rc = 0;
} else {
foreach (0..$#{$ap}) {
if ($ap->[$_] ne $bp->[$_]) {
$rc = 0;
last;
}
}
}
return($rc);
}
####################
# sprintfDotPosition
####################
sub sprintfDotPosition {
my ($earleySetId, $i, $dotPosition, $lhs, @rhs) = @_;
# We insert the 'dot' in the output
if (defined($dotPosition)) {
if ($dotPosition >= 0) {
splice(@rhs, $dotPosition, 0, '.');
} else {
#
# Completion
#
push(@rhs, '.');
}
}
my $rhs = join(' ', map {if ($_ ne '.') {"<$_>"} else {$_}} @rhs);
if (defined($earleySetId) && defined($i)) {
return sprintf('<%s> ::= %s (ordinal %d, indice %d)', $lhs, $rhs, $earleySetId, $i);
} elsif (defined($earleySetId)) {
return sprintf('<%s> ::= %s (ordinal %d)', $lhs, $rhs, $earleySetId);
} elsif (defined($i)) {
return sprintf('<%s> ::= %s (indice %d)', $lhs, $rhs, $i);
} else {
return sprintf('<%s> ::= %s', $lhs, $rhs);
}
}
################
# findInProgress
################
sub findInProgress {
my ($earleySetId, $wantedDotPosition, $wantedLhs, $wantedRhsp, $fatalMode) = @_;
$fatalMode ||= 0;
my $rc = 0;
my $i = 0;
my $latest_g1_location = M_latest_g1_location();
foreach (($earleySetId == LATEST_G1_ORDINAL || $earleySetId == $latest_g1_location) ? @{$cacheProgress} : @{M_progress($earleySetId)}) {
my ($rule_id, $dotPosition, $origin) = @{$_};
next if (defined($wantedDotPosition) && ($dotPosition != $wantedDotPosition));
my ($lhs, @rhs) = $grammar->rule($rule_id);
next if (defined($wantedLhs) && ($lhs ne $wantedLhs));
next if (defined($wantedRhsp) && ! arrayEq(\@rhs, $wantedRhsp));
if ($fatalMode) {
fatalf('%s', sprintfDotPosition($earleySetId, $i, $dotPosition, $lhs, @rhs));
} else {
tracef('%s', sprintfDotPosition($earleySetId, $i, $dotPosition, $lhs, @rhs));
}
if (defined($wantedDotPosition) ||
defined($wantedLhs) ||
defined($wantedRhsp)) {
$rc = 1;
last;
}
++$i;
}
return($rc);
}
############################################################################
# traceHeader - Intentionally no wrapper call here, otherwise deep recursion
############################################################################
sub traceHeader {
my @callers = ();
my $i = 1;
my @caller;
my $_trace = 0;
# Consider trace only after _trace() level plus 1 -;
while (@caller = caller($i++)) {
$caller[3] =~ s/^main:://;
push(@callers, $caller[3]) if ($_trace > 1);
$_trace++ if ($_trace);
$_trace = 1 if ($caller[3] eq '_trace');
}
my $info = sprintf('[%5s %3s %15s %-17s]', '', '', '', '');
if ($traceMode == EVENT_TRACE_MODE) {
$info = sprintf('[%43s]', join(',', @{$cacheEvents}));
} elsif ($traceMode == LEXEME_TRACE_MODE) {
my $lexeme = $recce->pause_lexeme();
if (defined($lexeme)) {
my ($start, $length) = $recce->pause_span();
my $value = substr($input, $start, $length);
my ($line, $column) = $recce->line_column($start);
#
# Specialisation for the lexeme that are paused before
#
if (grep {$lexeme eq $_} qw/TYPEDEF_NAME ENUMERATION_CONSTANT IDENTIFIER/) {
$lexeme .= '?'
}
$info = sprintf('[%5d:%3d:%15s %-17s]', $line, $column, $lexeme, "'$value'");
}
}
return sprintf('[G1 %5d]', $recce->latest_g1_location()) . ' ' . $info . ' ' . join('>', reverse @callers); # Because I prefer this format
}
#########################################################################
# Exhaustive list of Marpa calls - wrapped just to have a logging of them
#########################################################################
sub M_value { tracef('$recce->value()'); return $recce->value(); }
sub M_read { tracef('$recce->read(\$input)'); return $recce->read(\$input); }
sub M_resume { tracef('$recce->resume()'); return $recce->resume(); }
sub M_last_completed { tracef('$recce->last_completed("%s")', @_ ); return $recce->last_completed(@_); }
sub M_last_completed_range { tracef('$recce->last_completed_range("%s")', @_ ); return $recce->last_completed_range(@_); }
sub M_range_to_string { tracef('$recce->range_to_string(%d, %d )', @_); return $recce->range_to_string(@_); }
sub M_progress { tracef('$recce->progress(%d)', @_); return $recce->progress(@_); }
sub M_event { tracef('$recce->event(%d)', @_); return $recce->event(@_); }
sub M_pause_lexeme { tracef('$recce->pause_lexeme()'); return $recce->pause_lexeme(); }
sub M_pause_span { tracef('$recce->pause_span()'); return $recce->pause_span(); }
sub M_line_column { tracef('$recce->line_column(%d)', @_); return $recce->line_column(@_); }
sub M_substring { tracef('$recce->substring(%d, %d)', @_); return $recce->substring(@_); }
sub M_lexeme_read { tracef('$recce->lexeme_read("%s", %d, %d, "%s")', @_); return $recce->lexeme_read(@_); }
sub M_latest_g1_location { tracef('$recce->latest_g1_location()'); return $recce->latest_g1_location(); }
##############################
# inspectTypeSpecifier
##############################
sub inspectTypeSpecifier {
my ($this_g1_location, $start_g1_locationp, $end_g1_locationp, $dot_location_and_rule_end_conditionp) = @_;
our @directDeclaratorEnclosingRules = (
[ 1, 2, 'declarator', [ 'pointer', 'directDeclarator' ] ],
[ DOT_PREDICTION, 1, 'declarator', [ 'directDeclarator' ] ],
);
return(inspectG1('TYPEDEF_NAME', $this_g1_location, $start_g1_locationp, $end_g1_locationp, \@directDeclaratorEnclosingRules, \%G1LocationToTypedefName, $dot_location_and_rule_end_conditionp));
}
##############################
# inspectDeclarationSpecifiers
##############################
sub inspectDeclarationSpecifiers {
my ($this_g1_location, $start_g1_locationp, $end_g1_locationp, $dot_location_and_rule_end_conditionp) = @_;
our @declarationSpecifiersEnclosingRules = (
[ DOT_PREDICTION, 1, 'declaration', [ 'declarationSpecifiers', 'SEMICOLON' ] ],
[ DOT_PREDICTION, 1, 'declaration', [ 'declarationSpecifiers', 'initDeclaratorList ', 'SEMICOLON' ] ],
[ DOT_PREDICTION, 1, 'parameterDeclaration', [ 'declarationSpecifiers', 'declarator' ] ],
[ DOT_PREDICTION, 1, 'parameterDeclaration', [ 'declarationSpecifiers', 'abstractDeclarator' ] ],
[ DOT_PREDICTION, 1, 'parameterDeclaration', [ 'declarationSpecifiers' ] ],
[ DOT_PREDICTION, 1, 'functionDefinition', [ 'declarationSpecifiers', 'declarator', 'declarationList', 'compoundStatement' ] ],
[ DOT_PREDICTION, 1, 'functionDefinition', [ 'declarationSpecifiers', 'declarationList', 'compoundStatement' ] ]
);
return(inspectG1('TYPEDEF', $this_g1_location, $start_g1_locationp, $end_g1_locationp, \@declarationSpecifiersEnclosingRules, \%G1LocationToTypedef, $dot_location_and_rule_end_conditionp));
}
###########
# inspectG1
# rc undef: no candidate rule found
# rc 0 : candidate rule found, no match on $what
# rc 1 : candidate rule found, match on $what
###########
sub inspectG1 {
my ($what, $this_g1_location, $start_g1_locationp, $end_g1_locationp, $candidateRulesp, $matchesInG1p, $dot_location_and_rule_end_conditionp) = @_;
$this_g1_location ||= M_latest_g1_location();
my ($start_g1_location, $end_g1_location) = (undef, undef);
my $indexInCandidates = 0;
my $end_condition = 0;
my $rc = undef;
while (1) {
#
# Search
#
if (! defined($end_g1_location) && defined($candidateRulesp)) {
my $i = 0;
foreach (@{$candidateRulesp}) {
my ($dotPredictionStart, $dotPredictionEnd, $lhs, $rhsp) = @{$_};
if (findInProgress($this_g1_location, $dotPredictionEnd, $lhs, $rhsp, 0)) {
$end_g1_location = $this_g1_location;
$indexInCandidates = $i;
last;
}
++$i;
}
}
if (defined($end_g1_location) && ! defined($start_g1_location) && defined($candidateRulesp)) {
my ($dotPredictionStart, $dotPredictionEnd, $lhs, $rhsp) = @{$candidateRulesp->[$indexInCandidates]};
if (findInProgress($this_g1_location, $dotPredictionStart, $lhs, $rhsp, 0)) {
$start_g1_location = $this_g1_location;
}
}
if (defined($start_g1_location) && defined($end_g1_location)) {
$rc = 0;
tracef('G1 range [%d, %d]', $start_g1_location, $end_g1_location);
if ($start_g1_location > $end_g1_location) {
exitf('$start_g1_location %d > $end_g1_location %d !?', $start_g1_location, $end_g1_location);
}
if (grep {exists($matchesInG1p->{$_}) && $matchesInG1p->{$_}} ($start_g1_location..$end_g1_location)) {
tracef('G1 range [%d, %d] have %s', $start_g1_location, $end_g1_location, $what);
if (defined($start_g1_locationp)) {
${$start_g1_locationp} = $start_g1_location;
}
if (defined($end_g1_locationp)) {
${$end_g1_locationp} = $end_g1_location;
}
$rc = 1;
last;
} else {
tracef('G1 range [%d, %d] do not have %s', $start_g1_location, $end_g1_location, $what);
}
$start_g1_location = undef;
$end_g1_location = undef;
}
#
# End condition
if (defined($dot_location_and_rule_end_conditionp)) {
foreach (@{$dot_location_and_rule_end_conditionp}) {
my ($dotPrediction, $lhs, $rhsp) = @{$_};
if (findInProgress($this_g1_location, $dotPrediction, $lhs, $rhsp, 0)) {
$end_condition = 1;
last;
}
}
}
if ($end_condition) {
last;
}
#
# Next loop
#
if (--$this_g1_location < 0) {
last;
}
}
return($rc);
}
#########
# doEvent
#########
sub doEvent {
$traceMode = EVENT_TRACE_MODE;
foreach my $event (@{$cacheEvents}) {
if ($event eq 'parameterDeclaration') {
#
# In parameterDeclaration typedef is syntactically allowed but never valid, and
# a typedef-name cannot be entered. The that a typedef-name can be
# [ANSI C 3.5.4.3, 3.7.1]
#
doGrammarConstraints('TYPEDEF', [ [DOT_PREDICTION, $event, undef ] ]);
doGrammarConstraints('TYPEDEF_NAME', [ [DOT_PREDICTION, $event, undef ] ]);
}
elsif ($event eq 'directDeclarator') {
#
# directDeclarator ::= IDENTIFIER .
# can introduce names into ordinaty name space when it eventually participates
# in the grammar rule:
# declaration: declarationSpecifiers initDeclaratorList . SEMICOLON
#
if (findInProgress(LATEST_G1_ORDINAL , DOT_COMPLETION, 'directDeclarator', ['IDENTIFIER']) &&
findInProgress(LATEST_G1_ORDINAL , 2, 'declaration', ['declarationSpecifiers', 'initDeclaratorList', 'SEMICOLON'])) {
#
## In structDeclarator ordinaty name space names cannot be defined
#
my $directDeclarator = M_substring(M_last_completed('directDeclarator'));
if (findInProgress(LATEST_G1_ORDINAL , 1, 'structDeclarator', [ 'declarator', 'COLON', 'constantExpression' ]) ||
findInProgress(LATEST_G1_ORDINAL , 1, 'structDeclarator', [ 'declarator' ])) {
debugf('declaration of IDENTIFIER "%s" in structDeclarator context: parse symbol inactive', $directDeclarator);
} else {
my $directDeclarator = M_substring(M_last_completed('directDeclarator'));
debugf('declaration of IDENTIFIER "%s" that can introduce name in name-space', $directDeclarator);
if (inspectDeclarationSpecifiers(undef, undef, undef)) {
parseEnterTypedef($directDeclarator);
} else {
parseObscureTypedef($directDeclarator);
}
}
}
} elsif ($event eq 'enumerationConstant') {
#
# Enum is not scope dependend - from now on it obscures any use of its
# identifier in any scope
#
my $enumerationConstant = M_substring(M_last_completed('enumerationConstant'));
parseEnterEnum($enumerationConstant);
} elsif ($event eq 'primaryExpression') {
#
## Anything special to do ?
#
}
}
}
########################################
# doGrammarConstraints
########################################
sub doGrammarConstraints {
my ($what, $dot_location_and_rule_end_conditionp) = @_;
my $rc = 0;
my $this_g1_location = M_latest_g1_location();
my ($start_g1_location, $end_g1_location);
if ($what eq 'TYPEDEF') {
$rc = inspectDeclarationSpecifiers($this_g1_location, \$start_g1_location, \$end_g1_location, $dot_location_and_rule_end_conditionp);
} elsif ($what eq 'TYPEDEF_NAME') {
$rc = inspectTypeSpecifier($this_g1_location, \$start_g1_location, \$end_g1_location, $dot_location_and_rule_end_conditionp);
} else {
exitf('Internal error, bad parameter "%s"', $what);
}
if (defined($rc) && $rc) {
#
# Match
#
if ($start_g1_location < $end_g1_location) {
exitf('%s is not allowed in "%s"', $what, M_range_to_string($start_g1_location, $end_g1_location));
} else {
if ($start_g1_location < $this_g1_location) {
exitf('%s is not allowed in "%s"', $what, M_range_to_string($start_g1_location, $start_g1_location + 1));
} else {
exitf('%s is not allowed in "%s"', $what, M_range_to_string($this_g1_location - 1, $this_g1_location));
}
}
}
}
#####################
# canEnterTypedefName
#####################
sub canEnterTypedefName {
my $rc = 1;
if (findInProgress(LATEST_G1_ORDINAL, 1, 'parameterDeclaration', ['declarationSpecifiers', 'declarator'])) {
debugf('A parameterDeclaration cannot enter a TYPEDEF_NAME');
$rc = 0;
}
return($rc);
}
#############################
# canEnterEnumerationConstant
#############################
sub canEnterEnumerationConstant {
my $rc = 1;
return($rc);
}
##########
# doLexeme
##########
sub doLexeme {
$traceMode = LEXEME_TRACE_MODE;
my $lexeme = M_pause_lexeme();
if (! defined($lexeme)) {
return;
}
#
# Ambiguity managenent: 'before' paused lexemes
#
if (grep {$lexeme eq $_} qw/IDENTIFIER TYPEDEF_NAME ENUMERATION_CONSTANT/) {
my ($lexeme_start, $lexeme_length) = M_pause_span();
my $lexeme_value = substr($input, $lexeme_start, $lexeme_length);
if (findInProgress(LATEST_G1_ORDINAL, DOT_PREDICTION, 'typeSpecifier', [ 'TYPEDEF_NAME' ]) && parseIsTypedef($lexeme_value) && canEnterTypedefName()) {
$lexeme = 'TYPEDEF_NAME';
} elsif (findInProgress(LATEST_G1_ORDINAL, DOT_PREDICTION, 'constant', [ 'ENUMERATION_CONSTANT' ]) && parseIsEnum($lexeme_value) && canEnterEnumerationConstant()) {
$lexeme = 'ENUMERATION_CONSTANT';
} else {
$lexeme = 'IDENTIFIER';
}
#
# Push the unambiguated lexeme
#
debugf('%s "%s"', $lexeme, $lexeme_value);
if (! defined(M_lexeme_read($lexeme, $lexeme_start, $lexeme_length, $lexeme_value))) {
my ($line, $column) = M_line_column($lexeme_start);
exitf('Internal error at line %d, column %d: "%s" cannot be associated to lexeme %s', $line, $column, $lexeme_value, $lexeme);
}
if ($lexeme eq 'TYPEDEF_NAME') {
my $latest_g1_location = M_latest_g1_location;
debugf('%s detected at G1 location %d', $lexeme, $latest_g1_location);
$G1LocationToTypedefName{$latest_g1_location} = 1;
}
#
# A lexeme_read() can generate an event
#
cacheProgressAndEvents();
doEvent();
}
#
# Scope management: Associated with file-scope, function body, compound statement, or prototype
# - function body matches compound statement
# - file-scope is implicit here, we treat one file at a time
#
elsif ($lexeme eq 'LPAREN_SCOPE') {
parseEnterScope();
} elsif ($lexeme eq 'LCURLY_SCOPE') {
if (canReenterScope()) {
#
# We know now that we are in the functionDefinition beginning of body
# It is better to do the check here rather than waiting for the
# functionDefinition rule to complete via an event
#
# In functionDefinion typedef is syntactically allowed but never valid
# [ANSI C 3.7.1]
#
doGrammarConstraints('TYPEDEF', [ [DOT_PREDICTION, 'functionDefinition', undef ] ]);
parseReenterScope();
} else {
parseEnterScope();
}
} elsif ($lexeme eq 'RPAREN_SCOPE') {
parseExitScope();
} elsif ($lexeme eq 'RCURLY_SCOPE') {
parseExitScope();
}
#
# Track of TYPEDEF lexeme per G1 location
#
elsif ($lexeme eq 'TYPEDEF') {
my $latest_g1_location = M_latest_g1_location;
debugf('%s detected at G1 location %d', $lexeme, $latest_g1_location);
$G1LocationToTypedef{$latest_g1_location} = 1;
}
}
#################
# canReenterScope
#################
sub canReenterScope {
return (findInProgress(LATEST_G1_ORDINAL - 1, 3, 'functionDefinition', [ 'declarationSpecifiers', 'declarator', 'declarationList', 'compoundStatement' ]) ||
findInProgress(LATEST_G1_ORDINAL - 1, 2, 'functionDefinition', [ 'declarationSpecifiers', 'declarator', 'compoundStatement' ]));
}
###################################################################################################
# Implementation of
## http://www.cs.dartmouth.edu/~mckeeman/references/JCLT/ResolvingTypedefsInAMultipassCCompiler.pdf
###################################################################################################
sub parseEnterScope {
doDelayedExitScope();
my $scope = $#{$typedefPerScope};
push(@{$typedefPerScope}, clone($typedefPerScope->[$scope]));
debugf('Duplicated scope %d to %d', $scope, $scope + 1);
}
sub parseExitScope {
my $scope = $#{$typedefPerScope};
$delayedExitScope = 1;
debugf('Setting delay flag on scope %d', $scope);
}
sub parseReenterScope {
my $scope = $#{$typedefPerScope};
$delayedExitScope = 0;
debugf('Resetting delay flag on scope %d', $scope);
}
sub parseEnterTypedef {
my ($token) = @_;
doDelayedExitScope();
my $scope = $#{$typedefPerScope};
$typedefPerScope->[$scope]->{$token} = 1;
debugf('"%s" at scope %d', $token, $scope);
}
sub parseEnterEnum {
my ($token) = @_;
doDelayedExitScope();
$enumAnyScope->{$token} = 1;
debugf('"%s"', $token);
}
sub parseObscureTypedef {
my ($token) = @_;
doDelayedExitScope();
my $scope = $#{$typedefPerScope};
$typedefPerScope->[$scope]->{$token} = 0;
debugf('"%s obscured at scope %d', $token, $scope);
}
sub parseIsTypedef {
my ($token) = @_;
doDelayedExitScope();
my $scope = $#{$typedefPerScope};
my $rc = (exists($typedefPerScope->[$scope]->{$token}) && $typedefPerScope->[$scope]->{$token}) ? 1 : 0;
debugf('"%s" at scope %d ? %s', $token, $scope, $rc ? 'yes' : 'no');
return($rc);
}
sub parseIsEnum {
my ($token) = @_;
doDelayedExitScope();
my $rc = (exists($enumAnyScope->{$token}) && $enumAnyScope->{$token}) ? 1 : 0;
debugf('"%s" ? %s', $token, $rc ? 'yes' : 'no');
return($rc);
}
sub doDelayedExitScope {
if ($delayedExitScope) {
my $scope = $#{$typedefPerScope};
pop(@{$typedefPerScope});
$delayedExitScope = 0;
debugf('Removed scope %d and resetted delay flag', $scope);
}
}
############################
# usage
############################
sub usage {
my $rc = shift || EXIT_FAILURE;
print STDERR "Usage: $^X $0 file.c\n";
print STDERR "\n";
print STDERR "Warning, file.c must contain only 2011 ISO C\n";
print STDERR "With GNU cpp, here is how to get it from a general .c file:\n";
print STDERR "cpp -P -ansi old.c > new.c\n";
exit($rc);
}
__DATA__
################################################################################################################
# grammar
#
# 2011 ISO C, as of http://www.quut.com/c/ANSI-C-grammar-l.html
# http://www.quut.com/c/ANSI-C-grammar-y-2011.html
#
################################################################################################################
#
# Defaults
#
:default ::= action => [values] bless => ::lhs
#
# G1 (grammar), c.f. http://www.quut.com/c/ANSI-C-grammar-y-2011.html
#
:start ::= translationUnit
event primaryExpression = completed <primaryExpression>
primaryExpression
::= IDENTIFIER
| constant
| string
| LPAREN expression RPAREN
| genericSelection
constant
::= I_CONSTANT # includes character_constant
| F_CONSTANT
| ENUMERATION_CONSTANT # after it has been defined as such
event enumerationConstant = completed <enumerationConstant>
enumerationConstant # before it has been defined as such
::= IDENTIFIER
string
::= STRING_LITERAL
| FUNC_NAME
genericSelection
::= GENERIC LPAREN assignmentExpression COMMA genericAssocList RPAREN
genericAssocList
::= genericAssociation
| genericAssocList COMMA genericAssociation
genericAssociation
::= typeName COLON assignmentExpression
| DEFAULT COLON assignmentExpression
postfixExpression
::= primaryExpression
| postfixExpression LBRACKET expression RBRACKET
| postfixExpression LPAREN RPAREN
| postfixExpression LPAREN argumentExpressionList RPAREN
| postfixExpression DOT IDENTIFIER
| postfixExpression PTR_OP IDENTIFIER
| postfixExpression INC_OP
| postfixExpression DEC_OP
| LPAREN typeName RPAREN LCURLY initializerList RCURLY
| LPAREN typeName RPAREN LCURLY initializerList COMMA RCURLY
argumentExpressionList
::= assignmentExpression
| argumentExpressionList COMMA assignmentExpression
unaryExpression
::= postfixExpression
| INC_OP unaryExpression
| DEC_OP unaryExpression
| unaryOperator castExpression
| SIZEOF unaryExpression
| SIZEOF LPAREN typeName RPAREN
| ALIGNOF LPAREN typeName RPAREN
unaryOperator
::= AMPERSAND
| STAR
| PLUS
| HYPHEN
| TILDE
| EXCLAMATION
castExpression
::= unaryExpression
| LPAREN typeName RPAREN castExpression
multiplicativeExpression
::= castExpression
| multiplicativeExpression STAR castExpression
| multiplicativeExpression SLASH castExpression
| multiplicativeExpression PERCENT castExpression
additiveExpression
::= multiplicativeExpression
| additiveExpression PLUS multiplicativeExpression
| additiveExpression HYPHEN multiplicativeExpression
shiftExpression
::= additiveExpression
| shiftExpression LEFT_OP additiveExpression
| shiftExpression RIGHT_OP additiveExpression
relationalExpression
::= shiftExpression
| relationalExpression LESS_THAN shiftExpression
| relationalExpression GREATER_THAN shiftExpression
| relationalExpression LE_OP shiftExpression
| relationalExpression GE_OP shiftExpression
equalityExpression
::= relationalExpression
| equalityExpression EQ_OP relationalExpression
| equalityExpression NE_OP relationalExpression
andExpression
::= equalityExpression
| andExpression AMPERSAND equalityExpression
exclusiveOrExpression
::= andExpression
| exclusiveOrExpression CARET andExpression
inclusiveOrExpression
::= exclusiveOrExpression
| inclusiveOrExpression VERTICAL_BAR exclusiveOrExpression
logicalAndExpression
::= inclusiveOrExpression
| logicalAndExpression AND_OP inclusiveOrExpression
logicalOrExpression
::= logicalAndExpression
| logicalOrExpression OR_OP logicalAndExpression
conditionalExpression
::= logicalOrExpression
| logicalOrExpression QUESTION_MARK expression COLON conditionalExpression
assignmentExpression
::= conditionalExpression
| unaryExpression assignmentOperator assignmentExpression
assignmentOperator
::= EQUAL
| MUL_ASSIGN
| DIV_ASSIGN
| MOD_ASSIGN
| ADD_ASSIGN
| SUB_ASSIGN
| LEFT_ASSIGN
| RIGHT_ASSIGN
| AND_ASSIGN
| XOR_ASSIGN
| OR_ASSIGN
expression
::= assignmentExpression
| expression COMMA assignmentExpression
constantExpression
::= conditionalExpression # with constraints
declaration
::= declarationSpecifiers SEMICOLON
| declarationSpecifiers initDeclaratorList SEMICOLON
| staticAssertDeclaration
declarationSpecifiers
::= storageClassSpecifier declarationSpecifiers
| storageClassSpecifier
| typeSpecifier declarationSpecifiers
| typeSpecifier
| typeQualifier declarationSpecifiers
| typeQualifier
| functionSpecifier declarationSpecifiers
| functionSpecifier
| alignmentSpecifier declarationSpecifiers
| alignmentSpecifier
initDeclaratorList
::= initDeclarator
| initDeclaratorList COMMA initDeclarator
initDeclarator
::= declarator EQUAL initializer
| declarator
storageClassSpecifier
::= TYPEDEF # identifiers must be flagged as TYPEDEF_NAME
| EXTERN
| STATIC
| THREAD_LOCAL
| AUTO
| REGISTER
typeSpecifier
::= VOID
| CHAR
| SHORT
| INT
| LONG
| FLOAT
| DOUBLE
| SIGNED
| UNSIGNED
| BOOL
| COMPLEX
| IMAGINARY # non-mandated extension
| atomicTypeSpecifier
| structOrUnionSpecifier
| enumSpecifier
| TYPEDEF_NAME # after it has been defined as such
structOrUnionSpecifier
::= structOrUnion LCURLY structDeclarationList RCURLY
| structOrUnion IDENTIFIER LCURLY structDeclarationList RCURLY
| structOrUnion IDENTIFIER
structOrUnion
::= STRUCT
| UNION
structDeclarationList
::= structDeclaration
| structDeclarationList structDeclaration
structDeclaration
::= specifierQualifierList SEMICOLON # for anonymous struct/union
| specifierQualifierList structDeclaratorList SEMICOLON
| staticAssertDeclaration
specifierQualifierList
::= typeSpecifier specifierQualifierList
| typeSpecifier
| typeQualifier specifierQualifierList
| typeQualifier
structDeclaratorList
::= structDeclarator
| structDeclaratorList COMMA structDeclarator
structDeclarator
::= COLON constantExpression
| declarator COLON constantExpression
| declarator
enumSpecifier
::= ENUM LCURLY enumeratorList RCURLY
| ENUM LCURLY enumeratorList COMMA RCURLY
| ENUM IDENTIFIER LCURLY enumeratorList RCURLY
| ENUM IDENTIFIER LCURLY enumeratorList COMMA RCURLY
| ENUM IDENTIFIER
enumeratorList
::= enumerator
| enumeratorList COMMA enumerator
enumerator # identifiers must be flagged as ENUMERATION_CONSTANT
::= enumerationConstant EQUAL constantExpression
| enumerationConstant
atomicTypeSpecifier
::= ATOMIC LPAREN typeName RPAREN
typeQualifier
::= CONST
| RESTRICT
| VOLATILE
| ATOMIC
functionSpecifier
::= INLINE
| NORETURN
alignmentSpecifier
::= ALIGNAS LPAREN typeName RPAREN
| ALIGNAS LPAREN constantExpression RPAREN
declarator
::= pointer directDeclarator
| directDeclarator
event directDeclarator = completed <directDeclarator>
directDeclarator
::= IDENTIFIER
| LPAREN declarator RPAREN
| directDeclarator LBRACKET RBRACKET
| directDeclarator LBRACKET STAR RBRACKET
| directDeclarator LBRACKET STATIC typeQualifierList assignmentExpression RBRACKET
| directDeclarator LBRACKET STATIC assignmentExpression RBRACKET
| directDeclarator LBRACKET typeQualifierList STAR RBRACKET
| directDeclarator LBRACKET typeQualifierList STATIC assignmentExpression RBRACKET
| directDeclarator LBRACKET typeQualifierList assignmentExpression RBRACKET
| directDeclarator LBRACKET typeQualifierList RBRACKET
| directDeclarator LBRACKET assignmentExpression RBRACKET
| directDeclarator LPAREN_SCOPE parameterTypeList RPAREN_SCOPE
| directDeclarator LPAREN RPAREN
| directDeclarator LPAREN identifierList RPAREN
pointer
::= STAR typeQualifierList pointer
| STAR typeQualifierList
| STAR pointer
| STAR
typeQualifierList
::= typeQualifier
| typeQualifierList typeQualifier
parameterTypeList
::= parameterList COMMA ELLIPSIS
| parameterList
parameterList
::= parameterDeclaration
| parameterList COMMA parameterDeclaration
event parameterDeclaration = completed <parameterDeclaration>
parameterDeclaration
::= declarationSpecifiers declarator
| declarationSpecifiers abstractDeclarator
| declarationSpecifiers
identifierList
::= IDENTIFIER
| identifierList COMMA IDENTIFIER
typeName
::= specifierQualifierList abstractDeclarator
| specifierQualifierList
abstractDeclarator
::= pointer directAbstractDeclarator
| pointer
| directAbstractDeclarator
directAbstractDeclarator
::= LPAREN abstractDeclarator RPAREN
| LBRACKET RBRACKET
| LBRACKET STAR RBRACKET
| LBRACKET STATIC typeQualifierList assignmentExpression RBRACKET
| LBRACKET STATIC assignmentExpression RBRACKET
| LBRACKET typeQualifierList STATIC assignmentExpression RBRACKET
| LBRACKET typeQualifierList assignmentExpression RBRACKET
| LBRACKET typeQualifierList RBRACKET
| LBRACKET assignmentExpression RBRACKET
| directAbstractDeclarator LBRACKET RBRACKET
| directAbstractDeclarator LBRACKET STAR RBRACKET
| directAbstractDeclarator LBRACKET STATIC typeQualifierList assignmentExpression RBRACKET
| directAbstractDeclarator LBRACKET STATIC assignmentExpression RBRACKET
| directAbstractDeclarator LBRACKET typeQualifierList assignmentExpression RBRACKET
| directAbstractDeclarator LBRACKET typeQualifierList STATIC assignmentExpression RBRACKET
| directAbstractDeclarator LBRACKET typeQualifierList RBRACKET
| directAbstractDeclarator LBRACKET assignmentExpression RBRACKET
| LPAREN RPAREN
| LPAREN_SCOPE parameterTypeList RPAREN_SCOPE
| directAbstractDeclarator LPAREN RPAREN
| directAbstractDeclarator LPAREN_SCOPE parameterTypeList RPAREN_SCOPE
initializer
::= LCURLY initializerList RCURLY
| LCURLY initializerList COMMA RCURLY
| assignmentExpression
initializerList
::= designation initializer
| initializer
| initializerList COMMA designation initializer
| initializerList COMMA initializer
designation
::= designatorList EQUAL
designatorList
::= designator
| designatorList designator
designator
::= LBRACKET constantExpression RBRACKET
| DOT IDENTIFIER
staticAssertDeclaration
::= STATIC_ASSERT LPAREN constantExpression COMMA STRING_LITERAL RPAREN SEMICOLON
statement
::= labeledStatement
| compoundStatement
| expressionStatement
| selectionStatement
| iterationStatement
| jumpStatement
labeledStatement
::= IDENTIFIER COLON statement
| CASE constantExpression COLON statement
| DEFAULT COLON statement
compoundStatement
::= LCURLY_SCOPE RCURLY_SCOPE
| LCURLY_SCOPE blockItemList RCURLY_SCOPE
blockItemList
::= blockItem
| blockItemList blockItem
blockItem
::= declaration
| statement
expressionStatement
::= SEMICOLON
| expression SEMICOLON
selectionStatement
::= IF LPAREN expression RPAREN statement ELSE statement
| IF LPAREN expression RPAREN statement
| SWITCH LPAREN expression RPAREN statement
iterationStatement
::= WHILE LPAREN expression RPAREN statement
| DO statement WHILE LPAREN expression RPAREN SEMICOLON
| FOR LPAREN expressionStatement expressionStatement RPAREN statement
| FOR LPAREN expressionStatement expressionStatement expression RPAREN statement
| FOR LPAREN declaration expressionStatement RPAREN statement
| FOR LPAREN declaration expressionStatement expression RPAREN statement
jumpStatement
::= GOTO IDENTIFIER SEMICOLON
| CONTINUE SEMICOLON
| BREAK SEMICOLON
| RETURN SEMICOLON
| RETURN expression SEMICOLON
translationUnit
::= externalDeclaration
| translationUnit externalDeclaration
externalDeclaration
::= functionDefinition
| declaration
functionDefinition
::= declarationSpecifiers declarator declarationList compoundStatement
| declarationSpecifiers declarator compoundStatement
declarationList
::= declaration
| declarationList declaration
#
# G0 (tokens), c.f. http://www.quut.com/c/ANSI-C-grammar-l.html
#
# Intermediary tokens
O ~ [0-7]
O_any ~ O*
D ~ [0-9]
D_any ~ D*
D_many ~ D+
NZ ~ [1-9]
L ~ [a-zA-Z_]
A ~ [a-zA-Z_0-9]
A_any ~ A*
H ~ [a-fA-F0-9]
H_any ~ H*
H_many ~ H+
HP ~ '0' [xX]
SIGN_maybe ~ [+-]
SIGN_maybe ~
E ~ [Ee] SIGN_maybe D_many
E_maybe ~ E
E_maybe ~
P ~ [Pp] SIGN_maybe D_many
FS ~ [fFlL]
FS_maybe ~ FS
FS_maybe ~
LL ~ 'll' | 'LL' | [lL]
LL_maybe ~ LL
LL_maybe ~
U ~ [uU]
U_maybe ~ U
U_maybe ~
IS ~ U LL_maybe | LL U_maybe
IS_maybe ~ IS
IS_maybe ~
CP ~ [uUL]
CP_maybe ~ CP
CP_maybe ~
SP ~ 'u8' | [uUL]
SP_maybe ~ SP
SP_maybe ~
ES_AFTERBS ~ [\'\"\?\\abfnrtv]
| O
| O O
| O O O
| 'x' H_many
ES ~ BS ES_AFTERBS
WS ~ [ \t\v\n\f]
WS_any ~ WS*
# Lexemes
:lexeme ~ <AUTO> priority => -1
AUTO ~ 'auto'
:lexeme ~ <BREAK> priority => -2
BREAK ~ 'break'
:lexeme ~ <CASE> priority => -3
CASE ~ 'case'
:lexeme ~ <CHAR> priority => -4
CHAR ~ 'char'
:lexeme ~ <CONST> priority => -5
CONST ~ 'const'
:lexeme ~ <CONTINUE> priority => -6
CONTINUE ~ 'continue'
:lexeme ~ <DEFAULT> priority => -7
DEFAULT ~ 'default'
:lexeme ~ <DO> priority => -8
DO ~ 'do'
:lexeme ~ <DOUBLE> priority => -9
DOUBLE ~ 'double'
:lexeme ~ <ELSE> priority => -10
ELSE ~ 'else'
:lexeme ~ <ENUM> priority => -11
ENUM ~ 'enum'
:lexeme ~ <EXTERN> priority => -12
EXTERN ~ 'extern'
:lexeme ~ <FLOAT> priority => -13
FLOAT ~ 'float'
:lexeme ~ <FOR> priority => -14
FOR ~ 'for'
:lexeme ~ <GOTO> priority => -15
GOTO ~ 'goto'
:lexeme ~ <IF> priority => -16
IF ~ 'if'
:lexeme ~ <INLINE> priority => -17
INLINE ~ 'inline'
:lexeme ~ <INT> priority => -18
INT ~ 'int'
:lexeme ~ <LONG> priority => -19
LONG ~ 'long'
:lexeme ~ <REGISTER> priority => -20
REGISTER ~ 'register'
:lexeme ~ <RESTRICT> priority => -21
RESTRICT ~ 'restrict'
:lexeme ~ <RETURN> priority => -22
RETURN ~ 'return'
:lexeme ~ <SHORT> priority => -23
SHORT ~ 'short'
:lexeme ~ <SIGNED> priority => -24
SIGNED ~ 'signed'
:lexeme ~ <SIZEOF> priority => -25
SIZEOF ~ 'sizeof'
:lexeme ~ <STATIC> priority => -26
STATIC ~ 'static'
:lexeme ~ <STRUCT> priority => -27
STRUCT ~ 'struct'
:lexeme ~ <SWITCH> priority => -28
SWITCH ~ 'switch'
:lexeme ~ <TYPEDEF> priority => -29 pause => after
TYPEDEF ~ 'typedef'
:lexeme ~ <UNION> priority => -30
UNION ~ 'union'
:lexeme ~ <UNSIGNED> priority => -31
UNSIGNED ~ 'unsigned'
:lexeme ~ <VOID> priority => -32
VOID ~ 'void'
:lexeme ~ <VOLATILE> priority => -33
VOLATILE ~ 'volatile'
:lexeme ~ <WHILE> priority => -34
WHILE ~ 'while'
:lexeme ~ <ALIGNAS> priority => -35
ALIGNAS ~ '_Alignas'
:lexeme ~ <ALIGNOF> priority => -36
ALIGNOF ~ '_Alignof'
:lexeme ~ <ATOMIC> priority => -37
ATOMIC ~ '_Atomic'
:lexeme ~ <BOOL> priority => -38
BOOL ~ '_Bool'
:lexeme ~ <COMPLEX> priority => -39
COMPLEX ~ '_Complex'
:lexeme ~ <GENERIC> priority => -40
GENERIC ~ '_Generic'
:lexeme ~ <IMAGINARY> priority => -41
IMAGINARY ~ '_Imaginary'
:lexeme ~ <NORETURN> priority => -42
NORETURN ~ '_Noreturn'
:lexeme ~ <STATIC_ASSERT> priority => -43
STATIC_ASSERT ~ '_Static_assert'
:lexeme ~ <THREAD_LOCAL> priority => -44
THREAD_LOCAL ~ '_Thread_local'
:lexeme ~ <FUNC_NAME> priority => -45
FUNC_NAME ~ '__func__'
#
## DETERMINED AT RUN TIME
#
:lexeme ~ <TYPEDEF_NAME> priority => -100 pause => before
:lexeme ~ <ENUMERATION_CONSTANT> priority => -100 pause => before
:lexeme ~ <IDENTIFIER> priority => -100 pause => before
TYPEDEF_NAME ~ L A_any
ENUMERATION_CONSTANT ~ L A_any
IDENTIFIER ~ L A_any
:lexeme ~ <I_CONSTANT> priority => -101
I_CONSTANT ~ HP H_many IS_maybe
| NZ D_any IS_maybe
| '0' O_any IS_maybe
| CP_maybe QUOTE I_CONSTANT_INSIDE_many QUOTE
:lexeme ~ <F_CONSTANT> priority => -102
F_CONSTANT ~ D_many E FS_maybe
| D_any '.' D_many E_maybe FS_maybe
| D_many '.' E_maybe FS_maybe
| HP H_many P FS_maybe
| HP H_any '.' H_many P FS_maybe
| HP H_many '.' P FS_maybe
:lexeme ~ <STRING_LITERAL> priority => -103
STRING_LITERAL ~ STRING_LITERAL_UNIT+
:lexeme ~ <ELLIPSIS> priority => -104
ELLIPSIS ~ '...'
:lexeme ~ <RIGHT_ASSIGN> priority => -105
RIGHT_ASSIGN ~ '>>='
:lexeme ~ <LEFT_ASSIGN> priority => -106
LEFT_ASSIGN ~ '<<='
:lexeme ~ <ADD_ASSIGN> priority => -107
ADD_ASSIGN ~ '+='
:lexeme ~ <SUB_ASSIGN> priority => -108
SUB_ASSIGN ~ '-='
:lexeme ~ <MUL_ASSIGN> priority => -109
MUL_ASSIGN ~ '*='
:lexeme ~ <DIV_ASSIGN> priority => -110
DIV_ASSIGN ~ '/='
:lexeme ~ <MOD_ASSIGN> priority => -111
MOD_ASSIGN ~ '%='
:lexeme ~ <AND_ASSIGN> priority => -112
AND_ASSIGN ~ '&='
:lexeme ~ <XOR_ASSIGN> priority => -113
XOR_ASSIGN ~ '^='
:lexeme ~ <OR_ASSIGN> priority => -114
OR_ASSIGN ~ '|='
:lexeme ~ <RIGHT_OP> priority => -115
RIGHT_OP ~ '>>'
:lexeme ~ <LEFT_OP> priority => -116
LEFT_OP ~ '<<'
:lexeme ~ <INC_OP> priority => -117
INC_OP ~ '++'
:lexeme ~ <DEC_OP> priority => -118
DEC_OP ~ '--'
:lexeme ~ <PTR_OP> priority => -119
PTR_OP ~ '->'
:lexeme ~ <AND_OP> priority => -120
AND_OP ~ '&&'
:lexeme ~ <OR_OP> priority => -121
OR_OP ~ '||'
:lexeme ~ <LE_OP> priority => -122
LE_OP ~ '<='
:lexeme ~ <GE_OP> priority => -123
GE_OP ~ '>='
:lexeme ~ <EQ_OP> priority => -124
EQ_OP ~ '=='
:lexeme ~ <NE_OP> priority => -125
NE_OP ~ '!='
:lexeme ~ <SEMICOLON> priority => -126
SEMICOLON ~ ';'
:lexeme ~ <LCURLY> priority => -127
LCURLY ~ '{' | '<%'
:lexeme ~ <LCURLY_SCOPE> priority => -127 pause => after
LCURLY_SCOPE ~ '{' | '<%'
:lexeme ~ <RCURLY> priority => -128
RCURLY ~ '}' | '%>'
:lexeme ~ <RCURLY_SCOPE> priority => -128 pause => after
RCURLY_SCOPE ~ '}' | '%>'
:lexeme ~ <COMMA> priority => -129
COMMA ~ ','
:lexeme ~ <COLON> priority => -130
COLON ~ ':'
:lexeme ~ <EQUAL> priority => -131
EQUAL ~ '='
:lexeme ~ <LPAREN> priority => -132
LPAREN ~ '('
:lexeme ~ <LPAREN_SCOPE> priority => -132 pause => after
LPAREN_SCOPE ~ '('
:lexeme ~ <RPAREN> priority => -133
RPAREN ~ ')'
:lexeme ~ <RPAREN_SCOPE> priority => -133 pause => after
RPAREN_SCOPE ~ ')'
:lexeme ~ <LBRACKET> priority => -134
LBRACKET ~ '[' | '<:'
:lexeme ~ <RBRACKET> priority => -135
RBRACKET ~ ']' | ':>'
:lexeme ~ <DOT> priority => -136
DOT ~ '.'
:lexeme ~ <AMPERSAND> priority => -137
AMPERSAND ~ '&'
:lexeme ~ <EXCLAMATION> priority => -138
EXCLAMATION ~ '!'
:lexeme ~ <TILDE> priority => -139
TILDE ~ '~'
:lexeme ~ <HYPHEN> priority => -140
HYPHEN ~ '-'
:lexeme ~ <PLUS> priority => -141
PLUS ~ '+'
:lexeme ~ <STAR> priority => -142
STAR ~ '*'
:lexeme ~ <SLASH> priority => -143
SLASH ~ '/'
:lexeme ~ <PERCENT> priority => -144
PERCENT ~ '%'
:lexeme ~ <LESS_THAN> priority => -145
LESS_THAN ~ '<'
:lexeme ~ <GREATER_THAN> priority => -146
GREATER_THAN ~ '>'
:lexeme ~ <CARET> priority => -147
CARET ~ '^'
:lexeme ~ <VERTICAL_BAR> priority => -148
VERTICAL_BAR ~ '|'
:lexeme ~ <QUESTION_MARK> priority => -149
QUESTION_MARK ~ '?'
:discard ~ WS # whitespace separates tokens
:discard ~ ANYTHING_ELSE # discard bad characters
#
# Discard of a C comment, c.f. https://gist.github.com/jeffreykegler/5015057
#
<C style comment> ~ '/*' <comment interior> '*/'
<comment interior> ~
<optional non stars>
<optional star prefixed segments>
<optional pre final stars>
<optional non stars> ~ [^*]*
<optional star prefixed segments> ~ <star prefixed segment>*
<star prefixed segment> ~ <stars> [^/*] <optional star free text>
<stars> ~ [*]+
<optional star free text> ~ [^*]*
<optional pre final stars> ~ [*]*
:discard ~ <C style comment>
#
# Discard of a C++ comment
#
<Cplusplus style comment> ~ '//' <Cplusplus comment interior>
<Cplusplus comment interior> ~ [^\n]*
:discard ~ <Cplusplus style comment>
#
# Discard of a Preprocessor comment
#
<Preprocessor style comment> ~ '#' <Preprocessor comment interior>
<Preprocessor comment interior> ~ [^\n]*
:discard ~ <Preprocessor style comment>
#
# Internal tokens added
#
QUOTE ~ [']
I_CONSTANT_INSIDE ~ [^'\\\n]
I_CONSTANT_INSIDE ~ ES
I_CONSTANT_INSIDE_many ~ I_CONSTANT_INSIDE+
STRING_LITERAL_INSIDE ~ [^"\\\n]
STRING_LITERAL_INSIDE ~ ES
STRING_LITERAL_INSIDE_any ~ STRING_LITERAL_INSIDE*
STRING_LITERAL_UNIT ~ SP_maybe '"' STRING_LITERAL_INSIDE_any '"' WS_any
BS ~ '\'
ANYTHING_ELSE ~ [.]
#
# Lexeme events are appended automatically here
#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment