public
Last active

Precedence parsing, example 1

  • Download Gist
OP1.pm
Perl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
package Marpa::Demo::OP1;
 
use 5.010;
use strict;
use warnings;
use English qw( -no_match_vars );
 
use Marpa::XS;
 
sub rules { shift; return $_[0]; }
 
sub priority_rule {
my ( undef, $lhs, undef, $priorities ) = @_;
my $priority_count = scalar @{$priorities};
my @rules = ();
for my $priority_ix ( 0 .. $priority_count - 1 ) {
my $priority = $priority_count - ( $priority_ix + 1 );
for my $alternative ( @{ $priorities->[$priority_ix] } ) {
push @rules, [ $priority, @{$alternative} ];
}
} ## end for my $priority_ix ( 0 .. $priority_count - 1 )
my @xs_rules = (
{ lhs => $lhs, rhs => [ $lhs . '_0' ] },
( map {
;
{ lhs => ( $lhs . '_' . ( $_ - 1 ) ),
rhs => [ $lhs . '_' . ($_) ]
}
} 1 .. $priority_count - 1
)
);
RULE: for my $rule (@rules) {
my ( $priority, $assoc, $rhs, $action ) = @{$rule};
my @action_kv = ();
push @action_kv, action => $action if defined $action;
my @new_rhs = @{$rhs};
my @arity = grep { $new_rhs[$_] eq $lhs } 0 .. $#new_rhs;
my $length = scalar @{$rhs};
my $current_exp = $lhs . '_' . $priority;
my $next_priority = $priority + 1;
$next_priority = 0 if $next_priority >= $priority_count;
my $next_exp = $lhs . '_' . $next_priority;
 
if ( not scalar @arity ) {
push @xs_rules,
{
lhs => $current_exp,
rhs => \@new_rhs,
@action_kv
};
next RULE;
} ## end if ( not scalar @arity )
 
if ( scalar @arity == 1 ) {
die 'Unnecessary unit rule in priority rule' if $length == 1;
$new_rhs[ $arity[0] ] = $current_exp;
}
DO_ASSOCIATION: {
if ( $assoc eq 'L' ) {
$new_rhs[ $arity[0] ] = $current_exp;
for my $rhs_ix ( @arity[ 1 .. $#arity ] ) {
$new_rhs[$rhs_ix] = $next_exp;
}
last DO_ASSOCIATION;
} ## end if ( $assoc eq 'L' )
if ( $assoc eq 'R' ) {
$new_rhs[ $arity[-1] ] = $current_exp;
for my $rhs_ix ( @arity[ 0 .. $#arity - 1 ] ) {
$new_rhs[$rhs_ix] = $next_exp;
}
last DO_ASSOCIATION;
} ## end if ( $assoc eq 'R' )
if ( $assoc eq 'G' ) {
for my $rhs_ix ( @arity[ 0 .. $#arity ] ) {
$new_rhs[$rhs_ix] = $lhs . '_0';
}
last DO_ASSOCIATION;
} ## end if ( $assoc eq 'G' )
die qq{Unknown association type: "$assoc"};
} ## end DO_ASSOCIATION:
push @xs_rules, { lhs => $current_exp, rhs => \@new_rhs, @action_kv };
} ## end RULE: for my $rule (@rules)
return [@xs_rules];
} ## end sub priority_rule
 
sub empty_rule { shift; return { @{ $_[0] }, rhs => [], @{ $_[2] || [] } }; }
 
sub quantified_rule {
shift;
return {
@{ $_[0] },
rhs => [ $_[2] ],
min => ( $_[3] eq q{+} ? 1 : 0 ),
@{ $_[4] || [] }
};
} ## end sub quantified_rule
 
sub do_priority1 { shift; return [ $_[0] ]; }
sub do_priority3 { shift; return [ $_[0], @{ $_[2] } ]; }
sub do_full_alternative { shift; return [ ( $_[0] // 'L' ), $_[1], $_[2] ]; }
sub do_bare_alternative { shift; return [ ( $_[0] // 'L' ), $_[1], undef ] }
sub do_alternatives_1 { shift; return [ $_[0] ]; }
sub do_alternatives_3 { shift; return [ $_[0], @{ $_[2] } ] }
sub do_lhs { shift; return $_[0]; }
sub do_array { shift; return [@_]; }
sub do_arg1 { return $_[2]; }
sub do_right_adverb { return 'R' }
sub do_left_adverb { return 'L' }
sub do_group_adverb { return 'G' }
 
sub do_what_I_mean {
 
# The first argument is the per-parse variable.
# Until we know what to do with it, just throw it away
shift;
 
# Throw away any undef's
my @children = grep {defined} @_;
 
# Return what's left
return scalar @children > 1 ? \@children : shift @children;
} ## end sub do_what_I_mean
 
sub parse_rules {
my ($string) = @_;
 
my $grammar = Marpa::XS::Grammar->new(
{ start => 'rules',
actions => __PACKAGE__,
default_action => 'do_what_I_mean',
rules => [
{ lhs => 'rules',
rhs => [qw/rule/],
action => 'rules',
min => 1
},
{ lhs => 'rule',
rhs => [qw/lhs op_declare priorities/],
action => 'priority_rule'
},
{ lhs => 'rule',
rhs => [qw/lhs op_declare action/],
action => 'empty_rule'
},
{ lhs => 'rule',
rhs => [qw/lhs op_declare name quantifier action/],
action => 'quantified_rule'
},
 
{ lhs => 'priorities',
rhs => [qw(alternatives)],
action => 'do_priority1'
},
{ lhs => 'priorities',
rhs => [qw(alternatives op_tighter priorities)],
action => 'do_priority3'
},
 
{ lhs => 'alternatives',
rhs => [qw(alternative)],
action => 'do_alternatives_1',
},
{ lhs => 'alternatives',
rhs => [qw(alternative op_eq_pri alternatives)],
action => 'do_alternatives_3',
},
 
{ lhs => 'alternative',
rhs => [qw(adverb rhs action)],
action => 'do_full_alternative'
},
{ lhs => 'alternative',
rhs => [qw(adverb rhs)],
action => 'do_bare_alternative'
},
 
{ lhs => 'adverb',
rhs => [qw/op_group/],
action => 'do_group_adverb'
},
{ lhs => 'adverb',
rhs => [qw/op_right/],
action => 'do_right_adverb'
},
{ lhs => 'adverb',
rhs => [qw/op_left/],
action => 'do_left_adverb'
},
{ lhs => 'adverb', rhs => [] },
 
{ lhs => 'action', rhs => [] },
{ lhs => 'action',
rhs => [qw/op_arrow action_name/],
action => 'do_arg1'
},
{ lhs => 'action',
rhs => [qw/op_arrow name/],
action => 'do_arg1'
},
 
{ lhs => 'lhs', rhs => [qw/name/], action => 'do_lhs' },
 
{ lhs => 'rhs', rhs => [qw/names/] },
{ lhs => 'quantifier', rhs => [qw/op_plus/] },
{ lhs => 'quantifier', rhs => [qw/op_star/] },
 
{ lhs => 'names',
rhs => [qw/name/],
min => 1,
action => 'do_array'
},
],
lhs_terminals => 0,
}
);
$grammar->precompute;
 
my $rec = Marpa::XS::Recognizer->new( { grammar => $grammar } );
 
# Order matters !!!
my @terminals = (
[ 'op_right', qr/:right\b/xms ],
[ 'op_left', qr/:left\b/xms ],
[ 'op_group', qr/:group\b/xms ],
[ 'op_declare', qr/::=/xms ],
[ 'op_arrow', qr/=>/xms ],
[ 'op_tighter', qr/[|][|]/xms ],
[ 'op_eq_pri', qr/[|]/xms ],
[ 'reserved_name', qr/(::(whatever|undef))/xms ],
[ 'op_plus', qr/[+]/xms ],
[ 'op_star', qr/[*]/xms ],
[ 'name', qr/\w+/xms ],
[ 'name', qr/['][^']+[']/xms ],
);
 
my $length = length $string;
pos $string = 0;
TOKEN: while ( pos $string < $length ) {
 
# skip whitespace
next TOKEN if $string =~ m/\G\s+/gcxms;
 
# read other tokens
TOKEN_TYPE: for my $t (@terminals) {
next TOKEN_TYPE if not $string =~ m/\G($t->[1])/gcxms;
if ( not defined $rec->read( $t->[0], $1 ) ) {
die die q{Problem before position }, pos $string, ': ',
( substr $string, pos $string, 40 ),
qq{\nToken rejected, "}, $t->[0], qq{", "$1"},
;
} ## end if ( not defined $rec->read( $t->[0], $1 ) )
next TOKEN;
} ## end TOKEN_TYPE: for my $t (@terminals)
 
die q{No token at "}, ( substr $string, pos $string, 40 ),
q{", position }, pos $string;
} ## end TOKEN: while ( pos $string < $length )
 
$rec->end_input;
 
my $parse_ref = $rec->value;
 
if ( !defined $parse_ref ) {
say $rec->show_progress() or die "say failed: $ERRNO";
die 'Parse failed';
}
my $parse = ${$parse_ref};
 
return $parse;
} ## end sub parse_rules
 
1;
dsl_ex1.pl
Perl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
#!perl
 
use 5.010;
use strict;
use warnings;
use English qw( -no_match_vars );
 
use Marpa::XS;
 
use Data::Dumper;
require './OP1.pm'; ## no critic (Modules::RequireBarewordIncludes)
 
my $rules = Marpa::Demo::OP1::parse_rules(
<<'END_OF_GRAMMAR'
e ::=
NUM
| VAR
| :group '(' e ')'
|| '-' e
|| :right e '^' e
|| e '*' e
| e '/' e
|| e '+' e
| e '-' e
|| VAR '=' e
END_OF_GRAMMAR
);
 
sub add_brackets {
my ( undef, @children ) = @_;
return $children[0] if 1 == scalar @children;
my $original = join q{}, grep {defined} @children;
return '[' . $original . ']';
} ## end sub add_brackets
 
my $grammar = Marpa::XS::Grammar->new(
{ start => 'e',
actions => __PACKAGE__,
default_action => 'add_brackets',
rules => $rules,
lhs_terminals => 0,
}
);
$grammar->precompute;
 
# Order matters !!
my @terminals = (
[ 'NUM', qr/\d+/xms ],
[ 'VAR', qr/\w+/xms ],
[ q{'='}, qr/[=]/xms ],
[ q{'*'}, qr/[*]/xms ],
[ q{'/'}, qr/[\/]/xms ],
[ q{'+'}, qr/[+]/xms ],
[ q{'-'}, qr/[-]/xms ],
[ q{'^'}, qr/[\^]/xms ],
[ q{'('}, qr/[(]/xms ],
[ q{')'}, qr/[)]/xms ],
);
 
sub calculate {
my ($string) = @_;
my $rec = Marpa::XS::Recognizer->new( { grammar => $grammar } );
 
my $length = length $string;
pos $string = 0;
TOKEN: while ( pos $string < $length ) {
 
# skip whitespace
next TOKEN if $string =~ m/\G\s+/gcxms;
 
# read other tokens
TOKEN_TYPE: for my $t (@terminals) {
next TOKEN_TYPE if not $string =~ m/\G($t->[1])/gcxms;
if ( not defined $rec->read( $t->[0], $1 ) ) {
say $rec->show_progress() or die "say failed: $ERRNO";
my $problem_position = ( pos $string ) - length $1;
my $before_start = $problem_position - 40;
$before_start = 0 if $before_start < 0;
my $before_length = $problem_position - $before_start;
die "Problem near position $problem_position\n",
q{Problem is here: "},
( substr $string, $before_start, $before_length + 40 ),
qq{"\n},
( q{ } x ( $before_length + 18 ) ), qq{^\n},
q{Token rejected, "}, $t->[0], qq{", "$1"},
;
} ## end if ( not defined $rec->read( $t->[0], $1 ) )
next TOKEN;
} ## end TOKEN_TYPE: for my $t (@terminals)
 
die q{No token at "}, ( substr $string, pos $string, 40 ),
q{", position }, pos $string;
} ## end TOKEN: while ( pos $string < $length )
 
$rec->end_input;
 
my $value_ref = $rec->value;
 
if ( !defined $value_ref ) {
say $rec->show_progress() or die "say failed: $ERRNO";
die 'Parse failed';
}
return ${$value_ref};
 
} ## end sub calculate
 
sub report_calculation {
my ($string) = @_;
return qq{Input: "$string"\n} . ' Parse: ' . calculate($string) . "\n";
}
 
my $output = join q{},
report_calculation('4 * 3 + 42 / 1'),
report_calculation('4 * 3 / (a = b = 5) + 42 - 1'),
report_calculation('4 * 3 / 5 - - - 3 + 42 - 1'),
report_calculation('- a - b'),
report_calculation('1 * 2 + 3 * 4 ^ 2 ^ 2 ^ 2 * 42 + 1');
 
print $output or die "print failed: $ERRNO";
$output eq <<'EXPECTED_OUTPUT' or die 'FAIL: Output mismatch';
Input: "4 * 3 + 42 / 1"
Parse: [[4*3]+[42/1]]
Input: "4 * 3 / (a = b = 5) + 42 - 1"
Parse: [[[[4*3]/[([a=[b=5]])]]+42]-1]
Input: "4 * 3 / 5 - - - 3 + 42 - 1"
Parse: [[[[[4*3]/5]-[-[-3]]]+42]-1]
Input: "- a - b"
Parse: [[-a]-b]
Input: "1 * 2 + 3 * 4 ^ 2 ^ 2 ^ 2 * 42 + 1"
Parse: [[[1*2]+[[3*[4^[2^[2^2]]]]*42]]+1]
EXPECTED_OUTPUT

(My perl install is a mess at the moment so I can't test right now.) I believe lines 19 and 20 of dsl_ex1.pl (namely those responsible for parsing -e and e^e) should be swapped, otherwise -1^2 would incorrectly be parsed as [[-1]^2] instead of[-[1^2]]`.

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.