Skip to content

Instantly share code, notes, and snippets.

@jeffreykegler
Created August 22, 2012 16:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jeffreykegler/3427294 to your computer and use it in GitHub Desktop.
Save jeffreykegler/3427294 to your computer and use it in GitHub Desktop.
Precedence parsing, example 1
#!perl
use 5.010;
use strict;
use warnings;
use English qw( -no_match_vars );
use Marpa::XS;
use Data::Dumper;
require './OP1.pm'; ## no critic (Modules::RequireBarewordIncludes)
my $rules = Marpa::Demo::OP1::parse_rules(
<<'END_OF_GRAMMAR'
e ::=
NUM
| VAR
| :group '(' e ')'
|| '-' e
|| :right e '^' e
|| e '*' e
| e '/' e
|| e '+' e
| e '-' e
|| VAR '=' e
END_OF_GRAMMAR
);
sub add_brackets {
my ( undef, @children ) = @_;
return $children[0] if 1 == scalar @children;
my $original = join q{}, grep {defined} @children;
return '[' . $original . ']';
} ## end sub add_brackets
my $grammar = Marpa::XS::Grammar->new(
{ start => 'e',
actions => __PACKAGE__,
default_action => 'add_brackets',
rules => $rules,
lhs_terminals => 0,
}
);
$grammar->precompute;
# Order matters !!
my @terminals = (
[ 'NUM', qr/\d+/xms ],
[ 'VAR', qr/\w+/xms ],
[ q{'='}, qr/[=]/xms ],
[ q{'*'}, qr/[*]/xms ],
[ q{'/'}, qr/[\/]/xms ],
[ q{'+'}, qr/[+]/xms ],
[ q{'-'}, qr/[-]/xms ],
[ q{'^'}, qr/[\^]/xms ],
[ q{'('}, qr/[(]/xms ],
[ q{')'}, qr/[)]/xms ],
);
sub calculate {
my ($string) = @_;
my $rec = Marpa::XS::Recognizer->new( { grammar => $grammar } );
my $length = length $string;
pos $string = 0;
TOKEN: while ( pos $string < $length ) {
# skip whitespace
next TOKEN if $string =~ m/\G\s+/gcxms;
# read other tokens
TOKEN_TYPE: for my $t (@terminals) {
next TOKEN_TYPE if not $string =~ m/\G($t->[1])/gcxms;
if ( not defined $rec->read( $t->[0], $1 ) ) {
say $rec->show_progress() or die "say failed: $ERRNO";
my $problem_position = ( pos $string ) - length $1;
my $before_start = $problem_position - 40;
$before_start = 0 if $before_start < 0;
my $before_length = $problem_position - $before_start;
die "Problem near position $problem_position\n",
q{Problem is here: "},
( substr $string, $before_start, $before_length + 40 ),
qq{"\n},
( q{ } x ( $before_length + 18 ) ), qq{^\n},
q{Token rejected, "}, $t->[0], qq{", "$1"},
;
} ## end if ( not defined $rec->read( $t->[0], $1 ) )
next TOKEN;
} ## end TOKEN_TYPE: for my $t (@terminals)
die q{No token at "}, ( substr $string, pos $string, 40 ),
q{", position }, pos $string;
} ## end TOKEN: while ( pos $string < $length )
$rec->end_input;
my $value_ref = $rec->value;
if ( !defined $value_ref ) {
say $rec->show_progress() or die "say failed: $ERRNO";
die 'Parse failed';
}
return ${$value_ref};
} ## end sub calculate
sub report_calculation {
my ($string) = @_;
return qq{Input: "$string"\n} . ' Parse: ' . calculate($string) . "\n";
}
my $output = join q{},
report_calculation('4 * 3 + 42 / 1'),
report_calculation('4 * 3 / (a = b = 5) + 42 - 1'),
report_calculation('4 * 3 / 5 - - - 3 + 42 - 1'),
report_calculation('- a - b'),
report_calculation('1 * 2 + 3 * 4 ^ 2 ^ 2 ^ 2 * 42 + 1');
print $output or die "print failed: $ERRNO";
$output eq <<'EXPECTED_OUTPUT' or die 'FAIL: Output mismatch';
Input: "4 * 3 + 42 / 1"
Parse: [[4*3]+[42/1]]
Input: "4 * 3 / (a = b = 5) + 42 - 1"
Parse: [[[[4*3]/[([a=[b=5]])]]+42]-1]
Input: "4 * 3 / 5 - - - 3 + 42 - 1"
Parse: [[[[[4*3]/5]-[-[-3]]]+42]-1]
Input: "- a - b"
Parse: [[-a]-b]
Input: "1 * 2 + 3 * 4 ^ 2 ^ 2 ^ 2 * 42 + 1"
Parse: [[[1*2]+[[3*[4^[2^[2^2]]]]*42]]+1]
EXPECTED_OUTPUT
package Marpa::Demo::OP1;
use 5.010;
use strict;
use warnings;
use English qw( -no_match_vars );
use Marpa::XS;
sub rules { shift; return $_[0]; }
sub priority_rule {
my ( undef, $lhs, undef, $priorities ) = @_;
my $priority_count = scalar @{$priorities};
my @rules = ();
for my $priority_ix ( 0 .. $priority_count - 1 ) {
my $priority = $priority_count - ( $priority_ix + 1 );
for my $alternative ( @{ $priorities->[$priority_ix] } ) {
push @rules, [ $priority, @{$alternative} ];
}
} ## end for my $priority_ix ( 0 .. $priority_count - 1 )
my @xs_rules = (
{ lhs => $lhs, rhs => [ $lhs . '_0' ] },
( map {
;
{ lhs => ( $lhs . '_' . ( $_ - 1 ) ),
rhs => [ $lhs . '_' . ($_) ]
}
} 1 .. $priority_count - 1
)
);
RULE: for my $rule (@rules) {
my ( $priority, $assoc, $rhs, $action ) = @{$rule};
my @action_kv = ();
push @action_kv, action => $action if defined $action;
my @new_rhs = @{$rhs};
my @arity = grep { $new_rhs[$_] eq $lhs } 0 .. $#new_rhs;
my $length = scalar @{$rhs};
my $current_exp = $lhs . '_' . $priority;
my $next_priority = $priority + 1;
$next_priority = 0 if $next_priority >= $priority_count;
my $next_exp = $lhs . '_' . $next_priority;
if ( not scalar @arity ) {
push @xs_rules,
{
lhs => $current_exp,
rhs => \@new_rhs,
@action_kv
};
next RULE;
} ## end if ( not scalar @arity )
if ( scalar @arity == 1 ) {
die 'Unnecessary unit rule in priority rule' if $length == 1;
$new_rhs[ $arity[0] ] = $current_exp;
}
DO_ASSOCIATION: {
if ( $assoc eq 'L' ) {
$new_rhs[ $arity[0] ] = $current_exp;
for my $rhs_ix ( @arity[ 1 .. $#arity ] ) {
$new_rhs[$rhs_ix] = $next_exp;
}
last DO_ASSOCIATION;
} ## end if ( $assoc eq 'L' )
if ( $assoc eq 'R' ) {
$new_rhs[ $arity[-1] ] = $current_exp;
for my $rhs_ix ( @arity[ 0 .. $#arity - 1 ] ) {
$new_rhs[$rhs_ix] = $next_exp;
}
last DO_ASSOCIATION;
} ## end if ( $assoc eq 'R' )
if ( $assoc eq 'G' ) {
for my $rhs_ix ( @arity[ 0 .. $#arity ] ) {
$new_rhs[$rhs_ix] = $lhs . '_0';
}
last DO_ASSOCIATION;
} ## end if ( $assoc eq 'G' )
die qq{Unknown association type: "$assoc"};
} ## end DO_ASSOCIATION:
push @xs_rules, { lhs => $current_exp, rhs => \@new_rhs, @action_kv };
} ## end RULE: for my $rule (@rules)
return [@xs_rules];
} ## end sub priority_rule
sub empty_rule { shift; return { @{ $_[0] }, rhs => [], @{ $_[2] || [] } }; }
sub quantified_rule {
shift;
return {
@{ $_[0] },
rhs => [ $_[2] ],
min => ( $_[3] eq q{+} ? 1 : 0 ),
@{ $_[4] || [] }
};
} ## end sub quantified_rule
sub do_priority1 { shift; return [ $_[0] ]; }
sub do_priority3 { shift; return [ $_[0], @{ $_[2] } ]; }
sub do_full_alternative { shift; return [ ( $_[0] // 'L' ), $_[1], $_[2] ]; }
sub do_bare_alternative { shift; return [ ( $_[0] // 'L' ), $_[1], undef ] }
sub do_alternatives_1 { shift; return [ $_[0] ]; }
sub do_alternatives_3 { shift; return [ $_[0], @{ $_[2] } ] }
sub do_lhs { shift; return $_[0]; }
sub do_array { shift; return [@_]; }
sub do_arg1 { return $_[2]; }
sub do_right_adverb { return 'R' }
sub do_left_adverb { return 'L' }
sub do_group_adverb { return 'G' }
sub do_what_I_mean {
# The first argument is the per-parse variable.
# Until we know what to do with it, just throw it away
shift;
# Throw away any undef's
my @children = grep {defined} @_;
# Return what's left
return scalar @children > 1 ? \@children : shift @children;
} ## end sub do_what_I_mean
sub parse_rules {
my ($string) = @_;
my $grammar = Marpa::XS::Grammar->new(
{ start => 'rules',
actions => __PACKAGE__,
default_action => 'do_what_I_mean',
rules => [
{ lhs => 'rules',
rhs => [qw/rule/],
action => 'rules',
min => 1
},
{ lhs => 'rule',
rhs => [qw/lhs op_declare priorities/],
action => 'priority_rule'
},
{ lhs => 'rule',
rhs => [qw/lhs op_declare action/],
action => 'empty_rule'
},
{ lhs => 'rule',
rhs => [qw/lhs op_declare name quantifier action/],
action => 'quantified_rule'
},
{ lhs => 'priorities',
rhs => [qw(alternatives)],
action => 'do_priority1'
},
{ lhs => 'priorities',
rhs => [qw(alternatives op_tighter priorities)],
action => 'do_priority3'
},
{ lhs => 'alternatives',
rhs => [qw(alternative)],
action => 'do_alternatives_1',
},
{ lhs => 'alternatives',
rhs => [qw(alternative op_eq_pri alternatives)],
action => 'do_alternatives_3',
},
{ lhs => 'alternative',
rhs => [qw(adverb rhs action)],
action => 'do_full_alternative'
},
{ lhs => 'alternative',
rhs => [qw(adverb rhs)],
action => 'do_bare_alternative'
},
{ lhs => 'adverb',
rhs => [qw/op_group/],
action => 'do_group_adverb'
},
{ lhs => 'adverb',
rhs => [qw/op_right/],
action => 'do_right_adverb'
},
{ lhs => 'adverb',
rhs => [qw/op_left/],
action => 'do_left_adverb'
},
{ lhs => 'adverb', rhs => [] },
{ lhs => 'action', rhs => [] },
{ lhs => 'action',
rhs => [qw/op_arrow action_name/],
action => 'do_arg1'
},
{ lhs => 'action',
rhs => [qw/op_arrow name/],
action => 'do_arg1'
},
{ lhs => 'lhs', rhs => [qw/name/], action => 'do_lhs' },
{ lhs => 'rhs', rhs => [qw/names/] },
{ lhs => 'quantifier', rhs => [qw/op_plus/] },
{ lhs => 'quantifier', rhs => [qw/op_star/] },
{ lhs => 'names',
rhs => [qw/name/],
min => 1,
action => 'do_array'
},
],
lhs_terminals => 0,
}
);
$grammar->precompute;
my $rec = Marpa::XS::Recognizer->new( { grammar => $grammar } );
# Order matters !!!
my @terminals = (
[ 'op_right', qr/:right\b/xms ],
[ 'op_left', qr/:left\b/xms ],
[ 'op_group', qr/:group\b/xms ],
[ 'op_declare', qr/::=/xms ],
[ 'op_arrow', qr/=>/xms ],
[ 'op_tighter', qr/[|][|]/xms ],
[ 'op_eq_pri', qr/[|]/xms ],
[ 'reserved_name', qr/(::(whatever|undef))/xms ],
[ 'op_plus', qr/[+]/xms ],
[ 'op_star', qr/[*]/xms ],
[ 'name', qr/\w+/xms ],
[ 'name', qr/['][^']+[']/xms ],
);
my $length = length $string;
pos $string = 0;
TOKEN: while ( pos $string < $length ) {
# skip whitespace
next TOKEN if $string =~ m/\G\s+/gcxms;
# read other tokens
TOKEN_TYPE: for my $t (@terminals) {
next TOKEN_TYPE if not $string =~ m/\G($t->[1])/gcxms;
if ( not defined $rec->read( $t->[0], $1 ) ) {
die die q{Problem before position }, pos $string, ': ',
( substr $string, pos $string, 40 ),
qq{\nToken rejected, "}, $t->[0], qq{", "$1"},
;
} ## end if ( not defined $rec->read( $t->[0], $1 ) )
next TOKEN;
} ## end TOKEN_TYPE: for my $t (@terminals)
die q{No token at "}, ( substr $string, pos $string, 40 ),
q{", position }, pos $string;
} ## end TOKEN: while ( pos $string < $length )
$rec->end_input;
my $parse_ref = $rec->value;
if ( !defined $parse_ref ) {
say $rec->show_progress() or die "say failed: $ERRNO";
die 'Parse failed';
}
my $parse = ${$parse_ref};
return $parse;
} ## end sub parse_rules
1;
@blefloch
Copy link

(My perl install is a mess at the moment so I can't test right now.) I believe lines 19 and 20 of dsl_ex1.pl (namely those responsible for parsing -e and e^e) should be swapped, otherwise -1^2 would incorrectly be parsed as [[-1]^2] instead of[-[1^2]]`.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment