Skip to content

@jeffreykegler /round2.pl
Created

Embed URL

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Marpa v. Perl regex, round 2
#!perl
use 5.010;
use strict;
use warnings;
use autodie;
use Benchmark qw(timethis);
use List::Util qw(min);
use Regexp::Common qw /balanced/;
use Marpa::R2 2.016000;
my $tchrist_regex = '(\\((?:[^()]++|(?-1))*+\\))';
my $marpa_answer_shown;
my $thin_answer_shown;
my $regex_answer_shown;
sub do_marpa_r2 {
my ($s) = @_;
my $grammar_args = {
start => 'S',
rules => [
[ S => [qw(prefix first_balanced endmark )] ],
{ lhs => 'S',
rhs => [qw(prefix first_balanced )]
},
{ lhs => 'prefix', rhs => [qw(prefix_char)], min => 0 },
{ lhs => 'prefix_char', rhs => [qw(xlparen)] },
{ lhs => 'prefix_char', rhs => [qw(rparen)] },
{ lhs => 'lparen', rhs => [qw(xlparen)] },
{ lhs => 'lparen', rhs => [qw(ilparen)] },
{ lhs => 'first_balanced',
rhs => [qw(xlparen balanced_sequence rparen)],
},
{ lhs => 'balanced',
rhs => [qw(lparen balanced_sequence rparen)],
},
{ lhs => 'balanced_sequence',
rhs => [qw(balanced)],
min => 0,
},
],
};
my $grammar = Marpa::R2::Grammar->new($grammar_args);
$grammar->precompute();
my ($first_balanced_rule) =
grep { ( $grammar->rule($_) )[0] eq 'first_balanced' }
$grammar->rule_ids();
my $recce = Marpa::R2::Recognizer->new( { grammar => $grammar } );
$recce->expected_symbol_event_set( 'endmark', 1 );
my $location = 0;
my $string_length = length $s;
my $end_of_match;
# find the match which ends first -- the one which starts
# first must start at or before it does
CHAR: while ( $location < $string_length ) {
my $value = substr $s, $location, 1;
my $event_count;
if ( $value eq '(' ) {
# say "Adding xlparen at $location";
$event_count = $recce->read('xlparen');
}
else {
# say "Adding rparen at $location";
$event_count = $recce->read('rparen');
}
if ( $event_count
and grep { $_->[0] eq 'SYMBOL_EXPECTED' } @{ $recce->events() } )
{
$end_of_match = $location + 1;
last CHAR;
} ## end if ( $event_count and grep { $_->[0] eq 'SYMBOL_EXPECTED'...})
$location++;
} ## end CHAR: while ( $location < $string_length )
if ( not defined $end_of_match ) {
say 'No balanced parens';
return 0;
}
CHAR: while ( ++$location < $string_length ) {
my $value = substr $s, $location, 1;
my $token = $value eq '(' ? 'ilparen' : 'rparen';
my $event_count = $recce->read($token);
last CHAR if not defined $event_count;
if ( $event_count
and grep { $_->[0] eq 'SYMBOL_EXPECTED' } @{ $recce->events() } )
{
$end_of_match = $location + 1;
}
} ## end CHAR: while ( ++$location < $string_length )
my $report = $recce->progress($end_of_match);
# say Dumper($report);
my $start_of_match = List::Util::min map { $_->[2] }
grep { $_->[1] < 0 && $_->[0] == $first_balanced_rule } @{$report};
my $value = substr $s, $start_of_match, $end_of_match - $start_of_match;
return 0 if $marpa_answer_shown;
$marpa_answer_shown = $value;
say qq{Marpa::R2: "$value" at $start_of_match-$end_of_match};
return 0;
} ## end sub do_marpa_r2
sub do_regex {
my ($s) = @_;
my $answer =
$s =~ $tchrist_regex
? $1
: 'no balanced parentheses';
return 0 if $regex_answer_shown;
$regex_answer_shown = $answer;
say qq{regex: "$answer"};
return 0;
} ## end sub do_regex
sub do_thin {
my ($s) = @_;
my $thin_grammar = Marpa::R2::Thin::G->new( { if => 1 } );
my $s_xlparen = $thin_grammar->symbol_new();
my $s_ilparen = $thin_grammar->symbol_new();
my $s_rparen = $thin_grammar->symbol_new();
my $s_lparen = $thin_grammar->symbol_new();
my $s_endmark = $thin_grammar->symbol_new();
my $s_start = $thin_grammar->symbol_new();
my $s_prefix = $thin_grammar->symbol_new();
my $s_first_balanced = $thin_grammar->symbol_new();
my $s_prefix_char = $thin_grammar->symbol_new();
my $s_balanced_sequence = $thin_grammar->symbol_new();
my $s_balanced = $thin_grammar->symbol_new();
$thin_grammar->start_symbol_set($s_start);
$thin_grammar->rule_new( $s_start,
[ $s_prefix, $s_first_balanced, $s_endmark ] );
$thin_grammar->rule_new( $s_start, [ $s_prefix, $s_first_balanced ] );
$thin_grammar->rule_new( $s_prefix_char, [$s_xlparen] );
$thin_grammar->rule_new( $s_prefix_char, [$s_rparen] );
$thin_grammar->rule_new( $s_lparen, [$s_xlparen] );
$thin_grammar->rule_new( $s_lparen, [$s_ilparen] );
my $first_balanced_rule =
$thin_grammar->rule_new( $s_first_balanced,
[ $s_xlparen, $s_balanced_sequence, $s_rparen ] );
$thin_grammar->rule_new( $s_balanced,
[ $s_lparen, $s_balanced_sequence, $s_rparen ] );
$thin_grammar->sequence_new( $s_prefix, $s_prefix_char, { min => 0 } );
$thin_grammar->sequence_new( $s_balanced_sequence, $s_balanced,
{ min => 0 } );
$thin_grammar->precompute();
my $thin_recce = Marpa::R2::Thin::R->new($thin_grammar);
$thin_recce->start_input();
$thin_recce->expected_symbol_event_set( $s_endmark, 1 );
my $location = 0;
my $string_length = length $s;
my $end_of_match;
# find the match which ends first -- the one which starts
# first must start at or before it does
CHAR: while ( $location < $string_length ) {
my $value = substr $s, $location, 1;
my $event_count;
if ( $value eq '(' ) {
# say "Adding xlparen at $location";
$thin_recce->alternative( $s_xlparen, 0, 1 );
$event_count = $thin_recce->earleme_complete();
} ## end if ( $value eq '(' ) )
else {
# say "Adding rparen at $location";
$thin_recce->alternative( $s_rparen, 0, 1 );
$event_count = $thin_recce->earleme_complete();
}
if ($event_count
and grep { $_ eq 'MARPA_EVENT_SYMBOL_EXPECTED' }
map { ; ( $thin_grammar->event($_) )[0] }
( 0 .. $event_count - 1 )
)
{
$end_of_match = $location + 1;
last CHAR;
} ## end if ( $event_count and grep { $_ eq ...})
$location++;
} ## end CHAR: while ( $location < $string_length )
if ( not defined $end_of_match ) {
say 'No balanced parens';
return 0;
}
CHAR: while ( ++$location < $string_length ) {
my $value = substr $s, $location, 1;
my $token = $value eq '(' ? $s_ilparen : $s_rparen;
# say "Adding $token at $location";
last CHAR if not defined $thin_recce->alternative( $token, 0, 1 );
my $event_count = $thin_recce->earleme_complete();
if ($event_count
and grep { $_ eq 'MARPA_EVENT_SYMBOL_EXPECTED' }
map { ; ( $thin_grammar->event($_) )[0] }
( 0 .. $event_count - 1 )
)
{
$end_of_match = $location + 1;
} ## end if ( $event_count and grep { $_ eq ...})
} ## end CHAR: while ( ++$location < $string_length )
my $start_of_match = $end_of_match;
$thin_recce->progress_report_start($end_of_match);
ITEM: while (1) {
my ( $rule_id, $dot_position, $item_origin ) =
$thin_recce->progress_item();
last ITEM if not defined $rule_id;
next ITEM if $dot_position >= 0;
next ITEM if $rule_id != $first_balanced_rule;
$start_of_match = $item_origin if $item_origin < $start_of_match;
} ## end ITEM: while (1)
my $value = substr $s, $start_of_match, $end_of_match - $start_of_match;
return 0 if $thin_answer_shown;
$thin_answer_shown = $value;
say qq{Marpa::R2::Thin: "$value" at $start_of_match-$end_of_match};
return 0;
} ## end sub do_thin
for my $length (qw(10 100 500 1000 2000 3000)) {
my $target = '(()())((';
my $test_string = ( '(' x ( $length - length $target ) ) . $target;
timethis( -4, sub { do_thin($test_string); }, "Marpa::R2::Thin $length" );
timethis( -4, sub { do_marpa_r2($test_string); }, "Marpa::R2 $length" );
timethis( -4, sub { do_regex($test_string); }, "regex $length" );
my $answer = '(()())';
$marpa_answer_shown eq $answer
or say {*STDERR} 'R2 ANSWER DOES NOT MATCH!';
$thin_answer_shown eq $answer
or say {*STDERR} 'Thin ANSWER DOES NOT MATCH!';
$regex_answer_shown eq $answer
or say {*STDERR} 'Regex ANSWER DOES NOT MATCH!';
$marpa_answer_shown = $thin_answer_shown = $regex_answer_shown = 0;
} ## end for my $length (qw(10 100 500 1000 2000 3000))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.