public
Created

Marpa::{XS,PP} vs. Regex::Common::balanced benchmarking script

  • Download Gist
balanced.pl
Perl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
use 5.010;
use strict;
use warnings;
 
use Benchmark qw(timeit countit timestr);
use Regexp::Common qw /balanced/;
use Getopt::Long;
my $example;
my $length = 1000;
my $string;
my $pp = 0;
my $getopt_result = GetOptions(
"length=i" => \$length,
"example=s" => \$example,
"string=s" => \$string,
"pp" => \$pp,
);
 
if ($pp) {
require Marpa::PP;
'Marpa::PP'->VERSION(0.010000);
say "Marpa::PP ", $Marpa::PP::VERSION;
no strict 'refs';
*{'main::Marpa::grammar_new'} = \&Marpa::PP::Grammar::new;
*{'main::Marpa::recce_new'} = \&Marpa::PP::Recognizer::new;
}
else {
require Marpa::XS;
'Marpa::XS'->VERSION(0.020000);
say "Marpa::XS ", $Marpa::XS::VERSION;
no strict 'refs';
*{'main::Marpa::grammar_new'} = \&Marpa::XS::Grammar::new;
*{'main::Marpa::recce_new'} = \&Marpa::XS::Recognizer::new;
}
 
 
if ( defined $string ) {
die "Bad string: $string" if not $string =~ /\A [()]+ \z/xms;
say "Testing $string";
do_marpa_xs($string);
do_regex($string);
exit 0;
} ## end if ( defined $string )
 
$length += 0;
if ($length <= 0) {
die "Bad length $length";
}
 
$example //= "final";
my $s;
CREATE_S: {
my $s_balanced = '(()())((';
if ( $example eq 'pos2' ) {
$s = '(' . $s_balanced . ( '(' x ( $length - length $s_balanced ) );
last CREATE_S;
}
if ( $example eq 'final' ) {
$s = ( '(' x ( $length - length $s_balanced ) ) . $s_balanced;
last CREATE_S;
}
die qq{Example "$example" not known};
} ## end CREATE_S:
 
sub concat {
my (undef, @args) = @_;
return join q{}, @args;
}
sub arg1 {
my (undef, undef, $balanced) = @_;
return $balanced;
}
 
my $marpa_answer_shown;
my $regex_answer_shown;
 
sub paren_grammar_generate {
my $grammar = main::Marpa->grammar_new(
{ start => 'S',
strip => 0,
rules => [
[ S => [qw(prefix first_balanced endmark )] ],
{ lhs => 'S',
rhs => [qw(prefix first_balanced )],
action => 'main::arg1'
},
{ lhs => 'prefix', rhs => [qw(prefix_char)], min => 0 },
{ lhs => 'first_balanced',
rhs => [qw(balanced)],
action => 'main::concat'
},
{ lhs => 'balanced',
rhs => [qw(lparen rparen)],
action => 'main::concat'
},
{ lhs => 'balanced',
rhs => [qw(lparen balanced_sequence rparen)],
action => 'main::concat'
},
{ lhs => 'balanced_sequence',
rhs => [qw(balanced)],
min => 1,
action => 'main::concat'
},
],
}
);
 
$grammar->set(
{ terminals => [qw(prefix_char lparen rparen endmark )] } );
 
$grammar->precompute();
return $grammar;
} ## end sub paren_grammar_generate
 
sub do_marpa_xs {
my ($s) = @_;
my $grammar = paren_grammar_generate();
my $recce = main::Marpa->recce_new( { grammar => $grammar } );
my $location = 0;
my $string_length = length $s;
my $start_of_match;
my $end_of_match;
# find the match which ends first -- the one which starts
# first must start at or before it does
CHAR: while ( $location < $string_length ) {
my $value = substr $s, $location, 1;
my $token = $value eq '(' ? 'lparen' : 'rparen';
$recce->alternative( $token, $value );
$recce->alternative( 'prefix_char', $value );
$recce->earleme_complete();
if ( 'endmark' ~~ $recce->terminals_expected() ) {
my $progress = $recce->show_progress();
if (not( $progress
=~ m/ ^ F\d+ \s [@] (\d+) [-] \d+ \s first_balanced \s [-][>] /xms
)
)
{
die "No match for $progress";
} ## end if ( not( $progress =~ ...))
$start_of_match = $1;
# say $start_of_match;
$end_of_match = $location + 1;
last CHAR;
} ## end if ( 'endmark' ~~ $recce->terminals_expected() )
$location++;
} ## end while ( $location < $string_length )
if (not defined $start_of_match ) {
say "No balanced parens";
return 0;
}
 
CHAR: while ( ++$location < $string_length ) {
my $value = substr $s, $location, 1;
my $token = $value eq '(' ? 'lparen' : 'rparen';
# say "Reading $token $value";
last CHAR if not defined $recce->alternative( $token, $value );
$recce->earleme_complete();
if ( 'endmark' ~~ $recce->terminals_expected() ) {
my $progress = $recce->show_progress();
if (not( $progress
=~ m/ ^ F\d+ \s [@] (\d+) [-] \d+ \s first_balanced \s [-][>] /xms
)
)
{
die "No match for $progress";
}
if ( $1 >= $start_of_match) {
# say "spurious match starts at $1";
next CHAR;
}
# say $start_of_match;
$start_of_match = $1;
$end_of_match = $location + 1;
last CHAR;
}
} ## end while ( $location < $string_length )
my $value = substr $s, $start_of_match, $end_of_match - $start_of_match;
return 0 if $marpa_answer_shown;
$marpa_answer_shown = $value;
say qq{marpa: "$value" at $start_of_match-$end_of_match};
return 0;
 
} ## end sub do_marpa_xs
 
sub do_regex {
my ($s) = @_;
my $answer =
$s =~ /$RE{balanced}{-parens=>'()'}{-keep}/
? $1
: 'no balanced parentheses';
return 0 if $regex_answer_shown;
$regex_answer_shown = $answer;
say "regex answer: $answer";
return 0;
} ## end sub do_regex
 
say timestr countit( 2, sub { do_marpa_xs($s) } );
say timestr countit( 2, sub { do_regex($s) } );
 
say +($marpa_answer_shown eq $regex_answer_shown ? 'Answers match' : 'ANSWERS DO NOT MATCH!');

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.