Skip to content

Instantly share code, notes, and snippets.

@jkeroes
Created May 14, 2012 18:23
Show Gist options
  • Save jkeroes/2695503 to your computer and use it in GitHub Desktop.
Save jkeroes/2695503 to your computer and use it in GitHub Desktop.
Catastrophic backtracking and regexp optimization in Perl
#!/usr/bin/perl
use 5.10.0;
use strict;
use warnings;
use Benchmark qw/:all :hireswallclock/;
use Regexp::Optimizer;
my $max_len = 4250;
my $bad = '^(aa|aab?)+$';
my $good = '^(aab??)+$';
my $bad_re = qr{$bad};
my $good_re = qr{$good};
my $fixed_re = Regexp::Optimizer->new->optimize($bad_re);
timethese(
10,
{
"Bad: $bad_re" => sub { say test_re($bad_re, $max_len) },
"Good: $good_re" => sub { say test_re($good_re, $max_len) },
"Fixed: $fixed_re" => sub { say test_re($fixed_re, $max_len) },
},
);
sub test_re {
my ($re, $max_len) = @_;
die "test_re(regex, max_len) called with bad regexp arg" unless ref $re eq 'Regexp';
die "test_re(regex, max_len) called with bad max_len arg" unless $max_len =~ /^\d+$/;
my $count = 0;
for my $len (0..$max_len) {
my $str = 'a' x $len;
++$count if $str =~ $re;
}
return $count;
}
__END__
Benchmark: timing 10 iterations of Bad: (?-xism:^(aa|aab?)+$), Fixed: (?-xism:^(?:aab??)+$), Good: (?-xism:^(aab?)+$)...
2125
2125
2125
2125
2125
2125
2125
2125
2125
2125
Bad: (?-xism:^(aa|aab?)+$): 35.9051 wallclock secs (35.73 usr + 0.03 sys = 35.76 CPU) @ 0.28/s (n=10)
2125
2125
2125
2125
2125
2125
2125
2125
2125
2125
Fixed: (?-xism:^(?:aab??)+$): 4.91235 wallclock secs ( 4.90 usr + 0.00 sys = 4.90 CPU) @ 2.04/s (n=10)
2125
2125
2125
2125
2125
2125
2125
2125
2125
2125
Good: (?-xism:^(aab?)+$): 6.13033 wallclock secs ( 6.12 usr + 0.01 sys = 6.13 CPU) @ 1.63/s (n=10)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment