-
-
Save smls/85b78904e96eb589ea1e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl6 | |
########## Benchmark Plumbing ########## | |
# Feel free to ignore this part, it's just for debug purposes and doesn't affect | |
# the regex engine problem itself. | |
my $time; | |
signal(SIGINT).tap({ | |
say "\n[killed after {sprintf '%.2f', now - $time} seconds]" if $time; | |
exit 1; | |
}); | |
sub bench ($title, &block) { | |
state $n; | |
say "{"\n" if $n}== {++$n}) $title =="; | |
$time = now; | |
block; | |
say "[took {sprintf '%.2f', now - $time} seconds]"; | |
} | |
########## Actual Code ########## | |
# Based on http://rosettacode.org/wiki/Amb#Perl_6 | |
sub amb($var,*@a) { | |
my $regex = "[" ~ @a.map({"||\{ $var = '$_' }"}) ~ "]"; | |
note " regex: $regex"; | |
return $regex; | |
} | |
sub joins ($word1, $word2) { | |
substr($word1,*-1,1) eq substr($word2,0,1) | |
} | |
# 1) Identical to the 3rd example, except with the words 'frog' and 'elephant' | |
# swapped so that the first option of each alternation always ends up matching | |
# and the regex engine never has to backtrack. This works fine. | |
bench "Pattern '[y||n] [y||n] [y]' (generated)", { | |
'' ~~ m/ | |
:my ($a,$b,$c); | |
<{ amb '$a', <the that> }> | |
<{ amb '$b', <elephant frog> }> | |
<?{ joins $a, $b }> | |
<{ amb '$c', <treaded> }> | |
<?{ joins $b, $c }> | |
{ say "$a $b $c" } | |
/ | |
} | |
# 2) Identical to the 3rd example, except with <{ }> generated alternations | |
# replaced with the equivalent literal ones. This also works fine. | |
bench "Pattern '[y||n] [n||y] [y]' (literal)", { | |
'' ~~ m/ | |
:my ($a,$b,$c); | |
[||{ $a = 'the' } ||{ $a = 'that' }] | |
[||{ $b = 'frog' } ||{ $b = 'elephant' }] | |
<?{ joins $a, $b }> | |
[||{ $c = 'treaded' }] | |
<?{ joins $b, $c }> | |
{ say "$a $b $c" } | |
/; | |
} | |
# 3) The problematic example. Backtracking and the <{ }> feature don't seem to | |
# like each other. If you comment out the { print "." } line you'll see it gets | |
# executed on infinite loop. | |
bench "Pattern '[y||n] [n||y] [y]' (generated)", { | |
'' ~~ m/ | |
:my ($a,$b,$c); | |
<{ amb '$a', <the that> }> | |
<{ amb '$b', <frog elephant> }> | |
# { print "." } | |
<?{ joins $a, $b }> | |
<{ amb '$c', <treaded> }> | |
<?{ joins $b, $c }> | |
{ say "$a $b $c" } | |
/; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
== 1) Pattern '[y||n] [y||n] [y]' (generated) == | |
regex: [||{ $a = 'the' } ||{ $a = 'that' }] | |
regex: [||{ $b = 'elephant' } ||{ $b = 'frog' }] | |
regex: [||{ $c = 'treaded' }] | |
the elephant treaded | |
[took 0.10 seconds] | |
== 2) Pattern '[y||n] [n||y] [y]' (literal) == | |
the elephant treaded | |
[took 0.00 seconds] | |
== 3) Pattern '[y||n] [n||y] [y]' (generated) == | |
regex: [||{ $a = 'the' } ||{ $a = 'that' }] | |
regex: [||{ $b = 'frog' } ||{ $b = 'elephant' }] | |
^C | |
[killed after 28.19 seconds] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment