Created
December 3, 2009 05:49
-
-
Save masak/247924 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ cat tree-spider | |
enum Action <DESCEND MATCH FAIL BACKTRACK>; | |
class Exp { | |
method start($, $, %h is rw) { DESCEND } | |
method succeeded(%h is rw) { MATCH } | |
method failed($, %h) { FAIL } | |
method Str() { self.WHAT.perl } | |
} | |
class RegexContainer is Exp { | |
has Exp $.child; | |
method new($child) { self.bless(self.CREATE, :$child) } | |
} | |
class Concat is Exp { | |
has Exp @.children; | |
method new(*@children) { self.bless(self.CREATE, :@children) } | |
method start($, $, %h is rw) { | |
%h<child> = 0; | |
DESCEND | |
} | |
method succeeded(%h is rw) { | |
if ++%h<child> == @.children { | |
MATCH | |
} | |
else { | |
DESCEND | |
} | |
} | |
} | |
enum Bt <GREEDY FRUGAL>; | |
class Quant is Exp { | |
has Num $.min; | |
has Num $.max; | |
has Bt $.bt; | |
has Exp $.child; | |
method new(Num $min, Num $max, Bt $bt, Exp $child) { | |
self.bless(self.CREATE, :$min, :$max, :$bt, :$child) | |
} | |
method start($, $, %h is rw) { | |
%h<reps> = 0; | |
if $.min > 0 || $.max > 0 && $.bt == GREEDY { | |
DESCEND | |
} | |
else { | |
MATCH | |
} | |
} | |
method succeeded(%h is rw) { | |
++%h<reps>; | |
if $.bt == GREEDY && %h<reps> < $.max { | |
(%h<mempos> //= []).push(%h<to>); | |
DESCEND | |
} | |
else { | |
MATCH | |
} | |
} | |
method failed($, %h) { | |
if %h<reps> >= $.min { | |
MATCH | |
} | |
else { | |
FAIL | |
} | |
} | |
method backtracked($to is rw, %h) { | |
if $.bt == FRUGAL && %h<reps> < $.max { | |
DESCEND | |
} | |
elsif $.bt == GREEDY && +%h<mempos> { | |
$to = pop %h<mempos>; | |
MATCH | |
} | |
else { | |
FAIL | |
} | |
} | |
method Str() { self.WHAT.perl ~ "({$.bt.name} {$.min}..{$.max})" } | |
} | |
class Group is Exp { | |
has $.child; | |
method new(Exp $child) { self.bless(self.CREATE, :$child) } | |
} | |
class Literal is Exp { | |
has Str $.text; | |
method new(Str $text) { self.bless(self.CREATE, :$text) } | |
method start($target, $to is rw, %h) { | |
if $to <= $target.chars - $.text.chars | |
&& $.text eq $target.substr($to, $.text.chars) { | |
$to += $.text.chars; | |
MATCH | |
} | |
else { | |
FAIL | |
} | |
} | |
method Str() { self.WHAT.perl ~ "('{$.text}')" } | |
} | |
# Let's instantiate the regexp /[a b*? c]+ ac/ like this: | |
my $regexp = | |
RegexContainer.new( | |
Concat.new( | |
Quant.new( | |
1, Inf, | |
GREEDY, | |
Group.new( | |
Concat.new( | |
Literal.new('a'), | |
Quant.new( | |
0, Inf, | |
FRUGAL, | |
Literal.new('b') | |
), | |
Literal.new('c') | |
) | |
) | |
), | |
Literal.new('ac') | |
) | |
) | |
; | |
class TreeSpider { | |
has Exp $!top; | |
method new(Exp $top) { | |
self.bless(self.CREATE, :$top) | |
} | |
method crawl($target) { | |
my $current = $!top; | |
my $to = 0; | |
my Action $last = DESCEND; | |
my @nodestack; | |
my @padstack; | |
my %savepoints; | |
my $iterations = 0; | |
loop { | |
die 'Time out' if ++$iterations >= 120; | |
say 'At ', $current, | |
$last == FAIL ?? ', failing' !! | |
$last == BACKTRACK ?? ', backtracking' !! | |
(q[, wanting to match '], $target.substr($to), q[']); | |
my %h = $last == DESCEND ?? (:$to) !! pop @padstack; | |
if $last == FAIL { | |
if %savepoints.exists($current) { | |
say '=== \o/ Activating savepoint \o/ ==='; | |
my @info = %savepoints{$current}.list; | |
%savepoints.delete($current); | |
@nodestack = @info[0].list; | |
@padstack = @info[1].list; | |
$current = @nodestack[*-1]; | |
$last = BACKTRACK; | |
next; | |
} | |
} | |
if $last == BACKTRACK { | |
$to = %h<to>; | |
} | |
my $action = $last == DESCEND | |
?? $current.start($target, $to, %h) | |
!! $last == MATCH | |
?? $current.succeeded(%h) | |
!! $last == FAIL | |
?? $current.failed($to, %h) | |
!! $current.backtracked($to, %h); | |
if $action == DESCEND && %savepoints.exists($current) { | |
say 'Unregistering savepoint at ', $current; | |
%savepoints.delete($current); | |
} | |
say $current, ' says ', $action.name; | |
%h<to> = $to; | |
push @padstack, {%h}; | |
if $last == DESCEND { | |
push @nodestack, $current; | |
} | |
if $current ~~ Quant && $action == MATCH { | |
my $index = @nodestack.end - 1; | |
$index-- until @nodestack[$index] ~~ Quant | RegexContainer; | |
my $surrounding = @nodestack[$index]; | |
say 'Registering savepoint with ', $surrounding; | |
%savepoints{$surrounding} | |
= [[@nodestack.list], [@padstack.list]]; | |
} | |
if $action == DESCEND { | |
$current = $current ~~ Concat | |
?? $current.children[ %h<child> ] | |
!! $current.child; | |
} | |
else { | |
pop @nodestack; | |
last unless @nodestack; | |
$current = @nodestack[*-1]; | |
pop @padstack; | |
} | |
$last = $action; | |
} | |
} | |
} | |
TreeSpider.new($regexp).crawl('abbcabcac'); | |
$ perl6 tree-spider | |
At RegexContainer, wanting to match 'abbcabcac' | |
RegexContainer says DESCEND | |
At Concat, wanting to match 'abbcabcac' | |
Concat says DESCEND | |
At Quant(GREEDY 1..Inf), wanting to match 'abbcabcac' | |
Quant(GREEDY 1..Inf) says DESCEND | |
At Group, wanting to match 'abbcabcac' | |
Group says DESCEND | |
At Concat, wanting to match 'abbcabcac' | |
Concat says DESCEND | |
At Literal('a'), wanting to match 'abbcabcac' | |
Literal('a') says MATCH | |
At Concat, wanting to match 'bbcabcac' | |
Concat says DESCEND | |
At Quant(FRUGAL 0..Inf), wanting to match 'bbcabcac' | |
Quant(FRUGAL 0..Inf) says MATCH | |
Registering savepoint with Quant(GREEDY 1..Inf) | |
At Concat, wanting to match 'bbcabcac' | |
Concat says DESCEND | |
At Literal('c'), wanting to match 'bbcabcac' | |
Literal('c') says FAIL | |
At Concat, failing | |
Concat says FAIL | |
At Group, failing | |
Group says FAIL | |
At Quant(GREEDY 1..Inf), failing | |
=== \o/ Activating savepoint \o/ === | |
At Quant(FRUGAL 0..Inf), backtracking | |
Quant(FRUGAL 0..Inf) says DESCEND | |
At Literal('b'), wanting to match 'bbcabcac' | |
Literal('b') says MATCH | |
At Quant(FRUGAL 0..Inf), wanting to match 'bcabcac' | |
Quant(FRUGAL 0..Inf) says MATCH | |
Registering savepoint with Quant(GREEDY 1..Inf) | |
At Concat, wanting to match 'bcabcac' | |
Concat says DESCEND | |
At Literal('c'), wanting to match 'bcabcac' | |
Literal('c') says FAIL | |
At Concat, failing | |
Concat says FAIL | |
At Group, failing | |
Group says FAIL | |
At Quant(GREEDY 1..Inf), failing | |
=== \o/ Activating savepoint \o/ === | |
At Quant(FRUGAL 0..Inf), backtracking | |
Quant(FRUGAL 0..Inf) says DESCEND | |
At Literal('b'), wanting to match 'bcabcac' | |
Literal('b') says MATCH | |
At Quant(FRUGAL 0..Inf), wanting to match 'cabcac' | |
Quant(FRUGAL 0..Inf) says MATCH | |
Registering savepoint with Quant(GREEDY 1..Inf) | |
At Concat, wanting to match 'cabcac' | |
Concat says DESCEND | |
At Literal('c'), wanting to match 'cabcac' | |
Literal('c') says MATCH | |
At Concat, wanting to match 'abcac' | |
Concat says MATCH | |
At Group, wanting to match 'abcac' | |
Group says MATCH | |
At Quant(GREEDY 1..Inf), wanting to match 'abcac' | |
Unregistering savepoint at Quant(GREEDY 1..Inf) | |
Quant(GREEDY 1..Inf) says DESCEND | |
At Group, wanting to match 'abcac' | |
Group says DESCEND | |
At Concat, wanting to match 'abcac' | |
Concat says DESCEND | |
At Literal('a'), wanting to match 'abcac' | |
Literal('a') says MATCH | |
At Concat, wanting to match 'bcac' | |
Concat says DESCEND | |
At Quant(FRUGAL 0..Inf), wanting to match 'bcac' | |
Quant(FRUGAL 0..Inf) says MATCH | |
Registering savepoint with Quant(GREEDY 1..Inf) | |
At Concat, wanting to match 'bcac' | |
Concat says DESCEND | |
At Literal('c'), wanting to match 'bcac' | |
Literal('c') says FAIL | |
At Concat, failing | |
Concat says FAIL | |
At Group, failing | |
Group says FAIL | |
At Quant(GREEDY 1..Inf), failing | |
=== \o/ Activating savepoint \o/ === | |
At Quant(FRUGAL 0..Inf), backtracking | |
Quant(FRUGAL 0..Inf) says DESCEND | |
At Literal('b'), wanting to match 'bcac' | |
Literal('b') says MATCH | |
At Quant(FRUGAL 0..Inf), wanting to match 'cac' | |
Quant(FRUGAL 0..Inf) says MATCH | |
Registering savepoint with Quant(GREEDY 1..Inf) | |
At Concat, wanting to match 'cac' | |
Concat says DESCEND | |
At Literal('c'), wanting to match 'cac' | |
Literal('c') says MATCH | |
At Concat, wanting to match 'ac' | |
Concat says MATCH | |
At Group, wanting to match 'ac' | |
Group says MATCH | |
At Quant(GREEDY 1..Inf), wanting to match 'ac' | |
Unregistering savepoint at Quant(GREEDY 1..Inf) | |
Quant(GREEDY 1..Inf) says DESCEND | |
At Group, wanting to match 'ac' | |
Group says DESCEND | |
At Concat, wanting to match 'ac' | |
Concat says DESCEND | |
At Literal('a'), wanting to match 'ac' | |
Literal('a') says MATCH | |
At Concat, wanting to match 'c' | |
Concat says DESCEND | |
At Quant(FRUGAL 0..Inf), wanting to match 'c' | |
Quant(FRUGAL 0..Inf) says MATCH | |
Registering savepoint with Quant(GREEDY 1..Inf) | |
At Concat, wanting to match 'c' | |
Concat says DESCEND | |
At Literal('c'), wanting to match 'c' | |
Literal('c') says MATCH | |
At Concat, wanting to match '' | |
Concat says MATCH | |
At Group, wanting to match '' | |
Group says MATCH | |
At Quant(GREEDY 1..Inf), wanting to match '' | |
Unregistering savepoint at Quant(GREEDY 1..Inf) | |
Quant(GREEDY 1..Inf) says DESCEND | |
At Group, wanting to match '' | |
Group says DESCEND | |
At Concat, wanting to match '' | |
Concat says DESCEND | |
At Literal('a'), wanting to match '' | |
Literal('a') says FAIL | |
At Concat, failing | |
Concat says FAIL | |
At Group, failing | |
Group says FAIL | |
At Quant(GREEDY 1..Inf), failing | |
Quant(GREEDY 1..Inf) says MATCH | |
Registering savepoint with RegexContainer | |
At Concat, wanting to match '' | |
Concat says DESCEND | |
At Literal('ac'), wanting to match '' | |
Literal('ac') says FAIL | |
At Concat, failing | |
Concat says FAIL | |
At RegexContainer, failing | |
=== \o/ Activating savepoint \o/ === | |
At Quant(GREEDY 1..Inf), backtracking | |
Quant(GREEDY 1..Inf) says MATCH | |
Registering savepoint with RegexContainer | |
At Concat, wanting to match 'ac' | |
Concat says DESCEND | |
At Literal('ac'), wanting to match 'ac' | |
Literal('ac') says MATCH | |
At Concat, wanting to match '' | |
Concat says MATCH | |
At RegexContainer, wanting to match '' | |
RegexContainer says MATCH |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment