Skip to content

Instantly share code, notes, and snippets.

@BenGoldberg1
Created January 23, 2016 01:52
Show Gist options
  • Save BenGoldberg1/fff5027b2b1f7049d6c7 to your computer and use it in GitHub Desktop.
Save BenGoldberg1/fff5027b2b1f7049d6c7 to your computer and use it in GitHub Desktop.
Originally was "step 2" of http://perl.plover.com/Regex/Regex.pm
use strict;
use warnings;
do 'PloverOO.pl';
package NFA;
################################################################
#
# Compile parsed regexp into representation of NFA
#
################################################################
my $S = '00000';
my $STARTSYMBOL = 0;
my $ENDSYMBOL = 1;
sub ALTERN::to_NFA {
my $self = shift;
my $result = NFA->_new;
my @submachines = map $_->to_NFA, @$self;
return $submachines[0] if @submachines == 1;
my ($startsym, $endsym) = @{$result->{Symbols}};
&putin($result, @submachines);
my @startsyms = map { $_->{Symbols}[$STARTSYMBOL] } @submachines;
my @endsyms = map { $_->{Symbols}[$ENDSYMBOL] } @submachines;
$result->{$startsym} = { '' => \@startsyms };
foreach my $es (@endsyms) {
$result->{$es} = { '' => $endsym };
}
$result;
}
sub CONCAT::to_NFA {
my $self = shift;
my $result = NFA->_new;
my @submachines = map $_->to_NFA, @$self;
return $submachines[0] if @submachines == 1;
my ($startsym, $endsym) = @{$result->{Symbols}};
&putin($result, @submachines);
my $i;
for ($i = 0; $i < @submachines - 1; $i++) {
my $tail = $submachines[$i] {Symbols}[$ENDSYMBOL];
my $head = $submachines[$i+1]{Symbols}[$STARTSYMBOL];
$result->{$tail} = { '' => $head };
}
$result->{$startsym} = { '' => $submachines[0] {Symbols}[$STARTSYMBOL] };
$result->{$submachines[-1]{Symbols}[$ENDSYMBOL]} = { '' => $endsym };
$result;
}
sub STAR::to_NFA {
my $self = shift;
my $result = NFA->_new;
my @submachines = map $_->to_NFA, @$self;
my $sm = $submachines[0];
&putin($result, $sm);
my ($startsym, $endsym) = @{$result->{Symbols}};
my ($s, $e) = @{$sm->{Symbols}};
$result->{$e} = { '' => [$s, $endsym] };
$result->{$startsym} = { '' => [$s, $endsym] };
$result;
}
sub PLUS::to_NFA {
my $self = shift;
my $result = NFA->_new;
my ($startsym, $endsym) = @{$result->{Symbols}};
my @submachines = map $_->to_NFA, @$self;
my $sm = $submachines[0];
&putin($result, $sm);
my ($s, $e) = @{$sm->{Symbols}};
$result->{$e} = { '' => [$s, $endsym] };
$result->{$startsym} = { '' => $s };
$result;
}
sub LITERAL::to_NFA {
return NFA->literal(@{$_[0]});
}
sub CLASS::d::to_NFA {
return NFA->digit();
}
sub _new {
my $pack = shift;
my $startsym = "S" . &gensym();
my $endsym = "E" . &gensym();
my $result = { Symbols => [ $startsym, $endsym ] };
bless $result, $pack;
}
sub new {
my ($pack, $tree) = @_;
$tree->to_NFA;
}
sub start_state {
$_[0]{Symbols}[$STARTSYMBOL];
}
sub is_end_state {
my $self = shift;
my $state = shift;
$state eq $self->{Symbols}[$ENDSYMBOL];
}
sub transitions_for_symbol {
my ($self, $state, $symbol) = @_;
my $s = $self->{$state} or return;
my $n = $s->{$symbol} or return;
ref($n) ? @$n : $n;
}
sub digit {
my ($pack, $what) = @_;
my ($ss, $es) = ("S".gensym(), "E".gensym());
my %t;
$t{$_} = $es for 0 .. 9;
bless { Symbols => [ $ss, $es ], $ss => \%t }, $pack;
}
sub literal {
my $pack = shift;
my $what = shift;
my $startsym = "S" . &gensym();
my $endsym = "E" . &gensym();
bless
{ Symbols => [ $startsym, $endsym ],
$startsym => { $what => $endsym } },
=> $pack;
}
# Given a list of machines, M1 ... Mn, put M2... Mn into M1.
sub putin {
my $master = shift;
foreach my $m (@_) {
foreach my $state (keys %$m) {
next if $state eq 'Symbols';
if (exists $master->{$state}) {
print STDERR "Warning: State name conflict for `$state'.\n";
}
$master->{$state} = $m->{$state};
}
}
$master;
}
sub gensym {
$S++;
}
if( !caller ) {
NFA->self_test;
exit 0;
}
1;
__END__
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment