Skip to content

Instantly share code, notes, and snippets.

@bdw
Created August 13, 2015 12:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bdw/f727aa99aece2ff7554d to your computer and use it in GitHub Desktop.
Save bdw/f727aa99aece2ff7554d to your computer and use it in GitHub Desktop.
ruleset-generation.pl
#!/usr/bin/perl
use Data::Dumper;
use strict;
use warnings;
use sexpr;
sub sortn {
sort { $a <=> $b } @_;
}
sub uniq {
my %h;
$h{$_}++ for @_;
return keys %h;
}
# Collect rules from the grammar
my (@rules, @names, @paths, @curpath);
sub add_rule {
my ($fragment, $terminal, $cost, @trace) = @_;
my $list = [];
# replace all sublist with pseudorules
for (my $i = 0; $i < @$fragment; $i++) {
my $item = $fragment->[$i];
if (ref($item) eq 'ARRAY') {
# create pseudorule
my $label = sprintf('L%dP%d', scalar @rules, scalar @trace);
# divide costs
$cost /= 2;
add_rule($item, $label, $cost, @trace, $i);
push @$list, $label;
} else {
push @curpath, @trace, $i, -1 if $i > 0;
push @$list, $item;
}
}
push @curpath, @trace, -1 if @$fragment == 1 && @trace > 0;
# NB - only top-level fragments are associated with tiles.
my $rulenr = scalar @rules;
push @rules, [$list, $terminal, $cost];
return $rulenr;
}
my $input = \*DATA;
my $parser = sexpr->parser($input);
while (my $tree = $parser->read) {
my ($keyword, $name, $fragment, $terminal, $cost) = @$tree;
if ($keyword eq 'tile:') {
@curpath = ();
my $rulenr = add_rule($fragment, $terminal, $cost);
$names[$rulenr] = $name;
$paths[$rulenr] = [@curpath, -1];
}
}
close $input;
# initialize nonterminal sets
my (%nonterminal_sets, %trie);
$nonterminal_sets{$_->[1]} = [$_->[1]] for @rules;
my ($added, $deleted, $i);
# override hash-key-join character
local $; = ",";
do {
$i++;
# lookup table from nonterminals to nonterminalsetnames
my %lookup;
while (my ($k, $v) = each %nonterminal_sets) {
$lookup{$_}{$k} = 1 for @$v;
}
$lookup{$_} = [keys %{$lookup{$_}}] for keys %lookup;
# reinitialize trie
%trie = ();
# build it based on the terminal-to-terminalset map
for (my $rule_nr = 0; $rule_nr < @rules; $rule_nr++) {
my ($head, $n1, $n2) = @{$rules[$rule_nr][0]};
if (defined $n2) {
for my $nt_k1 (@{$lookup{$n1}}) {
for my $nt_k2 (@{$lookup{$n2}}) {
$trie{$head, $nt_k1, $nt_k2}{$rule_nr} = $rules[$rule_nr][1];
}
}
} elsif (defined $n1) {
for my $nt_k1 (@{$lookup{$n1}}) {
$trie{$head, $nt_k1, -1}{$rule_nr} = $rules[$rule_nr][1];
}
} else {
$trie{$head,-1, -1}{$rule_nr} = $rules[$rule_nr][1];
}
}
# generate new nonterminal-sets
my %new_nts;
for my $generated (values %trie) {
my @nt_set_gen = sort(uniq(values %$generated));
my $nt_k = join(':', @nt_set_gen);
$new_nts{$nt_k} = [@nt_set_gen];
}
# Calculate changes
$deleted = 0;
for my $k (keys %nonterminal_sets) {
$deleted++ unless exists $new_nts{$k};
}
$added = scalar(keys %new_nts) - scalar(keys %nonterminal_sets) + $deleted;
print "Added $added and deleted $deleted\n";
%nonterminal_sets = %new_nts;
} while ($added || $deleted);
print "Required $i iterations\n";
# Rulesets can now be read off from the trie
my (@rulesets, %inversed);
for my $v (values %trie) {
my @rules = sortn(keys %$v);
my $name = join $;, @rules;
next if exists $inversed{$name};
my $ruleset_nr = scalar @rulesets;
push @rulesets, [@rules];
$inversed{$name} = $ruleset_nr;
}
# print them for me to see
for my $rs (@rulesets) {
my $key = join $;, @$rs;
print "$key: ";
my @expr = map { sexpr::encode($_) } map { $rules[$_][0] } @$rs;
print join("; ", @expr);
print "\n";
}
__DATA__
(tile: t1 (t) n 1)
(tile: s1 (s) n 1)
(tile: a1 (a n) n 1)
(tile: at1 (a (t)) n 1)
(tile: as1 (a (s)) n 1)
(tile: b1 (b n n) n 1)
(tile: bs1 (b n (s)) n 1)
(tile: bt1 (b n (t)) n 1)
(tile: bst1 (b (s) (t)) n 1)
package sexpr;
use strict;
# declare keyword syntax regex
my $keyword = qr/^[&\$^,]?[\w\.\[\]_\*]+[!:]?/;
sub parser {
my ($class, $input) = @_;
return bless {
input => $input,
buffer => '',
macros => {},
}, $class;
}
sub read {
my $self = shift;
my $file = $self->{input};
my $expr = $self->{buffer};
my ($open, $close) = (0, 0);
while (!eof($file)) {
my $line = <$file>;
next if $line =~ m/^#|^\s*$/;
$expr .= $line;
$open = $expr =~ tr/(//;
$close = $expr =~ tr/)//;
last if ($open > 0) && ($open == $close);
}
die "End of input with unclosed template" if $open > $close;
my ($tree, $rest) = $self->parse($expr);
$self->{buffer} = $rest;
return $tree;
}
sub parse {
my ($self, $expr) = @_;
my $tree = [];
# consume initial opening parenthesis
return (undef, $expr) unless $expr =~ m/^\s*\(/;
$expr = substr($expr, $+[0]);
while ($expr) {
# remove initial space
$expr =~ s/^\s*//;
if (substr($expr, 0, 1) eq '(') {
# descend on opening parenthesis
my ($child, $rest) = $self->parse($expr);
$expr = $rest;
push @$tree, $child;
} elsif (substr($expr, 0, 1) eq ')') {
# ascend on closing parenthesis
$expr = substr $expr, 1;
last;
} elsif ($expr =~ m/$keyword/) {
# consume keyword
push @$tree, substr($expr, $-[0], $+[0] - $-[0]);
$expr = substr $expr, $+[0];
} else {
die "Could not parse $expr";
}
}
if (@$tree && substr($tree->[0], 0, 1) eq '^') {
if (defined $self->{macros}->{$tree->[0]}) {
$tree = apply_macro($self->{macros}->{$tree->[0]}, $tree);
} else {
die "Attempted to invoke undefined macro $tree->[0]";
}
}
return ($tree, $expr);
}
sub decl_macro {
my ($self, $name, $macro) = @_;
die "Macro name '$name' must start with ^ symbol" unless substr($name,0,1) eq '^';
die "Redeclaration of macro $name" if defined $self->{'macros'}->{$name};
$self->{macros}->{$name} = $macro;
}
sub apply_macro {
my ($macro, $tree) = @_;
my $params = $macro->[0];
my $args = [@$tree[1..$#$tree]];
die "Incorrect number of args, got ".@$args." expected ".@$params unless @$args == @$params;
my %bind;
@bind{@$params} = @$args;
return fill_macro($macro->[1], \%bind);
}
sub fill_macro {
my ($macro, $bind) = @_;
my $result = [];
for (my $i = 0; $i < @$macro; $i++) {
if (ref($macro->[$i]) eq 'ARRAY') {
push @$result, fill_macro($macro->[$i], $bind);
} elsif (substr($macro->[$i], 0, 1) eq ',') {
if (defined $bind->{$macro->[$i]}) {
push @$result, $bind->{$macro->[$i]};
} else {
die "Unmatched macro substitution: $macro->[$i]";
}
} else {
push @$result, $macro->[$i];
}
}
return $result;
}
sub encode {
my $list = shift;
my $out = '(';
for my $item (@$list) {
if (ref($item) eq 'ARRAY') {
$out .= encode($item);
} else {
$out .= "$item";
}
$out .= " ";
}
$out .= ')';
return $out;
}
1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment