Skip to content

Instantly share code, notes, and snippets.

@pstuifzand
Created March 15, 2012 00:39
Show Gist options
  • Save pstuifzand/2040745 to your computer and use it in GitHub Desktop.
Save pstuifzand/2040745 to your computer and use it in GitHub Desktop.
(Partial) Parser for Scribe built with Marpa::XS
#!/usr/bin/perl -w
use 5.10.1;
use strict;
use File::Slurp 'read_file';
use Marpa::XS;
my %tokens = (
AT => qr/^\@/,
LP => qr/^\(/,
RP => qr/^\)/,
LB => qr/^\[/,
RB => qr/^\]/,
LS => qr/^\{/,
RS => qr/^\}/,
Char => qr/^./s,
Begin => qr/^Begin/,
End => qr/^End/,
Word => qr/^[A-Za-z][A-Za-z0-9]*/,
);
my $grammar = Marpa::XS::Grammar->new({
actions => 'Action',
start => 'Parser',
rules => [
{ lhs => 'Parser', rhs => [qw/Tree/], action => 'Parser' },
{ lhs => 'Tree', rhs => [ qw/Expression/ ], min => 1, action => 'Parser' },
{ lhs => 'Expression', rhs => [ qw/BeginExpr Tree EndExpr/ ], action => 'BeginEnd' },
{ lhs => 'BeginExpr', rhs => [ qw/AT Begin LP Word RP/ ], action => 'BeginExpr' },
{ lhs => 'BeginExpr', rhs => [ qw/AT Begin LB Word RB/ ], action => 'BeginExpr' },
{ lhs => 'BeginExpr', rhs => [ qw/AT Begin LS Word RS/ ], action => 'BeginExpr' },
{ lhs => 'EndExpr', rhs => [ qw/AT End LP Word RP/ ], action => 'BeginExpr' },
{ lhs => 'EndExpr', rhs => [ qw/AT End LB Word RB/ ], action => 'BeginExpr' },
{ lhs => 'EndExpr', rhs => [ qw/AT End LS Word RS/ ], action => 'BeginExpr' },
{ lhs => 'Expression', rhs => [ qw/AT Word LP Text RP/ ], action => 'Expression' },
{ lhs => 'Expression', rhs => [ qw/AT Word LB Text RB/ ], action => 'Expression' },
{ lhs => 'Expression', rhs => [ qw/AT Word LS Text RS/ ], action => 'Expression' },
{ lhs => 'Expression', rhs => [ qw/Text/ ], action => 'ExpressionText' },
{ lhs => 'Text', rhs => [ qw/Char/ ], min => 1, action => 'Text' },
],
symbols => {
Text => { null_value => '' },
},
terminals => [keys %tokens],
});
sub Action::Parser {
my (undef, @exprs) = @_;
return \@exprs;
}
sub Action::Expression {
my (undef, $at, $word, $lp, $text, $rp) = @_;
return { word => $word, text => $text }
}
sub Action::BeginEnd {
my (undef, $begin, $tree, $end) = @_;
if ($begin->{word} ne $end->{word}) {
return { error => 1 };
}
return { word => $begin->{word}, _content => $tree };
}
sub Action::BeginExpr {
my (undef, $at, $beginend, $lp, $word, $rp) = @_;
return { word => $word, begin => $beginend eq 'Begin' };
}
sub Action::ExpressionText{
my (undef, $text) = @_;
return { text => $text };
}
sub Action::Text {
my (undef, @chars) = @_;
return join '', @chars;
}
$grammar->precompute;
my $doc = read_file($ARGV[0]);
my $r = Marpa::XS::Recognizer->new({
grammar => $grammar,
});
while (length($doc)) {
my $expected = $r->terminals_expected;
if (!@$expected) {
die "End of parse";
}
for my $token_name (@$expected) {
my $re = $tokens{$token_name};
my (@res) = ($doc=~m/^($re)/);
if (@res) {
defined $r->alternative($token_name, $res[0], 1)
or next;
$doc = substr $doc, length($res[0]);
my $ok = eval {
$r->earleme_complete;
1;
};
if (!$ok) {
die "Error";
}
}
}
}
$r->end_input;
my $tree = ${$r->value};
sub generate {
my (@val) = @_;
for my $val (@val) {
if (ref($val) eq 'ARRAY') {
generate(@$val);
}
elsif (ref($val) eq 'HASH') {
if ($val->{word}) {
my $tag = lc($val->{word});
print '<'.$tag.'>';
if ($val->{_content}) {
generate(@{$val->{_content}});
}
elsif ($val->{text}) {
print $val->{text};
}
print '</'.$tag.'>';
}
else {
print $val->{text};
}
}
}
}
generate($tree);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment