Skip to content

Instantly share code, notes, and snippets.

@rns
Last active August 29, 2015 14:06
Show Gist options
  • Save rns/8241288fe9351f29a28a to your computer and use it in GitHub Desktop.
Save rns/8241288fe9351f29a28a to your computer and use it in GitHub Desktop.
input : <[/] Trading 10mm ABC 2.5 19 05/06 mkt can use 50mm>
extracted: <ABC 2.5 19>
input : <XYZ 6.5 15 10-2B 106-107 B3 AAA- 1.646MM 2x2>
extracted: <XYZ 6.5 15>
use 5.010;
use strict;
use warnings;
use YAML;
use Marpa::R2 2.090; # for parse()
my $g = Marpa::R2::Scanless::G->new( {
source => \(<<'END_OF_SOURCE'),
:default ::= action => [ name, value]
lexeme default = action => [ name, value] latm => 1
info ::= item+
item ::= (chars_if_any) ticket (whitespace) float (whitespace) int (whitespace) (chars_if_any)
ticket ::= capital capital capital
float ::= digits '.' digits
int ::= digits
capital ::= [A-Z]
digits ::= [0-9]+
chars_if_any ::= [\d\D]*
whitespace ::= [\s]+
END_OF_SOURCE
} );
my @tests = (
[
'[/] Trading 10mm ABC 2.5 19 05/06 mkt can use 50mm',
'ABC 2.5 19'
],
[
'XYZ 6.5 15 10-2B 106-107 B3 AAA- 1.646MM 2x2',
'XYZ 6.5 15'
]
);
for my $test (@tests){
my ($input, $expected) = @$test;
my $r = Marpa::R2::Scanless::R->new( {
grammar => $g,
too_many_earley_items => 500,
# trace_terminals => 1
} );
eval {$r->read(\$input)} || warn "Parse failure, progress report is:\n" . $r->show_progress;
my $ast = $r->value;
unless (defined $ast){
die "No parse";
}
say "input : <$input>\n", "extracted: <", ast_process( ${$ast} ), ">";
}
sub ast_process{
my $ast = shift;
if (ref $ast){
my ($id, @children) = @$ast;
if ($id eq 'info'){
return join "\n", map { ast_process( $_ ) } @children;
}
if ($id eq 'item'){
return join " ", map { ast_process( $_ ) } @children;
}
elsif ($id =~ /^ticket|float|int$/){
return join "", map { ast_process( $_ ) } @children;
}
elsif ($id eq 'capital'){
return $children[0];
}
elsif ($id eq 'digits'){
return join '', @children;
}
}
else{
return $ast;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment