Skip to content

Instantly share code, notes, and snippets.

@nordicdyno
Created December 16, 2012 19:37
Show Gist options
  • Save nordicdyno/4311952 to your computer and use it in GitHub Desktop.
Save nordicdyno/4311952 to your computer and use it in GitHub Desktop.
$|++;
use English qw( -no_match_vars );
use strict;
use warnings;
use feature 'say';
use utf8;
use open qw(:std :utf8);
use Data::Dumper;
use Data::Dump;
use Marpa::R2;
# Language description:
# 1. $name - variable insertion (can be external or user's)
# 2. $name( ... ) <- method call
# 3. $set(var_name, value) <- special case for setting user's variables
# 4. all other symbols moved to output as is (spaces matters)
#
# EXAMPLES :
# 1) $set(var_name, $func($other_var, str ing1, string2 $var1 $var2)) $var_name
# 2) blah blah $func(1) $set(var, world) Hello $var!
my $tmpl_str;
{
$/ = undef;
$tmpl_str = readline DATA;
};
my %symbol_for_char = (
'(' => 'OPEN_PAREN', ')' => 'CLOSE_PAREN', ',' => 'COMMA',
);
my @tokens;
sub t {
my %opt = @_; #say Dumper(\%opt); sleep 1;
push @tokens, \%opt;
return '';
}
# parser context
my $rules_bnf = <<'RULES_BNF_END';
Script ::= Expressions
Expressions ::= Expression+
Expression ::=
Set_stmt
| Call_stmt
| TOKEN
| Stream
Set_stmt ::= SET OPEN_PAREN Var_name COMMA Var_value CLOSE_PAREN
Var_value ::= Expressions
Call_stmt ::=
TOKEN OPEN_PAREN Params CLOSE_PAREN
Params ::= Expressions COMMA Params
| Expressions
Stream ::= CHAR
| SPACE
Var_name ::= CHAR+
RULES_BNF_END
my %context = (
var_name => undef,
is_stream => undef, stream_stack => [],
);
sub do_what_I_mean {
shift;
my @children = grep {defined} @_;
my $ret = scalar @children > 1 ? \@children : shift @children;
return $ret;
}
my $grammar = Marpa::R2::Grammar->new({
start => 'Script',
actions => 'main',
default_action => 'do_what_I_mean',
rules => [ $rules_bnf, ],
});
$grammar->precompute;
my $string = $tmpl_str;
my $length = length $string;
pos $string = 0;
my $last_pos = 0;
TOKEN: while ( $last_pos < $length ) {
if ($string =~ m{ \G (\s+) }cmsxg) {
$last_pos = pos($string);
t name => 'SPACE', value => ' ', pos => $last_pos - 1;
}
elsif ($string =~ m{ \G \\(.) }cmsxg) {
$last_pos = pos($string);
t name => 'CHAR', value => $1, pos => $last_pos - 1;
}
elsif ($string =~ m{ \G (\$set)\( }cmsxg) {
$last_pos = pos($string);
t name => 'SET', pos => $last_pos - length($1) - 1;
t name => 'OPEN_PAREN', pos => $last_pos - 1;
}
elsif ($string =~ m{ \G \$([a-zA-Z0-9_-]+) }cmsxg) {
$last_pos = pos($string);
t name => 'TOKEN', value => $1, pos => $last_pos - length($1);
}
elsif ($string =~ m{ \G \( }cmsxg) {
$last_pos = pos($string);
t name => 'OPEN_PAREN', pos => $last_pos - 1;
}
elsif ($string =~ m{ \G \) }cmsxg) {
$last_pos = pos($string);
t name => 'CLOSE_PAREN', pos => $last_pos - 1;
}
elsif ($string =~ m{ \G , }cmsxg) {
$last_pos = pos($string);
t name => 'COMMA', pos => $last_pos - 1;
}
elsif ($string =~ m{ \G (.) }cmsxg) {
$last_pos = pos($string);
t name => 'CHAR', value => $1, pos => $last_pos - 1;
}
else {
die "not rules for '$string' on pos: " . pos($string);
}
}
my $rec = Marpa::R2::Recognizer->new( { grammar => $grammar } );
for my $t (@tokens) {
#say $rec->show_progress();
if ( not defined $rec->read( $t->{name}, $t->{value}) ) {
#say $rec->show_progress() or die "say failed: $ERRNO";
say $string, "\n", '-' x 25;
say '"' . (substr $string, 0, $t->{pos}) . '" [x] "'
. (substr $string, $t->{pos}) . '"'
;
say "Expected: " . $rec->terminals_expected;
die "Problem near position: " . $t->{pos} . " " . Dumper($t);
}
#say "OK: " . Dumper($t);
}
my $value_ref = $rec->value;
if ( !defined $value_ref ) {
say $rec->show_progress() or die "say failed: $ERRNO";
die 'Parse failed';
}
say "string: '$string'";
say "RESULT:\n" . Dumper($value_ref);
__DATA__
$set(var_name, str )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment