Skip to content

Instantly share code, notes, and snippets.

@jddurand
Created September 23, 2014 21:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save jddurand/8d3238c22731a85eb890 to your computer and use it in GitHub Desktop.
Save jddurand/8d3238c22731a85eb890 to your computer and use it in GitHub Desktop.
#!env perl
use strict;
use diagnostics;
use Marpa::R2;
use Data::Dumper;
use POSIX qw/EXIT_SUCCESS/;
use Data::Section -setup;
use open qw(:std :utf8); # Undeclared streams in UTF-8.
#
# Build grammar for "..." or '...'
#
our $parameterizedGrammar = ${__PACKAGE__->section_data('parameterizedGrammar')};
our $dquoteGrammar = $parameterizedGrammar;
our $squoteGrammar = $parameterizedGrammar;
$squoteGrammar =~ s/\$a/SQUOTE/g; $squoteGrammar =~ s/\$b/'/g;
$dquoteGrammar =~ s/\$a/DQUOTE/g; $dquoteGrammar =~ s/\$b/"/g;
our $DATA = ${__PACKAGE__->section_data('header')} . $squoteGrammar . $dquoteGrammar;
our $G = Marpa::R2::Scanless::G->new({source => \$DATA, bless_package => 'STRING'});
#
# Run tests
#
foreach (eval ${__PACKAGE__->section_data('tests')}) {
my ($state, $input) = @{$_};
my $r = Marpa::R2::Scanless::R->new({grammar => $G});
eval {$r->read(\$input);};
my $valuep = $@ ? undef : $r->value;
my $value = defined($valuep) ? ${$valuep} : undef;
if (($state eq 'OK' && defined($value)) ||
($state eq 'Fail' && ! defined($value))) {
print "TEST OK: Got '$state' for input: $input. Grammar value is " . ($value || 'undef') . ".\n";
} else {
print "TEST KO: Expected '$state' for input: $input. Grammar value is " . ($value || 'undef') . ".\n";
}
}
exit EXIT_SUCCESS;
__DATA__
__[ header ]__
############################################################
# NON-PARAMETERIZED PART OF THE GRAMMAR #
############################################################
:default ::= action => ::first
:start ::= stringLiteralUnit
stringLiteralUnit ::= STRING_LITERAL_UNIT_DQUOTE
| STRING_LITERAL_UNIT_SQUOTE
BS ~ '\'
H ~ [a-fA-F0-9]
H_many ~ H+
O ~ [0-7]
ES ~ BS ES_AFTERBS
ES_AFTERBS ~ ["'\?\\abfnrtv]
| O
| O O
| O O O
| 'x' H_many
__[ parameterizedGrammar ]__
############################################################
# PARAMETERIZED PART OF THE GRAMMAR #
############################################################
STRING_LITERAL_UNIT_$a ~ LEX_$a STRING_LITERAL_INSIDE_$a_any LEX_$a
STRING_LITERAL_INSIDE_$a_any ~ STRING_LITERAL_INSIDE_$a*
STRING_LITERAL_INSIDE_$a ~ [^$b\\]
STRING_LITERAL_INSIDE_$a ~ ES
LEX_$a ~ [$b]
__[ tests ]__
############################################################
# TESTS #
############################################################
(
['OK', q("X Z")], # 1.
['OK', q('X Z')], # 2.
['OK', q(" Z ")], # 3.
['OK', q(' Z ')], # 4.
['OK', q("")], # 5. Double-quoted empty string.
['OK', q('')], # 6. Single-quoted empty string.
['OK', q("'")], # 7.
['OK', q("''")], # 8.
['OK', q('"')], # 9.
['OK', q('""')], # 10.
['OK', q("\'")], # 11.
['OK', q('\"')], # 12.
['OK', q("\"")], # 13.
['OK', q('\'')], # 14.
['OK', q("A\rB")], # 15.
['OK', q('A\rB')], # 16.
['Fail', q(Δ Lady)], # 17. UTF8.
['Fail', q( )], # 18. Empty input.
['Fail', q()], # 19. Empty string.
['Fail', q(")], # 20. Unbalanced quotes.
['Fail', q(')], # 21. Unbalanced quotes.
['Fail', q(A B)], # 22. Unquoted string. Pre-preprocess by adding your own quotes, if possible.
)
@jddurand
Copy link
Author

Output:

TEST OK: Got 'OK' for input: "X Z". Grammar value is "X Z".
TEST OK: Got 'OK' for input: 'X Z'. Grammar value is 'X Z'.
TEST OK: Got 'OK' for input: " Z ". Grammar value is " Z ".
TEST OK: Got 'OK' for input: ' Z '. Grammar value is ' Z '.
TEST OK: Got 'OK' for input: "". Grammar value is "".
TEST OK: Got 'OK' for input: ''. Grammar value is ''.
TEST OK: Got 'OK' for input: "'". Grammar value is "'".
TEST OK: Got 'OK' for input: "''". Grammar value is "''".
TEST OK: Got 'OK' for input: '"'. Grammar value is '"'.
TEST OK: Got 'OK' for input: '""'. Grammar value is '""'.
TEST OK: Got 'OK' for input: "\'". Grammar value is "\'".
TEST OK: Got 'OK' for input: '\"'. Grammar value is '\"'.
TEST OK: Got 'OK' for input: "\"". Grammar value is "\"".
TEST OK: Got 'OK' for input: '\''. Grammar value is '\''.
TEST OK: Got 'OK' for input: "A\rB". Grammar value is "A\rB".
TEST OK: Got 'OK' for input: 'A\rB'. Grammar value is 'A\rB'.
TEST OK: Got 'Fail' for input: Δ Lady. Grammar value is undef.
TEST OK: Got 'Fail' for input:  . Grammar value is undef.
TEST OK: Got 'Fail' for input: . Grammar value is undef.
TEST OK: Got 'Fail' for input: ". Grammar value is undef.
TEST OK: Got 'Fail' for input: '. Grammar value is undef.
TEST OK: Got 'Fail' for input: A B. Grammar value is undef.

@jddurand
Copy link
Author

And just to be sure it is UTF8 ok with an UTF8 quoted string:

TEST OK: Got 'OK' for input: 'Δ Lady'. Grammar value is 'Δ Lady'.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment