Skip to content

Instantly share code, notes, and snippets.

@jddurand
Created May 28, 2014 22:24
Show Gist options
  • Save jddurand/de84beacf235c608ea10 to your computer and use it in GitHub Desktop.
Save jddurand/de84beacf235c608ea10 to your computer and use it in GitHub Desktop.
Marpa grammar of official EBNF ISO/IEC 14977:1996(E)
#!env perl
# -----------------------------------------------------------------------------------------
# Marpa grammar for EBNF published by ISO/IEC 14977:1996(E)
#
# Extensions are minor, and ONLY because our keyboards are more likely to expose Latin-1
# character rather than the Windows-1252 characters as per the spec, i.e.:
#
# Extension 1: Add ' to quoted strings (ISO published the special neighbour character ’)
# Extension 2: Add ^ to <other character> (ISO published the special neighbour character ˆ)
# Extension 3: Add ` to <other character> (ISO published the special neighbour character ‘)
# Extension 4: Add ~ to <other character> (ISO published the special neighbour character ˜)
# -----------------------------------------------------------------------------------------
#
use strict;
use warnings FATAL => 'all';
use Data::Section -setup;
use Marpa::R2;
use Data::Dumper;
our $DATA = __PACKAGE__->local_section_data;
my $grammar = Marpa::R2::Scanless::G->new( { source => $DATA->{grammar_source} });
#
# Test grammars, grabbed from the web
#
foreach (grep {$_ ne 'grammar_source'} keys %{$DATA}) {
test($_, $grammar, $DATA->{$_});
}
sub test {
my ($testName, $grammar, $inputp) = @_;
my $recognizer = Marpa::R2::Scanless::R->new({grammar => $grammar});
my $length = length(${$inputp});
my $error = '';
my $status = eval {$recognizer->read($inputp)} ? 'OK' : do {$error = $@; 'KO'};
if ($error) {
printf "%20s: %s\n", $testName, $status;
print $error;
print $recognizer->show_progress(-1, -1);
} else {
my $nvalue = 0;
my @values = ();
while (defined(my $value = $recognizer->value())) {
push(@values, $value);
use File::Spec;
my $filename = File::Spec->catfile(File::Spec->tmpdir(), "jdd$nvalue.txt");
open(FILE, '>', File::Spec->catfile(File::Spec->tmpdir(), "jdd$nvalue.txt"));
print FILE Dumper($value);
close FILE;
print "... C.f. $filename\n";
last if (++$nvalue > 1);
}
if ($nvalue <= 0) {
printf "%20s: %s, %s\n", $testName, $status, 'oups... no value ?';
} elsif ($nvalue > 1) {
printf "%20s: %s, %s\n", $testName, $status, 'oups... more than one value !?';
} else {
printf "%20s: %s, %s\n", $testName, $status, 'good one value';
}
}
}
__DATA__
__[ grammar_source ]__
:start ::= <syntax>
:default ::= action => [name,values]
#
# Marpa does not have support for the "minus" rule, fortunately the only "minus" in EBNF grammar
# can be expressed differently
#
<terminal character 0> ::= <concatenate symbol>
| <defining symbol>
| <definition separator symbol>
| <end group symbol>
| <end option symbol>
| <end repeat symbol>
| <except symbol>
| <repetition symbol>
| <start group symbol>
| <start option symbol>
| <start repeat symbol>
| <terminator symbol>
<terminal character 1> ::= <terminal character 0>
| <letter>
| <decimal digit>
| <end comment symbol>
| <special sequence symbol>
| <start comment symbol>
| <other character>
<gap free symbol> ::= <terminal character 1>
| <terminal string>
<first terminal character many> ::= <first terminal character>+
<second terminal character many> ::= <second terminal character>+
<third terminal character many> ::= <third terminal character>+
<terminal string> ::= <first quote symbol> <first terminal character many> <first quote symbol>
| <second quote symbol> <second terminal character many> <second quote symbol>
| <third quote symbol> <third terminal character many> <third quote symbol>
<first terminal character> ::= <terminal character 1>
| <second quote symbol>
| <third quote symbol>
<second terminal character> ::= <terminal character 1>
| <first quote symbol>
| <third quote symbol>
<third terminal character> ::= <terminal character 1>
| <first quote symbol>
| <second quote symbol>
<gap separator> ::= <space character>
| <horizontal tabulation character>
| <new line>
| <vertical tabulation character>
| <form feed>
<gap separator any> ::= <gap separator>*
<syntax interior> ::= <gap free symbol> <gap separator any>
<syntax interior many> ::= <syntax interior>+
<syntax> ::= <gap separator any> <syntax interior many>
<commentless symbol> ::= <terminal character 0>
| <meta identifier>
| <integer>
| <terminal string>
| <special sequence>
<integer> ::= <decimal digit>+
<meta identifier> ::= <letter> <meta identifier character any>
<meta identifier character any> ::= <meta identifier character>*
<meta identifier character> ::= <letter>
| <decimal digit>
<special sequence> ::= <special sequence symbol> <special sequence character any> <special sequence symbol>
<special sequence character any> ::= <special sequence character>*
<special sequence character> ::= <terminal character 0>
| <letter>
| <decimal digit>
| <end comment symbol>
| <start comment symbol>
| <other character>
| <first quote symbol>
| <second quote symbol>
| <third quote symbol>
<comment symbol> ::= <bracketed textual comment>
| <other character>
| <commentless symbol>
<comment symbol any> ::= <comment symbol>*
<bracketed textual comment> ::= <start comment symbol> <comment symbol any> <end comment symbol>
<bracketed textual comment any> ::= <bracketed textual comment>*
<syntax interior> ::= <commentless symbol> <bracketed textual comment any>
<syntax> ::= <bracketed textual comment any> <syntax interior many>
<syntax rule many> ::= <syntax rule>+
<syntax> ::= <syntax rule many>
<syntax rule> ::= <meta identifier> <defining symbol> <definitions list> <terminator symbol>
<definitions list> ::= <single definition>
| <definition separator symbol> <definitions list>
<single definition> ::= <syntactic term>
| <concatenate symbol> <single definition>
<syntactic term> ::= <syntactic factor>
| <syntactic factor> <except symbol> <syntactic exception>
<syntactic exception> ::= <syntactic factor> # Most general form. In practice that could be replaced by a syntactic-factor containing no meta-identifiers ?
<syntactic factor> ::= <integer> <repetition symbol> <syntactic primary>
| <syntactic primary>
<syntactic primary> ::= <optional sequence>
| <repeated sequence>
| <grouped sequence>
| <meta identifier>
| <terminal string>
| <special sequence>
| <empty sequence>
<optional sequence> ::= <start option symbol> <definitions list> <end option symbol>
<repeated sequence> ::= <start repeat symbol> <definitions list> <end repeat symbol>
<grouped sequence> ::= <start group symbol> <definitions list> <end group symbol>
<empty sequence> ::=
#
# Lexemes
#
<letter> ~ [a-zA-Z]
<decimal digit> ~ [0-9]
<concatenate symbol> ~ ','
<defining symbol> ~ '='
<definition separator symbol> ~ [|/!]
<end comment symbol> ~ '*)'
<end group symbol> ~ ')'
<end option symbol> ~ [\]/]
<end repeat symbol> ~ '}' | ':)'
<except symbol> ~ '-'
<first quote symbol> ~ '’'
<repetition symbol> ~ '*'
<second quote symbol> ~ '"'
<third quote symbol> ~ ['] # Extension 1
<special sequence symbol> ~ '?'
<start comment symbol> ~ '(*'
<start group symbol> ~ '('
<start option symbol> ~ [\[/]
<start repeat symbol> ~ '{' | '(:'
<terminator symbol> ~ [;.]
#
# Other characters is a sort of fallback, so we decrease its priority
#
:lexeme ~ <other character> priority => -1
<other character> ~ [ :+_%@&#$<>\\^ˆ‘`˜~] # Extensions 2, 3, and 4
<space character> ~ ' '
<horizontal tabulation character> ~ [\x{0009}]
<carriage return> ~ [\x{000D}]
<carriage return any> ~ <carriage return>*
<line feed> ~ [\x{000A}]
<new line> ~ <carriage return any> <line feed> <carriage return any>
<vertical tabulation character> ~ [\x{000B}]
<form feed> ~ [\x{000C}]
__[ poze.ebnf ]__
#
# Reference: http://home.hku.nl/~pieter.suurmond/SOFT/RESP/poze.ebnf
#
(*
ISO EBNF description of the PoZeTools input syntax,
using Extended Backus-Naur Form according to ISO/IEC 14977 : 1996(E).
See inputfile 'x.poze' which serves as a valid 'poze' example.
PoZeTools v 0.52, march 31, 2005. Copyright (c) 2002-2005 Pieter Suurmond.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the 'Software'), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
Any person wishing to distribute modifications to the Software is
requested to send the modifications to the original developer so that they
can be incorporated into the canonical version.
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND ON INFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR OTHER DEALINGS IN THE SOFTWARE.
*)
(*
A 'poze' may specify multiple poles and zeros, but not more than one gain
parameter. They may appear in any order. Each pole, zero and optional gain
takes one single line. Empty lines and lines starting with # are ignored.
*)
poze = {pole | zero | empty | remark}, [gain],
{pole | zero | empty | remark};
pole = {vwhite}, ('P'|'p'), specifier, complex, new line;
zero = {vwhite}, ('Z'|'z'), specifier, complex, new line;
gain = {vwhite}, ('G'|'g'), specifier, complex, new line;
empty = {vwhite}, new line;
remark = '#', anything, new line;
specifier = {vwhite}, '=', {vwhite};
anything = {terminal character | horizontal tabulation character};
(*
All poles, zeros and the optional gain may be supplied in either polar or
cartesian form. Note that a 'negative magnitude' may be specified in polar
notation!
*)
complex = ['-'], (cartesian | polar);
cartesian = (real, [('+'|'-'), imaginary]) |
(imaginary, [('+'|'-'), real ]);
polar = [real], exp, '(', {vwhite}, ['-'], arg, {vwhite},
')', {vwhite};
real = integer, ['.', integer], {vwhite};
imaginary = [real], i;
arg = ([real], ([pi], i) | (i, pi) ) |
([pi], ([real], i) | (i, real)) |
(i, ([real], [pi]) | (pi, real));
exp = ('e'|'E'), ('x'|'X'), ('p'|'P'), {vwhite};
pi = ('p'|'P'), ('i'|'I'), {vwhite};
i = ('j'|'J'), {vwhite};
vwhite = ' ' | horizontal tabulation character;
(*
As defined by ISO/IEC 14977 (the following 7 definitions are redundant):
*)
integer = decimal digit, {decimal digit};
decimal digit = '0' | '1' | '2' | '3' | '4' |
'5' | '6' | '7' | '8' | '9';
(* \t *)
horizontal tabulation character = ? ISO 6429 character Horizontal Tabulation ?;
(* \r *) new line = {? ISO 6429 character Carriage Return ?},
(* \n *) ? ISO 6429 character Line Feed ?,
{? ISO 6429 character Carriage Return ?};
terminal character = letter |
decimal digit |
',' | '=' | '|' |
'*)' | ')' | ']' | '}' | '-' |
"'" | '*' | '"' | '?' |
'(*' | '(' | '[' | '{' | ';' |
other character;
other character = ' ' | ':' | '+' | '_' | '%' | '@' |
'&' | '#' | '$' | '<' | '>' | '\' |
'^' | '`' | '~';
letter = 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' |
'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' |
'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' |
'V' | 'W' | 'X' | 'Y' | 'Z' |
'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' |
'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' |
'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' |
'v' | 'w' | 'x' | 'y' | 'z';
(*
End of poze definition.
*)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment