Skip to content

Instantly share code, notes, and snippets.

@ruz
Created January 6, 2014 21:46
Show Gist options
  • Save ruz/8290356 to your computer and use it in GitHub Desktop.
Save ruz/8290356 to your computer and use it in GitHub Desktop.
Problem with marpa's scanless interface
rules L0:
G0 R0 'BEGIN:VCARD' ::= [B] [E] [G] [I] [N] [\:] [V] [C] [A] [R] [D]
G0 R1 'VERSION:4.0' ::= [V] [E] [R] [S] [I] [O] [N] [\:] [4] [\.] [0]
G0 R2 'END:VCARD' ::= [E] [N] [D] [\:] [V] [C] [A] [R] [D]
G0 R3 ':' ::= [\:]
G0 R4 '.' ::= [\.]
G0 R5 group ::= A_D_D
G0 R6 name ::= A_D_D
G0 R7 ';' ::= [\;]
G0 R8 '=' ::= [\=]
G0 R9 any_param_name ::= A_D_D
G0 R10 boolean ::= [T] [R] [U] [E]
G0 R11 boolean ::= [F] [A] [L] [S] [E]
G0 R12 integer ::= SIGNED_DIGITS
G0 R13 float ::= SIGNED_DIGITS
G0 R14 float ::= SIGNED_DIGITS [\.] DIGITS
G0 R15 utc_offset ::= SIGN DIGITx2
G0 R16 utc_offset ::= SIGN DIGITx4
G0 R17 URI ::= NOT_IMPLEMENTED
G0 R18 Language_Tag ::= NOT_IMPLEMENTED
G0 R19 iana_valuespec ::= NOT_IMPLEMENTED
G0 R20 date ::= NOT_IMPLEMENTED
G0 R21 date_time ::= NOT_IMPLEMENTED
G0 R22 date_and_or_time ::= NOT_IMPLEMENTED
G0 R23 time ::= NOT_IMPLEMENTED
G0 R24 timestamp ::= NOT_IMPLEMENTED
G0 R25 DIGITS ::= DIGIT +
G0 R26 DIGIT ::= [0-9]
G0 R27 DIGITx2 ::= DIGIT DIGIT
G0 R28 DIGITx4 ::= DIGIT DIGIT DIGIT DIGIT
G0 R29 SIGNED_DIGITS ::= DIGITS
G0 R30 SIGNED_DIGITS ::= SIGN DIGITS
G0 R31 SIGN ::= [\+]
G0 R32 SIGN ::= [\-]
G0 R33 NOT_IMPLEMENTED ::= [\x00]
G0 R34 A_D_D ::= [A-Za-z0-9-] +
G0 R35 text ::= TEXT_CHAR +
G0 R36 safe ::= SAFE_CHAR +
G0 R37 dquoted ::= [\"] [\"]
G0 R38 dquoted ::= [\"] QSAFE_CHARS [\"]
G0 R39 SEMICOLON ::= [\;]
G0 R40 COMMA ::= [\,]
G0 R41 TEXT_CHAR ::= [\\] [\\n,;:]
G0 R42 TEXT_CHAR ::= WSP
G0 R43 TEXT_CHAR ::= NON_ASCII
G0 R44 TEXT_CHAR ::= [\x21-\x2B\x2D-\x5B\x5D-\x7E]
G0 R45 SAFE_CHAR ::= [!\x23-\x2B\x2D-\x39\x3C-\x7E]
G0 R46 SAFE_CHAR ::= WSP
G0 R47 SAFE_CHAR ::= NON_ASCII
G0 R48 QSAFE_CHARS ::= QSAFE_CHAR +
G0 R49 QSAFE_CHAR ::= [!\x23-\x7E]
G0 R50 QSAFE_CHAR ::= WSP
G0 R51 QSAFE_CHAR ::= NON_ASCII
G0 R52 NON_ASCII ::= [\xC2-\xDF] [\x80-\xBF]
G0 R53 NON_ASCII ::= [\xE0] [\xA0-\xBF] [\x80-\xBF]
G0 R54 NON_ASCII ::= [\xED] [\x80-\x9F] [\x80-\xBF]
G0 R55 NON_ASCII ::= [\xE1-\xEC] [\x80-\xBF] [\x80-\xBF]
G0 R56 NON_ASCII ::= [\xEE-\xEF] [\x80-\xBF] [\x80-\xBF]
G0 R57 NON_ASCII ::= [\xF0] [\x90-\xBF] [\x80-\xBF] [\x80-\xBF]
G0 R58 NON_ASCII ::= [\xF4] [\x80-\x8F] [\x80-\xBF] [\x80-\xBF]
G0 R59 NON_ASCII ::= [\xF1-\xF3] [\x80-\xBF] [\x80-\xBF] [\x80-\xBF]
G0 R60 WSP ::= [ \t]
G0 R61 CRLF ::= [\x0D] [\x0A]
G0 R62 CRLF ::= [\x0A]
G0 R63 :start_lex ::= COMMA
G0 R64 :start_lex ::= CRLF
G0 R65 :start_lex ::= Language_Tag
G0 R66 :start_lex ::= SEMICOLON
G0 R67 :start_lex ::= URI
G0 R68 :start_lex ::= 'BEGIN:VCARD'
G0 R69 :start_lex ::= 'VERSION:4.0'
G0 R70 :start_lex ::= 'END:VCARD'
G0 R71 :start_lex ::= ':'
G0 R72 :start_lex ::= '.'
G0 R73 :start_lex ::= ';'
G0 R74 :start_lex ::= '='
G0 R75 :start_lex ::= any_param_name
G0 R76 :start_lex ::= boolean
G0 R77 :start_lex ::= date
G0 R78 :start_lex ::= date_and_or_time
G0 R79 :start_lex ::= date_time
G0 R80 :start_lex ::= dquoted
G0 R81 :start_lex ::= float
G0 R82 :start_lex ::= group
G0 R83 :start_lex ::= iana_valuespec
G0 R84 :start_lex ::= integer
G0 R85 :start_lex ::= name
G0 R86 :start_lex ::= safe
G0 R87 :start_lex ::= text
G0 R88 :start_lex ::= time
G0 R89 :start_lex ::= timestamp
G0 R90 :start_lex ::= utc_offset
Setting trace_terminals option
Lexer "L0" rejected lexeme L1c1-11: text; value="BEGIN:VCARD"
Lexer "L0" accepted lexeme L1c1-11: 'BEGIN:VCARD'; value="BEGIN:VCARD"
Lexer "L0" accepted lexeme L1c12: CRLF; value="
"
Lexer "L0" rejected lexeme L2c1-11: text; value="VERSION:4.0"
Lexer "L0" accepted lexeme L2c1-11: 'VERSION:4.0'; value="VERSION:4.0"
Lexer "L0" accepted lexeme L2c12: CRLF; value="
"
Lexer "L0" rejected lexeme L3c1-49: text; value="UID:urn:uuid:4fbe8971-0bc3-424c-9c26-36c3e1eff6b1"
progress:
P0 @0-0 L1c1 vCards -> . vCard +
P1 @0-0 L1c1 vCard -> . 'BEGIN:VCARD' CRLF 'VERSION:4.0' CRLF content 'END:VCARD'
P33 @0-0 L1c1 :start -> . vCards
R1:1 @0-1 L1c1-11 vCard -> 'BEGIN:VCARD' . CRLF 'VERSION:4.0' CRLF content 'END:VCARD'
R1:2 @0-2 L1c1-12 vCard -> 'BEGIN:VCARD' CRLF . 'VERSION:4.0' CRLF content 'END:VCARD'
R1:3 @0-3 L1c1-L2c11 vCard -> 'BEGIN:VCARD' CRLF 'VERSION:4.0' . CRLF content 'END:VCARD'
R1:4 @0-4 L1c1-L2c12 vCard -> 'BEGIN:VCARD' CRLF 'VERSION:4.0' CRLF . content 'END:VCARD'
P2 @4-4 L2c12 content -> . content_line +
P3 @4-4 L2c12 content_line -> . content_name params ':' value CRLF
P4 @4-4 L2c12 content_name -> . name
P5 @4-4 L2c12 content_name -> . group '.' name
Error in SLIF parse: No lexemes accepted at line 3, column 1
Lexer "L0" rejected 1 lexeme(s)
Rejected lexeme #1: text; value="UID:urn:uuid:4fbe8971-0bc3-424c-9c26-36c3e1eff6b1"; length = 49
* String before error: BEGIN:VCARD\nVERSION:4.0\n
* The error was at line 3, column 1, and at character 0x0055 'U', ...
* here: UID:urn:uuid:4fbe8971-0bc3-424c-9c26-36c3e1eff6b1\n
Marpa::R2 exception at try-scanless.pl line 94.
use v5.10;
use strict;
use warnings;
use Marpa::R2;
my $syntax = <<'END';
:default ::= action => ::first
:start ::= vCards
vCards ::= vCard+ separator => CRLF
vCard ::= 'BEGIN:VCARD' CRLF 'VERSION:4.0' CRLF content 'END:VCARD'
content ::= content_line+
content_line ::= content_name params ':' value CRLF
content_name ::= name | group '.' name
group ~ A_D_D
name ~ A_D_D
params ::= ';' param_list | empty
param_list ::= param+ separator => SEMICOLON
param ::= any_param
any_param ::= any_param_name '=' param_values
any_param_name ~ A_D_D
param_values ::= param_value COMMA param_values | param_value
param_value ::= empty | safe | dquoted
value ::= text | value_list | boolean | URI | utc_offset | Language_Tag | iana_valuespec
value_list ::= value_listable+ separator => COMMA proper => 1
value_listable ::= text | date | time | date_time | date_and_or_time | timestamp | integer | float
boolean ~ 'TRUE' | 'FALSE'
integer ~ SIGNED_DIGITS
float ~ SIGNED_DIGITS | SIGNED_DIGITS '.' DIGITS
utc_offset ~ SIGN DIGITx2 | SIGN DIGITx4
URI ~ NOT_IMPLEMENTED
Language_Tag ~ NOT_IMPLEMENTED
iana_valuespec ~ NOT_IMPLEMENTED
date ~ NOT_IMPLEMENTED
date_time ~ NOT_IMPLEMENTED
date_and_or_time ~ NOT_IMPLEMENTED
time ~ NOT_IMPLEMENTED
timestamp ~ NOT_IMPLEMENTED
DIGITS ~ DIGIT+
DIGIT ~ [0-9]
DIGITx2 ~ DIGIT DIGIT
DIGITx4 ~ DIGIT DIGIT DIGIT DIGIT
SIGNED_DIGITS ~ DIGITS | SIGN DIGITS
SIGN ~ '+' | '-'
NOT_IMPLEMENTED ~ [\x00]
empty ::=
A_D_D ~ [A-Za-z0-9-]+
text ~ TEXT_CHAR+
safe ~ SAFE_CHAR+
dquoted ~ '""' | '"' QSAFE_CHARS '"'
SEMICOLON ~ ';'
COMMA ~ ','
TEXT_CHAR ~
[\\] [\\n,;:]
| WSP
| NON_ASCII
| [\x21-\x2B\x2D-\x5B\x5D-\x7E]
SAFE_CHAR ~
[!\x23-\x2B\x2D-\x39\x3C-\x7E]
| WSP
| NON_ASCII
QSAFE_CHARS ~ QSAFE_CHAR+
QSAFE_CHAR ~ [!\x23-\x7E] | WSP | NON_ASCII
NON_ASCII ~
[\xC2-\xDF][\x80-\xBF]
| [\xE0] [\xA0-\xBF][\x80-\xBF]
| [\xED] [\x80-\x9F][\x80-\xBF]
| [\xE1-\xEC][\x80-\xBF][\x80-\xBF]
| [\xEE-\xEF][\x80-\xBF][\x80-\xBF]
| [\xF0] [\x90-\xBF][\x80-\xBF][\x80-\xBF]
| [\xF4] [\x80-\x8F][\x80-\xBF][\x80-\xBF]
| [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]
WSP ~ [ \t]
CRLF ~ [\x0D] [\x0A] | [\x0A]
END
my $grammar = Marpa::R2::Scanless::G->new( { source => \$syntax } );
say "rules L0:\n", $grammar->show_rules(1, 'G0');
my $recce = Marpa::R2::Scanless::R->new(
{ grammar => $grammar, semantics_package => 'Parse::vCard::Actions::v4', trace_terminals => 1 } );
my $input = do { local $/; <DATA> };
eval { $recce->read( \$input ); 1 }
or do { say "\nprogress:\n", $recce->show_progress( 0, -1 ); die $@ };
my $value_ref = $recce->value;
my $value = $value_ref ? ${$value_ref} : 'No Parse';
package Parse::vCard::Actions::v4;
package main;
__DATA__
BEGIN:VCARD
VERSION:4.0
UID:urn:uuid:4fbe8971-0bc3-424c-9c26-36c3e1eff6b1
FN:J. Doe
N:Doe;J.;;;
EMAIL;PID=1.1:jdoe@example.com
EMAIL;PID=2.1:boss@example.com
EMAIL;PID=2.2:ceo@example.com
TEL;PID=1.1;VALUE=uri:tel:+1-555-555-5555
TEL;PID=2.1,2.2;VALUE=uri:tel:+1-666-666-6666
CLIENTPIDMAP:1;urn:uuid:53e374d9-337e-4727-8803-a1e9c14e0556
CLIENTPIDMAP:2;urn:uuid:1f762d2b-03c4-4a83-9a03-75ff658a6eee
END:VCARD
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment