public
Last active

A JSON parser in Marpa.

  • Download Gist
MarpaX-JSON.pm
Perl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
package MarpaX::JSON;
use strict;
use Marpa::R2 2.039_000;
 
sub new {
my ($class) = @_;
 
my $self = bless {}, $class;
 
$self->{grammar} = Marpa::R2::Scanless::G->new(
{
action_object => 'MarpaX::JSON::Actions',
default_action => 'do_first_arg',
source => \(<<'END_OF_SOURCE'),
 
:start ::= json
 
json ::= object
| array
 
object ::= '{' '}' action => do_empty_object
| '{' members '}' action => do_object
 
members ::= pair+ separator => <comma> action => do_list
 
pair ::= string ':' value action => do_pair
 
value ::= string
| object
| number
| array
| 'true' action => do_true
| 'false' action => do_true
| 'null' action => do_null
 
array ::= '[' ']' action => do_empty_array
| '[' elements ']' action => do_array
 
elements ::= value+ separator => <comma> action => do_list
 
number ~ int
| int frac
| int exp
| int frac exp
 
int ~ digits
| '-' digits
 
digits ~ [\d]+
 
frac ~ '.' digits
 
exp ~ e digits
 
e ~ 'e'
| 'e+'
| 'e-'
| 'E'
| 'E+'
| 'E-'
 
string ::= lstring action => do_string
 
lstring ~ quote in_string quote
quote ~ ["]
 
in_string ~ in_string_char*
 
in_string_char ~ [^"\\]
| '\' '"'
| '\' 'b'
| '\' 'f'
| '\' 't'
| '\' 'n'
| '\' 'r'
| '\' 'u' four_hex_digits
| '\' '/'
| '\\'
 
four_hex_digits ~ hex_digit hex_digit hex_digit hex_digit
hex_digit ~ [0-9a-fA-F]
 
comma ~ ','
 
:discard ~ whitespace
whitespace ~ [\s]+
 
END_OF_SOURCE
 
}
);
return $self;
}
 
sub parse {
my ($self, $string) = @_;
my $re = Marpa::R2::Scanless::R->new( { grammar => $self->{grammar} } );
$re->read(\$string);
my $value_ref = $re->value();
return ${$value_ref};
}
 
sub parse_json {
my ($string) = @_;
my $parser = MarpaX::JSON->new();
return $parser->parse($string);
}
 
package MarpaX::JSON::Actions;
use strict;
 
sub new {
my ($class) = @_;
return bless {}, $class;
}
 
sub do_first_arg {
shift;
return $_[0];
}
 
sub do_empty_object {
return {};
}
 
sub do_object {
shift;
return { map { @$_ } @{$_[1]} };
}
 
sub do_empty_array {
return [];
}
 
sub do_array {
shift;
return $_[1];
}
 
sub do_list {
shift;
return \@_;
}
 
sub do_pair {
shift;
return [ $_[0], $_[2] ];
}
 
sub do_string {
shift;
my $s = $_[0];
 
$s =~ s/^"//;
$s =~ s/"$//;
 
$s =~ s/\\u([0-9A-Fa-f]{4})/chr(hex($1))/eg;
 
$s =~ s/\\n/\n/g;
$s =~ s/\\r/\r/g;
$s =~ s/\\b/\b/g;
$s =~ s/\\f/\f/g;
$s =~ s/\\t/\t/g;
$s =~ s/\\\\/\\/g;
$s =~ s{\\/}{/}g;
$s =~ s{\\"}{"}g;
 
return $s;
}
 
sub do_true {
shift;
return $_[0] eq 'true';
}
 
sub do_null {
return undef;
}
 
sub do_join {
shift;
return join '', @_;
}
 
1;
test.pl
Perl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
use Test::More;
use Test::Exception;
use lib 'lib';
use MarpaX::JSON;
 
my $data = MarpaX::JSON::parse_json(q${"test":"1"}$);
is($data->{test}, 1);
 
$data = MarpaX::JSON::parse_json(q${"test":[1,2,3]}$);
is_deeply($data->{test}, [1,2,3]);
 
$data = MarpaX::JSON::parse_json(q${"test":true}$);
is($data->{test}, 1);
 
$data = MarpaX::JSON::parse_json(q${"test":false}$);
is($data->{test}, '');
 
$data = MarpaX::JSON::parse_json(q${"test":null}$);
is($data->{test}, undef);
 
$data = MarpaX::JSON::parse_json(q${"test":null, "test2":"hello world"}$);
is($data->{test}, undef);
is($data->{test2}, "hello world");
 
$data = MarpaX::JSON::parse_json(q${"test":"1.25"}$);
is($data->{test}, '1.25');
 
$data = MarpaX::JSON::parse_json(q${"test":"1.25e4"}$);
is($data->{test}, '1.25e4');
 
$data = MarpaX::JSON::parse_json(q$[]$);
is_deeply($data, []);
 
$data = MarpaX::JSON::parse_json(<<'JSON');
[
{
"precision": "zip",
"Latitude": 37.7668,
"Longitude": -122.3959,
"Address": "",
"City": "SAN FRANCISCO",
"State": "CA",
"Zip": "94107",
"Country": "US"
},
{
"precision": "zip",
"Latitude": 37.371991,
"Longitude": -122.026020,
"Address": "",
"City": "SUNNYVALE",
"State": "CA",
"Zip": "94085",
"Country": "US"
}
]
JSON
is_deeply($data, [
{ "precision"=>"zip", Latitude => "37.7668", Longitude=>"-122.3959",
"Country" => "US", Zip => 94107, Address => '',
City => "SAN FRANCISCO", State => 'CA' },
{ "precision" => "zip", Longitude => "-122.026020", Address => "",
City => "SUNNYVALE", Country => "US", Latitude => "37.371991",
Zip => 94085, State => "CA" }
]);
 
$data = MarpaX::JSON::parse_json(<<'JSON');
{
"Image": {
"Width": 800,
"Height": 600,
"Title": "View from 15th Floor",
"Thumbnail": {
"Url": "http://www.example.com/image/481989943",
"Height": 125,
"Width": "100"
},
"IDs": [116, 943, 234, 38793]
}
}
JSON
is_deeply($data, {
"Image" => {
"Width" => 800, "Height" => 600,
"Title" => "View from 15th Floor",
"Thumbnail" => {
"Url" => "http://www.example.com/image/481989943",
"Height" => 125,
"Width" => 100,
},
"IDs" => [ 116, 943, 234, 38793 ],
}
});
 
$data = MarpaX::JSON::parse_json(<<'JSON');
{
"source" : "<a href=\"http://janetter.net/\" rel=\"nofollow\">Janetter</a>",
"entities" : {
"user_mentions" : [ {
"name" : "James Governor",
"screen_name" : "moankchips",
"indices" : [ 0, 10 ],
"id_str" : "61233",
"id" : 61233
} ],
"media" : [ ],
"hashtags" : [ ],
"urls" : [ ]
},
"in_reply_to_status_id_str" : "281400879465238529",
"geo" : {
},
"id_str" : "281405942321532929",
"in_reply_to_user_id" : 61233,
"text" : "@monkchips Ouch. Some regrets are harsher than others.",
"id" : 281405942321532929,
"in_reply_to_status_id" : 281400879465238529,
"created_at" : "Wed Dec 19 14:29:39 +0000 2012",
"in_reply_to_screen_name" : "monkchips",
"in_reply_to_user_id_str" : "61233",
"user" : {
"name" : "Sarah Bourne",
"screen_name" : "sarahebourne",
"protected" : false,
"id_str" : "16010789",
"profile_image_url_https" : "https://si0.twimg.com/profile_images/638441870/Snapshot-of-sb_normal.jpg",
"id" : 16010789,
"verified" : false
}
}
JSON
 
$data = MarpaX::JSON::parse_json(<<'JSON');
{ "test": "\u2603" }
JSON
is($data->{test}, "\x{2603}");
 
dies_ok {
$data = MarpaX::JSON::parse_json(<<'JSON');
{ "test": "éáóüöï" }
JSON
}, 'marpa scanless doesn\'t understand higher than 8-bit codepoints yet';
 
done_testing();

Very cool, only it should be

json ::= object | array

At least according to the RFC: "A JSON text is a serialized object or array."

There are probably some more problems with it at the moment. This is just an half hour experiment that worked out really great.

I only mentioned it because as cool as this is, I don't doubt it's already on its way to be included in some production code. You totally rock.

"number" should be a G0 rule:

-number       ::= int
-               | int frac              action => do_join
-               | int exp               action => do_join
-               | int frac exp          action => do_join
+number       ~ int
+               | int frac
+               | int exp
+               | int frac exp

Also, since it needs 2.039_000 to work, it is probably best to change

use Marpa::R2;

to

use Marpa::R2 2.039_000;

A structural rule with a join is a bit of a red flag. I'll change it.

@uberbaud Thanks. And I guess you're totally right about being used in production.

~ 20-30x slower than JSON::PP for simple JSON strings. Not bad for a half-hour work :)

BTW, the parser doesn't seem to report errors (it just returns undef) for cases like: "[", "[[", "{", q/"a/.

I've done some work on this, which is in my fork of this gist. (I'd create a pull request, but I cannot figure out how to do that for a gist -- in any case, you'll want to edit my work.)

It's now 10 times as fast. Still not quite as fast as JSON::PP, but a lot closer. And it's now an even simpler solution. One big change is to do all parse-time logic in C, creating an AST, and then create the data structure by fixing up the AST.

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.