Skip to content

Embed URL

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
A JSON parser in Marpa.
package MarpaX::JSON;
use strict;
use Marpa::R2 2.039_000;
sub new {
my ($class) = @_;
my $self = bless {}, $class;
$self->{grammar} = Marpa::R2::Scanless::G->new(
{
action_object => 'MarpaX::JSON::Actions',
default_action => 'do_first_arg',
source => \(<<'END_OF_SOURCE'),
:start ::= json
json ::= object
| array
object ::= '{' '}' action => do_empty_object
| '{' members '}' action => do_object
members ::= pair+ separator => <comma> action => do_list
pair ::= string ':' value action => do_pair
value ::= string
| object
| number
| array
| 'true' action => do_true
| 'false' action => do_true
| 'null' action => do_null
array ::= '[' ']' action => do_empty_array
| '[' elements ']' action => do_array
elements ::= value+ separator => <comma> action => do_list
number ~ int
| int frac
| int exp
| int frac exp
int ~ digits
| '-' digits
digits ~ [\d]+
frac ~ '.' digits
exp ~ e digits
e ~ 'e'
| 'e+'
| 'e-'
| 'E'
| 'E+'
| 'E-'
string ::= lstring action => do_string
lstring ~ quote in_string quote
quote ~ ["]
in_string ~ in_string_char*
in_string_char ~ [^"\\]
| '\' '"'
| '\' 'b'
| '\' 'f'
| '\' 't'
| '\' 'n'
| '\' 'r'
| '\' 'u' four_hex_digits
| '\' '/'
| '\\'
four_hex_digits ~ hex_digit hex_digit hex_digit hex_digit
hex_digit ~ [0-9a-fA-F]
comma ~ ','
:discard ~ whitespace
whitespace ~ [\s]+
END_OF_SOURCE
}
);
return $self;
}
sub parse {
my ($self, $string) = @_;
my $re = Marpa::R2::Scanless::R->new( { grammar => $self->{grammar} } );
$re->read(\$string);
my $value_ref = $re->value();
return ${$value_ref};
}
sub parse_json {
my ($string) = @_;
my $parser = MarpaX::JSON->new();
return $parser->parse($string);
}
package MarpaX::JSON::Actions;
use strict;
sub new {
my ($class) = @_;
return bless {}, $class;
}
sub do_first_arg {
shift;
return $_[0];
}
sub do_empty_object {
return {};
}
sub do_object {
shift;
return { map { @$_ } @{$_[1]} };
}
sub do_empty_array {
return [];
}
sub do_array {
shift;
return $_[1];
}
sub do_list {
shift;
return \@_;
}
sub do_pair {
shift;
return [ $_[0], $_[2] ];
}
sub do_string {
shift;
my $s = $_[0];
$s =~ s/^"//;
$s =~ s/"$//;
$s =~ s/\\u([0-9A-Fa-f]{4})/chr(hex($1))/eg;
$s =~ s/\\n/\n/g;
$s =~ s/\\r/\r/g;
$s =~ s/\\b/\b/g;
$s =~ s/\\f/\f/g;
$s =~ s/\\t/\t/g;
$s =~ s/\\\\/\\/g;
$s =~ s{\\/}{/}g;
$s =~ s{\\"}{"}g;
return $s;
}
sub do_true {
shift;
return $_[0] eq 'true';
}
sub do_null {
return undef;
}
sub do_join {
shift;
return join '', @_;
}
1;
use Test::More;
use Test::Exception;
use lib 'lib';
use MarpaX::JSON;
my $data = MarpaX::JSON::parse_json(q${"test":"1"}$);
is($data->{test}, 1);
$data = MarpaX::JSON::parse_json(q${"test":[1,2,3]}$);
is_deeply($data->{test}, [1,2,3]);
$data = MarpaX::JSON::parse_json(q${"test":true}$);
is($data->{test}, 1);
$data = MarpaX::JSON::parse_json(q${"test":false}$);
is($data->{test}, '');
$data = MarpaX::JSON::parse_json(q${"test":null}$);
is($data->{test}, undef);
$data = MarpaX::JSON::parse_json(q${"test":null, "test2":"hello world"}$);
is($data->{test}, undef);
is($data->{test2}, "hello world");
$data = MarpaX::JSON::parse_json(q${"test":"1.25"}$);
is($data->{test}, '1.25');
$data = MarpaX::JSON::parse_json(q${"test":"1.25e4"}$);
is($data->{test}, '1.25e4');
$data = MarpaX::JSON::parse_json(q$[]$);
is_deeply($data, []);
$data = MarpaX::JSON::parse_json(<<'JSON');
[
{
"precision": "zip",
"Latitude": 37.7668,
"Longitude": -122.3959,
"Address": "",
"City": "SAN FRANCISCO",
"State": "CA",
"Zip": "94107",
"Country": "US"
},
{
"precision": "zip",
"Latitude": 37.371991,
"Longitude": -122.026020,
"Address": "",
"City": "SUNNYVALE",
"State": "CA",
"Zip": "94085",
"Country": "US"
}
]
JSON
is_deeply($data, [
{ "precision"=>"zip", Latitude => "37.7668", Longitude=>"-122.3959",
"Country" => "US", Zip => 94107, Address => '',
City => "SAN FRANCISCO", State => 'CA' },
{ "precision" => "zip", Longitude => "-122.026020", Address => "",
City => "SUNNYVALE", Country => "US", Latitude => "37.371991",
Zip => 94085, State => "CA" }
]);
$data = MarpaX::JSON::parse_json(<<'JSON');
{
"Image": {
"Width": 800,
"Height": 600,
"Title": "View from 15th Floor",
"Thumbnail": {
"Url": "http://www.example.com/image/481989943",
"Height": 125,
"Width": "100"
},
"IDs": [116, 943, 234, 38793]
}
}
JSON
is_deeply($data, {
"Image" => {
"Width" => 800, "Height" => 600,
"Title" => "View from 15th Floor",
"Thumbnail" => {
"Url" => "http://www.example.com/image/481989943",
"Height" => 125,
"Width" => 100,
},
"IDs" => [ 116, 943, 234, 38793 ],
}
});
$data = MarpaX::JSON::parse_json(<<'JSON');
{
"source" : "<a href=\"http://janetter.net/\" rel=\"nofollow\">Janetter</a>",
"entities" : {
"user_mentions" : [ {
"name" : "James Governor",
"screen_name" : "moankchips",
"indices" : [ 0, 10 ],
"id_str" : "61233",
"id" : 61233
} ],
"media" : [ ],
"hashtags" : [ ],
"urls" : [ ]
},
"in_reply_to_status_id_str" : "281400879465238529",
"geo" : {
},
"id_str" : "281405942321532929",
"in_reply_to_user_id" : 61233,
"text" : "@monkchips Ouch. Some regrets are harsher than others.",
"id" : 281405942321532929,
"in_reply_to_status_id" : 281400879465238529,
"created_at" : "Wed Dec 19 14:29:39 +0000 2012",
"in_reply_to_screen_name" : "monkchips",
"in_reply_to_user_id_str" : "61233",
"user" : {
"name" : "Sarah Bourne",
"screen_name" : "sarahebourne",
"protected" : false,
"id_str" : "16010789",
"profile_image_url_https" : "https://si0.twimg.com/profile_images/638441870/Snapshot-of-sb_normal.jpg",
"id" : 16010789,
"verified" : false
}
}
JSON
$data = MarpaX::JSON::parse_json(<<'JSON');
{ "test": "\u2603" }
JSON
is($data->{test}, "\x{2603}");
dies_ok {
$data = MarpaX::JSON::parse_json(<<'JSON');
{ "test": "éáóüöï" }
JSON
}, 'marpa scanless doesn\'t understand higher than 8-bit codepoints yet';
done_testing();
@uberbaud

Very cool, only it should be

json ::= object | array

At least according to the RFC: "A JSON text is a serialized object or array."

@pstuifzand
Owner

There are probably some more problems with it at the moment. This is just an half hour experiment that worked out really great.

@uberbaud

I only mentioned it because as cool as this is, I don't doubt it's already on its way to be included in some production code. You totally rock.

@jeffreykegler

"number" should be a G0 rule:

-number       ::= int
-               | int frac              action => do_join
-               | int exp               action => do_join
-               | int frac exp          action => do_join
+number       ~ int
+               | int frac
+               | int exp
+               | int frac exp
@jeffreykegler

Also, since it needs 2.039_000 to work, it is probably best to change

use Marpa::R2;

to

use Marpa::R2 2.039_000;
@pstuifzand
Owner

A structural rule with a join is a bit of a red flag. I'll change it.

@pstuifzand
Owner

@uberbaud Thanks. And I guess you're totally right about being used in production.

@perlancar

~ 20-30x slower than JSON::PP for simple JSON strings. Not bad for a half-hour work :)

BTW, the parser doesn't seem to report errors (it just returns undef) for cases like: "[", "[[", "{", q/"a/.

@jeffreykegler

I've done some work on this, which is in my fork of this gist. (I'd create a pull request, but I cannot figure out how to do that for a gist -- in any case, you'll want to edit my work.)

It's now 10 times as fast. Still not quite as fast as JSON::PP, but a lot closer. And it's now an even simpler solution. One big change is to do all parse-time logic in C, creating an AST, and then create the data structure by fixing up the AST.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.