Skip to content

Instantly share code, notes, and snippets.

@pstuifzand
Last active December 10, 2015 14:28
Show Gist options
  • Star 7 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save pstuifzand/4447349 to your computer and use it in GitHub Desktop.
Save pstuifzand/4447349 to your computer and use it in GitHub Desktop.
A JSON parser in Marpa.
package MarpaX::JSON;
use strict;
use Marpa::R2 2.039_000;
sub new {
my ($class) = @_;
my $self = bless {}, $class;
$self->{grammar} = Marpa::R2::Scanless::G->new(
{
action_object => 'MarpaX::JSON::Actions',
default_action => 'do_first_arg',
source => \(<<'END_OF_SOURCE'),
:start ::= json
json ::= object
| array
object ::= '{' '}' action => do_empty_object
| '{' members '}' action => do_object
members ::= pair+ separator => <comma> action => do_list
pair ::= string ':' value action => do_pair
value ::= string
| object
| number
| array
| 'true' action => do_true
| 'false' action => do_true
| 'null' action => do_null
array ::= '[' ']' action => do_empty_array
| '[' elements ']' action => do_array
elements ::= value+ separator => <comma> action => do_list
number ~ int
| int frac
| int exp
| int frac exp
int ~ digits
| '-' digits
digits ~ [\d]+
frac ~ '.' digits
exp ~ e digits
e ~ 'e'
| 'e+'
| 'e-'
| 'E'
| 'E+'
| 'E-'
string ::= lstring action => do_string
lstring ~ quote in_string quote
quote ~ ["]
in_string ~ in_string_char*
in_string_char ~ [^"\\]
| '\' '"'
| '\' 'b'
| '\' 'f'
| '\' 't'
| '\' 'n'
| '\' 'r'
| '\' 'u' four_hex_digits
| '\' '/'
| '\\'
four_hex_digits ~ hex_digit hex_digit hex_digit hex_digit
hex_digit ~ [0-9a-fA-F]
comma ~ ','
:discard ~ whitespace
whitespace ~ [\s]+
END_OF_SOURCE
}
);
return $self;
}
sub parse {
my ($self, $string) = @_;
my $re = Marpa::R2::Scanless::R->new( { grammar => $self->{grammar} } );
$re->read(\$string);
my $value_ref = $re->value();
return ${$value_ref};
}
sub parse_json {
my ($string) = @_;
my $parser = MarpaX::JSON->new();
return $parser->parse($string);
}
package MarpaX::JSON::Actions;
use strict;
sub new {
my ($class) = @_;
return bless {}, $class;
}
sub do_first_arg {
shift;
return $_[0];
}
sub do_empty_object {
return {};
}
sub do_object {
shift;
return { map { @$_ } @{$_[1]} };
}
sub do_empty_array {
return [];
}
sub do_array {
shift;
return $_[1];
}
sub do_list {
shift;
return \@_;
}
sub do_pair {
shift;
return [ $_[0], $_[2] ];
}
sub do_string {
shift;
my $s = $_[0];
$s =~ s/^"//;
$s =~ s/"$//;
$s =~ s/\\u([0-9A-Fa-f]{4})/chr(hex($1))/eg;
$s =~ s/\\n/\n/g;
$s =~ s/\\r/\r/g;
$s =~ s/\\b/\b/g;
$s =~ s/\\f/\f/g;
$s =~ s/\\t/\t/g;
$s =~ s/\\\\/\\/g;
$s =~ s{\\/}{/}g;
$s =~ s{\\"}{"}g;
return $s;
}
sub do_true {
shift;
return $_[0] eq 'true';
}
sub do_null {
return undef;
}
sub do_join {
shift;
return join '', @_;
}
1;
use Test::More;
use Test::Exception;
use lib 'lib';
use MarpaX::JSON;
my $data = MarpaX::JSON::parse_json(q${"test":"1"}$);
is($data->{test}, 1);
$data = MarpaX::JSON::parse_json(q${"test":[1,2,3]}$);
is_deeply($data->{test}, [1,2,3]);
$data = MarpaX::JSON::parse_json(q${"test":true}$);
is($data->{test}, 1);
$data = MarpaX::JSON::parse_json(q${"test":false}$);
is($data->{test}, '');
$data = MarpaX::JSON::parse_json(q${"test":null}$);
is($data->{test}, undef);
$data = MarpaX::JSON::parse_json(q${"test":null, "test2":"hello world"}$);
is($data->{test}, undef);
is($data->{test2}, "hello world");
$data = MarpaX::JSON::parse_json(q${"test":"1.25"}$);
is($data->{test}, '1.25');
$data = MarpaX::JSON::parse_json(q${"test":"1.25e4"}$);
is($data->{test}, '1.25e4');
$data = MarpaX::JSON::parse_json(q$[]$);
is_deeply($data, []);
$data = MarpaX::JSON::parse_json(<<'JSON');
[
{
"precision": "zip",
"Latitude": 37.7668,
"Longitude": -122.3959,
"Address": "",
"City": "SAN FRANCISCO",
"State": "CA",
"Zip": "94107",
"Country": "US"
},
{
"precision": "zip",
"Latitude": 37.371991,
"Longitude": -122.026020,
"Address": "",
"City": "SUNNYVALE",
"State": "CA",
"Zip": "94085",
"Country": "US"
}
]
JSON
is_deeply($data, [
{ "precision"=>"zip", Latitude => "37.7668", Longitude=>"-122.3959",
"Country" => "US", Zip => 94107, Address => '',
City => "SAN FRANCISCO", State => 'CA' },
{ "precision" => "zip", Longitude => "-122.026020", Address => "",
City => "SUNNYVALE", Country => "US", Latitude => "37.371991",
Zip => 94085, State => "CA" }
]);
$data = MarpaX::JSON::parse_json(<<'JSON');
{
"Image": {
"Width": 800,
"Height": 600,
"Title": "View from 15th Floor",
"Thumbnail": {
"Url": "http://www.example.com/image/481989943",
"Height": 125,
"Width": "100"
},
"IDs": [116, 943, 234, 38793]
}
}
JSON
is_deeply($data, {
"Image" => {
"Width" => 800, "Height" => 600,
"Title" => "View from 15th Floor",
"Thumbnail" => {
"Url" => "http://www.example.com/image/481989943",
"Height" => 125,
"Width" => 100,
},
"IDs" => [ 116, 943, 234, 38793 ],
}
});
$data = MarpaX::JSON::parse_json(<<'JSON');
{
"source" : "<a href=\"http://janetter.net/\" rel=\"nofollow\">Janetter</a>",
"entities" : {
"user_mentions" : [ {
"name" : "James Governor",
"screen_name" : "moankchips",
"indices" : [ 0, 10 ],
"id_str" : "61233",
"id" : 61233
} ],
"media" : [ ],
"hashtags" : [ ],
"urls" : [ ]
},
"in_reply_to_status_id_str" : "281400879465238529",
"geo" : {
},
"id_str" : "281405942321532929",
"in_reply_to_user_id" : 61233,
"text" : "@monkchips Ouch. Some regrets are harsher than others.",
"id" : 281405942321532929,
"in_reply_to_status_id" : 281400879465238529,
"created_at" : "Wed Dec 19 14:29:39 +0000 2012",
"in_reply_to_screen_name" : "monkchips",
"in_reply_to_user_id_str" : "61233",
"user" : {
"name" : "Sarah Bourne",
"screen_name" : "sarahebourne",
"protected" : false,
"id_str" : "16010789",
"profile_image_url_https" : "https://si0.twimg.com/profile_images/638441870/Snapshot-of-sb_normal.jpg",
"id" : 16010789,
"verified" : false
}
}
JSON
$data = MarpaX::JSON::parse_json(<<'JSON');
{ "test": "\u2603" }
JSON
is($data->{test}, "\x{2603}");
dies_ok {
$data = MarpaX::JSON::parse_json(<<'JSON');
{ "test": "éáóüöï" }
JSON
}, 'marpa scanless doesn\'t understand higher than 8-bit codepoints yet';
done_testing();
@uberbaud
Copy link

uberbaud commented Jan 4, 2013

Very cool, only it should be

json ::= object | array

At least according to the RFC: "A JSON text is a serialized object or array."

@pstuifzand
Copy link
Author

There are probably some more problems with it at the moment. This is just an half hour experiment that worked out really great.

@uberbaud
Copy link

uberbaud commented Jan 4, 2013

I only mentioned it because as cool as this is, I don't doubt it's already on its way to be included in some production code. You totally rock.

@jeffreykegler
Copy link

"number" should be a G0 rule:

-number       ::= int
-               | int frac              action => do_join
-               | int exp               action => do_join
-               | int frac exp          action => do_join
+number       ~ int
+               | int frac
+               | int exp
+               | int frac exp

@jeffreykegler
Copy link

Also, since it needs 2.039_000 to work, it is probably best to change

use Marpa::R2;

to

use Marpa::R2 2.039_000;

@pstuifzand
Copy link
Author

A structural rule with a join is a bit of a red flag. I'll change it.

@pstuifzand
Copy link
Author

@uberbaud Thanks. And I guess you're totally right about being used in production.

@perlancar
Copy link

~ 20-30x slower than JSON::PP for simple JSON strings. Not bad for a half-hour work :)

BTW, the parser doesn't seem to report errors (it just returns undef) for cases like: "[", "[[", "{", q/"a/.

@jeffreykegler
Copy link

I've done some work on this, which is in my fork of this gist. (I'd create a pull request, but I cannot figure out how to do that for a gist -- in any case, you'll want to edit my work.)

It's now 10 times as fast. Still not quite as fast as JSON::PP, but a lot closer. And it's now an even simpler solution. One big change is to do all parse-time logic in C, creating an AST, and then create the data structure by fixing up the AST.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment