Last active
December 10, 2015 14:28
-
-
Save pstuifzand/4447349 to your computer and use it in GitHub Desktop.
A JSON parser in Marpa.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package MarpaX::JSON; | |
use strict; | |
use Marpa::R2 2.039_000; | |
sub new { | |
my ($class) = @_; | |
my $self = bless {}, $class; | |
$self->{grammar} = Marpa::R2::Scanless::G->new( | |
{ | |
action_object => 'MarpaX::JSON::Actions', | |
default_action => 'do_first_arg', | |
source => \(<<'END_OF_SOURCE'), | |
:start ::= json | |
json ::= object | |
| array | |
object ::= '{' '}' action => do_empty_object | |
| '{' members '}' action => do_object | |
members ::= pair+ separator => <comma> action => do_list | |
pair ::= string ':' value action => do_pair | |
value ::= string | |
| object | |
| number | |
| array | |
| 'true' action => do_true | |
| 'false' action => do_true | |
| 'null' action => do_null | |
array ::= '[' ']' action => do_empty_array | |
| '[' elements ']' action => do_array | |
elements ::= value+ separator => <comma> action => do_list | |
number ~ int | |
| int frac | |
| int exp | |
| int frac exp | |
int ~ digits | |
| '-' digits | |
digits ~ [\d]+ | |
frac ~ '.' digits | |
exp ~ e digits | |
e ~ 'e' | |
| 'e+' | |
| 'e-' | |
| 'E' | |
| 'E+' | |
| 'E-' | |
string ::= lstring action => do_string | |
lstring ~ quote in_string quote | |
quote ~ ["] | |
in_string ~ in_string_char* | |
in_string_char ~ [^"\\] | |
| '\' '"' | |
| '\' 'b' | |
| '\' 'f' | |
| '\' 't' | |
| '\' 'n' | |
| '\' 'r' | |
| '\' 'u' four_hex_digits | |
| '\' '/' | |
| '\\' | |
four_hex_digits ~ hex_digit hex_digit hex_digit hex_digit | |
hex_digit ~ [0-9a-fA-F] | |
comma ~ ',' | |
:discard ~ whitespace | |
whitespace ~ [\s]+ | |
END_OF_SOURCE | |
} | |
); | |
return $self; | |
} | |
sub parse { | |
my ($self, $string) = @_; | |
my $re = Marpa::R2::Scanless::R->new( { grammar => $self->{grammar} } ); | |
$re->read(\$string); | |
my $value_ref = $re->value(); | |
return ${$value_ref}; | |
} | |
sub parse_json { | |
my ($string) = @_; | |
my $parser = MarpaX::JSON->new(); | |
return $parser->parse($string); | |
} | |
package MarpaX::JSON::Actions; | |
use strict; | |
sub new { | |
my ($class) = @_; | |
return bless {}, $class; | |
} | |
sub do_first_arg { | |
shift; | |
return $_[0]; | |
} | |
sub do_empty_object { | |
return {}; | |
} | |
sub do_object { | |
shift; | |
return { map { @$_ } @{$_[1]} }; | |
} | |
sub do_empty_array { | |
return []; | |
} | |
sub do_array { | |
shift; | |
return $_[1]; | |
} | |
sub do_list { | |
shift; | |
return \@_; | |
} | |
sub do_pair { | |
shift; | |
return [ $_[0], $_[2] ]; | |
} | |
sub do_string { | |
shift; | |
my $s = $_[0]; | |
$s =~ s/^"//; | |
$s =~ s/"$//; | |
$s =~ s/\\u([0-9A-Fa-f]{4})/chr(hex($1))/eg; | |
$s =~ s/\\n/\n/g; | |
$s =~ s/\\r/\r/g; | |
$s =~ s/\\b/\b/g; | |
$s =~ s/\\f/\f/g; | |
$s =~ s/\\t/\t/g; | |
$s =~ s/\\\\/\\/g; | |
$s =~ s{\\/}{/}g; | |
$s =~ s{\\"}{"}g; | |
return $s; | |
} | |
sub do_true { | |
shift; | |
return $_[0] eq 'true'; | |
} | |
sub do_null { | |
return undef; | |
} | |
sub do_join { | |
shift; | |
return join '', @_; | |
} | |
1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use Test::More; | |
use Test::Exception; | |
use lib 'lib'; | |
use MarpaX::JSON; | |
my $data = MarpaX::JSON::parse_json(q${"test":"1"}$); | |
is($data->{test}, 1); | |
$data = MarpaX::JSON::parse_json(q${"test":[1,2,3]}$); | |
is_deeply($data->{test}, [1,2,3]); | |
$data = MarpaX::JSON::parse_json(q${"test":true}$); | |
is($data->{test}, 1); | |
$data = MarpaX::JSON::parse_json(q${"test":false}$); | |
is($data->{test}, ''); | |
$data = MarpaX::JSON::parse_json(q${"test":null}$); | |
is($data->{test}, undef); | |
$data = MarpaX::JSON::parse_json(q${"test":null, "test2":"hello world"}$); | |
is($data->{test}, undef); | |
is($data->{test2}, "hello world"); | |
$data = MarpaX::JSON::parse_json(q${"test":"1.25"}$); | |
is($data->{test}, '1.25'); | |
$data = MarpaX::JSON::parse_json(q${"test":"1.25e4"}$); | |
is($data->{test}, '1.25e4'); | |
$data = MarpaX::JSON::parse_json(q$[]$); | |
is_deeply($data, []); | |
$data = MarpaX::JSON::parse_json(<<'JSON'); | |
[ | |
{ | |
"precision": "zip", | |
"Latitude": 37.7668, | |
"Longitude": -122.3959, | |
"Address": "", | |
"City": "SAN FRANCISCO", | |
"State": "CA", | |
"Zip": "94107", | |
"Country": "US" | |
}, | |
{ | |
"precision": "zip", | |
"Latitude": 37.371991, | |
"Longitude": -122.026020, | |
"Address": "", | |
"City": "SUNNYVALE", | |
"State": "CA", | |
"Zip": "94085", | |
"Country": "US" | |
} | |
] | |
JSON | |
is_deeply($data, [ | |
{ "precision"=>"zip", Latitude => "37.7668", Longitude=>"-122.3959", | |
"Country" => "US", Zip => 94107, Address => '', | |
City => "SAN FRANCISCO", State => 'CA' }, | |
{ "precision" => "zip", Longitude => "-122.026020", Address => "", | |
City => "SUNNYVALE", Country => "US", Latitude => "37.371991", | |
Zip => 94085, State => "CA" } | |
]); | |
$data = MarpaX::JSON::parse_json(<<'JSON'); | |
{ | |
"Image": { | |
"Width": 800, | |
"Height": 600, | |
"Title": "View from 15th Floor", | |
"Thumbnail": { | |
"Url": "http://www.example.com/image/481989943", | |
"Height": 125, | |
"Width": "100" | |
}, | |
"IDs": [116, 943, 234, 38793] | |
} | |
} | |
JSON | |
is_deeply($data, { | |
"Image" => { | |
"Width" => 800, "Height" => 600, | |
"Title" => "View from 15th Floor", | |
"Thumbnail" => { | |
"Url" => "http://www.example.com/image/481989943", | |
"Height" => 125, | |
"Width" => 100, | |
}, | |
"IDs" => [ 116, 943, 234, 38793 ], | |
} | |
}); | |
$data = MarpaX::JSON::parse_json(<<'JSON'); | |
{ | |
"source" : "<a href=\"http://janetter.net/\" rel=\"nofollow\">Janetter</a>", | |
"entities" : { | |
"user_mentions" : [ { | |
"name" : "James Governor", | |
"screen_name" : "moankchips", | |
"indices" : [ 0, 10 ], | |
"id_str" : "61233", | |
"id" : 61233 | |
} ], | |
"media" : [ ], | |
"hashtags" : [ ], | |
"urls" : [ ] | |
}, | |
"in_reply_to_status_id_str" : "281400879465238529", | |
"geo" : { | |
}, | |
"id_str" : "281405942321532929", | |
"in_reply_to_user_id" : 61233, | |
"text" : "@monkchips Ouch. Some regrets are harsher than others.", | |
"id" : 281405942321532929, | |
"in_reply_to_status_id" : 281400879465238529, | |
"created_at" : "Wed Dec 19 14:29:39 +0000 2012", | |
"in_reply_to_screen_name" : "monkchips", | |
"in_reply_to_user_id_str" : "61233", | |
"user" : { | |
"name" : "Sarah Bourne", | |
"screen_name" : "sarahebourne", | |
"protected" : false, | |
"id_str" : "16010789", | |
"profile_image_url_https" : "https://si0.twimg.com/profile_images/638441870/Snapshot-of-sb_normal.jpg", | |
"id" : 16010789, | |
"verified" : false | |
} | |
} | |
JSON | |
$data = MarpaX::JSON::parse_json(<<'JSON'); | |
{ "test": "\u2603" } | |
JSON | |
is($data->{test}, "\x{2603}"); | |
dies_ok { | |
$data = MarpaX::JSON::parse_json(<<'JSON'); | |
{ "test": "éáóüöï" } | |
JSON | |
}, 'marpa scanless doesn\'t understand higher than 8-bit codepoints yet'; | |
done_testing(); |
I've done some work on this, which is in my fork of this gist. (I'd create a pull request, but I cannot figure out how to do that for a gist -- in any case, you'll want to edit my work.)
It's now 10 times as fast. Still not quite as fast as JSON::PP, but a lot closer. And it's now an even simpler solution. One big change is to do all parse-time logic in C, creating an AST, and then create the data structure by fixing up the AST.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
~ 20-30x slower than JSON::PP for simple JSON strings. Not bad for a half-hour work :)
BTW, the parser doesn't seem to report errors (it just returns undef) for cases like: "[", "[[", "{", q/"a/.