-
-
Save pstuifzand/4447349 to your computer and use it in GitHub Desktop.
package MarpaX::JSON; | |
use strict; | |
use Marpa::R2 2.039_000; | |
sub new { | |
my ($class) = @_; | |
my $self = bless {}, $class; | |
$self->{grammar} = Marpa::R2::Scanless::G->new( | |
{ | |
action_object => 'MarpaX::JSON::Actions', | |
default_action => 'do_first_arg', | |
source => \(<<'END_OF_SOURCE'), | |
:start ::= json | |
json ::= object | |
| array | |
object ::= '{' '}' action => do_empty_object | |
| '{' members '}' action => do_object | |
members ::= pair+ separator => <comma> action => do_list | |
pair ::= string ':' value action => do_pair | |
value ::= string | |
| object | |
| number | |
| array | |
| 'true' action => do_true | |
| 'false' action => do_true | |
| 'null' action => do_null | |
array ::= '[' ']' action => do_empty_array | |
| '[' elements ']' action => do_array | |
elements ::= value+ separator => <comma> action => do_list | |
number ~ int | |
| int frac | |
| int exp | |
| int frac exp | |
int ~ digits | |
| '-' digits | |
digits ~ [\d]+ | |
frac ~ '.' digits | |
exp ~ e digits | |
e ~ 'e' | |
| 'e+' | |
| 'e-' | |
| 'E' | |
| 'E+' | |
| 'E-' | |
string ::= lstring action => do_string | |
lstring ~ quote in_string quote | |
quote ~ ["] | |
in_string ~ in_string_char* | |
in_string_char ~ [^"\\] | |
| '\' '"' | |
| '\' 'b' | |
| '\' 'f' | |
| '\' 't' | |
| '\' 'n' | |
| '\' 'r' | |
| '\' 'u' four_hex_digits | |
| '\' '/' | |
| '\\' | |
four_hex_digits ~ hex_digit hex_digit hex_digit hex_digit | |
hex_digit ~ [0-9a-fA-F] | |
comma ~ ',' | |
:discard ~ whitespace | |
whitespace ~ [\s]+ | |
END_OF_SOURCE | |
} | |
); | |
return $self; | |
} | |
sub parse { | |
my ($self, $string) = @_; | |
my $re = Marpa::R2::Scanless::R->new( { grammar => $self->{grammar} } ); | |
$re->read(\$string); | |
my $value_ref = $re->value(); | |
return ${$value_ref}; | |
} | |
sub parse_json { | |
my ($string) = @_; | |
my $parser = MarpaX::JSON->new(); | |
return $parser->parse($string); | |
} | |
package MarpaX::JSON::Actions; | |
use strict; | |
sub new { | |
my ($class) = @_; | |
return bless {}, $class; | |
} | |
sub do_first_arg { | |
shift; | |
return $_[0]; | |
} | |
sub do_empty_object { | |
return {}; | |
} | |
sub do_object { | |
shift; | |
return { map { @$_ } @{$_[1]} }; | |
} | |
sub do_empty_array { | |
return []; | |
} | |
sub do_array { | |
shift; | |
return $_[1]; | |
} | |
sub do_list { | |
shift; | |
return \@_; | |
} | |
sub do_pair { | |
shift; | |
return [ $_[0], $_[2] ]; | |
} | |
sub do_string { | |
shift; | |
my $s = $_[0]; | |
$s =~ s/^"//; | |
$s =~ s/"$//; | |
$s =~ s/\\u([0-9A-Fa-f]{4})/chr(hex($1))/eg; | |
$s =~ s/\\n/\n/g; | |
$s =~ s/\\r/\r/g; | |
$s =~ s/\\b/\b/g; | |
$s =~ s/\\f/\f/g; | |
$s =~ s/\\t/\t/g; | |
$s =~ s/\\\\/\\/g; | |
$s =~ s{\\/}{/}g; | |
$s =~ s{\\"}{"}g; | |
return $s; | |
} | |
sub do_true { | |
shift; | |
return $_[0] eq 'true'; | |
} | |
sub do_null { | |
return undef; | |
} | |
sub do_join { | |
shift; | |
return join '', @_; | |
} | |
1; |
use Test::More; | |
use Test::Exception; | |
use lib 'lib'; | |
use MarpaX::JSON; | |
my $data = MarpaX::JSON::parse_json(q${"test":"1"}$); | |
is($data->{test}, 1); | |
$data = MarpaX::JSON::parse_json(q${"test":[1,2,3]}$); | |
is_deeply($data->{test}, [1,2,3]); | |
$data = MarpaX::JSON::parse_json(q${"test":true}$); | |
is($data->{test}, 1); | |
$data = MarpaX::JSON::parse_json(q${"test":false}$); | |
is($data->{test}, ''); | |
$data = MarpaX::JSON::parse_json(q${"test":null}$); | |
is($data->{test}, undef); | |
$data = MarpaX::JSON::parse_json(q${"test":null, "test2":"hello world"}$); | |
is($data->{test}, undef); | |
is($data->{test2}, "hello world"); | |
$data = MarpaX::JSON::parse_json(q${"test":"1.25"}$); | |
is($data->{test}, '1.25'); | |
$data = MarpaX::JSON::parse_json(q${"test":"1.25e4"}$); | |
is($data->{test}, '1.25e4'); | |
$data = MarpaX::JSON::parse_json(q$[]$); | |
is_deeply($data, []); | |
$data = MarpaX::JSON::parse_json(<<'JSON'); | |
[ | |
{ | |
"precision": "zip", | |
"Latitude": 37.7668, | |
"Longitude": -122.3959, | |
"Address": "", | |
"City": "SAN FRANCISCO", | |
"State": "CA", | |
"Zip": "94107", | |
"Country": "US" | |
}, | |
{ | |
"precision": "zip", | |
"Latitude": 37.371991, | |
"Longitude": -122.026020, | |
"Address": "", | |
"City": "SUNNYVALE", | |
"State": "CA", | |
"Zip": "94085", | |
"Country": "US" | |
} | |
] | |
JSON | |
is_deeply($data, [ | |
{ "precision"=>"zip", Latitude => "37.7668", Longitude=>"-122.3959", | |
"Country" => "US", Zip => 94107, Address => '', | |
City => "SAN FRANCISCO", State => 'CA' }, | |
{ "precision" => "zip", Longitude => "-122.026020", Address => "", | |
City => "SUNNYVALE", Country => "US", Latitude => "37.371991", | |
Zip => 94085, State => "CA" } | |
]); | |
$data = MarpaX::JSON::parse_json(<<'JSON'); | |
{ | |
"Image": { | |
"Width": 800, | |
"Height": 600, | |
"Title": "View from 15th Floor", | |
"Thumbnail": { | |
"Url": "http://www.example.com/image/481989943", | |
"Height": 125, | |
"Width": "100" | |
}, | |
"IDs": [116, 943, 234, 38793] | |
} | |
} | |
JSON | |
is_deeply($data, { | |
"Image" => { | |
"Width" => 800, "Height" => 600, | |
"Title" => "View from 15th Floor", | |
"Thumbnail" => { | |
"Url" => "http://www.example.com/image/481989943", | |
"Height" => 125, | |
"Width" => 100, | |
}, | |
"IDs" => [ 116, 943, 234, 38793 ], | |
} | |
}); | |
$data = MarpaX::JSON::parse_json(<<'JSON'); | |
{ | |
"source" : "<a href=\"http://janetter.net/\" rel=\"nofollow\">Janetter</a>", | |
"entities" : { | |
"user_mentions" : [ { | |
"name" : "James Governor", | |
"screen_name" : "moankchips", | |
"indices" : [ 0, 10 ], | |
"id_str" : "61233", | |
"id" : 61233 | |
} ], | |
"media" : [ ], | |
"hashtags" : [ ], | |
"urls" : [ ] | |
}, | |
"in_reply_to_status_id_str" : "281400879465238529", | |
"geo" : { | |
}, | |
"id_str" : "281405942321532929", | |
"in_reply_to_user_id" : 61233, | |
"text" : "@monkchips Ouch. Some regrets are harsher than others.", | |
"id" : 281405942321532929, | |
"in_reply_to_status_id" : 281400879465238529, | |
"created_at" : "Wed Dec 19 14:29:39 +0000 2012", | |
"in_reply_to_screen_name" : "monkchips", | |
"in_reply_to_user_id_str" : "61233", | |
"user" : { | |
"name" : "Sarah Bourne", | |
"screen_name" : "sarahebourne", | |
"protected" : false, | |
"id_str" : "16010789", | |
"profile_image_url_https" : "https://si0.twimg.com/profile_images/638441870/Snapshot-of-sb_normal.jpg", | |
"id" : 16010789, | |
"verified" : false | |
} | |
} | |
JSON | |
$data = MarpaX::JSON::parse_json(<<'JSON'); | |
{ "test": "\u2603" } | |
JSON | |
is($data->{test}, "\x{2603}"); | |
dies_ok { | |
$data = MarpaX::JSON::parse_json(<<'JSON'); | |
{ "test": "éáóüöï" } | |
JSON | |
}, 'marpa scanless doesn\'t understand higher than 8-bit codepoints yet'; | |
done_testing(); |
I only mentioned it because as cool as this is, I don't doubt it's already on its way to be included in some production code. You totally rock.
"number" should be a G0 rule:
-number ::= int
- | int frac action => do_join
- | int exp action => do_join
- | int frac exp action => do_join
+number ~ int
+ | int frac
+ | int exp
+ | int frac exp
Also, since it needs 2.039_000 to work, it is probably best to change
use Marpa::R2;
to
use Marpa::R2 2.039_000;
A structural rule with a join is a bit of a red flag. I'll change it.
@uberbaud Thanks. And I guess you're totally right about being used in production.
~ 20-30x slower than JSON::PP for simple JSON strings. Not bad for a half-hour work :)
BTW, the parser doesn't seem to report errors (it just returns undef) for cases like: "[", "[[", "{", q/"a/.
I've done some work on this, which is in my fork of this gist. (I'd create a pull request, but I cannot figure out how to do that for a gist -- in any case, you'll want to edit my work.)
It's now 10 times as fast. Still not quite as fast as JSON::PP, but a lot closer. And it's now an even simpler solution. One big change is to do all parse-time logic in C, creating an AST, and then create the data structure by fixing up the AST.
There are probably some more problems with it at the moment. This is just an half hour experiment that worked out really great.