Skip to content

Instantly share code, notes, and snippets.

@chansen
Created February 3, 2011 21:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chansen/810296 to your computer and use it in GitHub Desktop.
Save chansen/810296 to your computer and use it in GitHub Desktop.
PP json parser
#!/usr/bin/perl
use strict;
use warnings;
use Carp qw[];
my $WS = '[\x20\x09\x0A\x0D]*';
sub err {
m/ \G $WS /xogc;
my $msg = 'Malformed JSON: ' . shift;
$msg .= m/ \G \z /xgc ? ' before end of data' : ' at character offset ' . pos;
@_ = ($msg);
goto \&Carp::croak;
}
my %Escapes = (
'"' => '"',
'\\' => '\\',
'/' => '/',
'b' => "\x07",
'f' => "\x0C",
'n' => "\x0A",
'r' => "\x0D",
't' => "\x09",
);
sub decode_string {
my $pos = pos;
m! \G ((?:[^\x00-\x1F\\"]|\\(?:["\\/bfnrt]|u[A-Fa-f0-9]{4}))*) !xgc;
my $str = $1;
unless (m! \G " !xgc) {
err('unexpected character or invalid escape while parsing string')
if m! \G [\x00-\x1F\\] !x;
err('unterminated string');
}
if (index($str, '\\u') < 0) {
$str =~ s!\\(["\\/bfnrt])!$Escapes{$1}!gs;
return $str;
}
my $res = '';
while ($str =~ m!\G ([^\\]*) \\ (?:([^u])|u(.{4}))!xgc) {
$res .= $1;
if ($2) {
$res .= $Escapes{$2};
}
else {
my $ord = hex $3;
if (($ord & 0xF800) == 0xD800) {
($ord & 0xFC00) == 0xD800
or pos($_) = $pos + pos($str), err('missing high-surrogate');
$str =~ m!\G \\u ([Dd][C-Fc-f]..)!xgc
or pos($_) = $pos + pos($str), err('missing low-surrogate');
$ord = 0x10000 + ($ord - 0xD800) * 0x400 + (hex($1) - 0xDC00);
}
$res .= pack('U', $ord);
}
}
$res .= substr($str, pos($str), length($str));
return $res;
}
sub decode_array {
my @a;
until (m/ \G $WS \] /xogc) {
push @a, decode_value();
redo if m/ \G $WS , /xogc;
last if m/ \G $WS \] /xogc;
err(q/expected comma or right square bracket while parsing array/);
}
return \@a;
}
sub decode_object {
my %h;
until (m/ \G $WS \} /xogc) {
m/ \G $WS " /xogc
or err("expected string while parsing object");
my $k = decode_string();
m/ \G $WS : /xogc
or err("expected colon while parsing object");
$h{$k} = decode_value();
redo if m/ \G $WS , /xogc;
last if m/ \G $WS \} /xogc;
err(q/expected comma or right curly bracket while parsing object/);
}
return \%h;
}
sub decode_value {
m/ \G $WS /xogc;
if (m/ \G " /xgc) {
return decode_string();
}
if (m/ \G \[ /xgc) {
return decode_array();
}
if (m/ \G \{ /xgc) {
return decode_object();
}
if (m/ \G ([-]? (?: 0 | [1-9][0-9]*) (?: \. [0-9]*)? (?: [eE][+-]? [0-9]+)?)/xgc) {
return $1;
}
if (m/ \G true /xgc) {
return !!1;
}
if (m/ \G false /xgc) {
return !!0;
}
if (m/ \G null /xgc) {
return undef;
}
err('expected string, array, object, number, boolean or null');
}
sub decode_json {
@_ == 1 || Carp::croak(q/Usage: decode_json(octets)/);
local $_ = shift;
no warnings 'uninitialized';
utf8::downgrade($_ .= '', 1)
or Carp::croak(q/Wide character in decode_json()/);
utf8::decode($_)
or Carp::croak(q/Malformed UTF-8 in decode_json()/);
m/ \G $WS /xogc;
my $ref = do {
if (m/ \G \[ /xgc) {
decode_array();
}
elsif (m/ \G \{ /xgc) {
decode_object();
}
else {
err('expected array or object');
}
};
unless (m/ \G $WS \z /xogc) {
my $got = ref $ref eq 'ARRAY' ? 'array' : 'object';
err("unexpected data after $got");
}
return $ref;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment