public
Last active

PP json parser

  • Download Gist
json.pl
Perl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
#!/usr/bin/perl
 
use strict;
use warnings;
 
use Carp qw[];
 
my $WS = '[\x20\x09\x0A\x0D]*';
 
sub err {
m/ \G $WS /xogc;
my $msg = 'Malformed JSON: ' . shift;
$msg .= m/ \G \z /xgc ? ' before end of data' : ' at character offset ' . pos;
@_ = ($msg);
goto \&Carp::croak;
}
 
my %Escapes = (
'"' => '"',
'\\' => '\\',
'/' => '/',
'b' => "\x07",
'f' => "\x0C",
'n' => "\x0A",
'r' => "\x0D",
't' => "\x09",
);
 
sub decode_string {
my $pos = pos;
m! \G ((?:[^\x00-\x1F\\"]|\\(?:["\\/bfnrt]|u[A-Fa-f0-9]{4}))*) !xgc;
my $str = $1;
unless (m! \G " !xgc) {
err('unexpected character or invalid escape while parsing string')
if m! \G [\x00-\x1F\\] !x;
err('unterminated string');
}
if (index($str, '\\u') < 0) {
$str =~ s!\\(["\\/bfnrt])!$Escapes{$1}!gs;
return $str;
}
my $res = '';
while ($str =~ m!\G ([^\\]*) \\ (?:([^u])|u(.{4}))!xgc) {
$res .= $1;
if ($2) {
$res .= $Escapes{$2};
}
else {
my $ord = hex $3;
if (($ord & 0xF800) == 0xD800) {
($ord & 0xFC00) == 0xD800
or pos($_) = $pos + pos($str), err('missing high-surrogate');
$str =~ m!\G \\u ([Dd][C-Fc-f]..)!xgc
or pos($_) = $pos + pos($str), err('missing low-surrogate');
$ord = 0x10000 + ($ord - 0xD800) * 0x400 + (hex($1) - 0xDC00);
}
$res .= pack('U', $ord);
}
}
$res .= substr($str, pos($str), length($str));
return $res;
}
 
sub decode_array {
my @a;
until (m/ \G $WS \] /xogc) {
push @a, decode_value();
redo if m/ \G $WS , /xogc;
last if m/ \G $WS \] /xogc;
err(q/expected comma or right square bracket while parsing array/);
}
return \@a;
}
 
sub decode_object {
my %h;
until (m/ \G $WS \} /xogc) {
m/ \G $WS " /xogc
or err("expected string while parsing object");
my $k = decode_string();
m/ \G $WS : /xogc
or err("expected colon while parsing object");
$h{$k} = decode_value();
redo if m/ \G $WS , /xogc;
last if m/ \G $WS \} /xogc;
err(q/expected comma or right curly bracket while parsing object/);
}
return \%h;
}
 
sub decode_value {
m/ \G $WS /xogc;
 
if (m/ \G " /xgc) {
return decode_string();
}
if (m/ \G \[ /xgc) {
return decode_array();
}
if (m/ \G \{ /xgc) {
return decode_object();
}
if (m/ \G ([-]? (?: 0 | [1-9][0-9]*) (?: \. [0-9]*)? (?: [eE][+-]? [0-9]+)?)/xgc) {
return $1;
}
if (m/ \G true /xgc) {
return !!1;
}
if (m/ \G false /xgc) {
return !!0;
}
if (m/ \G null /xgc) {
return undef;
}
err('expected string, array, object, number, boolean or null');
}
 
sub decode_json {
@_ == 1 || Carp::croak(q/Usage: decode_json(octets)/);
local $_ = shift;
no warnings 'uninitialized';
utf8::downgrade($_ .= '', 1)
or Carp::croak(q/Wide character in decode_json()/);
utf8::decode($_)
or Carp::croak(q/Malformed UTF-8 in decode_json()/);
m/ \G $WS /xogc;
my $ref = do {
if (m/ \G \[ /xgc) {
decode_array();
}
elsif (m/ \G \{ /xgc) {
decode_object();
}
else {
err('expected array or object');
}
};
unless (m/ \G $WS \z /xogc) {
my $got = ref $ref eq 'ARRAY' ? 'array' : 'object';
err("unexpected data after $got");
}
return $ref;
}

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.