PP json parser
| #!/usr/bin/perl | |
| use strict; | |
| use warnings; | |
| use Carp qw[]; | |
| my $WS = '[\x20\x09\x0A\x0D]*'; | |
| sub err { | |
| m/ \G $WS /xogc; | |
| my $msg = 'Malformed JSON: ' . shift; | |
| $msg .= m/ \G \z /xgc ? ' before end of data' : ' at character offset ' . pos; | |
| @_ = ($msg); | |
| goto \&Carp::croak; | |
| } | |
| my %Escapes = ( | |
| '"' => '"', | |
| '\\' => '\\', | |
| '/' => '/', | |
| 'b' => "\x07", | |
| 'f' => "\x0C", | |
| 'n' => "\x0A", | |
| 'r' => "\x0D", | |
| 't' => "\x09", | |
| ); | |
| sub decode_string { | |
| my $pos = pos; | |
| m! \G ((?:[^\x00-\x1F\\"]|\\(?:["\\/bfnrt]|u[A-Fa-f0-9]{4}))*) !xgc; | |
| my $str = $1; | |
| unless (m! \G " !xgc) { | |
| err('unexpected character or invalid escape while parsing string') | |
| if m! \G [\x00-\x1F\\] !x; | |
| err('unterminated string'); | |
| } | |
| if (index($str, '\\u') < 0) { | |
| $str =~ s!\\(["\\/bfnrt])!$Escapes{$1}!gs; | |
| return $str; | |
| } | |
| my $res = ''; | |
| while ($str =~ m!\G ([^\\]*) \\ (?:([^u])|u(.{4}))!xgc) { | |
| $res .= $1; | |
| if ($2) { | |
| $res .= $Escapes{$2}; | |
| } | |
| else { | |
| my $ord = hex $3; | |
| if (($ord & 0xF800) == 0xD800) { | |
| ($ord & 0xFC00) == 0xD800 | |
| or pos($_) = $pos + pos($str), err('missing high-surrogate'); | |
| $str =~ m!\G \\u ([Dd][C-Fc-f]..)!xgc | |
| or pos($_) = $pos + pos($str), err('missing low-surrogate'); | |
| $ord = 0x10000 + ($ord - 0xD800) * 0x400 + (hex($1) - 0xDC00); | |
| } | |
| $res .= pack('U', $ord); | |
| } | |
| } | |
| $res .= substr($str, pos($str), length($str)); | |
| return $res; | |
| } | |
| sub decode_array { | |
| my @a; | |
| until (m/ \G $WS \] /xogc) { | |
| push @a, decode_value(); | |
| redo if m/ \G $WS , /xogc; | |
| last if m/ \G $WS \] /xogc; | |
| err(q/expected comma or right square bracket while parsing array/); | |
| } | |
| return \@a; | |
| } | |
| sub decode_object { | |
| my %h; | |
| until (m/ \G $WS \} /xogc) { | |
| m/ \G $WS " /xogc | |
| or err("expected string while parsing object"); | |
| my $k = decode_string(); | |
| m/ \G $WS : /xogc | |
| or err("expected colon while parsing object"); | |
| $h{$k} = decode_value(); | |
| redo if m/ \G $WS , /xogc; | |
| last if m/ \G $WS \} /xogc; | |
| err(q/expected comma or right curly bracket while parsing object/); | |
| } | |
| return \%h; | |
| } | |
| sub decode_value { | |
| m/ \G $WS /xogc; | |
| if (m/ \G " /xgc) { | |
| return decode_string(); | |
| } | |
| if (m/ \G \[ /xgc) { | |
| return decode_array(); | |
| } | |
| if (m/ \G \{ /xgc) { | |
| return decode_object(); | |
| } | |
| if (m/ \G ([-]? (?: 0 | [1-9][0-9]*) (?: \. [0-9]*)? (?: [eE][+-]? [0-9]+)?)/xgc) { | |
| return $1; | |
| } | |
| if (m/ \G true /xgc) { | |
| return !!1; | |
| } | |
| if (m/ \G false /xgc) { | |
| return !!0; | |
| } | |
| if (m/ \G null /xgc) { | |
| return undef; | |
| } | |
| err('expected string, array, object, number, boolean or null'); | |
| } | |
| sub decode_json { | |
| @_ == 1 || Carp::croak(q/Usage: decode_json(octets)/); | |
| local $_ = shift; | |
| no warnings 'uninitialized'; | |
| utf8::downgrade($_ .= '', 1) | |
| or Carp::croak(q/Wide character in decode_json()/); | |
| utf8::decode($_) | |
| or Carp::croak(q/Malformed UTF-8 in decode_json()/); | |
| m/ \G $WS /xogc; | |
| my $ref = do { | |
| if (m/ \G \[ /xgc) { | |
| decode_array(); | |
| } | |
| elsif (m/ \G \{ /xgc) { | |
| decode_object(); | |
| } | |
| else { | |
| err('expected array or object'); | |
| } | |
| }; | |
| unless (m/ \G $WS \z /xogc) { | |
| my $got = ref $ref eq 'ARRAY' ? 'array' : 'object'; | |
| err("unexpected data after $got"); | |
| } | |
| return $ref; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment