Skip to content

Instantly share code, notes, and snippets.

@brzuchal
Created August 26, 2022 10:42
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brzuchal/37e888d9b13937891c3e05fead5042bc to your computer and use it in GitHub Desktop.
Save brzuchal/37e888d9b13937891c3e05fead5042bc to your computer and use it in GitHub Desktop.
<?php declare(strict_types=1);
/* JSON_checker.c */
namespace JsonChecker;
/* 2016-11-11 */
/*
Copyright (c) 2005 JSON.org
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
The Software shall be used for Good, not Evil.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
#include <stdlib.h>
#include "JSON_checker.h"
/*
Characters are mapped into these 31 character classes. This allows for
a significant reduction in the size of the state transition table.
*/
enum Classes {
case C_SPACE; /* space */
case C_WHITE; /* other whitespace */
case C_LCURB; /* { */
case C_RCURB; /* } */
case C_LSQRB; /* [ */
case C_RSQRB; /* ] */
case C_COLON; /* : */
case C_COMMA; /* , */
case C_QUOTE; /* " */
case C_BACKS; /* \ */
case C_SLASH; /* / */
case C_PLUS; /* + */
case C_MINUS; /* - */
case C_POINT; /* . */
case C_ZERO; /* 0 */
case C_DIGIT; /* 123456789 */
case C_LOW_A; /* a */
case C_LOW_B; /* b */
case C_LOW_C; /* c */
case C_LOW_D; /* d */
case C_LOW_E; /* e */
case C_LOW_F; /* f */
case C_LOW_L; /* l */
case C_LOW_N; /* n */
case C_LOW_R; /* r */
case C_LOW_S; /* s */
case C_LOW_T; /* t */
case C_LOW_U; /* u */
case C_ABCDF; /* ABCDF */
case C_E; /* E */
case C_ETC; /* everything else */
case NR_CLASSES;
public function ord(): int
{
return \array_search($this, self::cases(), true);
}
};
const ascii_class = [
/*
This array maps the 128 ASCII characters into character classes.
The remaining Unicode characters should be mapped to C_ETC.
Non-whitespace control characters are errors.
*/
null, null, null, null, null, null, null, null,
null, Classes::C_WHITE, Classes::C_WHITE, null, null, Classes::C_WHITE, null, null,
null, null, null, null, null, null, null, null,
null, null, null, null, null, null, null, null,
Classes::C_SPACE, Classes::C_ETC, Classes::C_QUOTE, Classes::C_ETC, Classes::C_ETC, Classes::C_ETC, Classes::C_ETC, Classes::C_ETC,
Classes::C_ETC, Classes::C_ETC, Classes::C_ETC, Classes::C_PLUS, Classes::C_COMMA, Classes::C_MINUS, Classes::C_POINT, Classes::C_SLASH,
Classes::C_ZERO, Classes::C_DIGIT, Classes::C_DIGIT, Classes::C_DIGIT, Classes::C_DIGIT, Classes::C_DIGIT, Classes::C_DIGIT, Classes::C_DIGIT,
Classes::C_DIGIT, Classes::C_DIGIT, Classes::C_COLON, Classes::C_ETC, Classes::C_ETC, Classes::C_ETC, Classes::C_ETC, Classes::C_ETC,
Classes::C_ETC, Classes::C_ABCDF, Classes::C_ABCDF, Classes::C_ABCDF, Classes::C_ABCDF, Classes::C_E, Classes::C_ABCDF, Classes::C_ETC,
Classes::C_ETC, Classes::C_ETC, Classes::C_ETC, Classes::C_ETC, Classes::C_ETC, Classes::C_ETC, Classes::C_ETC, Classes::C_ETC,
Classes::C_ETC, Classes::C_ETC, Classes::C_ETC, Classes::C_ETC, Classes::C_ETC, Classes::C_ETC, Classes::C_ETC, Classes::C_ETC,
Classes::C_ETC, Classes::C_ETC, Classes::C_ETC, Classes::C_LSQRB, Classes::C_BACKS, Classes::C_RSQRB, Classes::C_ETC, Classes::C_ETC,
Classes::C_ETC, Classes::C_LOW_A, Classes::C_LOW_B, Classes::C_LOW_C, Classes::C_LOW_D, Classes::C_LOW_E, Classes::C_LOW_F, Classes::C_ETC,
Classes::C_ETC, Classes::C_ETC, Classes::C_ETC, Classes::C_ETC, Classes::C_LOW_L, Classes::C_ETC, Classes::C_LOW_N, Classes::C_ETC,
Classes::C_ETC, Classes::C_ETC, Classes::C_LOW_R, Classes::C_LOW_S, Classes::C_LOW_T, Classes::C_LOW_U, Classes::C_ETC, Classes::C_ETC,
Classes::C_ETC, Classes::C_ETC, Classes::C_ETC, Classes::C_LCURB, Classes::C_ETC, Classes::C_RCURB, Classes::C_ETC, Classes::C_ETC
];
/*
The state codes.
*/
enum States {
case GO; /* start */
case OK; /* ok */
case OB; /* object */
case KE; /* key */
case CO; /* colon */
case VA; /* value */
case AR; /* array */
case ST; /* string */
case ES; /* escape */
case U1; /* u1 */
case U2; /* u2 */
case U3; /* u3 */
case U4; /* u4 */
case MI; /* minus */
case ZE; /* zero */
case IN; /* integer */
case FR; /* fraction */
case FS; /* fraction */
case E1; /* e */
case E2; /* ex */
case E3; /* exp */
case T1; /* tr */
case T2; /* tru */
case T3; /* true */
case F1; /* fa */
case F2; /* fal */
case F3; /* fals */
case F4; /* false */
case N1; /* nu */
case N2; /* nul */
case N3; /* null */
case NR_STATES;
public function ord(): int
{
return \array_search($this, self::cases(), true);
}
};
const state_transition_table = [
/*
The state transition table takes the current state and the current symbol,
and returns either a new state or an action. An action is represented as a
negative number. A JSON text is accepted if at the end of the text the
state is OK and if the mode is MODE_DONE.
white 1-9 ABCDF etc
space | { } [ ] : , " \ / + - . 0 | a b c d e f l n r s t u | E |*/
/*start States::GO*/ [States::GO,States::GO,-6,null,-5,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null],
/*ok States::OK*/ [States::OK,States::OK,null,-8,null,-7,null,-3,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null],
/*object States::OB*/ [States::OB,States::OB,null,-9,null,null,null,null,States::ST,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null],
/*key States::KE*/ [States::KE,States::KE,null,null,null,null,null,null,States::ST,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null],
/*colon States::CO*/ [States::CO,States::CO,null,null,null,null,-2,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null],
/*value States::VA*/ [States::VA,States::VA,-6,null,-5,null,null,null,States::ST,null,null,null,States::MI,null,States::ZE,States::IN,null,null,null,null,null,States::F1,null,States::N1,null,null,States::T1,null,null,null,null],
/*array States::AR*/ [States::AR,States::AR,-6,null,-5,-7,null,null,States::ST,null,null,null,States::MI,null,States::ZE,States::IN,null,null,null,null,null,States::F1,null,States::N1,null,null,States::T1,null,null,null,null],
/*string States::ST*/ [States::ST,null,States::ST,States::ST,States::ST,States::ST,States::ST,States::ST,-4,States::ES,States::ST,States::ST,States::ST,States::ST,States::ST,States::ST,States::ST,States::ST,States::ST,States::ST,States::ST,States::ST,States::ST,States::ST,States::ST,States::ST,States::ST,States::ST,States::ST,States::ST,States::ST],
/*escape States::ES*/ [null,null,null,null,null,null,null,null,States::ST,States::ST,States::ST,null,null,null,null,null,null,States::ST,null,null,null,States::ST,null,States::ST,States::ST,null,States::ST,States::U1,null,null,null],
/*u1 States::U1*/ [null,null,null,null,null,null,null,null,null,null,null,null,null,null,States::U2,States::U2,States::U2,States::U2,States::U2,States::U2,States::U2,States::U2,null,null,null,null,null,null,States::U2,States::U2,null],
/*u2 States::U2*/ [null,null,null,null,null,null,null,null,null,null,null,null,null,null,States::U3,States::U3,States::U3,States::U3,States::U3,States::U3,States::U3,States::U3,null,null,null,null,null,null,States::U3,States::U3,null],
/*u3 States::U3*/ [null,null,null,null,null,null,null,null,null,null,null,null,null,null,States::U4,States::U4,States::U4,States::U4,States::U4,States::U4,States::U4,States::U4,null,null,null,null,null,null,States::U4,States::U4,null],
/*u4 States::U4*/ [null,null,null,null,null,null,null,null,null,null,null,null,null,null,States::ST,States::ST,States::ST,States::ST,States::ST,States::ST,States::ST,States::ST,null,null,null,null,null,null,States::ST,States::ST,null],
/*minus States::MI*/ [null,null,null,null,null,null,null,null,null,null,null,null,null,null,States::ZE,States::IN,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null],
/*zero States::ZE*/ [States::OK,States::OK,null,-8,null,-7,null,-3,null,null,null,null,null,States::FR,null,null,null,null,null,null,States::E1,null,null,null,null,null,null,null,null,States::E1,null],
/*int States::IN*/ [States::OK,States::OK,null,-8,null,-7,null,-3,null,null,null,null,null,States::FR,States::IN,States::IN,null,null,null,null,States::E1,null,null,null,null,null,null,null,null,States::E1,null],
/*frac States::FR*/ [null,null,null,null,null,null,null,null,null,null,null,null,null,null,States::FS,States::FS,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null],
/*fracs States::FS*/ [States::OK,States::OK,null,-8,null,-7,null,-3,null,null,null,null,null,null,States::FS,States::FS,null,null,null,null,States::E1,null,null,null,null,null,null,null,null,States::E1,null],
/*e States::E1*/ [null,null,null,null,null,null,null,null,null,null,null,States::E2,States::E2,null,States::E3,States::E3,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null],
/*ex States::E2*/ [null,null,null,null,null,null,null,null,null,null,null,null,null,null,States::E3,States::E3,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null],
/*exp States::E3*/ [States::OK,States::OK,null,-8,null,-7,null,-3,null,null,null,null,null,null,States::E3,States::E3,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null],
/*tr States::T1*/ [null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,States::T2,null,null,null,null,null,null],
/*tru States::T2*/ [null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,States::T3,null,null,null],
/*true States::T3*/ [null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,States::OK,null,null,null,null,null,null,null,null,null,null],
/*fa States::F1*/ [null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,States::F2,null,null,null,null,null,null,null,null,null,null,null,null,null,null],
/*fal States::F2*/ [null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,States::F3,null,null,null,null,null,null,null,null],
/*fals States::F3*/ [null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,States::F4,null,null,null,null,null],
/*false States::F4*/ [null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,States::OK,null,null,null,null,null,null,null,null,null,null],
/*nu States::N1*/ [null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,States::N2,null,null,null],
/*nul States::N2*/ [null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,States::N3,null,null,null,null,null,null,null,null],
/*null States::N3*/ [null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,States::OK,null,null,null,null,null,null,null,null]
];
/*
These modes can be pushed on the stack.
*/
enum Modes {
case MODE_ARRAY;
case MODE_DONE;
case MODE_KEY;
case MODE_OBJECT;
};
class JsonChecker
{
protected const GOOD = 0xBABAB00E;
protected int $valid = self::GOOD;
protected States $state = States::GO;
protected int $top = -1;
protected array $stack = [];
protected function push(Modes $mode): bool
{
/*
Push a mode onto the stack. Return false if there is overflow.
*/
$this->top += 1;
if ($this->top >= $this->depth) {
return false;
}
$this->stack[$this->top] = $mode;
return true;
}
protected function pop(Modes $mode): bool
{
/*
Pop the stack, assuring that the current mode matches the expectation.
Return false if there is underflow or if the modes mismatch.
*/
if ($this->top < 0 || $this->stack[$this->top] != $mode) {
return false;
}
$this->top -= 1;
return true;
}
public function __construct(protected int $depth)
{
/*
new_JSON_checker starts the checking process by constructing a JSON_checker
object. It takes a depth parameter that restricts the level of maximum
nesting.
To continue the process, call JSON_checker_char for each character in the
JSON text, and then call JSON_checker_done to obtain the final result.
These functions are fully reentrant.
The JSON_checker object will be deleted by JSON_checker_done.
JSON_checker_char will delete the JSON_checker object if it sees an error.
*/
$this->push(Modes::MODE_DONE);
}
protected function reset(): void
{
$this->valid = self::GOOD;
$this->state = States::GO;
$this->top = -1;
$this->stack = [];
$this->push(Modes::MODE_DONE);
}
protected function char(int $next_char): bool
{
/*
After calling new_JSON_checker, call this function for each character (or
partial character) in your JSON text. It can accept UTF-8, UTF-16, or
UTF-32. It returns true if things are looking ok so far. If it rejects the
text, it deletes the JSON_checker object and returns false.
*/
// int next_class, next_state;
/*
Determine the character's class.
*/
if ($this->valid != self::GOOD) {
return false;
}
if ($next_char < 0) {
return false;
}
if ($next_char >= 128) {
$next_class = Classes::C_ETC;
} else {
$next_class = ascii_class[$next_char];
if ($next_class === null) {
return false;
}
}
/*
Get the next state from the state transition table.
*/
assert($next_class instanceof Classes);
$next_state = state_transition_table[$this->state->ord()][$next_class->ord()];
if ($next_state instanceof States) {
/*
Change the state.
*/
$this->state = $next_state;
/*
Or perform one of the actions.
*/
} else {
switch ($next_state) {
/* empty } */
case -9:
if (!$this->pop(Modes::MODE_KEY)) {
return false;
}
$this->state = States::OK;
break;
/* } */ case -8:
if (!$this->pop(Modes::MODE_OBJECT)) {
return false;
}
$this->state = States::OK;
break;
/* ] */ case -7:
if (!$this->pop(Modes::MODE_ARRAY)) {
return false;
}
$this->state = States::OK;
break;
/* { */ case -6:
if (!$this->push(Modes::MODE_KEY)) {
return false;
}
$this->state = States::OB;
break;
/* [ */ case -5:
if (!$this->push(Modes::MODE_ARRAY)) {
return false;
}
$this->state = States::AR;
break;
/* " */ case -4:
switch ($this->stack[$this->top]) {
case Modes::MODE_KEY:
$this->state = States::CO;
break;
case Modes::MODE_ARRAY:
case Modes::MODE_OBJECT:
$this->state = States::OK;
break;
default:
return false;
}
break;
/* , */ case -3:
switch ($this->stack[$this->top]) {
case Modes::MODE_OBJECT:
/*
A comma causes a flip from object mode to key mode.
*/
if (!$this->pop(Modes::MODE_OBJECT) || !$this->push(Modes::MODE_KEY)) {
return false;
}
$this->state = States::KE;
break;
case Modes::MODE_ARRAY:
$this->state = States::VA;
break;
default:
return false;
}
break;
/* : */ case -2:
/*
A colon causes a flip from key mode to object mode.
*/
if (!$this->pop(Modes::MODE_KEY) || !$this->push(Modes::MODE_OBJECT)) {
return false;
}
$this->state = States::VA;
break;
/*
Bad action.
*/
default:
return false;
}
}
return true;
}
protected function done(): bool
{
/*
The JSON_checker_done function should be called after all of the characters
have been processed, but only if every call to JSON_checker_char returned
true. This function deletes the JSON_checker and returns true if the JSON
text was accepted.
*/
if ($this->valid != self::GOOD) {
$this->reset();
return false;
}
$result = $this->state == States::OK && $this->pop(Modes::MODE_DONE);
$this->reset();
return $result;
}
public function validateString(string $json): bool
{
$length = strlen($json);
for ($i = 0; $i < $length; $i++) {
$char = $json[$i];
if ($this->char(ord($char))) {
continue;
}
return $this->done();
}
return $this->done();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment