Skip to content

Instantly share code, notes, and snippets.

@Tasiro
Last active October 7, 2019 16:40
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Tasiro/d312f216b537ab1b43b6f38a2a0a0acb to your computer and use it in GitHub Desktop.
Save Tasiro/d312f216b537ab1b43b6f38a2a0a0acb to your computer and use it in GitHub Desktop.
A proof of concept json parser.
// Json format:
// type jsonStrings = string[]
// type jsonArrays = value[]
// type jsonObjects = [keyStringIndex, ...value][]
// type value = [-1, 0] (for parsing error) | [0, 0] (for null) | [1, 0 or 1] (for true (1) and false (0)) | [jsonValueType, indexInCorrespondingArray]
// type jsonValueType = -1 (for parsing error) | 0 (for null) | 1 (for boolean) | 2 (for number) | 3 (for string) | 4 (for array) | 5 (for object)
let JsonTypeNull = 0;
let JsonTypeBoolean = 1;
let JsonTypeNumber = 2;
let JsonTypeString = 3;
let JsonTypeArray = 4;
let JsonTypeObject = 5;
let JsonParseError = -1;
let JsonParseErrorValue = [JsonParseError, 0];
let jsonResultStrings = [];
let jsonResultArrays = [];
let jsonResultObjects = [];
let jsonParseErrorIndex = -1;
function ResetJsonResults () {
jsonResultStrings = [];
jsonResultArrays = [];
jsonResultObjects = [];
jsonParseErrorIndex = -1;
}
function JsonParse (text) {
function ArrayOfSize (size, value) {
if (size == 0) {
return [];
}
let array = [value];
while (length (array) < size) {
array = array ~ array;
}
return array [0 .. size];
}
// Exponential growth is noticeably faster than linear growth.
let jsonStrings = [""];
let jsonStringsSize = 0;
let jsonArrays = [[0]];
let jsonArraysSize = 0;
let jsonObjects = [[0, 0, 0]];
let jsonObjectsSize = 0;
let index = -1;
function IsAtEndOfInput () {
return index >= length (text);
}
let currentChar = 'a';
function SkipChar () {
assert (index <= length (text), "");
index++;
if (IsAtEndOfInput ()) {
currentChar = 'a';
}
else {
currentChar = text [index];
}
}
SkipChar ();
function SkipChars (count) {
ascent (i in 0..count) {
SkipChar ();
}
}
function NextIs (string) {
if (index + length (string) > length (text)) {
return false;
}
return text [index .. index + length (string)] == string;
}
let parseError = false;
let parseErrorIndex = -1;
function SetParseError () {
parseError = true;
parseErrorIndex = index;
}
function ParseValue () {
SkipWhitespace ();
if (NextIsNull ()) {
ParseNull ();
return [JsonTypeNull, 0];
}
else if (NextIsBoolean ()) {
let value = ParseBoolean ();
return [JsonTypeBoolean, value];
}
else if (NextIsNumber ()) {
let number = ParseNumber ();
if (parseError) {
return JsonParseErrorValue;
}
return [JsonTypeNumber, number];
}
else if (NextIsString ()) {
let string = ParseString ();
if (parseError) {
return JsonParseErrorValue;
}
let index = jsonStringsSize;
if (jsonStringsSize == length (jsonStrings)) {
jsonStrings = jsonStrings ~ ArrayOfSize (jsonStringsSize, "");
}
jsonStrings [jsonStringsSize] = string;
jsonStringsSize++;
return [JsonTypeString, index];
}
else if (NextIsArray ()) {
let array = ParseArray ();
if (parseError) {
return JsonParseErrorValue;
}
let index = jsonArraysSize;
if (jsonArraysSize == length (jsonArrays)) {
jsonArrays = jsonArrays ~ ArrayOfSize (jsonArraysSize, JsonParseErrorValue);
}
jsonArrays [jsonArraysSize] = array;
jsonArraysSize++;
return [JsonTypeString, index];
}
else if (NextIsObject ()) {
let object = ParseObject ();
if (parseError) {
return JsonParseErrorValue;
}
let index = jsonObjectsSize;
if (jsonObjectsSize == length (jsonObjects)) {
jsonObjects = jsonObjects ~ ArrayOfSize (jsonObjectsSize, [0] ~ JsonParseErrorValue);
}
jsonObjects [jsonObjectsSize] = object;
jsonObjectsSize++;
return [JsonTypeString, index];
}
else {
SetParseError ();
return JsonParseErrorValue;
}
}
function NextIsWhitespace () {
return ! IsAtEndOfInput ()
&& (currentChar == ' ' || currentChar == \t || currentChar == \r || currentChar == \n);
}
function SkipWhitespace () {
while (NextIsWhitespace ()) {
SkipChar ();
}
}
function NextIsNull () {
return NextIs ("null");
}
function ParseNull () {
SkipChars (4);
}
function NextIsBoolean () {
return NextIs ("true")
|| NextIs ("false");
}
function ParseBoolean () {
if (NextIs ("true")) {
SkipChars (4);
return 1;
}
else {
SkipChars (5);
return 0;
}
}
function NextIsNumber () {
return ! IsAtEndOfInput ()
&& (currentChar == '+' || currentChar == '-' || (currentChar >= '0' && currentChar <= '9'));
}
function ParseNumber () {
function ParseNumberSign () {
if (IsAtEndOfInput ()) {
SetParseError ();
return 0;
}
if (currentChar == '-') {
SkipChar ();
return -1;
}
if (currentChar == '+') {
SkipChar ();
return 1;
}
return 1;
}
// Parses a non-empty digit list.
function ParseDigitListAsString () {
if (! NextIsNumber ()) {
SetParseError ();
return "0";
}
let startIndex = index;
SkipChar ();
while (NextIsNumber ()) {
SkipChar ();
}
return text [startIndex..index];
}
function ParseDigitList () {
return atoi (ParseDigitListAsString ());
}
// Parses either "0", or a number without a leading 0.
function ParseDigitListWithoutLeadingNull () {
let numberString = ParseDigitListAsString ();
if (length (numberString) > 1 && numberString [0] == '0') {
SetParseError ();
}
return atoi (numberString);
}
function ParseDigitListAfterPoint () {
let numberString = ParseDigitListAsString ();
return atoi (numberString) / 10 ^ length (numberString);
}
let signFactor = ParseNumberSign ();
if (parseError) {
return 0;
}
let number = signFactor * ParseDigitListWithoutLeadingNull ();
if (parseError) {
return 0;
}
if (currentChar == '.') {
SkipChar ();
number += signFactor * ParseDigitListAfterPoint ();
if (parseError) {
return 0;
}
}
if (currentChar == 'e' || currentChar == 'E') {
SkipChar ();
let exponentSignFactor = ParseNumberSign ();
if (parseError) {
return 0;
}
let exponent = exponentSignFactor * ParseDigitList ();
if (parseError) {
return 0;
}
number *= 10 ^ exponent;
}
return number;
}
function NextIsString () {
return ! IsAtEndOfInput ()
&& currentChar == '"';
}
function ParseString () {
function ParseChar () {
if (currentChar != '\\') {
let char = currentChar;
SkipChar ();
return char;
}
else {
SkipChar ();
if (IsAtEndOfInput ()) {
SetParseError ();
return 'a';
}
let char = currentChar;
SkipChar ();
// Implementing \uXXXX is basically impossible, because any mathematical operation converts to number,
// and there is no way to get a char from a number without a big array.
alternative (char)
case ('"') {
return '"';
}
case ('/') {
return '/';
}
case ('\\') {
return '\\';
}
case ('b') {
return \x8;
}
case ('f') {
return \xc;
}
case ('t') {
return \t;
}
case ('r') {
return \r;
}
case ('n') {
return \n;
}
others {
SetParseError ();
return 'a';
}
}
}
if (! NextIsString ()) {
SetParseError ();
return "";
}
SkipChar ();
let value = "";
while (currentChar != '"') {
if (IsAtEndOfInput ()) {
SetParseError ();
return "";
}
value = value ~ [ParseChar ()];
if (parseError) {
return "";
}
}
SkipChar ();
return value;
}
function NextIsArray () {
return currentChar == '[';
}
function ParseArray () {
let elements = [];
function ParseElement () {
let element = ParseValue ();
if (parseError) {
return;
}
elements = elements ~ [element];
}
function ParseCommaThenElement () {
SkipWhitespace ();
if (currentChar != ',') {
SetParseError ();
return;
}
SkipChar ();
ParseElement ();
}
SkipChar ();
SkipWhitespace ();
if (currentChar != ']') {
ParseElement ();
if (parseError) {
return [];
}
SkipWhitespace ();
while (currentChar != ']') {
ParseCommaThenElement ();
if (parseError) {
return [];
}
SkipWhitespace ();
}
}
SkipChar ();
return elements;
}
function NextIsObject () {
return currentChar == '{';
}
function ParseObject () {
let entries = [];
function ParseEntry () {
if (! NextIsString ()) {
SetParseError ();
return;
}
let key = ParseValue ();
if (parseError) {
return;
}
let keyIndex = key [1];
SkipWhitespace ();
if (currentChar != ':') {
SetParseError ();
return;
}
SkipChar ();
let value = ParseValue ();
if (parseError) {
return;
}
entries = entries ~ [[keyIndex] ~ value];
SkipWhitespace ();
}
function ParseCommaThenEntry () {
SkipWhitespace ();
if (currentChar != ',') {
SetParseError ();
return;
}
SkipChar ();
SkipWhitespace ();
ParseEntry ();
}
SkipChar ();
SkipWhitespace ();
if (currentChar != '}') {
ParseEntry ();
if (parseError) {
return [];
}
SkipWhitespace ();
while (currentChar != '}') {
ParseCommaThenEntry ();
if (parseError) {
return [];
}
}
}
SkipChar ();
return entries;
}
let value = ParseValue ();
if (parseError) {
jsonParseErrorIndex = parseErrorIndex;
return JsonParseErrorValue;
}
SkipWhitespace ();
if (index != length (text)) {
jsonParseErrorIndex = index;
return JsonParseErrorValue;
}
if (jsonStringsSize == 0) {
jsonResultStrings = [];
}
else {
jsonResultStrings = jsonStrings [0..jsonStringsSize];
}
if (jsonArraysSize == 0) {
jsonResultArrays = [];
}
else {
jsonResultArrays = jsonArrays [0..jsonArraysSize];
}
if (jsonObjectsSize == 0) {
jsonResultObjects = [];
}
else {
jsonResultObjects = jsonObjects [0..jsonObjectsSize];
}
jsonParseErrorIndex = -1;
return value;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment