Last active
October 7, 2019 16:40
-
-
Save Tasiro/d312f216b537ab1b43b6f38a2a0a0acb to your computer and use it in GitHub Desktop.
A proof of concept json parser.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Json format: | |
// type jsonStrings = string[] | |
// type jsonArrays = value[] | |
// type jsonObjects = [keyStringIndex, ...value][] | |
// type value = [-1, 0] (for parsing error) | [0, 0] (for null) | [1, 0 or 1] (for true (1) and false (0)) | [jsonValueType, indexInCorrespondingArray] | |
// type jsonValueType = -1 (for parsing error) | 0 (for null) | 1 (for boolean) | 2 (for number) | 3 (for string) | 4 (for array) | 5 (for object) | |
let JsonTypeNull = 0; | |
let JsonTypeBoolean = 1; | |
let JsonTypeNumber = 2; | |
let JsonTypeString = 3; | |
let JsonTypeArray = 4; | |
let JsonTypeObject = 5; | |
let JsonParseError = -1; | |
let JsonParseErrorValue = [JsonParseError, 0]; | |
let jsonResultStrings = []; | |
let jsonResultArrays = []; | |
let jsonResultObjects = []; | |
let jsonParseErrorIndex = -1; | |
function ResetJsonResults () { | |
jsonResultStrings = []; | |
jsonResultArrays = []; | |
jsonResultObjects = []; | |
jsonParseErrorIndex = -1; | |
} | |
function JsonParse (text) { | |
function ArrayOfSize (size, value) { | |
if (size == 0) { | |
return []; | |
} | |
let array = [value]; | |
while (length (array) < size) { | |
array = array ~ array; | |
} | |
return array [0 .. size]; | |
} | |
// Exponential growth is noticeably faster than linear growth. | |
let jsonStrings = [""]; | |
let jsonStringsSize = 0; | |
let jsonArrays = [[0]]; | |
let jsonArraysSize = 0; | |
let jsonObjects = [[0, 0, 0]]; | |
let jsonObjectsSize = 0; | |
let index = -1; | |
function IsAtEndOfInput () { | |
return index >= length (text); | |
} | |
let currentChar = 'a'; | |
function SkipChar () { | |
assert (index <= length (text), ""); | |
index++; | |
if (IsAtEndOfInput ()) { | |
currentChar = 'a'; | |
} | |
else { | |
currentChar = text [index]; | |
} | |
} | |
SkipChar (); | |
function SkipChars (count) { | |
ascent (i in 0..count) { | |
SkipChar (); | |
} | |
} | |
function NextIs (string) { | |
if (index + length (string) > length (text)) { | |
return false; | |
} | |
return text [index .. index + length (string)] == string; | |
} | |
let parseError = false; | |
let parseErrorIndex = -1; | |
function SetParseError () { | |
parseError = true; | |
parseErrorIndex = index; | |
} | |
function ParseValue () { | |
SkipWhitespace (); | |
if (NextIsNull ()) { | |
ParseNull (); | |
return [JsonTypeNull, 0]; | |
} | |
else if (NextIsBoolean ()) { | |
let value = ParseBoolean (); | |
return [JsonTypeBoolean, value]; | |
} | |
else if (NextIsNumber ()) { | |
let number = ParseNumber (); | |
if (parseError) { | |
return JsonParseErrorValue; | |
} | |
return [JsonTypeNumber, number]; | |
} | |
else if (NextIsString ()) { | |
let string = ParseString (); | |
if (parseError) { | |
return JsonParseErrorValue; | |
} | |
let index = jsonStringsSize; | |
if (jsonStringsSize == length (jsonStrings)) { | |
jsonStrings = jsonStrings ~ ArrayOfSize (jsonStringsSize, ""); | |
} | |
jsonStrings [jsonStringsSize] = string; | |
jsonStringsSize++; | |
return [JsonTypeString, index]; | |
} | |
else if (NextIsArray ()) { | |
let array = ParseArray (); | |
if (parseError) { | |
return JsonParseErrorValue; | |
} | |
let index = jsonArraysSize; | |
if (jsonArraysSize == length (jsonArrays)) { | |
jsonArrays = jsonArrays ~ ArrayOfSize (jsonArraysSize, JsonParseErrorValue); | |
} | |
jsonArrays [jsonArraysSize] = array; | |
jsonArraysSize++; | |
return [JsonTypeString, index]; | |
} | |
else if (NextIsObject ()) { | |
let object = ParseObject (); | |
if (parseError) { | |
return JsonParseErrorValue; | |
} | |
let index = jsonObjectsSize; | |
if (jsonObjectsSize == length (jsonObjects)) { | |
jsonObjects = jsonObjects ~ ArrayOfSize (jsonObjectsSize, [0] ~ JsonParseErrorValue); | |
} | |
jsonObjects [jsonObjectsSize] = object; | |
jsonObjectsSize++; | |
return [JsonTypeString, index]; | |
} | |
else { | |
SetParseError (); | |
return JsonParseErrorValue; | |
} | |
} | |
function NextIsWhitespace () { | |
return ! IsAtEndOfInput () | |
&& (currentChar == ' ' || currentChar == \t || currentChar == \r || currentChar == \n); | |
} | |
function SkipWhitespace () { | |
while (NextIsWhitespace ()) { | |
SkipChar (); | |
} | |
} | |
function NextIsNull () { | |
return NextIs ("null"); | |
} | |
function ParseNull () { | |
SkipChars (4); | |
} | |
function NextIsBoolean () { | |
return NextIs ("true") | |
|| NextIs ("false"); | |
} | |
function ParseBoolean () { | |
if (NextIs ("true")) { | |
SkipChars (4); | |
return 1; | |
} | |
else { | |
SkipChars (5); | |
return 0; | |
} | |
} | |
function NextIsNumber () { | |
return ! IsAtEndOfInput () | |
&& (currentChar == '+' || currentChar == '-' || (currentChar >= '0' && currentChar <= '9')); | |
} | |
function ParseNumber () { | |
function ParseNumberSign () { | |
if (IsAtEndOfInput ()) { | |
SetParseError (); | |
return 0; | |
} | |
if (currentChar == '-') { | |
SkipChar (); | |
return -1; | |
} | |
if (currentChar == '+') { | |
SkipChar (); | |
return 1; | |
} | |
return 1; | |
} | |
// Parses a non-empty digit list. | |
function ParseDigitListAsString () { | |
if (! NextIsNumber ()) { | |
SetParseError (); | |
return "0"; | |
} | |
let startIndex = index; | |
SkipChar (); | |
while (NextIsNumber ()) { | |
SkipChar (); | |
} | |
return text [startIndex..index]; | |
} | |
function ParseDigitList () { | |
return atoi (ParseDigitListAsString ()); | |
} | |
// Parses either "0", or a number without a leading 0. | |
function ParseDigitListWithoutLeadingNull () { | |
let numberString = ParseDigitListAsString (); | |
if (length (numberString) > 1 && numberString [0] == '0') { | |
SetParseError (); | |
} | |
return atoi (numberString); | |
} | |
function ParseDigitListAfterPoint () { | |
let numberString = ParseDigitListAsString (); | |
return atoi (numberString) / 10 ^ length (numberString); | |
} | |
let signFactor = ParseNumberSign (); | |
if (parseError) { | |
return 0; | |
} | |
let number = signFactor * ParseDigitListWithoutLeadingNull (); | |
if (parseError) { | |
return 0; | |
} | |
if (currentChar == '.') { | |
SkipChar (); | |
number += signFactor * ParseDigitListAfterPoint (); | |
if (parseError) { | |
return 0; | |
} | |
} | |
if (currentChar == 'e' || currentChar == 'E') { | |
SkipChar (); | |
let exponentSignFactor = ParseNumberSign (); | |
if (parseError) { | |
return 0; | |
} | |
let exponent = exponentSignFactor * ParseDigitList (); | |
if (parseError) { | |
return 0; | |
} | |
number *= 10 ^ exponent; | |
} | |
return number; | |
} | |
function NextIsString () { | |
return ! IsAtEndOfInput () | |
&& currentChar == '"'; | |
} | |
function ParseString () { | |
function ParseChar () { | |
if (currentChar != '\\') { | |
let char = currentChar; | |
SkipChar (); | |
return char; | |
} | |
else { | |
SkipChar (); | |
if (IsAtEndOfInput ()) { | |
SetParseError (); | |
return 'a'; | |
} | |
let char = currentChar; | |
SkipChar (); | |
// Implementing \uXXXX is basically impossible, because any mathematical operation converts to number, | |
// and there is no way to get a char from a number without a big array. | |
alternative (char) | |
case ('"') { | |
return '"'; | |
} | |
case ('/') { | |
return '/'; | |
} | |
case ('\\') { | |
return '\\'; | |
} | |
case ('b') { | |
return \x8; | |
} | |
case ('f') { | |
return \xc; | |
} | |
case ('t') { | |
return \t; | |
} | |
case ('r') { | |
return \r; | |
} | |
case ('n') { | |
return \n; | |
} | |
others { | |
SetParseError (); | |
return 'a'; | |
} | |
} | |
} | |
if (! NextIsString ()) { | |
SetParseError (); | |
return ""; | |
} | |
SkipChar (); | |
let value = ""; | |
while (currentChar != '"') { | |
if (IsAtEndOfInput ()) { | |
SetParseError (); | |
return ""; | |
} | |
value = value ~ [ParseChar ()]; | |
if (parseError) { | |
return ""; | |
} | |
} | |
SkipChar (); | |
return value; | |
} | |
function NextIsArray () { | |
return currentChar == '['; | |
} | |
function ParseArray () { | |
let elements = []; | |
function ParseElement () { | |
let element = ParseValue (); | |
if (parseError) { | |
return; | |
} | |
elements = elements ~ [element]; | |
} | |
function ParseCommaThenElement () { | |
SkipWhitespace (); | |
if (currentChar != ',') { | |
SetParseError (); | |
return; | |
} | |
SkipChar (); | |
ParseElement (); | |
} | |
SkipChar (); | |
SkipWhitespace (); | |
if (currentChar != ']') { | |
ParseElement (); | |
if (parseError) { | |
return []; | |
} | |
SkipWhitespace (); | |
while (currentChar != ']') { | |
ParseCommaThenElement (); | |
if (parseError) { | |
return []; | |
} | |
SkipWhitespace (); | |
} | |
} | |
SkipChar (); | |
return elements; | |
} | |
function NextIsObject () { | |
return currentChar == '{'; | |
} | |
function ParseObject () { | |
let entries = []; | |
function ParseEntry () { | |
if (! NextIsString ()) { | |
SetParseError (); | |
return; | |
} | |
let key = ParseValue (); | |
if (parseError) { | |
return; | |
} | |
let keyIndex = key [1]; | |
SkipWhitespace (); | |
if (currentChar != ':') { | |
SetParseError (); | |
return; | |
} | |
SkipChar (); | |
let value = ParseValue (); | |
if (parseError) { | |
return; | |
} | |
entries = entries ~ [[keyIndex] ~ value]; | |
SkipWhitespace (); | |
} | |
function ParseCommaThenEntry () { | |
SkipWhitespace (); | |
if (currentChar != ',') { | |
SetParseError (); | |
return; | |
} | |
SkipChar (); | |
SkipWhitespace (); | |
ParseEntry (); | |
} | |
SkipChar (); | |
SkipWhitespace (); | |
if (currentChar != '}') { | |
ParseEntry (); | |
if (parseError) { | |
return []; | |
} | |
SkipWhitespace (); | |
while (currentChar != '}') { | |
ParseCommaThenEntry (); | |
if (parseError) { | |
return []; | |
} | |
} | |
} | |
SkipChar (); | |
return entries; | |
} | |
let value = ParseValue (); | |
if (parseError) { | |
jsonParseErrorIndex = parseErrorIndex; | |
return JsonParseErrorValue; | |
} | |
SkipWhitespace (); | |
if (index != length (text)) { | |
jsonParseErrorIndex = index; | |
return JsonParseErrorValue; | |
} | |
if (jsonStringsSize == 0) { | |
jsonResultStrings = []; | |
} | |
else { | |
jsonResultStrings = jsonStrings [0..jsonStringsSize]; | |
} | |
if (jsonArraysSize == 0) { | |
jsonResultArrays = []; | |
} | |
else { | |
jsonResultArrays = jsonArrays [0..jsonArraysSize]; | |
} | |
if (jsonObjectsSize == 0) { | |
jsonResultObjects = []; | |
} | |
else { | |
jsonResultObjects = jsonObjects [0..jsonObjectsSize]; | |
} | |
jsonParseErrorIndex = -1; | |
return value; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment