Last active
January 19, 2021 12:49
-
-
Save timw4mail/d34c01d65a5f7e5af62a17b3112aec1c to your computer and use it in GitHub Desktop.
Pure JS JSON Parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Pure JS JSON Parser | |
* | |
* @see https://lihautan.com/json-parser-with-javascript/ | |
* @param {string} str | |
*/ | |
function parseJSON(str) { | |
let i = 0; | |
const value = parseValue(); | |
expectEndOfInput(); | |
return value; | |
function parseObject() { | |
if (str[i] === '{') { | |
i++; | |
skipWhitespace(); | |
const result = {}; | |
let initial = true; | |
// if it is not '}', | |
// we take the path of string -> whitespace -> ':' -> value -> ... | |
while (str[i] !== '}') { | |
if ( ! initial) { | |
eatChar(','); | |
skipWhitespace(); | |
} | |
const key = parseString(); | |
if (key === undefined) { | |
expectObjectKey(); | |
} | |
skipWhitespace(); | |
eatChar(':'); | |
const value = parseValue(); | |
result[key] = value; | |
initial = false; | |
} | |
// move the the next character of '}' | |
i++; | |
return result; | |
} | |
} | |
function parseArray() { | |
if (str[i] === '[') { | |
i++; | |
skipWhitespace(); | |
const result = []; | |
let initial = true; | |
while (str[i] !== ']') { | |
if ( ! initial) { | |
eatChar(','); | |
} | |
const value = parseValue(); | |
result.push(value); | |
initial = false; | |
} | |
expectNotEndOfInput("]"); | |
// move to the next character of ']' | |
i++; | |
return result; | |
} | |
} | |
function parseValue() { | |
skipWhitespace(); | |
// The 'native' but not yet well supported way... | |
// Null only works because it's the last value checked for | |
/* const value = parseString() ?? | |
parseNumber() ?? | |
parseObject() ?? | |
parseArray() ?? | |
parseKeyword('true', true) ?? | |
parseKeyword('false', false) ?? | |
parseKeyword('null', null); */ | |
// A compromise that is better supported | |
const tryFn = (fns) => { | |
// find the first fn that doesn't return undefined | |
for (let n = 0; n < fns.length; n++) { | |
const [fn, args] = fns[n]; | |
const res = fn(...args); | |
if (res !== undefined) { | |
return res; | |
} | |
} | |
} | |
const value = tryFn([ | |
[parseString, []], | |
[parseNumber, []], | |
[parseObject, []], | |
[parseArray, []], | |
[parseKeyword, ['true', true]], | |
[parseKeyword, ['false', false]], | |
[parseKeyword, ['null', null]], | |
]); | |
skipWhitespace(); | |
return value; | |
} | |
function parseKeyword(name, value) { | |
if (str.slice(i, i + name.length) === name) { | |
i += name.length; | |
return value; | |
} | |
} | |
function skipWhitespace() { | |
while ([' ', "\n", "\t", "\r"].includes(str[i])) { | |
i++; | |
} | |
} | |
function parseString() { | |
if (str[i] === '"') { | |
i++; | |
let result = ""; | |
while (i < str.length && str[i] !== '"') { | |
if (str[i] === "\\") { | |
const char = str[i + 1]; | |
if (['"', "\\", "/"].includes(char)) { | |
result += char; | |
i++; | |
} else if (['b', 'f', 'n', 'r', 't'].includes(char)) { | |
// Non-visible characters need to get parsed as | |
// their escape sequence, not the letter of | |
// the escape sequence. | |
const replacement = { | |
b: "\b", | |
f: "\f", | |
n: "\n", | |
r: "\r", | |
t: "\t", | |
}; | |
result += replacement[char]; | |
i++; | |
} else if (char === "u") { | |
if ( | |
isHexadecimal(str[i + 2]) && | |
isHexadecimal(str[i + 3]) && | |
isHexadecimal(str[i + 4]) && | |
isHexadecimal(str[i + 5]) | |
) { | |
result += String.fromCharCode( | |
parseInt(str.slice(i + 2, i + 6), 16) | |
); | |
i += 5; | |
} else { | |
i += 2; | |
expectEscapeUnicode(result); | |
} | |
} else { | |
expectEscapeCharacter(result); | |
} | |
} else { | |
result += str[i]; | |
} | |
i++; | |
} | |
expectNotEndOfInput('"'); | |
i++; | |
return result; | |
} | |
} | |
function isHexadecimal(char) { | |
return /[0-9a-f]/i.test(char); | |
} | |
function parseNumber() { | |
let start = i; | |
if (str[i] === "-") { | |
i++; | |
expectDigit(str.slice(start, i)); | |
} | |
if (str[i] === "0") { | |
i++; | |
} else if (str[i] >= "1" && str[i] <="9") { | |
i++; | |
while (str[i] >= "0" && str[i] <="9") { | |
i++; | |
} | |
} | |
if (str[i] === ".") { | |
i++; | |
expectDigit(str.slice(start, i)); | |
while (str[i] >= "0" && str[i] <= "9") { | |
i++; | |
} | |
} | |
if (str[i] === "e" || str[i] === "E") { | |
i++; | |
if (str[i] === "-" || str[i] === "+") { | |
i++; | |
} | |
expectDigit(str.slice(start, i)); | |
while (str[i] >= "0" && str[i] <= "9") { | |
i++; | |
} | |
} | |
if (i > start) { | |
return Number(str.slice(start, i)); | |
} | |
} | |
function eatChar(char) { | |
if (str[i] !== char) { | |
throw new Error(`Expected "${char}".`); | |
} | |
i++; | |
} | |
function expectNotEndOfInput(expected) { | |
if (i === str.length) { | |
printCodeSnippet(`Expecting a \`${expected}\` here`); | |
throw new Error("JSON_ERROR_0001 Unexpected End of Input"); | |
} | |
} | |
function expectEndOfInput() { | |
if (i < str.length) { | |
printCodeSnippet("Expected to end here"); | |
throw new Error("JSON_ERROR_0002 Expected End of Input"); | |
} | |
} | |
function expectObjectKey() { | |
printCodeSnippet(`Expecting object key here | |
For example: | |
{ "foo": "bar" } | |
^^^^^`); | |
throw new Error("JSON_ERROR_0003 Expecting JSON Key"); | |
} | |
function expectCharacter(expected) { | |
if (str[i] !== expected) { | |
printCodeSnippet(`Expecting a \`${expected}\` here`); | |
throw new Error("JSON_ERROR_0004 Unexpected token"); | |
} | |
} | |
function expectDigit(numSoFar) { | |
if (!(str[i] >= "0" && str[i] <= "9")) { | |
printCodeSnippet(`JSON_ERROR_0005 Expecting a digit here | |
For example: | |
${numSoFar}5 | |
${" ".repeat(numSoFar.length)}^`); | |
throw new Error("JSON_ERROR_0006 Expecting a digit"); | |
} | |
} | |
function expectEscapeCharacter(strSoFar) { | |
printCodeSnippet(`JSON_ERROR_0007 Expecting escape character | |
For example: | |
"${strSoFar}\\n" | |
${" ".repeat(strSoFar.length + 1)}^^ | |
List of escape characters are: \\", \\\\, \\/, \\b, \\f, \\n, \\r, \\t, \\u`); | |
throw new Error("JSON_ERROR_0008 Expecting an escape character"); | |
} | |
function expectEscapeUnicode(strSoFar) { | |
printCodeSnippet(`Expect escape unicode | |
For example: | |
"${strSoFar}\\u0123 | |
${" ".repeat(strSoFar.length + 1)}^^^^^^`); | |
throw new Error("JSON_ERROR_0009 Expecting an escape unicode"); | |
} | |
function printCodeSnippet(message) { | |
const from = Math.max(0, i - 10); | |
const trimmed = from > 0; | |
const padding = (trimmed ? 4 : 0) + (i - from); | |
const snippet = [ | |
(trimmed ? "... " : "") + str.slice(from, i + 1), | |
" ".repeat(padding) + "^", | |
" ".repeat(padding) + message | |
].join("\n"); | |
console.log(snippet); | |
} | |
} | |
// Basic tests ---------------------------------------------------------------- | |
const testJS = [{ | |
a: 1, | |
b: 0x1f, | |
c: [1, 2, 3], | |
d: 'foo', | |
e: { | |
f: { | |
g: { | |
h: null, | |
}, | |
}, | |
}, | |
i: ['"', '\\', '/', '\b', '\f', '\n', '\r', '\t', '\u0001', '\uface'], | |
}]; | |
const testJson = JSON.stringify(testJS, null, 4); | |
const testParseJSON = parseJSON(testJson) | |
console.assert( | |
JSON.stringify(JSON.parse(testJson)) === JSON.stringify(testParseJSON), | |
"The parser does not work the same as JSON.parse" | |
); | |
// String literal | |
console.assert(parseJSON('"foo"') === 'foo', 'The parser failed on a single string literal'); | |
// Error tests ---------------------------------------------------------------- | |
// console.log("Try uncommenting the fail cases and see their error message"); | |
// console.log("↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓"); | |
// Fail cases: | |
// printFailCase("-"); | |
// printFailCase("-1."); | |
// printFailCase("1e"); | |
// printFailCase("-1e-2.2"); | |
// printFailCase("{"); | |
// printFailCase("{}{"); | |
// printFailCase('{"a"'); | |
// printFailCase('{"a": "b",'); | |
// printFailCase('{"a":"b""c"'); | |
// printFailCase('{"a":"foo\\}'); | |
// printFailCase('{"a":"foo\\u"}'); | |
// printFailCase("["); | |
// printFailCase("[]["); | |
// printFailCase("[[]"); | |
// printFailCase('["]'); | |
// function printFailCase(json) { | |
// try { | |
// console.log(`parseJSON('${json}')`); | |
// parseJSON(json); | |
// } catch (error) { | |
// console.error(error); | |
// } | |
// } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment