Last active
November 7, 2020 21:42
-
-
Save Igosuki/5019810c974343b4e467ff6c6f19d9e1 to your computer and use it in GitHub Desktop.
JS JSON parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const _ = require('lodash'); | |
const OPEN_BRACE = '{'; | |
const CLOSE_BRACE = '}'; | |
const OPEN_BRACKET = '['; | |
const CLOSE_BRACKET = ']'; | |
const DOUBLE_QUOTE = '"'; | |
const BACKSLASH = '\\'; | |
const COMMA = ','; | |
const SEMICOLON = ':'; | |
class ParseError extends Error { | |
constructor(message, context) { | |
super(message); | |
this.name = 'ParseError'; | |
this.context = context; | |
} | |
} | |
function isAtEnd(input, pos) { | |
return input.length <= pos + 1; | |
} | |
// Backspace is replaced with \b | |
// Form feed is replaced with \f | |
// Newline is replaced with \n | |
// Carriage return is replaced with \r | |
// Tab is replaced with \t | |
// Double quote is replaced with \" | |
// Backslash is replaced with \\ | |
function getEscapeSequence(char) { | |
switch (char) { | |
case 'b': | |
case 'f': | |
case 'n': | |
case 't': | |
case '"': | |
return char; | |
default: return null; | |
} | |
} | |
function parseString(input, context) { | |
let string = ""; | |
let isEscaping = false; | |
if (input.charAt(context.pos) !== DOUBLE_QUOTE) { | |
throw new ParseError(`string at ${context.pos} did not start with '"'`, context) | |
} | |
while (!isAtEnd(input, context.pos)) { | |
context.pos += 1; | |
let nextChar = input.charAt(context.pos); | |
if (nextChar === DOUBLE_QUOTE && !isEscaping) { | |
// console.log(`found closing quote at ${context.pos} breaking`) | |
break; | |
} | |
if (nextChar === BACKSLASH) { | |
if (isEscaping) { | |
string += BACKSLASH | |
} | |
isEscaping = !isEscaping | |
} | |
if (isEscaping) { | |
if (getEscapeSequence(nextChar) !== null) { | |
string += nextChar; | |
} else { | |
throw new ParseError(`invalid escape sequence \\${nextChar} at ${context.pos}`, context) | |
} | |
} else { | |
string += nextChar; | |
} | |
} | |
return string; | |
} | |
function isWhitespace(nextChar) { | |
switch (nextChar) { | |
case '\n': | |
case ' ': | |
case '\t': | |
case '\r': | |
return true; | |
default: return false; | |
} | |
} | |
function parseBlock(input, context) { | |
if (input.charAt(context.pos) !== OPEN_BRACE) { | |
throw new ParseError(`block at ${context.pos} did not start with '${OPEN_BRACE}'`, context) | |
} | |
let obj = {}; | |
let hasKey = false; | |
let hasSemiColon = false; | |
let key = null; | |
let hasValue = false; | |
let expectsNextValue = false; | |
while (!isAtEnd(input, context.pos)) { | |
context.pos += 1; | |
let nextChar = input.charAt(context.pos); | |
if (isWhitespace(nextChar)) { | |
continue; | |
} | |
if (nextChar === CLOSE_BRACE) { | |
// console.log(`found closing brace at ${context.pos} breaking`) | |
break; | |
} | |
if (nextChar === COMMA) { | |
if (hasKey && !hasValue) { | |
throw new ParseError(`unexpected comma at ${context.pos}`, context) | |
} else { | |
hasKey = false; | |
hasValue = false; | |
expectsNextValue = true; | |
continue; | |
} | |
} | |
if (hasKey) { | |
if (nextChar === SEMICOLON) { | |
hasSemiColon = true; | |
continue; | |
} | |
let value = parseNext(input, context); | |
// console.log(`found value ${parseNext1}`); | |
obj[key] = value | |
hasValue = true; | |
expectsNextValue = false; | |
} else { | |
if (nextChar !== DOUBLE_QUOTE) { | |
throw new ParseError(`expected key with double quotes at ${context.pos}, got '${nextChar}', key : ${key} , ${hasKey}`, context) | |
} else { | |
key = parseString(input, context) | |
// console.log(`Found key ${key}`); | |
hasKey = true; | |
} | |
} | |
} | |
if (expectsNextValue) { | |
throw new ParseError(`Expected next key value pair at ${context.pos}`, context) | |
} | |
return obj; | |
} | |
function parseArray(input, context) { | |
if (input.charAt(context.pos) !== OPEN_BRACKET) { | |
throw new Error(`array at ${context.pos} did not start with '${OPEN_BRACKET}'`) | |
} | |
let thearray = new Array(); | |
let expectsNextValue = false; | |
while (!isAtEnd(input, context.pos)) { | |
context.pos += 1; | |
let nextChar = input.charAt(context.pos); | |
if (isWhitespace(nextChar)) { | |
continue; | |
} | |
if (nextChar === CLOSE_BRACKET) { | |
// console.log(`found closing bracket at ${context.pos} breaking`) | |
break; | |
} | |
if (nextChar === COMMA) { | |
if (thearray.length === 0) { | |
throw new ParseError(`unexpected comma in array at ${context.pos}, arrays require at least one value`, context) | |
} else { | |
expectsNextValue = true; | |
continue; | |
} | |
} | |
let value = parseNext(input, context); | |
// console.log(`found array value ${value}`); | |
thearray.push(value); | |
expectsNextValue = false; | |
} | |
if (expectsNextValue) { | |
throw new ParseError(`Expected next key value pair at ${context.pos}`, context) | |
} | |
return thearray; | |
} | |
function isValidInLiteral(char) { | |
return (char >= '0' && char <= '9') || (char >= 'a' && char <= 'z') || isWhitespace(char) || char === '.'; | |
} | |
function parseLiteral(input, context) { | |
let firstChar = input.charAt(context.pos); | |
let literal = firstChar.toString(); | |
let isNumeric = false; | |
let isLiteral = false; | |
if (!isNumeric && !isLiteral) { | |
if (firstChar >= '0' && firstChar <= '9') { | |
isNumeric = true | |
} else { | |
isLiteral = true | |
} | |
} | |
// Skip heading whitespace | |
while (!isAtEnd(input, context.pos) && isValidInLiteral(input.charAt(context.pos + 1))) { | |
context.pos += 1; | |
let nextChar = input.charAt(context.pos); | |
if (isWhitespace(nextChar) && literal.length === 0) { | |
console.log(`whitespace ${nextChar}`) | |
continue; | |
} | |
literal += nextChar; | |
if (isLiteral && (nextChar >= '0' && nextChar <= '9')) { | |
throw new ParseError(`Unexpected char in literal ${nextChar}`, context) | |
} | |
if (isNumeric && (nextChar != '.' && (nextChar < '0' || nextChar > '9'))) { | |
throw new ParseError(`Unexpected char in number ${nextChar}`, context) | |
} | |
if (isLiteral) { | |
if (literal === 'null') { | |
return null | |
} | |
if (literal === 'true') { | |
return true | |
} | |
if (literal.length === 5 && literal === 'false') { | |
return false | |
} | |
if (literal.length >= 5) { | |
throw new ParseError(`unexpected literal sequence '${literal}' at ${context.pos}`, context) | |
} | |
} | |
} | |
if (isNumeric) { | |
if (literal.indexOf('.') !== -1) { | |
return parseFloat(literal) | |
} else { | |
return parseInt(literal) | |
} | |
} | |
throw new ParseError(`unexpected end at ${context.pos}`, context) | |
} | |
// Context must contain the following fields : pos | |
function parseNext(input, context) { | |
const nextChar = input.charAt(context.pos); | |
let obj = null; | |
switch (nextChar) { | |
case OPEN_BRACE: | |
obj = parseBlock(input, context) | |
break; | |
case OPEN_BRACKET: | |
obj = parseArray(input, context) | |
break; | |
case DOUBLE_QUOTE: | |
obj = parseString(input, context) | |
break; | |
default: | |
obj = parseLiteral(input, context) | |
} | |
return obj; | |
} | |
function parseJson(input) { | |
let context = {pos: 0}; | |
return parseNext(input, context); | |
} | |
function isEmpty(obj) { | |
return Object.keys(obj).length === 0 && obj.constructor === Object | |
} | |
const testString = '{"k": "v", "foo":"bar", "array": ["thefoo", {"thebar":"thecat"}], "thebool": false, "thefloat": 0.1, "theint": 1}'; | |
let testObject = null; | |
try { | |
testObject = parseJson(testString); | |
} catch (e) { | |
if (e.name === 'ParseError') { | |
console.error(e.message); | |
console.error(testString); | |
console.error("^".padStart(e.context.pos + 1, ' ')); | |
} else { | |
console.error(e); | |
} | |
} | |
let parsed = null; | |
try { | |
parsed = JSON.parse(testString) | |
} catch(e) { | |
console.error(`Invalid json`); | |
} | |
if (!_.isEqual(parsed, testObject)) { | |
throw new Error(`Wrong object returned for '${testString}' : ${JSON.stringify(testObject)}`); | |
} | |
console.log(testObject); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "parserjs", | |
"version": "1.0.0", | |
"main": "index.js", | |
"license": "MIT", | |
"dependencies": { | |
"lodash": "^4.17.20" | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import _ from "lodash"; | |
enum Structure { | |
OPEN_BRACE = '{', | |
CLOSE_BRACE = '}', | |
OPEN_BRACKET = '[', | |
CLOSE_BRACKET = ']', | |
DOUBLE_QUOTE = '"', | |
BACKSLASH = '\\', | |
COMMA = ',', | |
SEMICOLON = ':', | |
} | |
class ParseError extends Error { | |
private cursor: any; | |
constructor(message: string, cursor: Cursor) { | |
super(message); | |
this.name = 'ParseError'; | |
this.cursor = cursor; | |
} | |
} | |
// Backspace is replaced with \b | |
// Form feed is replaced with \f | |
// Newline is replaced with \n | |
// Carriage return is replaced with \r | |
// Tab is replaced with \t | |
// Double quote is replaced with \" | |
// Backslash is replaced with \\ | |
function getEscapeSequence(char: string) { | |
switch (char) { | |
case 'b': | |
case 'f': | |
case 'n': | |
case 't': | |
case '"': | |
return char; | |
default: return null; | |
} | |
} | |
function parseString(input: Cursor) { | |
let string = ""; | |
let isEscaping = false; | |
if (input.current() !== Structure.DOUBLE_QUOTE) { | |
throw new ParseError(`string at ${input.pos} did not start with '"'`, input) | |
} | |
while (!input.isAtEnd()) { | |
let nextChar = input.next(); | |
if (nextChar === Structure.DOUBLE_QUOTE && !isEscaping) { | |
// console.log(`found closing quote at ${context.pos} breaking`) | |
break; | |
} | |
if (nextChar === Structure.BACKSLASH) { | |
if (isEscaping) { | |
string += Structure.BACKSLASH | |
} | |
isEscaping = !isEscaping | |
} | |
if (isEscaping) { | |
if (getEscapeSequence(nextChar) !== null) { | |
string += nextChar; | |
} else { | |
throw new ParseError(`invalid escape sequence \\${nextChar} at ${input.pos}`, input) | |
} | |
} else { | |
string += nextChar; | |
} | |
} | |
return string; | |
} | |
function isWhitespace(nextChar: string) { | |
switch (nextChar) { | |
case '\n': | |
case ' ': | |
case '\t': | |
case '\r': | |
return true; | |
default: return false; | |
} | |
} | |
class Cursor { | |
private input: string; | |
pos: number; | |
constructor(input: string) { | |
this.input = input; | |
this.pos = 0; | |
} | |
current() { | |
return this.input.charAt(this.pos) | |
} | |
next() { | |
this.pos += 1; | |
return this.current() | |
} | |
peek() { | |
return this.input.charAt(this.pos + 1) | |
} | |
isAtEnd() { | |
return this.input.length <= this.pos + 1; | |
} | |
} | |
function parseBlock(input: Cursor) { | |
if (input.current() !== Structure.OPEN_BRACE) { | |
throw new ParseError(`block at ${input.pos} did not start with '${Structure.OPEN_BRACE}'`, input) | |
} | |
let obj : any = {}; | |
let hasKey = false; | |
let hasSemiColon = false; | |
let key = null; | |
let hasValue = false; | |
let expectsNextValue = false; | |
while (!input.isAtEnd()) { | |
let nextChar = input.next(); | |
if (isWhitespace(nextChar)) { | |
continue; | |
} | |
if (nextChar === Structure.CLOSE_BRACE) { | |
// console.log(`found closing brace at ${context.pos} breaking`) | |
break; | |
} | |
if (nextChar === Structure.COMMA) { | |
if (hasKey && !hasValue) { | |
throw new ParseError(`unexpected comma at ${input.pos}`, input) | |
} else { | |
hasKey = false; | |
hasValue = false; | |
expectsNextValue = true; | |
continue; | |
} | |
} | |
if (hasKey) { | |
if (nextChar === Structure.SEMICOLON) { | |
hasSemiColon = true; | |
continue; | |
} | |
let value = parseNext(input); | |
// console.log(`found value ${parseNext1}`); | |
obj[key] = value | |
hasValue = true; | |
expectsNextValue = false; | |
} else { | |
if (nextChar !== Structure.DOUBLE_QUOTE) { | |
throw new ParseError(`expected key with double quotes at ${input.pos}, got '${nextChar}', key : ${key} , ${hasKey}`, input) | |
} else { | |
key = parseString(input) | |
// console.log(`Found key ${key}`); | |
hasKey = true; | |
} | |
} | |
} | |
if (expectsNextValue) { | |
throw new ParseError(`Expected next key value pair at ${input.pos}`, input) | |
} | |
return obj; | |
} | |
function parseArray(input: Cursor) { | |
if (input.current() !== Structure.OPEN_BRACKET) { | |
throw new Error(`array at ${input.pos} did not start with '${Structure.OPEN_BRACKET}'`) | |
} | |
let thearray = new Array(); | |
let expectsNextValue = false; | |
while (!input.isAtEnd()) { | |
let nextChar = input.next(); | |
if (isWhitespace(nextChar)) { | |
continue; | |
} | |
if (nextChar === Structure.CLOSE_BRACKET) { | |
// console.log(`found closing bracket at ${context.pos} breaking`) | |
break; | |
} | |
if (nextChar === Structure.COMMA) { | |
if (thearray.length === 0) { | |
throw new ParseError(`unexpected comma in array at ${input.pos}, arrays require at least one value`, input) | |
} else { | |
expectsNextValue = true; | |
continue; | |
} | |
} | |
let value = parseNext(input); | |
// console.log(`found array value ${value}`); | |
thearray.push(value); | |
expectsNextValue = false; | |
} | |
if (expectsNextValue) { | |
throw new ParseError(`Expected next key value pair at ${input.pos}`, input) | |
} | |
return thearray; | |
} | |
function isValidInLiteral(char: string) { | |
return (char >= '0' && char <= '9') || (char >= 'a' && char <= 'z') || isWhitespace(char) || char === '.'; | |
} | |
function parseLiteral(input: Cursor) { | |
let firstChar = input.current(); | |
let literal = firstChar.toString(); | |
let isNumeric = false; | |
let isLiteral = false; | |
if (!isNumeric && !isLiteral) { | |
if (firstChar >= '0' && firstChar <= '9') { | |
isNumeric = true | |
} else { | |
isLiteral = true | |
} | |
} | |
// Skip heading whitespace | |
while (!input.isAtEnd() && isValidInLiteral(input.peek())) { | |
let nextChar = input.next(); | |
if (isWhitespace(nextChar) && literal.length === 0) { | |
console.log(`whitespace ${nextChar}`) | |
continue; | |
} | |
literal += nextChar; | |
if (isLiteral && (nextChar >= '0' && nextChar <= '9')) { | |
throw new ParseError(`Unexpected char in literal ${nextChar}`, input) | |
} | |
if (isNumeric && (nextChar != '.' && (nextChar < '0' || nextChar > '9'))) { | |
throw new ParseError(`Unexpected char in number ${nextChar}`, input) | |
} | |
if (isLiteral) { | |
if (literal === 'null') { | |
return null | |
} | |
if (literal === 'true') { | |
return true | |
} | |
if (literal.length === 5 && literal === 'false') { | |
return false | |
} | |
if (literal.length >= 5) { | |
throw new ParseError(`unexpected literal sequence '${literal}' at ${input.pos}`, input) | |
} | |
} | |
} | |
if (isNumeric) { | |
if (literal.indexOf('.') !== -1) { | |
return parseFloat(literal) | |
} else { | |
return parseInt(literal) | |
} | |
} | |
throw new ParseError(`unexpected end at ${input.pos}`, input) | |
} | |
// Context must contain the following fields : pos | |
function parseNext(input: Cursor) { | |
const nextChar = input.current(); | |
let obj = null; | |
switch (nextChar) { | |
case Structure.OPEN_BRACE: | |
obj = parseBlock(input) | |
break; | |
case Structure.OPEN_BRACKET: | |
obj = parseArray(input) | |
break; | |
case Structure.DOUBLE_QUOTE: | |
obj = parseString(input) | |
break; | |
default: | |
obj = parseLiteral(input) | |
} | |
return obj; | |
} | |
const parseJson = (input: string) => { | |
let cursor = new Cursor(input); | |
return parseNext(cursor); | |
} | |
const isEqual = _.isEqual; | |
export { | |
parseJson, | |
isEqual, | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment