Skip to content

Instantly share code, notes, and snippets.

@Igosuki
Last active November 7, 2020 21:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Igosuki/5019810c974343b4e467ff6c6f19d9e1 to your computer and use it in GitHub Desktop.
Save Igosuki/5019810c974343b4e467ff6c6f19d9e1 to your computer and use it in GitHub Desktop.
JS JSON parser
const _ = require('lodash');
const OPEN_BRACE = '{';
const CLOSE_BRACE = '}';
const OPEN_BRACKET = '[';
const CLOSE_BRACKET = ']';
const DOUBLE_QUOTE = '"';
const BACKSLASH = '\\';
const COMMA = ',';
const SEMICOLON = ':';
class ParseError extends Error {
constructor(message, context) {
super(message);
this.name = 'ParseError';
this.context = context;
}
}
function isAtEnd(input, pos) {
return input.length <= pos + 1;
}
// Backspace is replaced with \b
// Form feed is replaced with \f
// Newline is replaced with \n
// Carriage return is replaced with \r
// Tab is replaced with \t
// Double quote is replaced with \"
// Backslash is replaced with \\
function getEscapeSequence(char) {
switch (char) {
case 'b':
case 'f':
case 'n':
case 't':
case '"':
return char;
default: return null;
}
}
function parseString(input, context) {
let string = "";
let isEscaping = false;
if (input.charAt(context.pos) !== DOUBLE_QUOTE) {
throw new ParseError(`string at ${context.pos} did not start with '"'`, context)
}
while (!isAtEnd(input, context.pos)) {
context.pos += 1;
let nextChar = input.charAt(context.pos);
if (nextChar === DOUBLE_QUOTE && !isEscaping) {
// console.log(`found closing quote at ${context.pos} breaking`)
break;
}
if (nextChar === BACKSLASH) {
if (isEscaping) {
string += BACKSLASH
}
isEscaping = !isEscaping
}
if (isEscaping) {
if (getEscapeSequence(nextChar) !== null) {
string += nextChar;
} else {
throw new ParseError(`invalid escape sequence \\${nextChar} at ${context.pos}`, context)
}
} else {
string += nextChar;
}
}
return string;
}
function isWhitespace(nextChar) {
switch (nextChar) {
case '\n':
case ' ':
case '\t':
case '\r':
return true;
default: return false;
}
}
function parseBlock(input, context) {
if (input.charAt(context.pos) !== OPEN_BRACE) {
throw new ParseError(`block at ${context.pos} did not start with '${OPEN_BRACE}'`, context)
}
let obj = {};
let hasKey = false;
let hasSemiColon = false;
let key = null;
let hasValue = false;
let expectsNextValue = false;
while (!isAtEnd(input, context.pos)) {
context.pos += 1;
let nextChar = input.charAt(context.pos);
if (isWhitespace(nextChar)) {
continue;
}
if (nextChar === CLOSE_BRACE) {
// console.log(`found closing brace at ${context.pos} breaking`)
break;
}
if (nextChar === COMMA) {
if (hasKey && !hasValue) {
throw new ParseError(`unexpected comma at ${context.pos}`, context)
} else {
hasKey = false;
hasValue = false;
expectsNextValue = true;
continue;
}
}
if (hasKey) {
if (nextChar === SEMICOLON) {
hasSemiColon = true;
continue;
}
let value = parseNext(input, context);
// console.log(`found value ${parseNext1}`);
obj[key] = value
hasValue = true;
expectsNextValue = false;
} else {
if (nextChar !== DOUBLE_QUOTE) {
throw new ParseError(`expected key with double quotes at ${context.pos}, got '${nextChar}', key : ${key} , ${hasKey}`, context)
} else {
key = parseString(input, context)
// console.log(`Found key ${key}`);
hasKey = true;
}
}
}
if (expectsNextValue) {
throw new ParseError(`Expected next key value pair at ${context.pos}`, context)
}
return obj;
}
function parseArray(input, context) {
if (input.charAt(context.pos) !== OPEN_BRACKET) {
throw new Error(`array at ${context.pos} did not start with '${OPEN_BRACKET}'`)
}
let thearray = new Array();
let expectsNextValue = false;
while (!isAtEnd(input, context.pos)) {
context.pos += 1;
let nextChar = input.charAt(context.pos);
if (isWhitespace(nextChar)) {
continue;
}
if (nextChar === CLOSE_BRACKET) {
// console.log(`found closing bracket at ${context.pos} breaking`)
break;
}
if (nextChar === COMMA) {
if (thearray.length === 0) {
throw new ParseError(`unexpected comma in array at ${context.pos}, arrays require at least one value`, context)
} else {
expectsNextValue = true;
continue;
}
}
let value = parseNext(input, context);
// console.log(`found array value ${value}`);
thearray.push(value);
expectsNextValue = false;
}
if (expectsNextValue) {
throw new ParseError(`Expected next key value pair at ${context.pos}`, context)
}
return thearray;
}
function isValidInLiteral(char) {
return (char >= '0' && char <= '9') || (char >= 'a' && char <= 'z') || isWhitespace(char) || char === '.';
}
function parseLiteral(input, context) {
let firstChar = input.charAt(context.pos);
let literal = firstChar.toString();
let isNumeric = false;
let isLiteral = false;
if (!isNumeric && !isLiteral) {
if (firstChar >= '0' && firstChar <= '9') {
isNumeric = true
} else {
isLiteral = true
}
}
// Skip heading whitespace
while (!isAtEnd(input, context.pos) && isValidInLiteral(input.charAt(context.pos + 1))) {
context.pos += 1;
let nextChar = input.charAt(context.pos);
if (isWhitespace(nextChar) && literal.length === 0) {
console.log(`whitespace ${nextChar}`)
continue;
}
literal += nextChar;
if (isLiteral && (nextChar >= '0' && nextChar <= '9')) {
throw new ParseError(`Unexpected char in literal ${nextChar}`, context)
}
if (isNumeric && (nextChar != '.' && (nextChar < '0' || nextChar > '9'))) {
throw new ParseError(`Unexpected char in number ${nextChar}`, context)
}
if (isLiteral) {
if (literal === 'null') {
return null
}
if (literal === 'true') {
return true
}
if (literal.length === 5 && literal === 'false') {
return false
}
if (literal.length >= 5) {
throw new ParseError(`unexpected literal sequence '${literal}' at ${context.pos}`, context)
}
}
}
if (isNumeric) {
if (literal.indexOf('.') !== -1) {
return parseFloat(literal)
} else {
return parseInt(literal)
}
}
throw new ParseError(`unexpected end at ${context.pos}`, context)
}
// Context must contain the following fields : pos
function parseNext(input, context) {
const nextChar = input.charAt(context.pos);
let obj = null;
switch (nextChar) {
case OPEN_BRACE:
obj = parseBlock(input, context)
break;
case OPEN_BRACKET:
obj = parseArray(input, context)
break;
case DOUBLE_QUOTE:
obj = parseString(input, context)
break;
default:
obj = parseLiteral(input, context)
}
return obj;
}
function parseJson(input) {
let context = {pos: 0};
return parseNext(input, context);
}
function isEmpty(obj) {
return Object.keys(obj).length === 0 && obj.constructor === Object
}
const testString = '{"k": "v", "foo":"bar", "array": ["thefoo", {"thebar":"thecat"}], "thebool": false, "thefloat": 0.1, "theint": 1}';
let testObject = null;
try {
testObject = parseJson(testString);
} catch (e) {
if (e.name === 'ParseError') {
console.error(e.message);
console.error(testString);
console.error("^".padStart(e.context.pos + 1, ' '));
} else {
console.error(e);
}
}
let parsed = null;
try {
parsed = JSON.parse(testString)
} catch(e) {
console.error(`Invalid json`);
}
if (!_.isEqual(parsed, testObject)) {
throw new Error(`Wrong object returned for '${testString}' : ${JSON.stringify(testObject)}`);
}
console.log(testObject);
{
"name": "parserjs",
"version": "1.0.0",
"main": "index.js",
"license": "MIT",
"dependencies": {
"lodash": "^4.17.20"
}
}
import _ from "lodash";
enum Structure {
OPEN_BRACE = '{',
CLOSE_BRACE = '}',
OPEN_BRACKET = '[',
CLOSE_BRACKET = ']',
DOUBLE_QUOTE = '"',
BACKSLASH = '\\',
COMMA = ',',
SEMICOLON = ':',
}
class ParseError extends Error {
private cursor: any;
constructor(message: string, cursor: Cursor) {
super(message);
this.name = 'ParseError';
this.cursor = cursor;
}
}
// Backspace is replaced with \b
// Form feed is replaced with \f
// Newline is replaced with \n
// Carriage return is replaced with \r
// Tab is replaced with \t
// Double quote is replaced with \"
// Backslash is replaced with \\
function getEscapeSequence(char: string) {
switch (char) {
case 'b':
case 'f':
case 'n':
case 't':
case '"':
return char;
default: return null;
}
}
function parseString(input: Cursor) {
let string = "";
let isEscaping = false;
if (input.current() !== Structure.DOUBLE_QUOTE) {
throw new ParseError(`string at ${input.pos} did not start with '"'`, input)
}
while (!input.isAtEnd()) {
let nextChar = input.next();
if (nextChar === Structure.DOUBLE_QUOTE && !isEscaping) {
// console.log(`found closing quote at ${context.pos} breaking`)
break;
}
if (nextChar === Structure.BACKSLASH) {
if (isEscaping) {
string += Structure.BACKSLASH
}
isEscaping = !isEscaping
}
if (isEscaping) {
if (getEscapeSequence(nextChar) !== null) {
string += nextChar;
} else {
throw new ParseError(`invalid escape sequence \\${nextChar} at ${input.pos}`, input)
}
} else {
string += nextChar;
}
}
return string;
}
function isWhitespace(nextChar: string) {
switch (nextChar) {
case '\n':
case ' ':
case '\t':
case '\r':
return true;
default: return false;
}
}
class Cursor {
private input: string;
pos: number;
constructor(input: string) {
this.input = input;
this.pos = 0;
}
current() {
return this.input.charAt(this.pos)
}
next() {
this.pos += 1;
return this.current()
}
peek() {
return this.input.charAt(this.pos + 1)
}
isAtEnd() {
return this.input.length <= this.pos + 1;
}
}
function parseBlock(input: Cursor) {
if (input.current() !== Structure.OPEN_BRACE) {
throw new ParseError(`block at ${input.pos} did not start with '${Structure.OPEN_BRACE}'`, input)
}
let obj : any = {};
let hasKey = false;
let hasSemiColon = false;
let key = null;
let hasValue = false;
let expectsNextValue = false;
while (!input.isAtEnd()) {
let nextChar = input.next();
if (isWhitespace(nextChar)) {
continue;
}
if (nextChar === Structure.CLOSE_BRACE) {
// console.log(`found closing brace at ${context.pos} breaking`)
break;
}
if (nextChar === Structure.COMMA) {
if (hasKey && !hasValue) {
throw new ParseError(`unexpected comma at ${input.pos}`, input)
} else {
hasKey = false;
hasValue = false;
expectsNextValue = true;
continue;
}
}
if (hasKey) {
if (nextChar === Structure.SEMICOLON) {
hasSemiColon = true;
continue;
}
let value = parseNext(input);
// console.log(`found value ${parseNext1}`);
obj[key] = value
hasValue = true;
expectsNextValue = false;
} else {
if (nextChar !== Structure.DOUBLE_QUOTE) {
throw new ParseError(`expected key with double quotes at ${input.pos}, got '${nextChar}', key : ${key} , ${hasKey}`, input)
} else {
key = parseString(input)
// console.log(`Found key ${key}`);
hasKey = true;
}
}
}
if (expectsNextValue) {
throw new ParseError(`Expected next key value pair at ${input.pos}`, input)
}
return obj;
}
function parseArray(input: Cursor) {
if (input.current() !== Structure.OPEN_BRACKET) {
throw new Error(`array at ${input.pos} did not start with '${Structure.OPEN_BRACKET}'`)
}
let thearray = new Array();
let expectsNextValue = false;
while (!input.isAtEnd()) {
let nextChar = input.next();
if (isWhitespace(nextChar)) {
continue;
}
if (nextChar === Structure.CLOSE_BRACKET) {
// console.log(`found closing bracket at ${context.pos} breaking`)
break;
}
if (nextChar === Structure.COMMA) {
if (thearray.length === 0) {
throw new ParseError(`unexpected comma in array at ${input.pos}, arrays require at least one value`, input)
} else {
expectsNextValue = true;
continue;
}
}
let value = parseNext(input);
// console.log(`found array value ${value}`);
thearray.push(value);
expectsNextValue = false;
}
if (expectsNextValue) {
throw new ParseError(`Expected next key value pair at ${input.pos}`, input)
}
return thearray;
}
function isValidInLiteral(char: string) {
return (char >= '0' && char <= '9') || (char >= 'a' && char <= 'z') || isWhitespace(char) || char === '.';
}
function parseLiteral(input: Cursor) {
let firstChar = input.current();
let literal = firstChar.toString();
let isNumeric = false;
let isLiteral = false;
if (!isNumeric && !isLiteral) {
if (firstChar >= '0' && firstChar <= '9') {
isNumeric = true
} else {
isLiteral = true
}
}
// Skip heading whitespace
while (!input.isAtEnd() && isValidInLiteral(input.peek())) {
let nextChar = input.next();
if (isWhitespace(nextChar) && literal.length === 0) {
console.log(`whitespace ${nextChar}`)
continue;
}
literal += nextChar;
if (isLiteral && (nextChar >= '0' && nextChar <= '9')) {
throw new ParseError(`Unexpected char in literal ${nextChar}`, input)
}
if (isNumeric && (nextChar != '.' && (nextChar < '0' || nextChar > '9'))) {
throw new ParseError(`Unexpected char in number ${nextChar}`, input)
}
if (isLiteral) {
if (literal === 'null') {
return null
}
if (literal === 'true') {
return true
}
if (literal.length === 5 && literal === 'false') {
return false
}
if (literal.length >= 5) {
throw new ParseError(`unexpected literal sequence '${literal}' at ${input.pos}`, input)
}
}
}
if (isNumeric) {
if (literal.indexOf('.') !== -1) {
return parseFloat(literal)
} else {
return parseInt(literal)
}
}
throw new ParseError(`unexpected end at ${input.pos}`, input)
}
// Context must contain the following fields : pos
function parseNext(input: Cursor) {
const nextChar = input.current();
let obj = null;
switch (nextChar) {
case Structure.OPEN_BRACE:
obj = parseBlock(input)
break;
case Structure.OPEN_BRACKET:
obj = parseArray(input)
break;
case Structure.DOUBLE_QUOTE:
obj = parseString(input)
break;
default:
obj = parseLiteral(input)
}
return obj;
}
const parseJson = (input: string) => {
let cursor = new Cursor(input);
return parseNext(cursor);
}
const isEqual = _.isEqual;
export {
parseJson,
isEqual,
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment