Skip to content

Instantly share code, notes, and snippets.

@kalda341
Created October 19, 2021 20:18
Show Gist options
  • Save kalda341/647f936aefa2a49b7707d4fbf6768fb9 to your computer and use it in GitHub Desktop.
Save kalda341/647f936aefa2a49b7707d4fbf6768fb9 to your computer and use it in GitHub Desktop.
type JSONValue =
| string
| number
| null
| boolean
| JSONValue[]
// Record<string, JSONValue> is a circular value, so doesn't work.
// This is semantically equivalent.
| { [K: string]: JSONValue | undefined };
export const decode = (data: string): JSONValue => {
// Canonical JSON is a subset of regualar JSON, so the regular
// decode will work correctly.
return JSON.parse(data);
};
export const encode = (data: JSONValue, maxDepth: number = 100): string =>
encodeToArray(data, maxDepth).join("");
// If you're really fussed about performance you'd replace this
// with a more performant implmentation.
// Slice removes the final element, as we only want it between
// others.
const intersperse = <T>(element: T, array: readonly T[]): T[] =>
array.flatMap((x) => [x, element]).slice(0, -1);
// We could return a string from here, however there is a performance
// benefit to doing one large string join at the end compared to many
// along the way.
// Note: This is a fully functional implmentation, you may get some
// performance benefits to implementing in a non functional way.
// Note: Rather than using the spread operator, and .flat(), we could allow for
// any depth nested lists of chars and then flatten these all at once.
// (Like Erlang does with it's IOList). This could have a minor performance advantage.
// Typescript makes it difficult to specify such an operation, so we will leave
// it as a regular string[];
const encodeToArray = (
node: JSONValue,
maxDepth: number
): readonly string[] => {
// Avoid encoding recursive structures
if (maxDepth === 0) {
throw new Error("Max depth exceeded.");
}
if (node === null) {
return ["null"];
} else if (typeof node === "boolean") {
return [node.toString()];
} else if (typeof node === "number" && Number.isInteger(node)) {
// Integer numbers are desplayed in regular decimal notation
return [node.toString()];
} else if (typeof node === "number") {
return [encodeExponential(node)];
} else if (typeof node === "string") {
return ['"', sanitiseString(node), '"'];
} else if (Array.isArray(node)) {
return [
"[",
...intersperse(
",",
// We need to recursively encode the contents
node.flatMap((x) => encodeToArray(x, maxDepth - 1))
),
"]",
];
} else if (typeof node === "object") {
return [
"{",
...intersperse(
[","],
Object.keys(node)
// Keys must be sorted before encoding.
.sort()
.map((k): [string, JSONValue | undefined] => [k, node[k]])
// JSON.encode removes undefined values - we will too
.filter(valueIsNotUndefined)
// We need to recursively encode the contents
.map(([k, v]) => [
...encodeToArray(k, maxDepth - 1),
// Keys and values are separated by :
":",
...encodeToArray(v, maxDepth - 1),
])
).flat(),
"}",
];
} else {
throw new Error("Node is invalid JSONValue.");
}
};
const valueIsNotUndefined = <T>(
pair: [string, T | undefined]
): pair is [string, T] => pair[1] !== undefined;
const encodeExponential = (node: number): string => {
// If we're dealing with a number but not an integer we should encode
// in exponential notation
const exponent = Math.floor(Math.log10(Math.abs(node)));
const exponentString = exponent.toString();
const mantissa = node / Math.pow(10, exponent);
// Ensure decimal point exists
const mantissaString = Number.isInteger(mantissa)
? `${mantissa}.0`
: mantissa.toString();
return `${mantissaString}E${exponentString}`;
};
// Escape character | quote | backslash | high surrogate not followed by low, or low surrogate not following
// a high surrogate
// eslint-disable-next-line no-control-regex
const replaceRegex =
/[\u0000-\u001F]|"|\\|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]/g;
const backslashReplacementMatch: Record<string, string> = {
"\b": "\\b",
"\t": "\\t",
"\n": "\\n",
"\f": "\\f",
"\r": "\\r",
'"': '\\"',
"\\": "\\\\",
};
const escapeCharacter = (character: string): string => {
const slashEscaped = backslashReplacementMatch[character];
if (slashEscaped !== undefined) {
return slashEscaped;
} else {
// If the character is marked for replacement and we can't slash escape it, we'll encode it
// with a six-character \u00xx uppercase hexadecimal escape sequence
const charCode = character
.charCodeAt(0)
.toString(16)
.toUpperCase()
.padStart(4, "0");
return "\\u" + charCode;
}
};
const sanitiseString = (node: string): string =>
node.replace(replaceRegex, escapeCharacter);
// A nice easy test case
const testString =
'{"-0":0,"-1":-1,"0.1":1.0E-1,"1":1,"10.1":1.01E1,"emoji":"😃","escape":"\\u001B","lone surrogate":"\\uDEAD","whitespace":" \\t\\n\\r"}';
console.log(encode(decode(testString)) === testString);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment