Skip to content

Instantly share code, notes, and snippets.

@nicolo-ribaudo
Created June 4, 2023 13:14
Show Gist options
  • Star 17 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save nicolo-ribaudo/c5b09c43a10a99fd92c90e81e60732ec to your computer and use it in GitHub Desktop.
Save nicolo-ribaudo/c5b09c43a10a99fd92c90e81e60732ec to your computer and use it in GitHub Desktop.
const CH_BRACE_L = 0x7b as const;
const CH_BRACE_R = 0x7d as const;
const CH_SQUARE_L = 0x5b as const;
const CH_SQUARE_R = 0x5d as const;
const CH_QUOTE_D = 0x22 as const;
const CH_ESCAPE = 0x5c as const;
const CH_COMMA = 0x2c as const;
const CH_COLON = 0x3a as const;
const CH_DOT = 0x2e as const;
const CH_MINUS = 0x2d as const;
function isKwdOrNum(cp: number) {
return (
(0x61 <= cp && cp <= 0x7a) ||
(0x30 <= cp && cp <= 0x39) ||
cp === CH_DOT ||
cp === CH_MINUS
);
}
function isSTRChar(cp: number) {
return cp !== CH_ESCAPE && cp !== CH_QUOTE_D;
}
/**
* Parses an async iterable yielding chunks of a JSON-encoded value.
* It should parse valid JSON, but it's not guaranteed to properly
* reject invalid JSON.
*
* Numbers parsing is currently limited.
*/
export async function parseJSONStreaming(
stream: AsyncIterable<string> | Iterable<string>
) {
let stack: unknown[] = [];
let value: unknown;
let stateStack: string[] = [];
let state = "VAL";
let i = 0;
let chunk: string;
let chunkLen = 0;
let currRaw = "";
function skipSpaces() {
let ch;
while (
i < chunkLen &&
((ch = chunk[i]), ch === " " || ch === "\t" || ch === "\n")
) {
i++;
}
return i === chunkLen;
}
function takeWhile(test: (cp: number) => boolean) {
const start = i;
while (i < chunkLen && test(chunk.codePointAt(i)!)) i++;
return start;
}
function enter(s: string) {
stateStack.push(state);
state = s;
}
function exit(expected: string) {
if (state !== expected) throw new Error("Popped invalid state");
state = stateStack.pop()!;
}
function pushVal(val: unknown) {
stack.push(value);
value = val;
}
function popVal() {
const old = value;
value = stack.pop();
return old;
}
function unexpected() {
throw new Error(
`Unexpected ${JSON.stringify(chunk[i])} (${i}) in "${state}`
);
}
for await (chunk of stream) {
chunkLen = chunk.length;
i = 0;
loop: while (i < chunkLen) {
switch (state) {
case "VAL": {
if (skipSpaces()) break loop;
const cp = chunk.codePointAt(i)!;
switch (cp) {
case CH_BRACE_L:
enter("OBJ");
pushVal({});
enter("VAL");
i++;
continue loop;
case CH_BRACE_R:
// empty object
exit("VAL");
exit("OBJ");
exit("VAL");
i++;
continue loop;
case CH_SQUARE_L:
enter("ARR");
pushVal([]);
enter("VAL");
i++;
continue loop;
case CH_SQUARE_R:
// empty array
exit("VAL");
exit("ARR");
exit("VAL");
i++;
continue loop;
case CH_QUOTE_D:
enter("STR");
i++;
currRaw = '"';
continue loop;
default:
if (isKwdOrNum(cp)) {
enter("KWN");
currRaw = chunk.slice(takeWhile(isKwdOrNum), i);
continue loop;
}
unexpected();
}
continue loop;
}
case "KWN":
currRaw += chunk.slice(takeWhile(isKwdOrNum), i);
if (i < chunkLen) {
pushVal(JSON.parse(currRaw));
exit("KWN");
exit("VAL");
}
continue loop;
case "STR": {
const cp = chunk.codePointAt(i)!;
switch (cp) {
case CH_QUOTE_D:
currRaw += '"';
i++;
pushVal(JSON.parse(currRaw));
exit(state);
exit("VAL");
continue loop;
case CH_ESCAPE:
enter("ESC");
currRaw += "\\";
i++;
continue loop;
default:
currRaw += chunk.slice(takeWhile(isSTRChar), i);
}
continue loop;
}
case "ESC":
currRaw += chunk[i];
i++;
exit("ESC");
continue loop;
case "ARR": {
if (skipSpaces()) break loop;
const cp = chunk.codePointAt(i)!;
switch (cp) {
case CH_SQUARE_R:
case CH_COMMA: {
const el = popVal();
(value as unknown[]).push(el);
i++;
if (cp === CH_COMMA) {
enter("VAL");
} else {
exit("ARR");
exit("VAL");
}
continue loop;
}
default:
unexpected();
}
continue loop;
}
case "OBJ": {
if (skipSpaces()) break loop;
const cp = chunk.codePointAt(i)!;
switch (cp) {
case CH_BRACE_R:
case CH_COMMA: {
const val = popVal();
const key = popVal() as string;
// __proto__ pollution yay
(value as Record<string, unknown>)[key] = val;
i++;
if (cp === CH_COMMA) {
enter("VAL");
} else {
exit("OBJ");
exit("VAL");
}
continue loop;
}
case CH_COLON: {
enter("VAL");
i++;
continue loop;
}
default:
unexpected();
}
continue loop;
}
default:
if (!skipSpaces()) unexpected();
}
}
}
if (state === "KWN") {
value = JSON.parse(currRaw);
exit("KWN");
exit("VAL");
}
return value;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment