Skip to content

Instantly share code, notes, and snippets.

@webstrand
Created December 11, 2023 19:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save webstrand/885a3c5fcb608fa4209d230da54a55c9 to your computer and use it in GitHub Desktop.
Save webstrand/885a3c5fcb608fa4209d230da54a55c9 to your computer and use it in GitHub Desktop.
JSON string parser that generates minimal garbage by avoiding regex match objects
function parseString(json: string, startIndex: number) {
const Text = /[^\\"]+/y;
const Escapes = /\\+/y;
const Unicode = /[\dA-Fa-f]{4}/y;
let result = "";
for(let textStart = Text.lastIndex = startIndex;;) {
// Scan forward from the lastIndex until we encounter:
// 1. closing quote
// 3. end-of-string
// 2. escape
if(!Text.test(json)) throw new Error("unterminated string literal");
const opStart = Text.lastIndex;
if(json.charCodeAt(opStart) === 34 /* `"` */) {
// 1. We encountered a closing quote
return result + json.slice(textStart, opStart);
}
else if(opStart === json.length) {
// 2. We encountered the end-of-string
throw new Error("Unterminated string");
}
// 3. We encountered an escape
assert(json.charCodeAt(opStart) === 92 /* `\\` */, "LOGIC: We did not encounter an escape");
// Scan forward consuming all escapes
Escapes.lastIndex = opStart;
const foundSeq = Escapes.test(json);
assert(foundSeq, `LOGIC: Escapes failed to match after Op: ${json.slice(opStart)}`);
const seqEnd = Escapes.lastIndex;
const seqLen = seqEnd - opStart;
// At this point we append the text segments and, if there are an even
// number of escapes, half of them to the result.
result += json.slice(textStart, seqEnd - seqLen / 2);
if(seqLen % 2 === 0) {
if(json.charCodeAt(seqEnd) === 34 /* `"` */) return result;
textStart = Text.lastIndex = seqEnd;
}
else {
switch(json.charCodeAt(seqEnd)) {
case 34: // `"`
case 47: // `/`
textStart = seqEnd;
Text.lastIndex = seqEnd + 1;
break;
case 98: // `b`
result += `\b`;
textStart = Text.lastIndex = seqEnd + 1;
break;
case 102: // `f`
result += `\f`;
textStart = Text.lastIndex = seqEnd + 1;
break;
case 110: // `n`
result += `\n`;
textStart = Text.lastIndex = seqEnd + 1;
break;
case 114: // `r`
result += `\r`;
textStart = Text.lastIndex = seqEnd + 1;
break;
case 116: // `t`
result += `\t`;
textStart = Text.lastIndex = seqEnd + 1;
break;
case 117: // `u`
Unicode.lastIndex = seqEnd + 1;
const match = Unicode.exec(json);
if(!match) throw new Error(`Invalid unicode escape sequence \\${json.slice(seqEnd, seqEnd + 5)}`);
result += String.fromCharCode(parseInt(match[0], 16))
textStart = Text.lastIndex = seqEnd + 5;
break;
default: throw new Error(`Unrecognizable escape sequence \\${json[seqEnd]}`);
}
}
}
}
console.log(parseString(String.raw`foobarbaz: "hello \"world\" \this is (\u0072) a \\q test\\", contentThatComeAfter`, 12));
function assert(condition: unknown, message: string | (() => string), cons: new (message: string) => Error = Error): asserts condition { if(!condition) throw new cons(typeof message === "function" ? message() : message) }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment