webstrand/json-string-parser.ts

## json-string-parser.ts
function parseString(json: string, startIndex: number) {
  const Text = /[^\\"]+/y;
  const Escapes = /\\+/y;
  const Unicode = /[\dA-Fa-f]{4}/y;

  let result = "";
  for(let textStart = Text.lastIndex = startIndex;;) {
    // Scan forward from the lastIndex until we encounter:
    // 1. closing quote
    // 3. end-of-string
    // 2. escape
    if(!Text.test(json)) throw new Error("unterminated string literal");
    const opStart = Text.lastIndex;

    if(json.charCodeAt(opStart) === 34 /* `"` */) {
      // 1. We encountered a closing quote
      return result + json.slice(textStart, opStart);
    }
    else if(opStart === json.length) {
      // 2. We encountered the end-of-string
      throw new Error("Unterminated string");
    }

    // 3. We encountered an escape
    assert(json.charCodeAt(opStart) === 92 /* `\\` */, "LOGIC: We did not encounter an escape");
    // Scan forward consuming all escapes
    Escapes.lastIndex = opStart;
    const foundSeq = Escapes.test(json);
    assert(foundSeq, `LOGIC: Escapes failed to match after Op: ${json.slice(opStart)}`);
    const seqEnd = Escapes.lastIndex;
    const seqLen = seqEnd - opStart;

    // At this point we append the text segments and, if there are an even
    // number of escapes, half of them to the result.
    result += json.slice(textStart, seqEnd - seqLen / 2);

    if(seqLen % 2 === 0) {
      if(json.charCodeAt(seqEnd) === 34 /* `"` */) return result;
      textStart = Text.lastIndex = seqEnd;
    }
    else {
      switch(json.charCodeAt(seqEnd)) {
        case 34:  // `"`
        case 47:  // `/`
          textStart = seqEnd;
          Text.lastIndex = seqEnd + 1;
          break;
        case 98:  // `b`
          result += `\b`;
          textStart = Text.lastIndex = seqEnd + 1;
          break;
        case 102: // `f`
          result += `\f`;
          textStart = Text.lastIndex = seqEnd + 1;
          break;
        case 110: // `n`
          result += `\n`;
          textStart = Text.lastIndex = seqEnd + 1;
          break;
        case 114: // `r`
          result += `\r`;
          textStart = Text.lastIndex = seqEnd + 1;
          break;
        case 116: // `t`
          result += `\t`;
          textStart = Text.lastIndex = seqEnd + 1;
          break;
        case 117: // `u`
          Unicode.lastIndex = seqEnd + 1;
          const match = Unicode.exec(json);
          if(!match) throw new Error(`Invalid unicode escape sequence \\${json.slice(seqEnd, seqEnd + 5)}`);
          result += String.fromCharCode(parseInt(match[0], 16))
          textStart = Text.lastIndex = seqEnd + 5;
          break;
        default: throw new Error(`Unrecognizable escape sequence \\${json[seqEnd]}`);
      }
    }
  }
}

console.log(parseString(String.raw`foobarbaz: "hello \"world\" \this is (\u0072) a \\q test\\", contentThatComeAfter`, 12));

function assert(condition: unknown, message: string | (() => string), cons: new (message: string) => Error = Error): asserts condition { if(!condition) throw new cons(typeof message === "function" ? message() : message) }
	function parseString(json: string, startIndex: number) {
	const Text = /[^\\"]+/y;
	const Escapes = /\\+/y;
	const Unicode = /[\dA-Fa-f]{4}/y;

	let result = "";
	for(let textStart = Text.lastIndex = startIndex;;) {
	// Scan forward from the lastIndex until we encounter:
	// 1. closing quote
	// 3. end-of-string
	// 2. escape
	if(!Text.test(json)) throw new Error("unterminated string literal");
	const opStart = Text.lastIndex;

	if(json.charCodeAt(opStart) === 34 /* `"` */) {
	// 1. We encountered a closing quote
	return result + json.slice(textStart, opStart);
	}
	else if(opStart === json.length) {
	// 2. We encountered the end-of-string
	throw new Error("Unterminated string");
	}

	// 3. We encountered an escape
	assert(json.charCodeAt(opStart) === 92 /* `\\` */, "LOGIC: We did not encounter an escape");
	// Scan forward consuming all escapes
	Escapes.lastIndex = opStart;
	const foundSeq = Escapes.test(json);
	assert(foundSeq, `LOGIC: Escapes failed to match after Op: ${json.slice(opStart)}`);
	const seqEnd = Escapes.lastIndex;
	const seqLen = seqEnd - opStart;

	// At this point we append the text segments and, if there are an even
	// number of escapes, half of them to the result.
	result += json.slice(textStart, seqEnd - seqLen / 2);

	if(seqLen % 2 === 0) {
	if(json.charCodeAt(seqEnd) === 34 /* `"` */) return result;
	textStart = Text.lastIndex = seqEnd;
	}
	else {
	switch(json.charCodeAt(seqEnd)) {
	case 34: // `"`
	case 47: // `/`
	textStart = seqEnd;
	Text.lastIndex = seqEnd + 1;
	break;
	case 98: // `b`
	result += `\b`;
	textStart = Text.lastIndex = seqEnd + 1;
	break;
	case 102: // `f`
	result += `\f`;
	textStart = Text.lastIndex = seqEnd + 1;
	break;
	case 110: // `n`
	result += `\n`;
	textStart = Text.lastIndex = seqEnd + 1;
	break;
	case 114: // `r`
	result += `\r`;
	textStart = Text.lastIndex = seqEnd + 1;
	break;
	case 116: // `t`
	result += `\t`;
	textStart = Text.lastIndex = seqEnd + 1;
	break;
	case 117: // `u`
	Unicode.lastIndex = seqEnd + 1;
	const match = Unicode.exec(json);
	if(!match) throw new Error(`Invalid unicode escape sequence \\${json.slice(seqEnd, seqEnd + 5)}`);
	result += String.fromCharCode(parseInt(match[0], 16))
	textStart = Text.lastIndex = seqEnd + 5;
	break;
	default: throw new Error(`Unrecognizable escape sequence \\${json[seqEnd]}`);
	}
	}
	}
	}

	console.log(parseString(String.raw`foobarbaz: "hello \"world\" \this is (\u0072) a \\q test\\", contentThatComeAfter`, 12));

	function assert(condition: unknown, message: string \| (() => string), cons: new (message: string) => Error = Error): asserts condition { if(!condition) throw new cons(typeof message === "function" ? message() : message) }