Skip to content

Instantly share code, notes, and snippets.

@btakita
Last active April 26, 2023 00:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save btakita/2c33bab8ed078aef5b1a22006904fec3 to your computer and use it in GitHub Desktop.
Save btakita/2c33bab8ed078aef5b1a22006904fec3 to your computer and use it in GitHub Desktop.
Streaming JSON parser generated by openai
class JSONStreamParser {
constructor() {
this.buffer = '';
this.currentToken = '';
this.stack = [];
this.top = null;
this.state = this.parseValue;
this.result = null;
}
parseChunk(chunk) {
this.buffer += chunk;
while (this.buffer.length > 0) {
const char = this.buffer.charAt(0);
this.buffer = this.buffer.slice(1);
this.state(char);
}
}
parseValue(char) {
if (char === '{') {
this.top = {};
this.result = this.top;
this.state = this.parseObjectKey;
this.stack.push(this.top);
} else if (char === '[') {
this.top = [];
this.result = this.top;
this.state = this.parseValue;
this.stack.push(this.top);
} else if (char === '"' || char === "'") {
this.currentToken = char;
this.state = this.parseString;
} else if (/\d/.test(char)) {
this.currentToken = char;
this.state = this.parseNumber;
} else if (char === 't') {
this.currentToken = char;
this.state = this.parseTrue;
} else if (char === 'f') {
this.currentToken = char;
this.state = this.parseFalse;
} else if (char === 'n') {
this.currentToken = char;
this.state = this.parseNull;
} else if (/\s/.test(char)) {
// do nothing
} else {
throw new Error(`Unexpected character ${char}`);
}
}
parseObjectKey(char) {
if (char === '"' || char === "'") {
this.currentToken = '';
this.state = this.parseKeyString;
} else if (/\s/.test(char)) {
// do nothing
} else if (char === '}') {
this.stack.pop();
this.top = this.stack[this.stack.length - 1];
this.state = this.parseEndValue;
} else {
throw new Error(`Unexpected character ${char}`);
}
}
parseKeyString(char) {
if (char === this.currentToken) {
this.top[this.currentToken] = this.currentToken;
this.currentToken = '';
this.state = this.parseObjectColon;
} else {
this.currentToken += char;
}
}
parseObjectColon(char) {
if (/\s/.test(char)) {
// do nothing
} else if (char === ':') {
this.state = this.parseValue;
} else {
throw new Error(`Unexpected character ${char}`);
}
}
parseArrayValue(char) {
if (/\s/.test(char)) {
// do nothing
} else if (char === ']') {
this.stack.pop();
this.top = this.stack[this.stack.length - 1];
this.state = this.parseEndValue;
} else {
this.top.push(null);
this.state = this.parseValue;
this.parseValue(char);
}
}
parseString(char) {
if (char === '\\') {
this.state = this.parseEscapeSequence;
} else if (char === this.currentToken) {
this.top = this.currentToken;
this.currentToken = '';
this.state = this.parseEndValue;
} else {
this.currentToken += char;
}
parseEscapeSequence(char) {
this.currentToken += char;
this.state = this.parseString;
}
parseNumber(char) {
if (/\d/.test(char)) {
this.currentToken += char;
} else if (char === '.') {
this.currentToken += char;
this.state = this.parseNumberFraction;
} else if (/[eE]/.test(char)) {
this.currentToken += char;
this.state = this.parseNumberExponentSign;
} else {
this.top = Number(this.currentToken);
this.currentToken = '';
this.state = this.parseEndValue;
this.stack.pop();
this.top = this.stack[this.stack.length - 1];
this.parseValue(char);
}
}
parseNumberFraction(char) {
if (/\d/.test(char)) {
this.currentToken += char;
} else if (/[eE]/.test(char)) {
this.currentToken += char;
this.state = this.parseNumberExponentSign;
} else {
this.top = Number(this.currentToken);
this.currentToken = '';
this.state = this.parseEndValue;
this.stack.pop();
this.top = this.stack[this.stack.length - 1];
this.parseValue(char);
}
}
parseNumberExponentSign(char) {
if (/[+\-]/.test(char)) {
this.currentToken += char;
this.state = this.parseNumberExponent;
} else if (/\d/.test(char)) {
this.currentToken += char;
this.state = this.parseNumberExponentDigits;
} else {
throw new Error(`Unexpected character ${char}`);
}
}
parseNumberExponent(char) {
if (/\d/.test(char)) {
this.currentToken += char;
this.state = this.parseNumberExponentDigits;
} else {
throw new Error(`Unexpected character ${char}`);
}
}
parseNumberExponentDigits(char) {
if (/\d/.test(char)) {
this.currentToken += char;
} else {
this.top = Number(this.currentToken);
this.currentToken = '';
this.state = this.parseEndValue;
this.stack.pop();
this.top = this.stack[this.stack.length - 1];
this.parseValue(char);
}
}
parseTrue(char) {
this.currentToken += char;
if (this.currentToken === 'true') {
this.top = true;
this.currentToken = '';
this.state = this.parseEndValue;
this.stack.pop();
this.top = this.stack[this.stack.length - 1];
this.parseValue(char);
} else if (!'true'.startsWith(this.currentToken)) {
throw new Error(`Unexpected character ${char}`);
}
}
parseFalse(char) {
this.currentToken += char;
if (this.currentToken === 'false') {
this.top = false;
this.currentToken = '';
this.state = this.parseEndValue;
this.stack.pop();
this.top = this.stack[this.stack.length - 1];
this.parseValue(char);
} else if (!'false'.startsWith(this.currentToken)) {
throw new Error(`Unexpected character ${char}`);
}
}
parseNull(char) {
this.currentToken += char;
if (this.currentToken === 'null') {
this.top = null;
this.currentToken = '';
this.state = this.parseEndValue;
this.stack.pop();
this.top = this.stack[this
.stack.length - 1];
this.parseValue(char);
} else if (!'null'.startsWith(this.currentToken)) {
throw new Error(`Unexpected character ${char}`);
}
}
parseValue(char) {
if (char === '{') {
this.top = {};
this.stack.push(this.top);
this.state = this.parseObjectKey;
} else if (char === '[') {
this.top = [];
this.stack.push(this.top);
this.state = this.parseArrayValue;
} else if (char === '"') {
this.state = this.parseString;
} else if (/\d/.test(char)) {
this.currentToken = char;
this.state = this.parseNumber;
} else if (char === 't') {
this.currentToken = char;
this.state = this.parseTrue;
} else if (char === 'f') {
this.currentToken = char;
this.state = this.parseFalse;
} else if (char === 'n') {
this.currentToken = char;
this.state = this.parseNull;
} else if (/\s/.test(char)) {
// ignore whitespace
} else {
throw new Error(`Unexpected character ${char}`);
}
}
parse(json) {
for (let i = 0; i < json.length; i++) {
const char = json.charAt(i);
this.parseValue(char);
}
return this.stack.pop();
}
}
class JSONStreamParser {
constructor() {
this.buffer = '';
this.state = this.parseValue;
}
transform(chunk, controller) {
this.buffer += chunk;
for (let i = 0; i < this.buffer.length; i++) {
const char = this.buffer.charAt(i);
this.parse(char);
}
this.buffer = '';
}
flush(controller) {
try {
this.close();
} catch (error) {
controller.error(error);
}
}
start(controller) {
this.controller = controller;
}
parseValue(char) {
if (char === '{') {
this.state = this.parseObject;
this.controller.enqueue({});
} else if (char === '[') {
this.state = this.parseArray;
this.controller.enqueue([]);
} else if (/\d/.test(char)) {
this.state = this.parseNumber;
this.buffer += char;
} else if (char === '-') {
this.state = this.parseNegativeNumber;
this.buffer += char;
} else if (char === '"') {
this.state = this.parseString;
this.buffer = '';
} else if (char === 't') {
this.state = this.parseTrue;
this.buffer = char;
} else if (char === 'f') {
this.state = this.parseFalse;
this.buffer = char;
} else if (char === 'n') {
this.state = this.parseNull;
this.buffer = char;
} else if (/\s/.test(char)) {
// ignore whitespace
} else {
throw new Error(`Unexpected character ${char}`);
}
}
parseObject(char) {
if (char === '}') {
this.state = this.parseValue;
this.controller.enqueue(null);
} else if (/\s/.test(char)) {
// ignore whitespace
} else if (char === '"') {
this.state = this.parseObjectKey;
this.buffer = '';
} else if (char === ',') {
// ignore commas
} else {
throw new Error(`Unexpected character ${char}`);
}
}
parseObjectKey(char) {
if (char === '"') {
this.state = this.parseObjectColon;
} else {
this.buffer += char;
}
}
parseObjectColon(char) {
if (/\s/.test(char)) {
// ignore whitespace
} else if (char === ':') {
this.state = this.parseObjectValue;
this.currentKey = this.buffer;
this.buffer = '';
} else {
throw new Error(`Unexpected character ${char}`);
}
}
parseObjectValue(char) {
if (/\s/.test(char)) {
// ignore whitespace
} else {
this.state = this.parseObjectCommaOrEnd;
this.buffer += char;
}
}
parseObjectCommaOrEnd(char) {
if (char === '}') {
this.state = this.parseValue;
const obj = this.controller._controlledTransformStream._transformStreamController._transformStream._readableStreamController._controlledReadableStream._state._storedValue;
obj[this.currentKey] = JSON.parse(this.buffer);
this.controller.enqueue(null);
} else if (char === ',') {
this.state = this.parseObject;
const obj = this.controller._controlledTransformStream._transformStreamController._transformStream._readableStreamController._controlledReadableStream._state._storedValue;
obj[this.currentKey] = Value;
try {
Value = JSON.parse(this.buffer);
} catch (error) {
throw new Error(`Failed to parse JSON: ${error.message}`);
}
this.controller.enqueue(Value);
this.state = this.parseValue;
} else {
throw new Error(`Unexpected character ${char}`);
}
}
parseArray(char) {
if (char === ']') {
this.state = this.parseValue;
this.controller.enqueue(null);
} else if (/\s/.test(char)) {
// ignore whitespace
} else {
this.state = this.parseArrayCommaOrEnd;
this.buffer += char;
}
}
parseArrayCommaOrEnd(char) {
if (char === ']') {
this.state = this.parseValue;
const arr = this.controller._controlledTransformStream._transformStreamController._transformStream._readableStreamController._controlledReadableStream._state._storedValue;
arr.push(JSON.parse(this.buffer));
this.controller.enqueue(null);
} else if (char === ',') {
this.state = this.parseArray;
const arr = this.controller._controlledTransformStream._transformStreamController._transformStream._readableStreamController._controlledReadableStream._state._storedValue;
arr.push(JSON.parse(this.buffer));
this.buffer = '';
} else if (/\s/.test(char)) {
// ignore whitespace
} else {
throw new Error(`Unexpected character ${char}`);
}
}
parseString(char) {
if (char === '"') {
this.state = this.parseValue;
this.controller.enqueue(this.buffer);
} else {
this.buffer += char;
}
}
parseNumber(char) {
if (/\d/.test(char)) {
this.buffer += char;
} else if (char === '.') {
this.state = this.parseNumberFractional;
this.buffer += char;
} else if (/[eE]/.test(char)) {
this.state = this.parseNumberExponential;
this.buffer += char;
} else {
this.state = this.parseValue;
this.controller.enqueue(parseFloat(this.buffer));
}
}
parseNegativeNumber(char) {
if (/\d/.test(char)) {
this.state = this.parseNumber;
this.buffer += char;
} else if (char === '.') {
this.state = this.parseNumberFractional;
this.buffer += char;
} else {
throw new Error(`Unexpected character ${char}`);
}
}
parseNumberFractional(char) {
if (/\d/.test(char)) {
this.buffer += char;
} else if (/[eE]/.test(char)) {
this.state = this.parseNumberExponential;
this.buffer += char;
} else {
this.state = this.parseValue;
this.controller.enqueue(parseFloat(this.buffer));
}
}
parseNumberExponential(char) {
if (/\d/.test(char)) {
this.buffer += char;
} else if (/[-+]/.test(char) && /eE/.test(this.buffer.charAt(this.buffer.length - 1))) {
this.buffer += char;
} else {
this.state = this.parseValue;
this.controller.enqueue(parseFloat(this.buffer));
}
}
parseTrue(char) {
this.buffer += char;
if (this.buffer === 'true') {
this.state = this.parseValue;
this.controller.enqueue(true);
} else if (!/^true/.test(this.buffer)) {
throw new Error(`Unexpected character ${char}`);
}
}
parseFalse(char) {
this.buffer += char;
if (this.buffer === 'false') {
this.state = this.parseValue;
this.controller.enqueue(false);
} else if (!/^false/.test(this.buffer)) {
throw new Error(`Unexpected character ${char}`);
}
}
parseNull(char) {
this.buffer += char;
if (this.buffer === 'null') {
this.state = this.parseValue;
this.controller.enqueue(null);
} else if (!/^null/.test(this.buffer)) {
throw new Error(`Unexpected character ${char}`);
}
}
}
function createJSONParser() {
return new JSONParser();
}
class JSONTransformer {
constructor() {
this.parser = createJSONParser();
}
transform(chunk, controller) {
this.parser.parse(chunk, controller);
}
flush(controller) {
this.parser.flush(controller);
}
}
function createJSONTransformer() {
return new JSONTransformer();
}
class JSONStreamParser {
constructor() {
this.top = null;
this.stack = [];
this.state = this.parseValue;
this.currentToken = '';
this.controller = null;
}
start(controller) {
this.controller = controller;
}
enqueueValue(value) {
if (this.top === null) {
this.controller.enqueue(value);
} else if (Array.isArray(this.top)) {
this.top.push(value);
} else {
this.top[this.currentToken] = value;
this.currentToken = '';
}
}
parseString(char) {
if (char === '"') {
this.state = this.parseValue;
} else if (char === '\\') {
this.state = this.parseEscapedCharacter;
} else {
this.currentToken += char;
}
}
parseEscapedCharacter(char) {
this.currentToken += JSON.parse(`"${char}"`);
this.state = this.parseString;
}
parseNumber(char) {
if (/\d/.test(char)) {
this.currentToken += char;
} else {
this.enqueueValue(Number(this.currentToken));
this.state = this.parseValue;
this.parseValue(char);
}
}
parseTrue(char) {
this.currentToken += char;
if (this.currentToken === 'true') {
this.enqueueValue(true);
this.state = this.parseValue;
}
}
parseFalse(char) {
this.currentToken += char;
if (this.currentToken === 'false') {
this.enqueueValue(false);
this.state = this.parseValue;
}
}
parseNull(char) {
this.currentToken += char;
if (this.currentToken === 'null') {
this.enqueueValue(null);
this.state = this.parseValue;
}
}
parseArrayValue(char) {
if (char === ']') {
this.top = this.stack.pop();
this.state = this.parseValue;
} else if (/\s/.test(char)) {
// ignore whitespace
} else {
this.stack.push(this.top);
this.top = [];
this.state = this.parseValue;
this.parseValue(char);
}
}
parseObjectKey(char) {
if (char === '"') {
this.state = this.parseString;
} else if (/\s/.test(char)) {
// ignore whitespace
} else if (char === '}') {
this.top = this.stack.pop();
this.state = this.parseValue;
} else {
throw new Error(`Unexpected character ${char}`);
}
}
parseValue(char) {
if (char === '{') {
this.top = {};
this.stack.push(this.top);
this.state = this.parseObjectKey;
} else if (char === '[') {
this.top = [];
this.stack.push(this.top);
this.state = this.parseArrayValue;
} else if (char === '"') {
this.state = this.parseString;
} else if (/\d/.test(char)) {
this.currentToken = char;
this.state = this.parseNumber;
} else if (char === 't') {
this.currentToken = char;
this.state = this.parseTrue;
} else if (char === 'f') {
this.currentToken = char;
this.state = this.parseFalse;
} else if (char === 'n') {
this.currentToken = char;
this.state = this.parseNull;
} else
} else if (/\s/.test(char)) {
// ignore whitespace
} else {
throw new Error(`Unexpected character ${char}`);
}
}
parse(char) {
try {
this.state(char);
} catch (error) {
this.controller.error(error);
}
}
close() {
if (this.stack.length > 0) {
throw new Error('Unexpected end of input');
}
this.controller.close();
}
}
function streamToPromise(stream) {
const reader = stream.getReader();
let result = '';
return reader.read().then(function process(resultObj) {
if (resultObj.done) {
return result;
}
result += resultObj.value;
return reader.read().then(process);
});
}
function parseJSONStream(stream) {
const parser = new JSONStreamParser();
const reader = stream.getReader();
return new ReadableStream({
start(controller) {
parser.start(controller);
reader.read().then(function process(resultObj) {
if (resultObj.done) {
parser.close();
} else {
const chunk = resultObj.value;
for (let i = 0; i < chunk.length; i++) {
parser.parse(chunk[i]);
}
reader.read().then(process);
}
});
}
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment