Skip to content

Instantly share code, notes, and snippets.

@bjsi
Created September 13, 2023 00:49
Show Gist options
  • Save bjsi/3ad6297345aec91460cbce5aecac939c to your computer and use it in GitHub Desktop.
Save bjsi/3ad6297345aec91460cbce5aecac939c to your computer and use it in GitHub Desktop.
Partially parse a JSON object. Useful for streaming function calls from OpenAI.
// copied from here, converted to TS, created a `field` completion mode which is more granular: https://www.npmjs.com/package/partial-json-parser?activeTab=code
type TokenType =
| 'brace'
| 'paren'
| 'separator'
| 'delimiter'
| 'string'
| 'number'
| 'name';
interface Token {
type: TokenType;
value: string;
}
export type PartialParseJsonGranularity = 'object' | 'field';
const tokenize = (
input: string,
granularity: PartialParseJsonGranularity,
): Token[] => {
let current = 0;
let tokens: Token[] = [];
while (current < input.length) {
let char = input[current];
if (char === '\\') {
current++;
continue;
}
if (char === '{') {
tokens.push({
type: 'brace',
value: '{',
});
current++;
continue;
}
if (char === '}') {
tokens.push({
type: 'brace',
value: '}',
});
current++;
continue;
}
if (char === '[') {
tokens.push({
type: 'paren',
value: '[',
});
current++;
continue;
}
if (char === ']') {
tokens.push({
type: 'paren',
value: ']',
});
current++;
continue;
}
if (char === ':') {
tokens.push({
type: 'separator',
value: ':',
});
current++;
continue;
}
if (char === ',') {
tokens.push({
type: 'delimiter',
value: ',',
});
current++;
continue;
}
if (char === '"') {
let value = '';
let danglingQuote = false;
char = input[++current];
while (char !== '"') {
if (current === input.length) {
danglingQuote = true;
break;
}
if (char === '\\') {
current++;
if (current === input.length) {
danglingQuote = true;
break;
}
value += char + input[current];
char = input[++current];
} else {
value += char;
char = input[++current];
}
}
char = input[++current];
if (!danglingQuote || granularity === 'field') {
tokens.push({
type: 'string',
value,
});
}
continue;
}
let WHITESPACE = /\s/;
if (WHITESPACE.test(char)) {
current++;
continue;
}
let NUMBERS = /[0-9]/;
if (NUMBERS.test(char) || char === '-' || char === '.') {
let value = '';
if (char === '-') {
value += char;
char = input[++current];
}
while (NUMBERS.test(char) || char === '.') {
value += char;
char = input[++current];
}
tokens.push({
type: 'number',
value,
});
continue;
}
let LETTERS = /[a-z]/i;
if (LETTERS.test(char)) {
let value = '';
while (LETTERS.test(char)) {
if (current === input.length) {
break;
}
value += char;
char = input[++current];
}
if (value == 'true' || value == 'false') {
tokens.push({
type: 'name',
value,
});
} else {
throw new Error('Invalid token: ' + value + ' is not a valid token!');
}
continue;
}
current++;
}
return tokens;
};
const strip = (tokens: Token[]): Token[] => {
if (tokens.length === 0) {
return tokens;
}
let lastToken = tokens[tokens.length - 1];
switch (lastToken.type) {
case 'separator':
tokens = tokens.slice(0, tokens.length - 1);
return strip(tokens);
break;
case 'number':
let lastCharacterOfLastToken =
lastToken.value[lastToken.value.length - 1];
if (
lastCharacterOfLastToken === '.' ||
lastCharacterOfLastToken === '-'
) {
tokens = tokens.slice(0, tokens.length - 1);
return strip(tokens);
}
case 'string':
let tokenBeforeTheLastToken = tokens[tokens.length - 2];
if (tokenBeforeTheLastToken.type === 'delimiter') {
tokens = tokens.slice(0, tokens.length - 1);
return strip(tokens);
} else if (
tokenBeforeTheLastToken.type === 'brace' &&
tokenBeforeTheLastToken.value === '{'
) {
tokens = tokens.slice(0, tokens.length - 1);
return strip(tokens);
}
break;
case 'delimiter':
tokens = tokens.slice(0, tokens.length - 1);
return strip(tokens);
break;
}
return tokens;
};
const unstrip = (tokens: Token[]) => {
let tail: string[] = [];
tokens.map((token) => {
if (token.type === 'brace') {
if (token.value === '{') {
tail.push('}');
} else {
tail.splice(tail.lastIndexOf('}'), 1);
}
}
if (token.type === 'paren') {
if (token.value === '[') {
tail.push(']');
} else {
tail.splice(tail.lastIndexOf(']'), 1);
}
}
});
if (tail.length > 0) {
tail.reverse().map((item) => {
if (item === '}') {
tokens.push({
type: 'brace',
value: '}',
});
} else if (item === ']') {
tokens.push({
type: 'paren',
value: ']',
});
}
});
}
return tokens;
};
const generate = (tokens: Token[]) => {
let output = '';
tokens.map((token) => {
switch (token.type) {
case 'string':
output += '"' + token.value + '"';
break;
default:
output += token.value;
break;
}
});
return output;
};
/**
* Parse a partial streamed JSON object.
* @param mode `field` mode is more granular than `object` mode. Use `field` to complete partially complete fields. Use `object` to complete partially complete objects.
*/
export const partialParseJson = (
input: string,
granularity: PartialParseJsonGranularity,
): Record<string, any> | undefined => {
input = input.trim();
if (!input) {
return undefined;
} else {
return JSON.parse(generate(unstrip(strip(tokenize(input, granularity)))));
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment