Skip to content

Instantly share code, notes, and snippets.

@Mr0grog
Created April 26, 2022 03:23
Show Gist options
  • Save Mr0grog/7ff53ffd0f7021e2235bc132362e3770 to your computer and use it in GitHub Desktop.
Save Mr0grog/7ff53ffd0f7021e2235bc132362e3770 to your computer and use it in GitHub Desktop.
Parse Airtable’s ConstantPooledData format.
/**
* Parse Airtable's "ConstantPooledData" format. They recently started using
* this format to compress some API responses, and it appears to be a
* home-grown format.
*
* Call `parseData()` if you have an object with data (e.g. a JSON-parsed API
* response body).
*
* Call `parseString()` if you have a raw string of data (e.g. an API response
* body).
*
* ---------------
*
* The basic format is two lists:
* - `pool` is a list of primitive JS values that can be keys or values of an
* object.
* - `pointerList` is a list of numbers, most of which are indexes into `pool`.
*
* Parse by reading `pointerList` from start to end. The first item is a code
* for what type of data the current value is, or the index of a value in
* `pool`, as follows:
*
* - If the code is 0, it represents an array. The next value is the length of
* the array. Subsequent values should be parsed the same as the basic parsing
* of `pointerList` (so if the pointer is `0`, it's an array, etc.). That is,
* `pointerList` looks like:
*
* 0 <-- Array
* N <-- Number of items in the array
* V1 <-- Data type or pointer to first value in the array
* V2 <-- Data type or pointer to second value in the array
* ...etc... <-- And so on until you have N items
*
* - If the code is 6, it represents an object. The next value is the number of
* keys in the object, followed by pointers to the key names. After that,
* each entry is represents a value of one key, in the opposite order of the
* keys. Values should be parsed the same as the basic parsing of
* `pointerList`. So `pointerList` should look like:
*
* 6 <-- Object
* N <-- Number of keys in the object
* K1 <-- Pointer to first key.
* K2 <-- Pointer to second key.
* ...etc... <-- And so on until you have N keys.
* V2 <-- Data type or pointer to second key's value.
* V1 <-- Data type or pointer to first key's value.
*
* - If the code is 2 or 3, the value is `true` or `false`, respectively.
*
* - If the code is 4 or 5, the value is `null` or `undefined`, respectively.
*
* - Any other code is a pointer to a value in `pool`.
*/
import assert from 'node:assert/strict';
/**
* These values in the pointer list indicate what data type to read, which may
* alter how the next pointers are parsed.
*/
const DATA_TYPES = {
'0': 'ARRAY',
'2': 'TRUE',
'3': 'FALSE',
'4': 'NULL',
'5': 'UNDEFINED',
'6': 'OBJECT',
};
function readArray (pointerList, pool, index) {
const value = [];
let remainingItems = pointerList[index];
let currentIndex = index + 1;
const endIndex = pointerList.length;
while (remainingItems > 0) {
assert.ok(currentIndex < endIndex, 'Tried to read past end of pointerList');
const parsed = readValue(pointerList, pool, currentIndex);
value.push(parsed.value);
currentIndex = parsed.index;
remainingItems--;
}
return { value, index: currentIndex }
}
function readObject (pointerList, pool, index) {
const value = {};
let keyCount = pointerList[index];
let currentIndex = index + 1 + keyCount;
const endIndex = pointerList.length;
const keys = pointerList
.slice(index + 1, index + 1 + keyCount)
.map(pointer => pool[pointer]);
assert.equal(keys.length, keyCount, 'Could not read expected number of object keys');
while (keys.length) {
assert.ok(currentIndex < endIndex, 'Tried to read past end of pointerList');
const key = keys.pop();
const parsed = readValue(pointerList, pool, currentIndex);
value[key] = parsed.value;
currentIndex = parsed.index;
}
return { value, index: currentIndex }
}
function readValue (pointerList, pool, index) {
const dataType = DATA_TYPES[pointerList[index]];
if (dataType === 'ARRAY') {
return readArray(pointerList, pool, index + 1);
}
else if (dataType === 'OBJECT') {
return readObject(pointerList, pool, index + 1);
}
else {
let value;
if (dataType === 'TRUE') {
value = true;
}
else if (dataType === 'FALSE') {
value = false;
}
else if (dataType === 'NULL') {
value = null;
}
else if (dataType === 'UNDEFINED') {
value = undefined;
}
else {
const pointer = pointerList[index];
assert.ok(pointer < pool.length, `Invalid pointer: ${pointer} at index ${index}`);
value = pool[pointerList[index]];
}
return { value, index: index + 1 };
}
}
/**
* Parse an Airtable ConstantPooledData object into an actual value. This could
* return any type of JS value, but will usually be an object.
* @param {any} input A JS object with Airtable ConstantPooledData data.
* @returns {any}
*/
export function parseData (input) {
const raw = input?.data ?? input;
const pointerList = raw?.pooledData?.pointerList;
const pool = raw?.pooledData?.pool;
if (!raw?.isConstantPooledData || raw?.pooledData.v !== 1 || !Array.isArray(pointerList) || !Array.isArray(pool)) {
throw new TypeError(
'Input is not Airtable ConstantPooledData v1! ' +
'It should be an object like: ' +
'{ isConstantPooledData: true, pooledData: { v: 1, pointerList: [array], pool: [array] } }'
);
}
const parsed = readValue(pointerList, pool, 0);
assert.equal(parsed.index, pointerList.length, 'Did not read entire pointerList');
return parsed.value;
}
/**
* Parse a string with Airtable ConstantPooledData. This data format is
* JSON-based, so this is basically a shortcut to decode JSON before running
* `parseData()`.
* @param {string} rawString String with JSON-encoded ConstantPooledData.
* @returns {any}
*/
export function parseString (rawString) {
const data = JSON.parse(rawString);
return parseData(data);
}
#!/usr/bin/env node
import { readFileSync } from 'node:fs';
import { inspect } from 'node:util';
import { parseString } from './constant-pooled-data.mjs';
if (!process.argv[2]) {
console.error(`
Please specify a path to a file to read. Usage:
./read-constant-pooled-data.mjs path/to/airtable/data.json
`);
} else {
const filePath = process.argv[2];
const text = readFileSync(filePath, { encoding: 'utf-8' });
const data = parseString(text);
console.log(inspect(data, false, 20, true));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment