Skip to content

Instantly share code, notes, and snippets.

@lifthrasiir
Last active March 1, 2024 08:49
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lifthrasiir/1c7f9c5a421ad39c1af19a9c4f060743 to your computer and use it in GitHub Desktop.
Save lifthrasiir/1c7f9c5a421ad39c1af19a9c4f060743 to your computer and use it in GitHub Desktop.
Brotli bootstrapping based on WOFF2 font
// Brotli bootstrapping based on WOFF2 font
// Kang Seonghoon, 2024-03-01, public domain
// Based on my earlier work for JS1024: https://js1024.fun/demos/2022/18
// Only vaguely tested, YMMV
import { argv, stderr } from 'node:process';
import fs from 'node:fs';
import zlib from 'node:zlib';
const generateBrotliParams = function* () {
for (const mode of [
zlib.constants.BROTLI_MODE_GENERIC,
zlib.constants.BROTLI_MODE_TEXT,
zlib.constants.BROTLI_MODE_FONT
]) {
// further options don't seem to help that well
yield {
[zlib.constants.BROTLI_PARAM_QUALITY]: zlib.constants.BROTLI_MAX_QUALITY,
[zlib.constants.BROTLI_PARAM_MODE]: mode,
[zlib.constants.BROTLI_PARAM_LGWIN]: 16, // force the shortest WBITS encoding possible
};
}
};
const compressBrotliWithEmptyMetadata = (input, params) => {
const c = zlib.createBrotliCompress({ params });
c.flush(zlib.constants.BROTLI_OPERATION_EMIT_METADATA);
c.write(input);
c.end();
return new Promise((resolve, reject) => {
const chunks = [];
c.on('data', chunk => chunks.push(chunk));
c.on('end', () => resolve(Buffer.concat(chunks)));
c.on('error', e => reject(e));
});
};
const fillBrotliMetadata = (b, metadata) => {
if (metadata.length === 0) return b;
const l = metadata.length - 1;
if (l >= 256) throw 'metadata too long';
// the first metablock should have been empty,
// so it can only depend on the stream header, i.e. the window size.
//
// [keys] W: WBITS, L: ISLAST, N: MNIBBLES, S: MSKIPBYTES, s: MSKIPLEN-1
if (b[0] == 0b0001100) {
// SS NNLW sSS NNLW sssssss
// .0001100 -> y0101100 .yyyyyyy
return [(l & 1) << 7 | 0b0101100, l >> 1, ...metadata, ...b.slice(1)];
} else if ((b[0] & 0b11110001) == 0b01100001 && b[1] == 0b00) {
// NNLWWWW SS NNLWWWW ssssssSS ss
// 0110xxx1 ......00 -> 0110xxx1 yyyyyy01 ......yy
return [b[0], (l & 63) << 2 | 0b01, l >> 6, ...metadata, ...b.slice(2)];
} else if ((b[0] & 0b10001111) == 0b00000001 && b[1] == 0b00011) {
// LWWWWWWW SS NN LWWWWWWW sssSS NN sssss
// 0xxx0001 ...00011 -> 0xxx0001 yyy01011 ...yyyyy
return [b[0], (l & 7) << 5 | 0b01011, l >> 3, ...metadata, ...b.slice(2)];
} else {
throw 'unexpected brotli stream';
}
};
const problematicInBase128 = n => {
return n % 128 === 62 /*>*/;
};
const makeFont = async (input, options = {}) => {
const ascii = s => Array.from(s, c => c.charCodeAt(0));
const two = v => [v >>> 8 & 255, v & 255];
const four = v => [v >>> 24 & 255, v >>> 16 & 255, v >>> 8 & 255, v & 255];
const base128 = v => {
const b = [];
while (v > 127) b.unshift(128 | v & 127), v >>>= 7;
b.unshift(128 | v);
b[b.length - 1] &= 127;
return b;
};
const repeat = (n, f) => {
const r = [];
for (let i = 0; i < n; ++i) r.push(...f(i));
return r;
};
// the first glyph has a known advance width and used to scale subsequent glyphs.
// note that we can't really use excess glyphs, which are NOT actually mapped.
//
// normally this is not required, but Firefox seems to do a less accurate calculation
// during the metric calculation, so all advance widths are effectively scaled by (1 +/- epsilon).
// the scale factor seems to be uniform for almost all cases except for small glyphs,
// so we need to use a large enough power-of-two advance glyph for this purpose.
// the "small glyphs" depend on devices; 256 seems to be good enough for mobiles.
const refAdvance = Number(options['reference-advance'] ?? 256);
if (typeof input === 'string') input = ascii(input);
input = [...refAdvance ? two(refAdvance) : [], ...input];
if (!input.every(c => c < 0x80)) throw 'ASCII only';
if (input.length % 2 !== 0) input.push(32);
input.push(...two(0)); // sentinel for the bootstrap loop
// the font file, when reinterpreted as HTML, should look like this:
//
// +--------------------------------+--- file or table sizes
// +-|-+ +--------+ +-|-+ +-----------
// +---------|---|---|--------|--+------------|---|-+---------|-----------
// | header | | |><body | | table dir. | | | brotli | onload="..
// +---------|---|---|--------|--+------------|---|-+---------|-----------
// +---+ +---|----+ +---+ +----|------
// unused header fields brotli metadata
//
// we want that `<body ` and ` onload="` are contained in the same tag.
// in order to do this, we need to ensure that:
//
// 1. size fields in the header should not prevent `<body` to start a new tag.
//
// this is largely done by putting an additional `>` before `<body`,
// so for example a size of 0x3c41 (`\x00\x00<A`) can start a new tag,
// but that tag will be closed before `<body` appears.
//
// it is possible that the attribute value may have started via `<A B="`,
// or a comment started via `<!--`, but both require at least 4 bytes of
// size bytes to match the pattern, and some sizes should exceed at least
// 500 MB, which should be impossible in our usages.
//
// 2. size fields in the table directory should not close the prior `<body` tag,
// or start a new attribute value in that tag.
//
// this is only possible via the lowermost bits of table sizes (both
// original or transformed). they are encoded in base 128, where all but
// last bytes have an MSB set, so only the last byte, i.e. `size % 128`,
// can ever become problematic, i.e. when it encodes `>` (0x3e).
//
// the longest run of problematic bytes in this case is 2, where both
// the original length and transformed length contain the problematic
// last byte in the same table, and the transformed length is less than 128.
// then a sequence like `="` or `='` may occur. but that's a very strong
// requirement, so we can easily show that we never generate such tables.
//
// the following code covers the second requirement by checking for potentially
// problematic table sizes, and increase `excessGlyphs` to avoid them.
// such sizes are uncommon enough (about 3%) so this won't loop forever.
let excessGlyphs = 1; // should be at least 1
let numGlyphs = input.length / 2 + excessGlyphs;
while (
problematicInBase128(2 + 2 * numGlyphs) || // loca size
problematicInBase128(40 + numGlyphs * 2 + ((numGlyphs + 31) >>> 5) * 4) // glyf size
) {
++excessGlyphs;
++numGlyphs;
}
const startCodeStr = String(options['start-code'] ?? '3e4');
const startCode = +startCodeStr;
const endCode = startCode + input.length / 2 - 1;
const maxWidth = 0x7f00; // this is signed and can't be 0xffff
// https://docs.microsoft.com/en-us/typography/opentype/spec/head
const head = [
...two(1), // majorVersion
...two(0), // minorVersion
...four(0), // fontRevision
...four(0), // checksumAdjustment (WOFF2 recomputes this, so we don't have to be correct)
...four(0x5f0f3cf5), // magicNumber
...two(1<<0 | 1<<1 | 1<<3), // flags
...two(16), // unitsPerEm (the minimum possible)
0,0,0,0,0,0,0,0, // created
0,0,0,0,0,0,0,0, // modified
...two(0), ...two(0), // xMin, yMin
...two(maxWidth), ...two(0), // xMax, yMax (excess xMax is okay, as long as it's positive)
...two(0), // macStyle
...two(0), // lowestRectPPEM (0 is probably okay)
...two(0), // fontDirectionHint (deprecated)
...two(0), // indexToLocFormat
...two(0), // glyphDataFormat
];
// https://docs.microsoft.com/en-us/typography/opentype/spec/hhea
const hhea = [
...two(1), // majorVersion
...two(0), // minorVersion
...two(0), // ascender
...two(0), // descender
...two(0), // lineGap
...two(maxWidth), // advanceWidthmax
...two(0), // minLeftSideBearing
...two(0), // minRightSideBearing
...two(maxWidth), // xMaxExtent
...two(1), // caretSlopeRise
...two(0), // caretSlopeRun
...two(0), // caretOffset
...two(0), ...two(0), ...two(0), ...two(0), // reserved0..3
...two(0), // metricDataFormat
...two(numGlyphs), // numberOfHMetrics
];
// https://docs.microsoft.com/en-us/typography/opentype/spec/maxp
const maxp = [
...four(0x00010000), // version
...two(numGlyphs), // numGlyphs
...two(1), // maxPoints (at least 1)
...two(1), // maxContours (at least 1)
...two(0), // maxCompositePoints
...two(0), // maxCompositeContours
...two(2), // maxZones
...two(0), // maxTwilightPoints
...two(0), // maxStorage
...two(0), // maxFunctionDefs
...two(0), // maxInstructionDefs
...two(0), // maxStackElements
...two(0), // maxSizeOfInstructions
...two(0), // maxComponentElements
...two(0), // maxComponentDepth
];
// https://docs.microsoft.com/en-us/typography/opentype/spec/os2
const os2 = [
...two(0), // version
...two(0), // xAvgCharWidth
...two(500), // usWeightClass (anything from 100 to 900 will work)
...two(5), // usWidthClass (normal)
...two(0), // fsType
...two(0), // ySubscriptXSize
...two(0), // ySubscriptYSize
...two(0), // ySubscriptXOffset
...two(0), // ySubscriptYOffset
...two(0), // ySuperscriptXSize
...two(0), // ySuperscriptYSize
...two(0), // ySuperscriptXOffset
...two(0), // ySuperscriptYOffset
...two(0), // yStrikeoutSize
...two(0), // yStrikeoutPosition
...two(0), // sFamilyClass
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // panose (matches any spec)
...four(0), ...four(0), ...four(0), ...four(0), // ulUnicodeRange1..4
...four(0x00000000), // achVendorId
...two(0x0040), // fsSelection (regular)
...two(0), // usFirstCharIndex (OTS does fix this so leave it as 0)
...two(0), // usLastCharIndex (ditto)
...two(0), // sTypoAscender
...two(0), // sTypoDescender
...two(0), // sTypoLineGap
...two(0), // usWinAscent
...two(0), // usWinDescent
];
// https://docs.microsoft.com/en-us/typography/opentype/spec/glyf
const glyfBboxBitmapSize = ((numGlyphs + 31) >>> 5) * 4;
const glyf = [
...two(0), // reserved
...two(0), // optionFlags
...two(numGlyphs), // numGlyphs
...two(0), // indexFormat (= head.indexToLocFormat)
...four(2 * numGlyphs), // nContourStreamSize
...four(1), // nPointsStreamSize
...four(1), // flagStreamSize
...four(2), // glyphStreamSize
...four(0), // compositeStreamSize
...four(glyfBboxBitmapSize), // bboxStreamSize
...four(0), // instructionStreamSize
// nContourStream = [1, 0...]
...repeat(numGlyphs, i => two(i === 0 ? 1 : 0)),
// nPointsStream = [1]
1,
// flagStream = [0]
// flag 0: x = 0, y = 0 - (8 additional bits)
0,
// glyphStream
0b00000000, // additional bits for flag 0
0, // instruction length for the glyph
// bboxBitmap = [0...]
...repeat(glyfBboxBitmapSize, _ => [0]),
];
// https://docs.microsoft.com/en-us/typography/opentype/spec/name
const name = [
...two(0), // version
...two(0), // count
...two(6), // storageOffset (can't be zero, OTS will complain)
];
// https://docs.microsoft.com/en-us/typography/opentype/spec/post
const post = [
...four(0x00030000), // version
...four(0), // italicAngle
...two(0), // underlinePosition
...two(0), // underlineThickness
...four(0), // isFixedPitch
...four(0), ...four(0), // min/maxMemType42 (suggestive, can be left as 0)
...four(0), ...four(0), // min/maxMemType1 (ditto)
];
// https://docs.microsoft.com/en-us/typography/opentype/spec/cmap
const cmap = [
...two(0), // version
...two(1), // numTables
...two(3), // platformID (Microsoft)
...two(10), // encodingID (Unicode full repertoire)
...four(12), // subtableOffset
// cmap format 12 subtable:
...two(12), // format
...two(0), // reserved
...four(28), // length
...four(0), // language (default)
...four(1), // numGroups
...four(startCode - excessGlyphs), // startCharCode
...four(startCode - excessGlyphs + numGlyphs - 1), // endCharCode
...four(0), // startGlyphId
];
// https://docs.microsoft.com/en-us/typography/opentype/spec/hmtx
const hmtx = [
0x03, // flags (both lsb and leftSideBearing inferred)
...repeat(excessGlyphs, i => two(0)),
...input, // advanceWidth[numberOfHMetrics]
];
let tables = [
{ tag: 0, transform: 0, data: cmap },
{ tag: 1, transform: 0, data: head },
{ tag: 2, transform: 0, data: hhea },
{ tag: 4, transform: 0, data: maxp },
{ tag: 5, transform: 0, data: name },
{ tag: 6, transform: 0, data: os2 },
{ tag: 7, transform: 0, data: post },
// unlike other tables, the glyf table can be reconstructed in multiple ways
// and WOFF2 specifically ignores the original size of the glyf table.
{ tag: 10, transform: 0, data: glyf, originalSize: 0 },
{ tag: 3, transform: 1, data: hmtx, originalSize: 4 * numGlyphs },
];
// basic tabu search, because full search is too slow and I didn't bother
// to improve that beyond any simple algorithm, good enough nevertheless
const shuffleSeen = new Set();
let bestShuffleSize = Infinity;
let bestShuffle = tables.map((_, i) => i);
let shuffle = tables.map((_, i) => i);
let moreJumps = Number(options.jumps ?? Infinity);
const numPasses = Number(options.passes ?? 60);
for (let pass = 0; pass < numPasses; ++pass) {
let bestSwap = null;
let saved;
let moreNeighbor = false;
for (let i = 0; i < shuffle.length; shuffle[i++] = saved) {
saved = shuffle[i];
next: for (let j = 0; j < i; shuffle[j++] = shuffle[i]) {
shuffle[i] = shuffle[j];
shuffle[j] = saved;
const shuffleKey = shuffle.toString();
if (shuffleSeen.has(shuffleKey)) continue;
shuffleSeen.add(shuffleKey);
moreNeighbor = true;
let seen_glyf = false, seen_hmtx = false, seen_hhea = false;
let uncompressed = [];
for (const k of shuffle) {
// for some reason, hhea and glyf should precede hmtx
// in addition to glyf preceding loca...
if (tables[k].tag === 2) { // hhea
if (seen_hmtx) continue next;
seen_hhea = true;
}
if (tables[k].tag === 3) { // hmtx
if (!seen_glyf || !seen_hhea) continue next;
seen_hmtx = true;
}
if (tables[k].tag === 10) { // glyf
if (seen_hmtx) continue next;
seen_glyf = true;
}
uncompressed.push(...tables[k].data);
}
uncompressed = new Uint8Array(uncompressed);
for (const params of generateBrotliParams()) {
const compressed = zlib.brotliCompressSync(uncompressed, params);
if (bestShuffleSize > compressed.length) {
bestShuffleSize = compressed.length;
bestSwap = [i, j];
}
}
}
}
if (bestSwap) {
stderr.write('!');
const [i, j] = bestSwap;
const temp = shuffle[i];
shuffle[i] = shuffle[j];
shuffle[j] = temp;
bestShuffle = [...shuffle];
} else {
if (moreNeighbor) {
stderr.write('.');
} else if (--moreJumps > 0) {
stderr.write('?');
} else {
break;
}
for (let k = 0; k < 2; ++k) {
const i = Math.random() * (shuffle.length - 1) + 1 | 0;
const j = Math.random() * i | 0;
const temp = shuffle[i];
shuffle[i] = shuffle[j];
shuffle[j] = temp;
}
}
}
const shuffled = [];
for (const k of bestShuffle) {
shuffled.push(tables[k]);
if (tables[k].tag === 10) { // glyf follows loca
shuffled.push({ tag: 11, transform: 0, originalSize: 2 + 2 * numGlyphs });
}
}
tables = shuffled;
const out = [
...ascii('wOF2'), // signature
...four(0x00010000), // flavor (sfnt version)
...four(0), // total size (patched later)
...two(tables.length), // # tables
...two(0), // reserved
...four(0), // total uncompressed size (unused but should be "reasonable", patched later)
...four(0), // total compressed size (patched later)
...two(0), // font major version (unused)
...two(0), // font minor version (unused)
...four(0), // metadata block offset
// encoded lengths may accidentally match `</?[a-zA-Z]`, so we have to close it
...ascii('><bo'), // metadata compressed size (current impl doesn't check them if offset=0)
...ascii('dy '), // metadata uncompressed size (ditto)
...four(0), // private data block offset (may be patched later)
...four(0), // private data block size (may be patched later)
];
for (const { tag, transform, data, originalSize } of tables) {
out.push(tag | transform << 6);
if (typeof originalSize === 'number') out.push(...base128(originalSize));
out.push(...base128(data ? data.length : 0));
}
const headerSize = out.length;
// this goes into the first (empty) metablock in the Brotli stream,
// which gets placed much earlier so can be safely placed without much interference.
const write = (options.func === 'document.write');
const bootstrapInMetadata = ' onload="' +
"C=$.getContext`2d`;" +
"new FontFace('x','url(#')" +
".load(K=String.fromCharCode)" +
".then(F=>{" +
(write ? "_=document;_" : "document") + ".fonts.add(F);" +
"C.font='1pc x';" +
(refAdvance ?
"for(" +
"A=I='';" +
`W=C.measureText(K(${startCodeStr}+I)).width;` +
"I++?" +
"A+=K(V>>8,V&255):" +
`F=W/${refAdvance}` +
")" +
"V=W/F+.1;"
:
"for(" +
"A=I='';" +
`W=C.measureText(K(${startCodeStr}+I++)).width;` +
"A+=K(W>>8,W&255)" +
")" +
";"
) +
(write ? "_.write" : "eval") + "(A)" +
"})" +
'"><canvas id=$>';
// this goes into the private data following the entire font data.
const bootstrapInPrivate = ``;
let uncompressed = [];
for (const { data } of tables) {
if (data) uncompressed.push(...data);
}
uncompressed = new Uint8Array(uncompressed);
let bestCompressed = null;
let bestCompressedSize = Infinity;
for (const params of generateBrotliParams()) {
let compressed;
if (bootstrapInMetadata.length > 0) {
compressed = await compressBrotliWithEmptyMetadata(uncompressed, params);
} else {
compressed = zlib.brotliCompressSync(uncompressed, params);
}
if (bestCompressedSize > compressed.length) {
bestCompressedSize = compressed.length;
bestCompressed = compressed;
}
}
if (bootstrapInMetadata.length > 0) {
bestCompressed = fillBrotliMetadata(bestCompressed, ascii(bootstrapInMetadata));
}
out.push(...bestCompressed);
while (out.length % 4 !== 0) out.push(0);
const fontSize = out.length;
out.push(...ascii(bootstrapInPrivate));
const totalSize = out.length;
out.splice(8, 4, ...four(totalSize)); // total size
out.splice(16, 4, ...four(totalSize + 1)); // total uncompressed size (faked)
out.splice(20, 4, ...four(fontSize - headerSize)); // total compressed size
if (bootstrapInPrivate.length > 0) {
out.splice(40, 4, ...four(fontSize)); // private data block offset
out.splice(44, 4, ...four(bootstrapInPrivate.length)); // private data block size
}
console.log(out.length);
return new Uint8Array(out);
};
const args = argv.slice(2);
const options = {};
while (args.length > 0 && args[0].startsWith('--')) {
const m = args.shift().match(/^--([^=]*)(=(.*))?$/);
options[m[1]] = m[2] ? m[3] : true;
}
if (args.length < 2) {
console.log(`\
Usage: ${argv[0]} ${argv[1]} {options} infile outfile
--passes=# Relative amount of search done.
--jumps=# Relative amount of pertubartion when stuck.
--func=document.write Assumes \`infile\` is an HTML file.
--func=eval Assumes \`infile\` is a JavaScript file.
--start-code=### First code point to be used, as a JS token.
--reference-advance=# If set to zero, sacrifice some Firefox compat.
`);
} else {
const path = args[0];
const input = [...fs.readFileSync(path)];
options.func ??= path.match(/\.html?$/i) ? 'document.write' : 'eval';
const out = await makeFont(input, options);
fs.writeFileSync(args[1], out);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment