Skip to content

Instantly share code, notes, and snippets.

@lifthrasiir
Last active September 10, 2021 08:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lifthrasiir/e25fdb17dc88b6235739f8bdbf827473 to your computer and use it in GitHub Desktop.
Save lifthrasiir/e25fdb17dc88b6235739f8bdbf827473 to your computer and use it in GitHub Desktop.
Brotli preset dictionary recovery experiment
import * as fs from 'fs';
import * as crypto from 'crypto';
import * as zlib from 'zlib';
function makeBrotliPreset() {
const buf = [];
let bits = 0;
let nbits = 0;
const write = (v, n=1) => {
if ((v >>> 0) >= (1 << n)) throw 'write failed';
bits |= v << nbits;
nbits += n;
while (nbits >= 8) {
buf.push(bits & 0xff);
bits >>= 8;
nbits -= 8;
}
};
const flush = () => {
if (nbits > 0) {
buf.push(bits);
bits = nbits = 0;
}
};
const ndbits = [0, 0, 0, 0, 10, 10, 11, 11, 10, 10, 10, 10, 10, 9, 9, 8, 7, 7, 8, 7, 7, 6, 6, 5, 5];
const totalLength = ndbits.reduce((a, b, i) => a + (b ? i << b : 0), 0);
const nibbles = totalLength < (1 << 16) ? 4 : totalLength < (1 << 20) ? 5 : 6;
write(0b0011, 4); // WBITS: 18 (should be larger than totalLength)
write(1); // ISLAST: true
write(0); // ISLASTEMPTY: false
write(nibbles - 4, 2); // MNIBBLES
write(totalLength - 1, 4 * nibbles); // MLEN
write(0); // NBLTYPESL: 1
write(0); // NBLTYPESI: 1
write(0); // NBLTYPESD: 1
write(0, 2); // NPOSTFIX: 0
write(0, 4); // NDIRECT: 0<<NPOSTFIX = 0
write(0, 2); // context mode for literal block 0 (unused)
write(0); // NTREESL: 1
write(0); // NTREESD: 1
// literal tree
if (false) {
write(1, 2); // HSKIP: 1
write(1 - 1, 2); // NSYM: 1
write(0, 8); // literal 0 (unused)
} else {
// since we don't use literals it is relatively free to adjust this tree
// so that the beginning of block commands is aligned to the byte boundary
write(0, 2); // HSKIP: 0
write(0b0111, 4); // code 1: length 1 (0)
if (nibbles % 2 != 0) {
write(0b00, 2); // length 0
write(0b00, 2); // length 0
}
write(0b0111, 4); // length 1 (1, unused)
write(0b0, 1); // literal 0: length 1 (0, ununsed)
write(0b0, 1); // literal 1: length 1 (1, ununsed)
}
// insert-and-copy (IaC) length code tree
// - 130: literal code 0 = length 0, copy code 2 = length 4
// ...
// - 135: literal code 0 = length 0, copy code 7 = length 9
// - 192: literal code 0 = length 0, copy code 8 = length 10..11 (1 extra bit)
// - 193: literal code 0 = length 0, copy code 9 = length 12..13 (1 extra bit)
// - 194: literal code 0 = length 0, copy code 10 = length 14..17 (2 extra bits)
// - 195: literal code 0 = length 0, copy code 11 = length 18..21 (2 extra bits)
// - 196: literal code 0 = length 0, copy code 12 = length 22..29 (3 extra bits)
// the code length is set to (5 - # extra bits), so the entire code is fixed length.
write(0, 2); // HSKIP: 0
write(0b00, 2); // code 1: length 0
write(0b01, 2); // code 2: length 4 (0111)
write(0b10, 2); // code 3: length 3 (001)
write(0b10, 2); // code 4: length 3 (101)
write(0b00, 2); // code 0: length 0
write(0b01, 2); // code 5: length 4 (1111)
write(0b0111, 4); // code 17: length 1 (0)
write(0b00, 2); // code 6: length 0
write(0b10, 2); // code 16: length 3 (011)
{ // IaC code 0..129: length 0
write(0b0, 1); write(3 - 3, 3); // repeat count = 3
write(0b0, 1); write(9 - 3, 3); // repeat count = 8 * (3 - 2) + 9 = 17
write(0b0, 1); write(10 - 3, 3); // repeat count = 8 * (17 - 2) + 10 = 130
}
{ // IaC code 130..141: length 5 (00101..11111 in reverse)
write(0b1111, 4);
write(0b011, 3); write(4 - 3, 2); // repeat count = 4
write(0b011, 3); write(3 - 3, 2); // repeat count = 4 * (4 - 2) + 3 = 11
}
{ // IaC code 142..191: length 0
write(0b0, 1); write(7 - 3, 3); // repeat count = 7
write(0b0, 1); write(10 - 3, 3); // repeat count = 8 * (7 - 2) + 10 = 50
}
write(0b101, 3); // IaC code 192: length 4 (0001)
write(0b101, 3); // IaC code 193: length 4 (1001)
write(0b001, 3); // IaC code 194: length 3 (010)
write(0b001, 3); // IaC code 195: length 3 (110)
write(0b0111, 4); // IaC code 196: length 2 (00)
const iacCodes = [
-1, -1, -1, -1,
// prefix code is reversed but extra bits are not, so the lookup table is simpler
0b00101, // length 4 (code 130)
0b10101, // length 5 (code 131)
0b01101, // length 6 (code 132)
0b11101, // length 7 (code 133)
0b00011, // length 8 (code 134)
0b10011, // length 9 (code 135)
0b00001, // length 10 (code 192 + extra 0)
0b10001, // length 11 (code 192 + extra 1)
0b01001, // length 12 (code 193 + extra 0)
0b11001, // length 13 (code 193 + extra 1)
0b00010, // length 14 (code 194 + extra 00)
0b01010, // length 15 (code 194 + extra 01)
0b10010, // length 16 (code 194 + extra 10)
0b11010, // length 17 (code 194 + extra 11)
0b00110, // length 18 (code 195 + extra 00)
0b01110, // length 19 (code 195 + extra 01)
0b10110, // length 20 (code 195 + extra 10)
0b11110, // length 21 (code 195 + extra 11)
0b00000, // length 22 (code 196 + extra 000)
0b00100, // length 23 (code 196 + extra 001)
0b01000, // length 24 (code 196 + extra 010)
];
// distance code tree
// - 0..15: special codes (unused)
// - no direct distances
// - 16, 17: 2 x 2^1 distances starting from 1
// - 18, 19: 2 x 2^2 distances starting from 5
// - 20, 21: 2 x 2^3 distances starting from 13
// - 22, 23: 2 x 2^4 distances starting from 29
// ...
// - 44, 45: 2 x 2^15 distances starting from 131069
// - 46, 47: 2 x 2^16 distances starting from 262141
write(3, 2); // HSKIP: 3
write(0b00, 2); // code 4: length 0
write(0b00, 2); // code 0: length 0
write(0b011, 3); // code 5: length 2 (01)
write(0b011, 3); // code 17: length 2 (11)
write(0b00, 2); // code 6: length 0
write(0b0111, 4); // code 16: length 1 (0)
{ // distance code 0..15: length 0
write(0b11, 2); write(3 - 3, 3); // repeat count = 3
write(0b11, 2); write(8 - 3, 3); // repeat count = 8 * (3 - 2) + 8 = 16
}
{ // distance code 16..47: length 5 (00000..11111 in reverse)
write(0b01, 2);
write(0b0, 1); write(3 - 3, 2); // repeat count = 3
write(0b0, 1); write(5 - 3, 2); // repeat count = 4 * (3 - 2) + 5 = 9
write(0b0, 1); write(3 - 3, 2); // repeat count = 4 * (9 - 2) + 3 = 31
}
// bit reversal
// also possible with: (x * 0b1_00001_00001_00001_00001_00001 & 0b1_001000_000010_010000_000100) % 63
const distCodes = [
0b00000, 0b10000, 0b01000, 0b11000, 0b00100, 0b10100, 0b01100, 0b11100,
0b00010, 0b10010, 0b01010, 0b11010, 0b00110, 0b10110, 0b01110, 0b11110,
0b00001, 0b10001, 0b01001, 0b11001, 0b00101, 0b10101, 0b01101, 0b11101,
0b00011, 0b10011, 0b01011, 0b11011, 0b00111, 0b10111, 0b01111, 0b11111,
];
if (nbits === 0) {
console.log(btoa(String.fromCharCode(...buf)));
} else {
console.log(`warning: ${nbits} bits not yet flushed`);
}
let nbytes = 0;
for (let wordLen = 0; wordLen < ndbits.length; ++wordLen) {
if (!ndbits[wordLen]) continue;
const nwords = 1 << ndbits[wordLen];
for (let wordIdx = 0; wordIdx < nwords; ++wordIdx) {
let dist = nbytes + wordIdx;
nbytes += wordLen;
let code = 0;
let extraLen;
for (extraLen = 1; dist >= (2 << extraLen); ++extraLen) {
code += 2;
dist -= 2 << extraLen;
}
if (dist >= (1 << extraLen)) {
code += 1;
dist -= 1 << extraLen;
}
write(iacCodes[wordLen], 5);
write(distCodes[code], 5);
write(dist, extraLen);
}
}
flush();
return new Uint8Array(buf);
}
const zpreset = makeBrotliPreset();
//fs.writeFileSync('brotli-preset.br', zpreset);
console.log(zpreset.length, zpreset);
const preset = zlib.brotliDecompressSync(zpreset);
if (true) {
const hash = crypto.createHash('sha256').update(preset).digest('hex');
if (hash !== '20e42eb1b511c21806d4d227d07e5dd06877d8ce7b3a817f378f313653f35c70') throw 'preset recovery failed';
console.log(preset.length, '(verified)');
} else {
console.log(preset.length, preset.toString('utf-8'));
}
// greatly minified version
function makeBrotliPreset_COMPACT() {
let r, b, n, z, w, d, e, f;
r=[];
b=n=z=0;
f=i=>{for(n+=i;n>7;n-=8)r.push(b&255),b>>=8};
[40,152,97,209,26,138,12,124,68,179,18,73,128,184,45,100,156,211,1,28,56].map((v,i)=>{
for(w=0;w<(32<<v%7);z+=4+i) {
d=z+w++;
for(e=0;d>>e+2;d-=2<<++e);
b|=(e*8738&4740)%63*32+v/7<<n;f(9);
b|=d%(2<<e)*2+(d>>e+1)<<n;f(e+2);
}
});
return'U5/fAQA4OCClQ2D/NdDb5IaNGzcQ'+btoa(String.fromCharCode(...r,b));
}
const zpreset2 = atob(makeBrotliPreset_COMPACT());
console.log('compact verification:', zpreset.length === zpreset2.length && [...zpreset2].every((c, i) => c.charCodeAt(0) === zpreset[i]));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment