Last active
September 10, 2021 08:16
-
-
Save lifthrasiir/e25fdb17dc88b6235739f8bdbf827473 to your computer and use it in GitHub Desktop.
Brotli preset dictionary recovery experiment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import * as fs from 'fs'; | |
import * as crypto from 'crypto'; | |
import * as zlib from 'zlib'; | |
function makeBrotliPreset() { | |
const buf = []; | |
let bits = 0; | |
let nbits = 0; | |
const write = (v, n=1) => { | |
if ((v >>> 0) >= (1 << n)) throw 'write failed'; | |
bits |= v << nbits; | |
nbits += n; | |
while (nbits >= 8) { | |
buf.push(bits & 0xff); | |
bits >>= 8; | |
nbits -= 8; | |
} | |
}; | |
const flush = () => { | |
if (nbits > 0) { | |
buf.push(bits); | |
bits = nbits = 0; | |
} | |
}; | |
const ndbits = [0, 0, 0, 0, 10, 10, 11, 11, 10, 10, 10, 10, 10, 9, 9, 8, 7, 7, 8, 7, 7, 6, 6, 5, 5]; | |
const totalLength = ndbits.reduce((a, b, i) => a + (b ? i << b : 0), 0); | |
const nibbles = totalLength < (1 << 16) ? 4 : totalLength < (1 << 20) ? 5 : 6; | |
write(0b0011, 4); // WBITS: 18 (should be larger than totalLength) | |
write(1); // ISLAST: true | |
write(0); // ISLASTEMPTY: false | |
write(nibbles - 4, 2); // MNIBBLES | |
write(totalLength - 1, 4 * nibbles); // MLEN | |
write(0); // NBLTYPESL: 1 | |
write(0); // NBLTYPESI: 1 | |
write(0); // NBLTYPESD: 1 | |
write(0, 2); // NPOSTFIX: 0 | |
write(0, 4); // NDIRECT: 0<<NPOSTFIX = 0 | |
write(0, 2); // context mode for literal block 0 (unused) | |
write(0); // NTREESL: 1 | |
write(0); // NTREESD: 1 | |
// literal tree | |
if (false) { | |
write(1, 2); // HSKIP: 1 | |
write(1 - 1, 2); // NSYM: 1 | |
write(0, 8); // literal 0 (unused) | |
} else { | |
// since we don't use literals it is relatively free to adjust this tree | |
// so that the beginning of block commands is aligned to the byte boundary | |
write(0, 2); // HSKIP: 0 | |
write(0b0111, 4); // code 1: length 1 (0) | |
if (nibbles % 2 != 0) { | |
write(0b00, 2); // length 0 | |
write(0b00, 2); // length 0 | |
} | |
write(0b0111, 4); // length 1 (1, unused) | |
write(0b0, 1); // literal 0: length 1 (0, ununsed) | |
write(0b0, 1); // literal 1: length 1 (1, ununsed) | |
} | |
// insert-and-copy (IaC) length code tree | |
// - 130: literal code 0 = length 0, copy code 2 = length 4 | |
// ... | |
// - 135: literal code 0 = length 0, copy code 7 = length 9 | |
// - 192: literal code 0 = length 0, copy code 8 = length 10..11 (1 extra bit) | |
// - 193: literal code 0 = length 0, copy code 9 = length 12..13 (1 extra bit) | |
// - 194: literal code 0 = length 0, copy code 10 = length 14..17 (2 extra bits) | |
// - 195: literal code 0 = length 0, copy code 11 = length 18..21 (2 extra bits) | |
// - 196: literal code 0 = length 0, copy code 12 = length 22..29 (3 extra bits) | |
// the code length is set to (5 - # extra bits), so the entire code is fixed length. | |
write(0, 2); // HSKIP: 0 | |
write(0b00, 2); // code 1: length 0 | |
write(0b01, 2); // code 2: length 4 (0111) | |
write(0b10, 2); // code 3: length 3 (001) | |
write(0b10, 2); // code 4: length 3 (101) | |
write(0b00, 2); // code 0: length 0 | |
write(0b01, 2); // code 5: length 4 (1111) | |
write(0b0111, 4); // code 17: length 1 (0) | |
write(0b00, 2); // code 6: length 0 | |
write(0b10, 2); // code 16: length 3 (011) | |
{ // IaC code 0..129: length 0 | |
write(0b0, 1); write(3 - 3, 3); // repeat count = 3 | |
write(0b0, 1); write(9 - 3, 3); // repeat count = 8 * (3 - 2) + 9 = 17 | |
write(0b0, 1); write(10 - 3, 3); // repeat count = 8 * (17 - 2) + 10 = 130 | |
} | |
{ // IaC code 130..141: length 5 (00101..11111 in reverse) | |
write(0b1111, 4); | |
write(0b011, 3); write(4 - 3, 2); // repeat count = 4 | |
write(0b011, 3); write(3 - 3, 2); // repeat count = 4 * (4 - 2) + 3 = 11 | |
} | |
{ // IaC code 142..191: length 0 | |
write(0b0, 1); write(7 - 3, 3); // repeat count = 7 | |
write(0b0, 1); write(10 - 3, 3); // repeat count = 8 * (7 - 2) + 10 = 50 | |
} | |
write(0b101, 3); // IaC code 192: length 4 (0001) | |
write(0b101, 3); // IaC code 193: length 4 (1001) | |
write(0b001, 3); // IaC code 194: length 3 (010) | |
write(0b001, 3); // IaC code 195: length 3 (110) | |
write(0b0111, 4); // IaC code 196: length 2 (00) | |
const iacCodes = [ | |
-1, -1, -1, -1, | |
// prefix code is reversed but extra bits are not, so the lookup table is simpler | |
0b00101, // length 4 (code 130) | |
0b10101, // length 5 (code 131) | |
0b01101, // length 6 (code 132) | |
0b11101, // length 7 (code 133) | |
0b00011, // length 8 (code 134) | |
0b10011, // length 9 (code 135) | |
0b00001, // length 10 (code 192 + extra 0) | |
0b10001, // length 11 (code 192 + extra 1) | |
0b01001, // length 12 (code 193 + extra 0) | |
0b11001, // length 13 (code 193 + extra 1) | |
0b00010, // length 14 (code 194 + extra 00) | |
0b01010, // length 15 (code 194 + extra 01) | |
0b10010, // length 16 (code 194 + extra 10) | |
0b11010, // length 17 (code 194 + extra 11) | |
0b00110, // length 18 (code 195 + extra 00) | |
0b01110, // length 19 (code 195 + extra 01) | |
0b10110, // length 20 (code 195 + extra 10) | |
0b11110, // length 21 (code 195 + extra 11) | |
0b00000, // length 22 (code 196 + extra 000) | |
0b00100, // length 23 (code 196 + extra 001) | |
0b01000, // length 24 (code 196 + extra 010) | |
]; | |
// distance code tree | |
// - 0..15: special codes (unused) | |
// - no direct distances | |
// - 16, 17: 2 x 2^1 distances starting from 1 | |
// - 18, 19: 2 x 2^2 distances starting from 5 | |
// - 20, 21: 2 x 2^3 distances starting from 13 | |
// - 22, 23: 2 x 2^4 distances starting from 29 | |
// ... | |
// - 44, 45: 2 x 2^15 distances starting from 131069 | |
// - 46, 47: 2 x 2^16 distances starting from 262141 | |
write(3, 2); // HSKIP: 3 | |
write(0b00, 2); // code 4: length 0 | |
write(0b00, 2); // code 0: length 0 | |
write(0b011, 3); // code 5: length 2 (01) | |
write(0b011, 3); // code 17: length 2 (11) | |
write(0b00, 2); // code 6: length 0 | |
write(0b0111, 4); // code 16: length 1 (0) | |
{ // distance code 0..15: length 0 | |
write(0b11, 2); write(3 - 3, 3); // repeat count = 3 | |
write(0b11, 2); write(8 - 3, 3); // repeat count = 8 * (3 - 2) + 8 = 16 | |
} | |
{ // distance code 16..47: length 5 (00000..11111 in reverse) | |
write(0b01, 2); | |
write(0b0, 1); write(3 - 3, 2); // repeat count = 3 | |
write(0b0, 1); write(5 - 3, 2); // repeat count = 4 * (3 - 2) + 5 = 9 | |
write(0b0, 1); write(3 - 3, 2); // repeat count = 4 * (9 - 2) + 3 = 31 | |
} | |
// bit reversal | |
// also possible with: (x * 0b1_00001_00001_00001_00001_00001 & 0b1_001000_000010_010000_000100) % 63 | |
const distCodes = [ | |
0b00000, 0b10000, 0b01000, 0b11000, 0b00100, 0b10100, 0b01100, 0b11100, | |
0b00010, 0b10010, 0b01010, 0b11010, 0b00110, 0b10110, 0b01110, 0b11110, | |
0b00001, 0b10001, 0b01001, 0b11001, 0b00101, 0b10101, 0b01101, 0b11101, | |
0b00011, 0b10011, 0b01011, 0b11011, 0b00111, 0b10111, 0b01111, 0b11111, | |
]; | |
if (nbits === 0) { | |
console.log(btoa(String.fromCharCode(...buf))); | |
} else { | |
console.log(`warning: ${nbits} bits not yet flushed`); | |
} | |
let nbytes = 0; | |
for (let wordLen = 0; wordLen < ndbits.length; ++wordLen) { | |
if (!ndbits[wordLen]) continue; | |
const nwords = 1 << ndbits[wordLen]; | |
for (let wordIdx = 0; wordIdx < nwords; ++wordIdx) { | |
let dist = nbytes + wordIdx; | |
nbytes += wordLen; | |
let code = 0; | |
let extraLen; | |
for (extraLen = 1; dist >= (2 << extraLen); ++extraLen) { | |
code += 2; | |
dist -= 2 << extraLen; | |
} | |
if (dist >= (1 << extraLen)) { | |
code += 1; | |
dist -= 1 << extraLen; | |
} | |
write(iacCodes[wordLen], 5); | |
write(distCodes[code], 5); | |
write(dist, extraLen); | |
} | |
} | |
flush(); | |
return new Uint8Array(buf); | |
} | |
const zpreset = makeBrotliPreset(); | |
//fs.writeFileSync('brotli-preset.br', zpreset); | |
console.log(zpreset.length, zpreset); | |
const preset = zlib.brotliDecompressSync(zpreset); | |
if (true) { | |
const hash = crypto.createHash('sha256').update(preset).digest('hex'); | |
if (hash !== '20e42eb1b511c21806d4d227d07e5dd06877d8ce7b3a817f378f313653f35c70') throw 'preset recovery failed'; | |
console.log(preset.length, '(verified)'); | |
} else { | |
console.log(preset.length, preset.toString('utf-8')); | |
} | |
// greatly minified version | |
function makeBrotliPreset_COMPACT() { | |
let r, b, n, z, w, d, e, f; | |
r=[]; | |
b=n=z=0; | |
f=i=>{for(n+=i;n>7;n-=8)r.push(b&255),b>>=8}; | |
[40,152,97,209,26,138,12,124,68,179,18,73,128,184,45,100,156,211,1,28,56].map((v,i)=>{ | |
for(w=0;w<(32<<v%7);z+=4+i) { | |
d=z+w++; | |
for(e=0;d>>e+2;d-=2<<++e); | |
b|=(e*8738&4740)%63*32+v/7<<n;f(9); | |
b|=d%(2<<e)*2+(d>>e+1)<<n;f(e+2); | |
} | |
}); | |
return'U5/fAQA4OCClQ2D/NdDb5IaNGzcQ'+btoa(String.fromCharCode(...r,b)); | |
} | |
const zpreset2 = atob(makeBrotliPreset_COMPACT()); | |
console.log('compact verification:', zpreset.length === zpreset2.length && [...zpreset2].every((c, i) => c.charCodeAt(0) === zpreset[i])); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment