Skip to content

Instantly share code, notes, and snippets.

@corwin-of-amber
Created September 15, 2021 18:01
Show Gist options
  • Save corwin-of-amber/a855b14b7608447d6e67b9315382fa80 to your computer and use it in GitHub Desktop.
Save corwin-of-amber/a855b14b7608447d6e67b9315382fa80 to your computer and use it in GitHub Desktop.
Reading lzma2 information from an xz container
/**
* Based on:
* + https://tukaani.org/xz/xz-file-format-1.0.4.txt
* + https://github.com/Rogdham/python-xz/tree/master/src/xz
*/
import assert from 'assert';
import fs from 'fs';
import struct from 'python-struct';
// From https://github.com/SortaCore/lzma2-js
import lzma from './lzma2-js/lzma2_worker';
class Xz {
stream: Uint8Array
constructor(stream: Uint8Array) {
this.stream = stream;
}
get footer() {
var [crc, bwSz, _, check, magic] =
struct.unpack("<LIBB2s", this.stream.slice(-12));
assert(magic == 'YZ');
return {bwSz: (bwSz + 1) * 4, check};
}
get index() {
var buf = this.stream.slice(-12 - this.footer.bwSz),
[crc] = struct.unpack("<L", buf.slice(-4)),
mbis = decodeMbis(buf.slice(1, -4)),
blocks = [] as BlockRecord[], [nblocks, _] = iget(mbis);
for (let i = 0; i < nblocks; i++) {
let [unpaddedSz] = iget(mbis), [uncompressedSz] = iget(mbis);
blocks.push({unpaddedSz, uncompressedSz});
}
return {crc, mbis, blocks};
}
block(idx: number): Block {
// @todo
if (this.index.blocks.length > 1) throw new Error('not implemented: |blocks| > 1');
var end = -12 - this.footer.bwSz,
record = this.index.blocks[idx];
if (!record) throw new Error(`no such block: ${idx}`);
var raw = this.stream.slice(end - align4(record.unpaddedSz), end),
headerSz = (raw[0] + 1) * 4,
header = raw.slice(0, headerSz),
nFilters = header[1] + 1;
if (nFilters != 1 && header[2] !== 0x21 /* lzma2 */)
throw new Error('not implemented: only single lzma2 filter supported');
assert(header[3] == 1); /* size of lzma2 filter field */
var dictSzByte = header[4];
return {header: {dictSzByte}, data: raw.slice(headerSz)};
}
decompressBlock(block: Block) {
let header = new Uint8Array(9);
header[0] = block.header.dictSzByte;
header.fill(255, 1);
return lzma.LZMA.lzma2_decompress(concat(header, block.data));
}
}
type BlockRecord = {unpaddedSz: number, uncompressedSz: number};
type Block = {header: {dictSzByte: number}, data: Uint8Array};
function decodeMbi(buf: Uint8Array) {
var i = 0, v = 0;
for (let b of buf) {
v |= (b & 0x7f) << (i * 7); // little-endian
i++;
if ((b & 0x80) === 0) return [v, i];
}
throw new Error("invalid mbi");
}
function* decodeMbis(buf: Uint8Array) {
while (buf.length) {
var [v, sz] = decodeMbi(buf);
yield [v, sz];
buf = buf.slice(sz);
}
}
function align4(sz: number) {
var rem = sz % 4;
return rem ? sz - rem + 4 : sz;
}
function iget<T>(gen: Generator<T, void>) {
var n = gen.next();
if (n.done) throw new Error("stream ended prematurely");
return n.value as T;
}
function concat(...arrays: Uint8Array[]) {
return new Uint8Array([].concat(...(arrays.map(a => [...a]))));
}
var xz = new Xz(fs.readFileSync('/tmp/index.ts.xz'));
//Object.assign(window, {xz, concat});
@xenova
Copy link

xenova commented Sep 16, 2021

This is awesome! Are you perhaps able to provide a pure JavaScript implementation (so that it can run in the browser, not node)?

@corwin-of-amber
Copy link
Author

Definitely, I have been using the above in the browser using Webpack. I have collected it into a package here: https://github.com/corwin-of-amber/Web.OS.Shell/tree/master/packages/xz-extract

I plan to publish this package to NPM. It's very preliminary, and lzma2-js seems to have a bug with data > 16MB; but I've already put it to work in a small web app.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment