Created
September 15, 2021 18:01
-
-
Save corwin-of-amber/a855b14b7608447d6e67b9315382fa80 to your computer and use it in GitHub Desktop.
Reading lzma2 information from an xz container
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Based on: | |
* + https://tukaani.org/xz/xz-file-format-1.0.4.txt | |
* + https://github.com/Rogdham/python-xz/tree/master/src/xz | |
*/ | |
import assert from 'assert'; | |
import fs from 'fs'; | |
import struct from 'python-struct'; | |
// From https://github.com/SortaCore/lzma2-js | |
import lzma from './lzma2-js/lzma2_worker'; | |
class Xz { | |
stream: Uint8Array | |
constructor(stream: Uint8Array) { | |
this.stream = stream; | |
} | |
get footer() { | |
var [crc, bwSz, _, check, magic] = | |
struct.unpack("<LIBB2s", this.stream.slice(-12)); | |
assert(magic == 'YZ'); | |
return {bwSz: (bwSz + 1) * 4, check}; | |
} | |
get index() { | |
var buf = this.stream.slice(-12 - this.footer.bwSz), | |
[crc] = struct.unpack("<L", buf.slice(-4)), | |
mbis = decodeMbis(buf.slice(1, -4)), | |
blocks = [] as BlockRecord[], [nblocks, _] = iget(mbis); | |
for (let i = 0; i < nblocks; i++) { | |
let [unpaddedSz] = iget(mbis), [uncompressedSz] = iget(mbis); | |
blocks.push({unpaddedSz, uncompressedSz}); | |
} | |
return {crc, mbis, blocks}; | |
} | |
block(idx: number): Block { | |
// @todo | |
if (this.index.blocks.length > 1) throw new Error('not implemented: |blocks| > 1'); | |
var end = -12 - this.footer.bwSz, | |
record = this.index.blocks[idx]; | |
if (!record) throw new Error(`no such block: ${idx}`); | |
var raw = this.stream.slice(end - align4(record.unpaddedSz), end), | |
headerSz = (raw[0] + 1) * 4, | |
header = raw.slice(0, headerSz), | |
nFilters = header[1] + 1; | |
if (nFilters != 1 && header[2] !== 0x21 /* lzma2 */) | |
throw new Error('not implemented: only single lzma2 filter supported'); | |
assert(header[3] == 1); /* size of lzma2 filter field */ | |
var dictSzByte = header[4]; | |
return {header: {dictSzByte}, data: raw.slice(headerSz)}; | |
} | |
decompressBlock(block: Block) { | |
let header = new Uint8Array(9); | |
header[0] = block.header.dictSzByte; | |
header.fill(255, 1); | |
return lzma.LZMA.lzma2_decompress(concat(header, block.data)); | |
} | |
} | |
type BlockRecord = {unpaddedSz: number, uncompressedSz: number}; | |
type Block = {header: {dictSzByte: number}, data: Uint8Array}; | |
function decodeMbi(buf: Uint8Array) { | |
var i = 0, v = 0; | |
for (let b of buf) { | |
v |= (b & 0x7f) << (i * 7); // little-endian | |
i++; | |
if ((b & 0x80) === 0) return [v, i]; | |
} | |
throw new Error("invalid mbi"); | |
} | |
function* decodeMbis(buf: Uint8Array) { | |
while (buf.length) { | |
var [v, sz] = decodeMbi(buf); | |
yield [v, sz]; | |
buf = buf.slice(sz); | |
} | |
} | |
function align4(sz: number) { | |
var rem = sz % 4; | |
return rem ? sz - rem + 4 : sz; | |
} | |
function iget<T>(gen: Generator<T, void>) { | |
var n = gen.next(); | |
if (n.done) throw new Error("stream ended prematurely"); | |
return n.value as T; | |
} | |
function concat(...arrays: Uint8Array[]) { | |
return new Uint8Array([].concat(...(arrays.map(a => [...a])))); | |
} | |
var xz = new Xz(fs.readFileSync('/tmp/index.ts.xz')); | |
//Object.assign(window, {xz, concat}); |
Definitely, I have been using the above in the browser using Webpack. I have collected it into a package here: https://github.com/corwin-of-amber/Web.OS.Shell/tree/master/packages/xz-extract
I plan to publish this package to NPM. It's very preliminary, and lzma2-js seems to have a bug with data > 16MB; but I've already put it to work in a small web app.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is awesome! Are you perhaps able to provide a pure JavaScript implementation (so that it can run in the browser, not node)?