Skip to content

Instantly share code, notes, and snippets.

@sma
Created March 12, 2024 22:23
Show Gist options
  • Save sma/fbd6ae87330b77a014f094b6ae48dbd4 to your computer and use it in GitHub Desktop.
Save sma/fbd6ae87330b77a014f094b6ae48dbd4 to your computer and use it in GitHub Desktop.
a library and utility to unpack BG3 pak files
import 'dart:io';
import 'dart:math';
import 'package:unpak/unpak.dart';
void main(List<String> arguments) {
if (arguments.length < 2 || !<String>{'-l', '-x'}.contains(arguments[0])) {
stderr.writeln('usage: unpak -l <file.pak>');
stderr.writeln(' unpak -x <file.pak> <index> [<output>]');
exit(1);
}
if (arguments[0] == '-x') {
if (arguments.length < 3) {
stderr.writeln('missing file index');
exit(1);
}
}
final pak = Unpak(File(arguments[1]));
final entries = pak.read();
switch (arguments[0]) {
case '-l':
var width = 0;
for (final entry in entries) {
width = max(width, entry.name.length);
}
for (final entry in entries) {
stdout.writeln(
'${entry.name.padRight(width)} ${'${entry.uncompressedSize}'.padLeft(9)} ${entry.compressionMethod}');
}
case '-x':
final index = entries.indexWhere((entry) => entry.name == arguments[2]);
if (index == -1) {
stderr.writeln('unknown file to extract');
exit(1);
}
final data = pak.readFile(entries[index]);
if (arguments.length >= 4) {
File(arguments[3]).writeAsBytesSync(data);
} else {
stdout.add(data);
}
}
}
import 'dart:convert';
import 'dart:io';
import 'dart:typed_data';
/// Provides access to _Baldur's Gate_ `PAK` files (version 18).
///
/// PAK files start with a Header struct that points to a FileList struct. That
/// struct contains compressed FileEntry structs which each have a name and a
/// reference to the compressed or uncompressed file data. Everything is
/// compressed using [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)).
///
/// ```
/// Header (40 bytes)
/// signature: u32 ("LSPK")
/// version: u32 (18)
/// fileListOffset: u64 points to FileList struct
/// fileListSize: u32 size of FileList struct
/// flags: u8 (0)
/// priority: u8 ??
/// md5: [16]u8 (don't know what needs to be hashed)
/// numParts: u16 ??
///
/// FileList (8+X bytes)
/// numFiles: u32 number of FileEntry structs
/// compressedSize: u32 size of uncompressed compressedData
/// compressedData: [_]u8 compressed FileEntry structs
///
/// FileEntry (272 bytes)
/// name: [256]u8 name, utf8, padded with 0
/// offsetInFile1: u32 lower part of u48
/// offsetInFile2: u16 higher part of u48
/// archivePart: u8 ??
/// flags: u8 see CompressionMethod
/// diskSize: u32 ??
/// uncompressedSize: u32 size of uncompressed data at offsetInFile1/2
///
/// CompressionMethod
/// none = 0
/// zlib = 1
/// lz4 = 2
/// ```
///
/// To extract data, create an [Unpak] and call [read] to get a list of [Entry]
/// objects. Use [readFile] to read the given entry and get a [Uint8List].
/// When done, call [close].
///
class Unpak {
Unpak(File path) : _r = path.openSync();
final RandomAccessFile _r;
void close() {
_r.closeSync();
}
List<Entry> read() {
// seek to Header
_r.setPositionSync(0);
if (_r.readUint32() != 0x4b50534c) {
throw UnpakException('invalid signature');
}
if (_r.readUint32() != 18) {
throw UnpakException('invalid version');
}
// determine where the FileList struct starts
final fileListOffset = _r.readUint64();
final fileListSize = _r.readUint32();
_r.skip(1); // flags: u8
_r.skip(1); // priority: u8
_r.skip(16); // md5: [16]u8
_r.skip(2); // numParts: u16
// seek to FileList
_r.setPositionSync(fileListOffset);
final numFiles = _r.readUint32();
final compressedSize = _r.readUint32();
if (fileListSize != compressedSize + 8) {
throw UnpakException('file list size mismatch');
}
final compressedData = _r.readSync(compressedSize);
if (compressedData.length != compressedSize) {
throw UnpakException('not enough data in file list');
}
const sizeOfFileEntry = 272;
final uncompressedSize = numFiles * sizeOfFileEntry;
final data = lz4Uncompress(compressedData, uncompressedSize);
if (data.length != uncompressedSize) {
throw UnpakException('cannot decompress file list');
}
final bytes = ByteData.view(data.buffer);
final entries = <Entry>[];
for (var i = 0; i < uncompressedSize; i += sizeOfFileEntry) {
// search for end of string
var j = 0;
for (; j < 256; j++) {
if (bytes.getUint8(i + j) == 0) break;
}
final name = utf8.decode(Uint8List.sublistView(data, i, i + j));
final offset = bytes.getUint32(i + 256, Endian.little) + (bytes.getUint16(i + 260, Endian.little) << 32);
final flags = bytes.getUint8(i + 263);
final diskSize = bytes.getUint32(i + 264, Endian.little);
final uncompressedSize = bytes.getUint32(i + 268, Endian.little);
entries.add(Entry(
name: name,
offset: offset,
compressionMethod: flags & 15,
diskSize: diskSize,
uncompressedSize: uncompressedSize,
unpak: this,
));
}
return entries;
}
Uint8List readFile(Entry entry) {
_r.setPositionSync(entry.offset);
final diskData = _r.readSync(entry.diskSize);
if (diskData.length != entry.diskSize) {
throw UnpakException("couldn't read file data completely");
}
switch (entry.compressionMethod) {
case 0:
return diskData;
case 1:
final data = zlib.decode(diskData);
if (data.length != entry.uncompressedSize) {
throw UnpakException("couldn't uncompress file data completely");
}
return Uint8List.fromList(data);
case 2:
final data = lz4Uncompress(diskData, entry.uncompressedSize);
if (data.length != entry.uncompressedSize) {
throw UnpakException("couldn't uncompress file data completely");
}
return data;
default:
throw UnpakException('unknown compression method');
}
}
}
final class UnpakException extends IOException {
UnpakException(this.message);
final String message;
@override
String toString() => 'UnpackException: $message';
}
final class Entry {
const Entry({
required this.name,
required this.offset,
required this.compressionMethod,
required this.diskSize,
required this.uncompressedSize,
required this.unpak,
});
final String name; // name, up to 256 bytes
final int offset; // offset in
final int compressionMethod; // 0=uncompressed, 1=zlib, 2=lz4
final int diskSize;
final int uncompressedSize;
final Unpak unpak;
@override
String toString() => '$name, $offset, $diskSize, $uncompressedSize';
Uint8List readBytes() => unpak.readFile(this);
String readString() => utf8.decode(readBytes());
}
extension on RandomAccessFile {
void skip(int n) {
setPositionSync(positionSync() + n);
}
int readUint64() {
final data = readSync(8);
return ByteData.view(data.buffer).getUint64(0, Endian.little);
}
int readUint32() {
final data = readSync(4);
return ByteData.view(data.buffer).getUint32(0, Endian.little);
}
// int readUint16() {
// final data = readSync(2);
// return ByteData.view(data.buffer).getUint16(0, Endian.little);
// }
// int readUint8() {
// return readByteSync();
// }
}
Uint8List lz4Uncompress(Uint8List data, int uncompressedLength) {
final dest = Uint8List(uncompressedLength);
for (var op = 0, ip = 0;;) {
final token = data[ip++];
var length = token >> 4;
if (length == 15) {
do {
length += data[ip];
} while (data[ip++] == 255);
}
while (--length >= 0) {
dest[op++] = data[ip++];
}
if (ip >= data.length) break;
final offset = data[ip++] + (data[ip++] << 8);
var matchp = op - offset;
var matchlen = token & 15;
if (matchlen == 15) {
do {
matchlen += data[ip];
} while (data[ip++] == 255);
}
matchlen += 4;
while (--matchlen >= 0) {
dest[op++] = dest[matchp++];
}
}
return dest;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment