Skip to content

Instantly share code, notes, and snippets.

@Gemba
Last active January 5, 2022 08:38
Show Gist options
  • Save Gemba/c793590c32a6c121445fa14ed361e51d to your computer and use it in GitHub Desktop.
Save Gemba/c793590c32a6c121445fa14ed361e51d to your computer and use it in GitHub Desktop.
lab_copier.py: Enhanched Implementation of LABcopy in Python for 'Grim Fandando' and 'Escape From Monkey Island'
#! /usr/bin/env python3
# lab_copier.py: Modern implementation of labcopy in Python
#
# Rewrites size-forged LAB files to represent their actual size.
# E.g., for usage and proper game detection in SCUMMVM.
#
# Primary use is for "Grim Fandango" LAB files, but does handle
# "Escape from Monkey Island" files too.
#
# (LAB files are encoded in the LucasArts Binary archive format)
#
# Sample:
# Note: integers are LE encoded
#
# 00000000: 4c41 424e LABN ; 'LABN' file id
# 00000004: 4141 4141 AAAA ; reserved (copy as is)
# 00000008: 0300 0000 .... ; count of nested files
# 0000000c: 3200 0000 2... ; length of concatenated nested filename strings
# 00000010: 0000 0000 .... ; offset to file A filename (Grim only)
# 00000014: 7200 0000 r... ; offset to file A data
# 00000018: 0400 0000 .... ; bytesize of file A
# 0000001c: 4242 4242 BBBB ; reserved (usually 0x00, copy as is)
# 00000020: 0a00 0000 .... ; offset to file B filename
# 00000024: 7600 0000 v... ; offset to file B data
# 00000028: 0800 0000 .... ; bytesize of file B
# 0000002c: 4242 4242 BBBB ; reserved
# 00000030: 1b00 0000 .... ; offset to file C filename
# 00000034: 7e00 0000 ~... ; offset to file C data
# 00000038: 0c00 0000 .... ; bytesize of file C
# 0000003c: 4242 4242 BBBB ; reserved
# 00000040: 6669 6c65 file ; filenames concatenated. filename A from pos 0x40
# 00000044: 5f41 2e77 _A.w ; (0x40 = 16 + 3 files * 16)
# 00000048: 6176 0061 av.a ; 0x40+0x0a: B filename
# 0000004c: 6e6f 7468 noth
# 00000050: 6572 6669 erfi
# 00000054: 6c65 5f42 le_B
# 00000058: 2e6a 7067 .jpg
# 0000005c: 0079 6574 .yet ; 0x40+0x1b: C filename
# 00000060: 616e 6f74 anot
# 00000064: 6865 7266 herf
# 00000068: 696c 655f ile_
# 0000006c: 432e 7478 C.tx
# 00000070: 7400 4343 t.CC ; 0x6e start of fila data A file
# 00000074: 4343 4444 CCDD ; 0x76 start of fila data B file
# 00000078: 4444 4444 DDDD
# 0000007c: 4444 4545 DDEE ; 0x7e start of fila data C file
# 00000080: 4545 4545 EEEE
# 00000084: 4545 4545 EEEE
# 00000088: 4545 EE
#
# Inspired by discontinued labcopy C++ implementation:
# https://github.com/klusark/residual-tools/blob/master/tools/labcopy.cpp
#
# (C) 2021 Gemba
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>.
import argparse
import shutil
import struct
import sys
from collections import namedtuple
from hashlib import md5
OUTFILE_SUFFIX = "_rewritten"
OFFENDING_NESTED_FILES = ["cp_0_intha.bm"] # Grim
EMI_FN_TABLE_OFFSET = 0x13d0f
def copy_lab(in_data, list_content=False):
"""Read LAB file and read nested file parameters.
Return byte data for out file."""
hdr = in_data.read(16)
if hdr[:4].decode("ascii") != "LABN":
print("[!] Not a LAB file")
sys.exit(1)
num_entries = int.from_bytes(hdr[8:12], byteorder='little', signed=False)
nested_filenames_size = int.from_bytes(
hdr[12:16], byteorder='little', signed=False)
# read next four bytes to detect EMI or Grim LAB file
offset_nested_fn_bytes = in_data.read(4)
nested_fn_table_offset = int.from_bytes(
offset_nested_fn_bytes, byteorder='little', signed=False)
is_emi = nested_fn_table_offset > 0
nested_fn_array, nested_fn_bytes = get_nested_filenames(
in_data, num_entries, nested_fn_table_offset, nested_filenames_size,
is_emi)
if num_entries != len(nested_fn_array):
print("[!] Filename table size does not match count of nested files.")
sys.exit(1)
# write header
out_data = bytearray(hdr)
lab_entries = decode_and_copy_labentry_info(
in_data, out_data, nested_fn_array, offset_nested_fn_bytes, is_emi)
# write nested filenames as-is
if is_emi:
# copy blob back with offset
copy_nestedfile(in_data, out_data, nested_fn_table_offset -
EMI_FN_TABLE_OFFSET, nested_filenames_size)
else:
out_data.extend(nested_fn_bytes)
if list_content:
print(f"[*] Size [bytes] File")
print(f" ------------ ------------")
# write nested filedata
size_tot = 0
for lab_entry in lab_entries:
if list_content:
print(f" {lab_entry['size']:>12} {lab_entry['nested_fn']}")
size_tot += lab_entry['size']
if lab_entry['nested_fn'] in OFFENDING_NESTED_FILES:
print(f"[*] Skipping offending file '{lab_entry['nested_fn']}'.")
continue
copy_nestedfile(in_data, out_data,
lab_entry['start'], lab_entry['size'])
if list_content:
s = "" if len(lab_entries) == 1 else "s"
print(f" ------------ ------------")
print(f" {size_tot:>12} {len(lab_entries)} file{s}")
return out_data
def get_nested_filenames(in_data, num_entries, nested_fn_table_offset,
nested_filenames_size, is_emi):
"""Get an array of filenames from the archive's filename 'table'.
The 'table' contains the filenames each null terminated as bytes.
The return array consists of ASCII formatted strings."""
# seek fwd to retrieve nested filenames
if is_emi:
# Escape from Monkey Island
in_data.seek(nested_fn_table_offset - EMI_FN_TABLE_OFFSET)
else:
# Grim Fandango
# +1 for header
in_data.seek((num_entries + 1) * 16, 0)
nested_filenames_bytes = in_data.read(nested_filenames_size)
if is_emi:
nested_fns_decoded = bytearray()
for b in nested_filenames_bytes:
nested_fns_decoded.append(b ^ 0x96 if b else b)
else:
nested_fns_decoded = nested_filenames_bytes
# cut off surplus null byte, assume ASCII filenames
return (nested_fns_decoded[:- 1].decode("ascii").split('\x00'),
nested_filenames_bytes)
def decode_and_copy_labentry_info(in_data, out_data, nested_fn_array,
offset_nested_fn_bytes, is_emi):
"""Decodes each lab entry meta info and copies to out."""
# reset seek to first lab_entry
in_data.seek(20 if is_emi else 16, 0)
_lab_entry = namedtuple('lab_entry', 'fname_offset start size reserved')
lab_entries = []
if is_emi:
# write out bytes 16 to 19
out_data.extend(offset_nested_fn_bytes)
for nested_fn in nested_fn_array:
entry = in_data.read(16)
out_data.extend(entry)
data = struct.unpack("<IIII", entry)
lab_entry = _lab_entry._asdict(_lab_entry._make(data))
lab_entry['nested_fn'] = nested_fn
# print (lab_entry)
lab_entries.append(lab_entry)
return lab_entries
def copy_nestedfile(in_data, out_data, offset, size):
"""Copy a nested file of LAB file to target byte array."""
in_data.seek(offset, 0)
data = in_data.read(size)
# don't assume offset is strictly ordered
add_len = offset + size - len(out_data)
if add_len > 0:
out_data.extend(b'\x00' * add_len)
for idx, b in enumerate(data):
out_data[offset + idx] = b
def init_cli_parser():
"""Init command line argument parser."""
parser = argparse.ArgumentParser(
description='Restores a size-forged LAB file to its pristine size.')
parser.add_argument("in_file", help="input LAB file")
parser.add_argument("-o", "--outfile", help=f"file to write to, if not "
f"given in_file plus suffix '{OUTFILE_SUFFIX}' will be"
" used if overwrite is not set",
type=str, dest='out_file')
parser.add_argument("-f", "--overwrite", help="overwrite inputfile",
action="store_true", default=False)
parser.add_argument("-l", "--list", help="list nested files identified in"
" LAB file", action="store_true", default=False)
parser.add_argument("-s", "--skipidentical", help=f"skip writing of "
f"out_file if hash is identical to in_file",
action='store_true', default=False,
dest='skip_identical')
parser.add_argument("-b", "--md5bytes", help=f"number of bytes to "
f"calculate MD5 (default: full file)",
type=int, dest='md5_bytes', default=0)
return parser
if __name__ == "__main__":
parser = init_cli_parser()
args = parser.parse_args()
in_file = args.in_file
overwrite = args.overwrite
if args.out_file and overwrite:
print("[*] Parameter outfile will be ignored, because overwrite"
" (=same file) is set.")
out_file = args.out_file if not overwrite else None
if not out_file:
out_file = f"{in_file}{OUTFILE_SUFFIX}"
with open(in_file, 'rb') as in_data:
print(f"[+] Opened '{in_file}'")
size = -1 if not args.md5_bytes else args.md5_bytes
md5_infile = md5(in_data.read(size)).hexdigest()
in_data.seek(0)
print(f"[+] Copying ...")
out_data = copy_lab(in_data, args.list)
md5_bytes = len(out_data) if not args.md5_bytes else args.md5_bytes
md5_outfile = md5(out_data[:md5_bytes]).hexdigest()
mib = md5_bytes / 1024.0 / 1024
if args.skip_identical and md5_outfile == md5_infile:
print("[+] Skip identical: No outfile written, MD5 is identical to"
" infile.")
else:
with open(out_file, 'wb') as of:
of.write(out_data)
if overwrite:
shutil.move(out_file, in_file)
out_file = in_file
print(f"[+] ... written to '{out_file}'")
print(f"[*] MD5 {md5_outfile} over {md5_bytes} bytes ({mib:.3f} MiB).")
print("[*] Done.")
# run with pytest-3
def test_copy_lab():
from os import remove
md5_expected = "776138f5eb890123c49bb3046d7dd4f5"
f = _create_dummy_labfile(
["file_A.wav", "anotherfile_B.jpg", "yetanotherfile_C.txt"])
with open(f.name, 'rb') as test_data:
# print (md5(in_data.read()).hexdigest())
# in_data.seek(0)
md5_real = md5(copy_lab(test_data))
remove(f.name)
assert md5_expected == md5_real.hexdigest()
def test_copy_lab_remove_nested_file():
from os import remove
# breaks if OFFENDING_NESTED_FILES is modified
md5_expected = "c46fcc87f65df671ac93e9319f779f88"
f = _create_dummy_labfile(
["file_A.wav"] + OFFENDING_NESTED_FILES + ["yetanotherfile_C.txt"])
with open(f.name, 'rb') as test_data:
# print (md5(in_data.read()).hexdigest())
# in_data.seek(0)
md5_real = md5(copy_lab(test_data))
# remove(f.name)
assert md5_expected == md5_real.hexdigest()
def _create_dummy_labfile(filenames):
from tempfile import NamedTemporaryFile
f = NamedTemporaryFile(mode='wb', delete=False)
f.write(b"LABN")
f.write(b"AAAA") # reserved
file_count = len(filenames)
f.write(int.to_bytes(file_count, length=4, byteorder='little',
signed=False))
# land behind header and lab entries
nested_fn_list_start = (file_count + 1) * 16
f.seek(nested_fn_list_start, 0)
nested_fn_length = 0
# write concatenated list of filenames, 0x00 terminated
for fn in filenames:
f.write(bytearray(fn, "ascii"))
f.write(b'\x00')
nested_fn_length += len(fn) + 1
# write length of concatenated nested filenames in header
f.seek(12, 0)
f.write(int.to_bytes(nested_fn_length, length=4,
byteorder='little', signed=False))
sizes_sum = 0
# start of binary blob of all nested file content
raw_data_start = nested_fn_list_start + nested_fn_length
# lab entries from bytes 16 onwards
fsizes = []
for le_meta in range(file_count):
if le_meta > 0:
# offset to nested filename string
fn_offset = int.to_bytes(len(''.join(filenames[0:le_meta])),
length=4, byteorder='little', signed=False)
f.write(fn_offset)
else:
f.write(b'\x00' * 4) # Grim Fandango only
nested_file_data_start = int.to_bytes(raw_data_start + sizes_sum,
length=4, byteorder='little',
signed=False)
f.write(nested_file_data_start)
fsize = 4 + 4 * le_meta # sample size
sizes_sum = sizes_sum + fsize
fsizes.append(fsize)
size = int.to_bytes(fsize, length=4,
byteorder='little', signed=False)
f.write(size)
f.write(b"BBBB") # reserved
buf = bytearray()
for idx, fsize in enumerate(fsizes):
buf.extend(bytes(chr(0x43 + idx), 'ascii') * fsize)
# per file raw data after filename list
f.seek(raw_data_start, 0)
f.write(buf)
f.close()
return f
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment