#! /usr/bin/env python3
# Modern implementation of labcopy in Python
# Rewrites size-forged LAB files to represent their actual size.
# E.g., for usage and proper game detection in SCUMMVM.
# Primary use is for "Grim Fandango" LAB files, but does handle
# "Escape from Monkey Island" files too.
# (LAB files are encoded in the LucasArts Binary archive format)
# Sample:
# Note: integers are LE encoded
# 00000000: 4c41 424e LABN ; 'LABN' file id
# 00000004: 4141 4141 AAAA ; reserved (copy as is)
# 00000008: 0300 0000 .... ; count of nested files
# 0000000c: 3200 0000 2... ; length of concatenated nested filename strings
# 00000010: 0000 0000 .... ; offset to file A filename (Grim only)
# 00000014: 7200 0000 r... ; offset to file A data
# 00000018: 0400 0000 .... ; bytesize of file A
# 0000001c: 4242 4242 BBBB ; reserved (usually 0x00, copy as is)
# 00000020: 0a00 0000 .... ; offset to file B filename
# 00000024: 7600 0000 v... ; offset to file B data
# 00000028: 0800 0000 .... ; bytesize of file B
# 0000002c: 4242 4242 BBBB ; reserved
# 00000030: 1b00 0000 .... ; offset to file C filename
# 00000034: 7e00 0000 ~... ; offset to file C data
# 00000038: 0c00 0000 .... ; bytesize of file C
# 0000003c: 4242 4242 BBBB ; reserved
# 00000040: 6669 6c65 file ; filenames concatenated. filename A from pos 0x40
# 00000044: 5f41 2e77 _A.w ; (0x40 = 16 + 3 files * 16)
# 00000048: 6176 0061 av.a ; 0x40+0x0a: B filename
# 0000004c: 6e6f 7468 noth
# 00000050: 6572 6669 erfi
# 00000054: 6c65 5f42 le_B
# 00000058: 2e6a 7067 .jpg
# 0000005c: 0079 6574 .yet ; 0x40+0x1b: C filename
# 00000060: 616e 6f74 anot
# 00000064: 6865 7266 herf
# 00000068: 696c 655f ile_
# 0000006c: 432e 7478 C.tx
# 00000070: 7400 4343 t.CC ; 0x6e start of fila data A file
# 00000074: 4343 4444 CCDD ; 0x76 start of fila data B file
# 00000078: 4444 4444 DDDD
# 0000007c: 4444 4545 DDEE ; 0x7e start of fila data C file
# 00000080: 4545 4545 EEEE
# 00000084: 4545 4545 EEEE
# 00000088: 4545 EE
# Inspired by discontinued labcopy C++ implementation:
# (C) 2021 Gemba
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <>.
import argparse
import shutil
import struct
import sys
from collections import namedtuple
from hashlib import md5
OUTFILE_SUFFIX = "_rewritten"
def copy_lab(in_data, list_content=False):
"""Read LAB file and read nested file parameters.
Return byte data for out file."""
hdr =
if hdr[:4].decode("ascii") != "LABN":
print("[!] Not a LAB file")
num_entries = int.from_bytes(hdr[8:12], byteorder='little', signed=False)
nested_filenames_size = int.from_bytes(
hdr[12:16], byteorder='little', signed=False)
# read next four bytes to detect EMI or Grim LAB file
offset_nested_fn_bytes =
nested_fn_table_offset = int.from_bytes(
offset_nested_fn_bytes, byteorder='little', signed=False)
is_emi = nested_fn_table_offset > 0
nested_fn_array, nested_fn_bytes = get_nested_filenames(
in_data, num_entries, nested_fn_table_offset, nested_filenames_size,
if num_entries != len(nested_fn_array):
print("[!] Filename table size does not match count of nested files.")
# write header
out_data = bytearray(hdr)
lab_entries = decode_and_copy_labentry_info(
in_data, out_data, nested_fn_array, offset_nested_fn_bytes, is_emi)
# write nested filenames as-is
if is_emi:
# copy blob back with offset
copy_nestedfile(in_data, out_data, nested_fn_table_offset -
EMI_FN_TABLE_OFFSET, nested_filenames_size)
if list_content:
print(f"[*] Size [bytes] File")
print(f" ------------ ------------")
# write nested filedata
size_tot = 0
for lab_entry in lab_entries:
if list_content:
print(f" {lab_entry['size']:>12} {lab_entry['nested_fn']}")
size_tot += lab_entry['size']
if lab_entry['nested_fn'] in OFFENDING_NESTED_FILES:
print(f"[*] Skipping offending file '{lab_entry['nested_fn']}'.")
copy_nestedfile(in_data, out_data,
lab_entry['start'], lab_entry['size'])
if list_content:
s = "" if len(lab_entries) == 1 else "s"
print(f" ------------ ------------")
print(f" {size_tot:>12} {len(lab_entries)} file{s}")
return out_data
def get_nested_filenames(in_data, num_entries, nested_fn_table_offset,
nested_filenames_size, is_emi):
"""Get an array of filenames from the archive's filename 'table'.
The 'table' contains the filenames each null terminated as bytes.
The return array consists of ASCII formatted strings."""
# seek fwd to retrieve nested filenames
if is_emi:
# Escape from Monkey Island - EMI_FN_TABLE_OFFSET)
# Grim Fandango
# +1 for header + 1) * 16, 0)
nested_filenames_bytes =
if is_emi:
nested_fns_decoded = bytearray()
for b in nested_filenames_bytes:
nested_fns_decoded.append(b ^ 0x96 if b else b)
nested_fns_decoded = nested_filenames_bytes
# cut off surplus null byte, assume ASCII filenames
return (nested_fns_decoded[:- 1].decode("ascii").split('\x00'),
def decode_and_copy_labentry_info(in_data, out_data, nested_fn_array,
offset_nested_fn_bytes, is_emi):
"""Decodes each lab entry meta info and copies to out."""
# reset seek to first lab_entry if is_emi else 16, 0)
_lab_entry = namedtuple('lab_entry', 'fname_offset start size reserved')
lab_entries = []
if is_emi:
# write out bytes 16 to 19
for nested_fn in nested_fn_array:
entry =
data = struct.unpack("<IIII", entry)
lab_entry = _lab_entry._asdict(_lab_entry._make(data))
lab_entry['nested_fn'] = nested_fn
# print (lab_entry)
return lab_entries
def copy_nestedfile(in_data, out_data, offset, size):
"""Copy a nested file of LAB file to target byte array.""", 0)
data =
# don't assume offset is strictly ordered
add_len = offset + size - len(out_data)
if add_len > 0:
out_data.extend(b'\x00' * add_len)
for idx, b in enumerate(data):
out_data[offset + idx] = b
def init_cli_parser():
"""Init command line argument parser."""
parser = argparse.ArgumentParser(
description='Restores a size-forged LAB file to its pristine size.')
parser.add_argument("in_file", help="input LAB file")
parser.add_argument("-o", "--outfile", help=f"file to write to, if not "
f"given in_file plus suffix '{OUTFILE_SUFFIX}' will be"
" used if overwrite is not set",
type=str, dest='out_file')
parser.add_argument("-f", "--overwrite", help="overwrite inputfile",
action="store_true", default=False)
parser.add_argument("-l", "--list", help="list nested files identified in"
" LAB file", action="store_true", default=False)
parser.add_argument("-s", "--skipidentical", help=f"skip writing of "
f"out_file if hash is identical to in_file",
action='store_true', default=False,
parser.add_argument("-b", "--md5bytes", help=f"number of bytes to "
f"calculate MD5 (default: full file)",
type=int, dest='md5_bytes', default=0)
return parser
if __name__ == "__main__":
parser = init_cli_parser()
args = parser.parse_args()
in_file = args.in_file
overwrite = args.overwrite
if args.out_file and overwrite:
print("[*] Parameter outfile will be ignored, because overwrite"
" (=same file) is set.")
out_file = args.out_file if not overwrite else None
if not out_file:
out_file = f"{in_file}{OUTFILE_SUFFIX}"
with open(in_file, 'rb') as in_data:
print(f"[+] Opened '{in_file}'")
size = -1 if not args.md5_bytes else args.md5_bytes
md5_infile = md5(
print(f"[+] Copying ...")
out_data = copy_lab(in_data, args.list)
md5_bytes = len(out_data) if not args.md5_bytes else args.md5_bytes
md5_outfile = md5(out_data[:md5_bytes]).hexdigest()
mib = md5_bytes / 1024.0 / 1024
if args.skip_identical and md5_outfile == md5_infile:
print("[+] Skip identical: No outfile written, MD5 is identical to"
" infile.")
with open(out_file, 'wb') as of:
if overwrite:
shutil.move(out_file, in_file)
out_file = in_file
print(f"[+] ... written to '{out_file}'")
print(f"[*] MD5 {md5_outfile} over {md5_bytes} bytes ({mib:.3f} MiB).")
print("[*] Done.")
# run with pytest-3
def test_copy_lab():
from os import remove
md5_expected = "776138f5eb890123c49bb3046d7dd4f5"
f = _create_dummy_labfile(
["file_A.wav", "anotherfile_B.jpg", "yetanotherfile_C.txt"])
with open(, 'rb') as test_data:
# print (md5(
md5_real = md5(copy_lab(test_data))
assert md5_expected == md5_real.hexdigest()
def test_copy_lab_remove_nested_file():
from os import remove
# breaks if OFFENDING_NESTED_FILES is modified
md5_expected = "c46fcc87f65df671ac93e9319f779f88"
f = _create_dummy_labfile(
["file_A.wav"] + OFFENDING_NESTED_FILES + ["yetanotherfile_C.txt"])
with open(, 'rb') as test_data:
# print (md5(
md5_real = md5(copy_lab(test_data))
# remove(
assert md5_expected == md5_real.hexdigest()
def _create_dummy_labfile(filenames):
from tempfile import NamedTemporaryFile
f = NamedTemporaryFile(mode='wb', delete=False)
f.write(b"AAAA") # reserved
file_count = len(filenames)
f.write(int.to_bytes(file_count, length=4, byteorder='little',
# land behind header and lab entries
nested_fn_list_start = (file_count + 1) * 16, 0)
nested_fn_length = 0
# write concatenated list of filenames, 0x00 terminated
for fn in filenames:
f.write(bytearray(fn, "ascii"))
nested_fn_length += len(fn) + 1
# write length of concatenated nested filenames in header, 0)
f.write(int.to_bytes(nested_fn_length, length=4,
byteorder='little', signed=False))
sizes_sum = 0
# start of binary blob of all nested file content
raw_data_start = nested_fn_list_start + nested_fn_length
# lab entries from bytes 16 onwards
fsizes = []
for le_meta in range(file_count):
if le_meta > 0:
# offset to nested filename string
fn_offset = int.to_bytes(len(''.join(filenames[0:le_meta])),
length=4, byteorder='little', signed=False)
f.write(b'\x00' * 4) # Grim Fandango only
nested_file_data_start = int.to_bytes(raw_data_start + sizes_sum,
length=4, byteorder='little',
fsize = 4 + 4 * le_meta # sample size
sizes_sum = sizes_sum + fsize
size = int.to_bytes(fsize, length=4,
byteorder='little', signed=False)
f.write(b"BBBB") # reserved
buf = bytearray()
for idx, fsize in enumerate(fsizes):
buf.extend(bytes(chr(0x43 + idx), 'ascii') * fsize)
# per file raw data after filename list, 0)
return f
