Gemba/lab_copier.py

## lab_copier.py
#! /usr/bin/env python3

# lab_copier.py: Modern implementation of labcopy in Python
#
# Rewrites size-forged LAB files to represent their actual size.
# E.g., for usage and proper game detection in SCUMMVM.
#
# Primary use is for "Grim Fandango" LAB files, but does handle
# "Escape from Monkey Island" files too.
#
# (LAB files are encoded in the LucasArts Binary archive format)
#
# Sample:
# Note: integers are LE encoded
#
# 00000000: 4c41 424e  LABN ; 'LABN' file id
# 00000004: 4141 4141  AAAA ; reserved (copy as is)
# 00000008: 0300 0000  .... ; count of nested files
# 0000000c: 3200 0000  2... ; length of concatenated nested filename strings
# 00000010: 0000 0000  .... ; offset to file A filename (Grim only)
# 00000014: 7200 0000  r... ; offset to file A data
# 00000018: 0400 0000  .... ; bytesize of file A
# 0000001c: 4242 4242  BBBB ; reserved (usually 0x00, copy as is)
# 00000020: 0a00 0000  .... ; offset to file B filename
# 00000024: 7600 0000  v... ; offset to file B data
# 00000028: 0800 0000  .... ; bytesize of file B
# 0000002c: 4242 4242  BBBB ; reserved
# 00000030: 1b00 0000  .... ; offset to file C filename
# 00000034: 7e00 0000  ~... ; offset to file C data
# 00000038: 0c00 0000  .... ; bytesize of file C
# 0000003c: 4242 4242  BBBB ; reserved
# 00000040: 6669 6c65  file ; filenames concatenated. filename A from pos 0x40
# 00000044: 5f41 2e77  _A.w ; (0x40 = 16 + 3 files * 16)
# 00000048: 6176 0061  av.a ; 0x40+0x0a: B filename
# 0000004c: 6e6f 7468  noth
# 00000050: 6572 6669  erfi
# 00000054: 6c65 5f42  le_B
# 00000058: 2e6a 7067  .jpg
# 0000005c: 0079 6574  .yet ; 0x40+0x1b: C filename
# 00000060: 616e 6f74  anot
# 00000064: 6865 7266  herf
# 00000068: 696c 655f  ile_
# 0000006c: 432e 7478  C.tx
# 00000070: 7400 4343  t.CC ; 0x6e start of fila data A file
# 00000074: 4343 4444  CCDD ; 0x76 start of fila data B file
# 00000078: 4444 4444  DDDD
# 0000007c: 4444 4545  DDEE ; 0x7e start of fila data C file
# 00000080: 4545 4545  EEEE
# 00000084: 4545 4545  EEEE
# 00000088: 4545       EE
#
# Inspired by discontinued labcopy C++ implementation:
# https://github.com/klusark/residual-tools/blob/master/tools/labcopy.cpp
#
# (C) 2021 Gemba
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>.

import argparse
import shutil
import struct
import sys

from collections import namedtuple
from hashlib import md5

OUTFILE_SUFFIX = "_rewritten"
OFFENDING_NESTED_FILES = ["cp_0_intha.bm"]  # Grim
EMI_FN_TABLE_OFFSET = 0x13d0f


def copy_lab(in_data, list_content=False):
    """Read LAB file and read nested file parameters.

    Return byte data for out file."""

    hdr = in_data.read(16)
    if hdr[:4].decode("ascii") != "LABN":
        print("[!] Not a LAB file")
        sys.exit(1)

    num_entries = int.from_bytes(hdr[8:12], byteorder='little', signed=False)

    nested_filenames_size = int.from_bytes(
        hdr[12:16], byteorder='little', signed=False)

    # read next four bytes to detect EMI or Grim LAB file
    offset_nested_fn_bytes = in_data.read(4)
    nested_fn_table_offset = int.from_bytes(
        offset_nested_fn_bytes, byteorder='little', signed=False)

    is_emi = nested_fn_table_offset > 0

    nested_fn_array, nested_fn_bytes = get_nested_filenames(
        in_data, num_entries, nested_fn_table_offset, nested_filenames_size,
        is_emi)

    if num_entries != len(nested_fn_array):
        print("[!] Filename table size does not match count of nested files.")
        sys.exit(1)

    # write header
    out_data = bytearray(hdr)

    lab_entries = decode_and_copy_labentry_info(
        in_data, out_data, nested_fn_array, offset_nested_fn_bytes, is_emi)

    # write nested filenames as-is
    if is_emi:
        # copy blob back with offset
        copy_nestedfile(in_data, out_data, nested_fn_table_offset -
                        EMI_FN_TABLE_OFFSET, nested_filenames_size)
    else:
        out_data.extend(nested_fn_bytes)

    if list_content:
        print(f"[*] Size [bytes]  File")
        print(f"    ------------  ------------")
    # write nested filedata
    size_tot = 0
    for lab_entry in lab_entries:
        if list_content:
            print(f"    {lab_entry['size']:>12}  {lab_entry['nested_fn']}")
            size_tot += lab_entry['size']
        if lab_entry['nested_fn'] in OFFENDING_NESTED_FILES:
            print(f"[*] Skipping offending file '{lab_entry['nested_fn']}'.")
            continue
        copy_nestedfile(in_data, out_data,
                        lab_entry['start'], lab_entry['size'])
    if list_content:
        s = "" if len(lab_entries) == 1 else "s"
        print(f"    ------------  ------------")
        print(f"    {size_tot:>12}  {len(lab_entries)} file{s}")
    return out_data


def get_nested_filenames(in_data, num_entries, nested_fn_table_offset,
                         nested_filenames_size, is_emi):
    """Get an array of filenames from the archive's filename 'table'.

       The 'table' contains the filenames each null terminated as bytes.
       The return array consists of ASCII formatted strings."""
    # seek fwd to retrieve nested filenames
    if is_emi:
        # Escape from Monkey Island
        in_data.seek(nested_fn_table_offset - EMI_FN_TABLE_OFFSET)
    else:
        # Grim Fandango
        # +1 for header
        in_data.seek((num_entries + 1) * 16, 0)

    nested_filenames_bytes = in_data.read(nested_filenames_size)
    if is_emi:
        nested_fns_decoded = bytearray()
        for b in nested_filenames_bytes:
            nested_fns_decoded.append(b ^ 0x96 if b else b)
    else:
        nested_fns_decoded = nested_filenames_bytes

    # cut off surplus null byte, assume ASCII filenames
    return (nested_fns_decoded[:- 1].decode("ascii").split('\x00'),
            nested_filenames_bytes)


def decode_and_copy_labentry_info(in_data, out_data, nested_fn_array,
                                  offset_nested_fn_bytes, is_emi):
    """Decodes each lab entry meta info and copies to out."""
    # reset seek to first lab_entry
    in_data.seek(20 if is_emi else 16, 0)

    _lab_entry = namedtuple('lab_entry', 'fname_offset start size reserved')
    lab_entries = []

    if is_emi:
        # write out bytes 16 to 19
        out_data.extend(offset_nested_fn_bytes)

    for nested_fn in nested_fn_array:
        entry = in_data.read(16)
        out_data.extend(entry)
        data = struct.unpack("<IIII", entry)
        lab_entry = _lab_entry._asdict(_lab_entry._make(data))
        lab_entry['nested_fn'] = nested_fn
        # print (lab_entry)
        lab_entries.append(lab_entry)
    return lab_entries


def copy_nestedfile(in_data, out_data, offset, size):
    """Copy a nested file of LAB file to target byte array."""
    in_data.seek(offset, 0)
    data = in_data.read(size)
    # don't assume offset is strictly ordered
    add_len = offset + size - len(out_data)
    if add_len > 0:
        out_data.extend(b'\x00' * add_len)
    for idx, b in enumerate(data):
        out_data[offset + idx] = b


def init_cli_parser():
    """Init command line argument parser."""
    parser = argparse.ArgumentParser(
        description='Restores a size-forged LAB file to its pristine size.')
    parser.add_argument("in_file", help="input LAB file")
    parser.add_argument("-o", "--outfile", help=f"file to write to, if not "
                        f"given in_file plus suffix '{OUTFILE_SUFFIX}' will be"
                        " used if overwrite is not set",
                        type=str, dest='out_file')
    parser.add_argument("-f", "--overwrite", help="overwrite inputfile",
                        action="store_true", default=False)
    parser.add_argument("-l", "--list", help="list nested files identified in"
                        " LAB file", action="store_true", default=False)
    parser.add_argument("-s", "--skipidentical", help=f"skip writing of "
                        f"out_file if hash is identical to in_file",
                        action='store_true', default=False,
                        dest='skip_identical')
    parser.add_argument("-b", "--md5bytes", help=f"number of bytes to "
                        f"calculate MD5 (default: full file)",
                        type=int, dest='md5_bytes', default=0)
    return parser


if __name__ == "__main__":

    parser = init_cli_parser()
    args = parser.parse_args()

    in_file = args.in_file
    overwrite = args.overwrite
    if args.out_file and overwrite:
        print("[*] Parameter outfile will be ignored, because overwrite"
              " (=same file) is set.")

    out_file = args.out_file if not overwrite else None
    if not out_file:
        out_file = f"{in_file}{OUTFILE_SUFFIX}"

    with open(in_file, 'rb') as in_data:
        print(f"[+] Opened '{in_file}'")
        size = -1 if not args.md5_bytes else args.md5_bytes
        md5_infile = md5(in_data.read(size)).hexdigest()
        in_data.seek(0)
        print(f"[+] Copying ...")
        out_data = copy_lab(in_data, args.list)

    md5_bytes = len(out_data) if not args.md5_bytes else args.md5_bytes
    md5_outfile = md5(out_data[:md5_bytes]).hexdigest()
    mib = md5_bytes / 1024.0 / 1024

    if args.skip_identical and md5_outfile == md5_infile:
        print("[+] Skip identical: No outfile written, MD5 is identical to"
              " infile.")
    else:
        with open(out_file, 'wb') as of:
            of.write(out_data)
        if overwrite:
            shutil.move(out_file, in_file)
            out_file = in_file
        print(f"[+] ... written to '{out_file}'")

    print(f"[*] MD5 {md5_outfile} over {md5_bytes} bytes ({mib:.3f} MiB).")
    print("[*] Done.")


# run with pytest-3
def test_copy_lab():
    from os import remove

    md5_expected = "776138f5eb890123c49bb3046d7dd4f5"

    f = _create_dummy_labfile(
        ["file_A.wav", "anotherfile_B.jpg", "yetanotherfile_C.txt"])
    with open(f.name, 'rb') as test_data:
        # print (md5(in_data.read()).hexdigest())
        # in_data.seek(0)
        md5_real = md5(copy_lab(test_data))

    remove(f.name)
    assert md5_expected == md5_real.hexdigest()


def test_copy_lab_remove_nested_file():
    from os import remove

    # breaks if OFFENDING_NESTED_FILES is modified
    md5_expected = "c46fcc87f65df671ac93e9319f779f88"

    f = _create_dummy_labfile(
        ["file_A.wav"] + OFFENDING_NESTED_FILES + ["yetanotherfile_C.txt"])

    with open(f.name, 'rb') as test_data:
        # print (md5(in_data.read()).hexdigest())
        # in_data.seek(0)
        md5_real = md5(copy_lab(test_data))

    # remove(f.name)
    assert md5_expected == md5_real.hexdigest()


def _create_dummy_labfile(filenames):
    from tempfile import NamedTemporaryFile

    f = NamedTemporaryFile(mode='wb', delete=False)
    f.write(b"LABN")
    f.write(b"AAAA")  # reserved
    file_count = len(filenames)
    f.write(int.to_bytes(file_count, length=4, byteorder='little',
                         signed=False))

    # land behind header and lab entries
    nested_fn_list_start = (file_count + 1) * 16
    f.seek(nested_fn_list_start, 0)

    nested_fn_length = 0
    # write concatenated list of filenames, 0x00 terminated
    for fn in filenames:
        f.write(bytearray(fn, "ascii"))
        f.write(b'\x00')
        nested_fn_length += len(fn) + 1

    # write length of concatenated nested filenames in header
    f.seek(12, 0)
    f.write(int.to_bytes(nested_fn_length, length=4,
                         byteorder='little', signed=False))

    sizes_sum = 0

    # start of binary blob of all nested file content
    raw_data_start = nested_fn_list_start + nested_fn_length

    # lab entries from bytes 16 onwards
    fsizes = []
    for le_meta in range(file_count):
        if le_meta > 0:
            # offset to nested filename string
            fn_offset = int.to_bytes(len(''.join(filenames[0:le_meta])),
                                     length=4, byteorder='little', signed=False)
            f.write(fn_offset)
        else:
            f.write(b'\x00' * 4)  # Grim Fandango only
        nested_file_data_start = int.to_bytes(raw_data_start + sizes_sum,
                                              length=4, byteorder='little',
                                              signed=False)
        f.write(nested_file_data_start)
        fsize = 4 + 4 * le_meta  # sample size
        sizes_sum = sizes_sum + fsize
        fsizes.append(fsize)
        size = int.to_bytes(fsize, length=4,
                            byteorder='little', signed=False)
        f.write(size)
        f.write(b"BBBB")  # reserved

    buf = bytearray()
    for idx, fsize in enumerate(fsizes):
        buf.extend(bytes(chr(0x43 + idx), 'ascii') * fsize)

    # per file raw data after filename list
    f.seek(raw_data_start, 0)
    f.write(buf)
    f.close()
    return f
	#! /usr/bin/env python3

	# lab_copier.py: Modern implementation of labcopy in Python
	#
	# Rewrites size-forged LAB files to represent their actual size.
	# E.g., for usage and proper game detection in SCUMMVM.
	#
	# Primary use is for "Grim Fandango" LAB files, but does handle
	# "Escape from Monkey Island" files too.
	#
	# (LAB files are encoded in the LucasArts Binary archive format)
	#
	# Sample:
	# Note: integers are LE encoded
	#
	# 00000000: 4c41 424e LABN ; 'LABN' file id
	# 00000004: 4141 4141 AAAA ; reserved (copy as is)
	# 00000008: 0300 0000 .... ; count of nested files
	# 0000000c: 3200 0000 2... ; length of concatenated nested filename strings
	# 00000010: 0000 0000 .... ; offset to file A filename (Grim only)
	# 00000014: 7200 0000 r... ; offset to file A data
	# 00000018: 0400 0000 .... ; bytesize of file A
	# 0000001c: 4242 4242 BBBB ; reserved (usually 0x00, copy as is)
	# 00000020: 0a00 0000 .... ; offset to file B filename
	# 00000024: 7600 0000 v... ; offset to file B data
	# 00000028: 0800 0000 .... ; bytesize of file B
	# 0000002c: 4242 4242 BBBB ; reserved
	# 00000030: 1b00 0000 .... ; offset to file C filename
	# 00000034: 7e00 0000 ~... ; offset to file C data
	# 00000038: 0c00 0000 .... ; bytesize of file C
	# 0000003c: 4242 4242 BBBB ; reserved
	# 00000040: 6669 6c65 file ; filenames concatenated. filename A from pos 0x40
	# 00000044: 5f41 2e77 _A.w ; (0x40 = 16 + 3 files * 16)
	# 00000048: 6176 0061 av.a ; 0x40+0x0a: B filename
	# 0000004c: 6e6f 7468 noth
	# 00000050: 6572 6669 erfi
	# 00000054: 6c65 5f42 le_B
	# 00000058: 2e6a 7067 .jpg
	# 0000005c: 0079 6574 .yet ; 0x40+0x1b: C filename
	# 00000060: 616e 6f74 anot
	# 00000064: 6865 7266 herf
	# 00000068: 696c 655f ile_
	# 0000006c: 432e 7478 C.tx
	# 00000070: 7400 4343 t.CC ; 0x6e start of fila data A file
	# 00000074: 4343 4444 CCDD ; 0x76 start of fila data B file
	# 00000078: 4444 4444 DDDD
	# 0000007c: 4444 4545 DDEE ; 0x7e start of fila data C file
	# 00000080: 4545 4545 EEEE
	# 00000084: 4545 4545 EEEE
	# 00000088: 4545 EE
	#
	# Inspired by discontinued labcopy C++ implementation:
	# https://github.com/klusark/residual-tools/blob/master/tools/labcopy.cpp
	#
	# (C) 2021 Gemba
	#
	# This program is free software: you can redistribute it and/or modify it
	# under the terms of the GNU General Public License as published by the
	# Free Software Foundation, either version 3 of the License, or (at your
	# option) any later version.
	# This program is distributed in the hope that it will be useful, but
	# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
	# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	# for more details.
	# You should have received a copy of the GNU General Public License along
	# with this program. If not, see <https://www.gnu.org/licenses/>.

	import argparse
	import shutil
	import struct
	import sys

	from collections import namedtuple
	from hashlib import md5

	OUTFILE_SUFFIX = "_rewritten"
	OFFENDING_NESTED_FILES = ["cp_0_intha.bm"] # Grim
	EMI_FN_TABLE_OFFSET = 0x13d0f


	def copy_lab(in_data, list_content=False):
	"""Read LAB file and read nested file parameters.

	Return byte data for out file."""

	hdr = in_data.read(16)
	if hdr[:4].decode("ascii") != "LABN":
	print("[!] Not a LAB file")
	sys.exit(1)

	num_entries = int.from_bytes(hdr[8:12], byteorder='little', signed=False)

	nested_filenames_size = int.from_bytes(
	hdr[12:16], byteorder='little', signed=False)

	# read next four bytes to detect EMI or Grim LAB file
	offset_nested_fn_bytes = in_data.read(4)
	nested_fn_table_offset = int.from_bytes(
	offset_nested_fn_bytes, byteorder='little', signed=False)

	is_emi = nested_fn_table_offset > 0

	nested_fn_array, nested_fn_bytes = get_nested_filenames(
	in_data, num_entries, nested_fn_table_offset, nested_filenames_size,
	is_emi)

	if num_entries != len(nested_fn_array):
	print("[!] Filename table size does not match count of nested files.")
	sys.exit(1)

	# write header
	out_data = bytearray(hdr)

	lab_entries = decode_and_copy_labentry_info(
	in_data, out_data, nested_fn_array, offset_nested_fn_bytes, is_emi)

	# write nested filenames as-is
	if is_emi:
	# copy blob back with offset
	copy_nestedfile(in_data, out_data, nested_fn_table_offset -
	EMI_FN_TABLE_OFFSET, nested_filenames_size)
	else:
	out_data.extend(nested_fn_bytes)

	if list_content:
	print(f"[*] Size [bytes] File")
	print(f" ------------ ------------")
	# write nested filedata
	size_tot = 0
	for lab_entry in lab_entries:
	if list_content:
	print(f" {lab_entry['size']:>12} {lab_entry['nested_fn']}")
	size_tot += lab_entry['size']
	if lab_entry['nested_fn'] in OFFENDING_NESTED_FILES:
	print(f"[*] Skipping offending file '{lab_entry['nested_fn']}'.")
	continue
	copy_nestedfile(in_data, out_data,
	lab_entry['start'], lab_entry['size'])
	if list_content:
	s = "" if len(lab_entries) == 1 else "s"
	print(f" ------------ ------------")
	print(f" {size_tot:>12} {len(lab_entries)} file{s}")
	return out_data


	def get_nested_filenames(in_data, num_entries, nested_fn_table_offset,
	nested_filenames_size, is_emi):
	"""Get an array of filenames from the archive's filename 'table'.

	The 'table' contains the filenames each null terminated as bytes.
	The return array consists of ASCII formatted strings."""
	# seek fwd to retrieve nested filenames
	if is_emi:
	# Escape from Monkey Island
	in_data.seek(nested_fn_table_offset - EMI_FN_TABLE_OFFSET)
	else:
	# Grim Fandango
	# +1 for header
	in_data.seek((num_entries + 1) * 16, 0)

	nested_filenames_bytes = in_data.read(nested_filenames_size)
	if is_emi:
	nested_fns_decoded = bytearray()
	for b in nested_filenames_bytes:
	nested_fns_decoded.append(b ^ 0x96 if b else b)
	else:
	nested_fns_decoded = nested_filenames_bytes

	# cut off surplus null byte, assume ASCII filenames
	return (nested_fns_decoded[:- 1].decode("ascii").split('\x00'),
	nested_filenames_bytes)


	def decode_and_copy_labentry_info(in_data, out_data, nested_fn_array,
	offset_nested_fn_bytes, is_emi):
	"""Decodes each lab entry meta info and copies to out."""
	# reset seek to first lab_entry
	in_data.seek(20 if is_emi else 16, 0)

	_lab_entry = namedtuple('lab_entry', 'fname_offset start size reserved')
	lab_entries = []

	if is_emi:
	# write out bytes 16 to 19
	out_data.extend(offset_nested_fn_bytes)

	for nested_fn in nested_fn_array:
	entry = in_data.read(16)
	out_data.extend(entry)
	data = struct.unpack("<IIII", entry)
	lab_entry = _lab_entry._asdict(_lab_entry._make(data))
	lab_entry['nested_fn'] = nested_fn
	# print (lab_entry)
	lab_entries.append(lab_entry)
	return lab_entries


	def copy_nestedfile(in_data, out_data, offset, size):
	"""Copy a nested file of LAB file to target byte array."""
	in_data.seek(offset, 0)
	data = in_data.read(size)
	# don't assume offset is strictly ordered
	add_len = offset + size - len(out_data)
	if add_len > 0:
	out_data.extend(b'\x00' * add_len)
	for idx, b in enumerate(data):
	out_data[offset + idx] = b


	def init_cli_parser():
	"""Init command line argument parser."""
	parser = argparse.ArgumentParser(
	description='Restores a size-forged LAB file to its pristine size.')
	parser.add_argument("in_file", help="input LAB file")
	parser.add_argument("-o", "--outfile", help=f"file to write to, if not "
	f"given in_file plus suffix '{OUTFILE_SUFFIX}' will be"
	" used if overwrite is not set",
	type=str, dest='out_file')
	parser.add_argument("-f", "--overwrite", help="overwrite inputfile",
	action="store_true", default=False)
	parser.add_argument("-l", "--list", help="list nested files identified in"
	" LAB file", action="store_true", default=False)
	parser.add_argument("-s", "--skipidentical", help=f"skip writing of "
	f"out_file if hash is identical to in_file",
	action='store_true', default=False,
	dest='skip_identical')
	parser.add_argument("-b", "--md5bytes", help=f"number of bytes to "
	f"calculate MD5 (default: full file)",
	type=int, dest='md5_bytes', default=0)
	return parser


	if __name__ == "__main__":

	parser = init_cli_parser()
	args = parser.parse_args()

	in_file = args.in_file
	overwrite = args.overwrite
	if args.out_file and overwrite:
	print("[*] Parameter outfile will be ignored, because overwrite"
	" (=same file) is set.")

	out_file = args.out_file if not overwrite else None
	if not out_file:
	out_file = f"{in_file}{OUTFILE_SUFFIX}"

	with open(in_file, 'rb') as in_data:
	print(f"[+] Opened '{in_file}'")
	size = -1 if not args.md5_bytes else args.md5_bytes
	md5_infile = md5(in_data.read(size)).hexdigest()
	in_data.seek(0)
	print(f"[+] Copying ...")
	out_data = copy_lab(in_data, args.list)

	md5_bytes = len(out_data) if not args.md5_bytes else args.md5_bytes
	md5_outfile = md5(out_data[:md5_bytes]).hexdigest()
	mib = md5_bytes / 1024.0 / 1024

	if args.skip_identical and md5_outfile == md5_infile:
	print("[+] Skip identical: No outfile written, MD5 is identical to"
	" infile.")
	else:
	with open(out_file, 'wb') as of:
	of.write(out_data)
	if overwrite:
	shutil.move(out_file, in_file)
	out_file = in_file
	print(f"[+] ... written to '{out_file}'")

	print(f"[*] MD5 {md5_outfile} over {md5_bytes} bytes ({mib:.3f} MiB).")
	print("[*] Done.")


	# run with pytest-3
	def test_copy_lab():
	from os import remove

	md5_expected = "776138f5eb890123c49bb3046d7dd4f5"

	f = _create_dummy_labfile(
	["file_A.wav", "anotherfile_B.jpg", "yetanotherfile_C.txt"])
	with open(f.name, 'rb') as test_data:
	# print (md5(in_data.read()).hexdigest())
	# in_data.seek(0)
	md5_real = md5(copy_lab(test_data))

	remove(f.name)
	assert md5_expected == md5_real.hexdigest()


	def test_copy_lab_remove_nested_file():
	from os import remove

	# breaks if OFFENDING_NESTED_FILES is modified
	md5_expected = "c46fcc87f65df671ac93e9319f779f88"

	f = _create_dummy_labfile(
	["file_A.wav"] + OFFENDING_NESTED_FILES + ["yetanotherfile_C.txt"])

	with open(f.name, 'rb') as test_data:
	# print (md5(in_data.read()).hexdigest())
	# in_data.seek(0)
	md5_real = md5(copy_lab(test_data))

	# remove(f.name)
	assert md5_expected == md5_real.hexdigest()


	def _create_dummy_labfile(filenames):
	from tempfile import NamedTemporaryFile

	f = NamedTemporaryFile(mode='wb', delete=False)
	f.write(b"LABN")
	f.write(b"AAAA") # reserved
	file_count = len(filenames)
	f.write(int.to_bytes(file_count, length=4, byteorder='little',
	signed=False))

	# land behind header and lab entries
	nested_fn_list_start = (file_count + 1) * 16
	f.seek(nested_fn_list_start, 0)

	nested_fn_length = 0
	# write concatenated list of filenames, 0x00 terminated
	for fn in filenames:
	f.write(bytearray(fn, "ascii"))
	f.write(b'\x00')
	nested_fn_length += len(fn) + 1

	# write length of concatenated nested filenames in header
	f.seek(12, 0)
	f.write(int.to_bytes(nested_fn_length, length=4,
	byteorder='little', signed=False))

	sizes_sum = 0

	# start of binary blob of all nested file content
	raw_data_start = nested_fn_list_start + nested_fn_length

	# lab entries from bytes 16 onwards
	fsizes = []
	for le_meta in range(file_count):
	if le_meta > 0:
	# offset to nested filename string
	fn_offset = int.to_bytes(len(''.join(filenames[0:le_meta])),
	length=4, byteorder='little', signed=False)
	f.write(fn_offset)
	else:
	f.write(b'\x00' * 4) # Grim Fandango only
	nested_file_data_start = int.to_bytes(raw_data_start + sizes_sum,
	length=4, byteorder='little',
	signed=False)
	f.write(nested_file_data_start)
	fsize = 4 + 4 * le_meta # sample size
	sizes_sum = sizes_sum + fsize
	fsizes.append(fsize)
	size = int.to_bytes(fsize, length=4,
	byteorder='little', signed=False)
	f.write(size)
	f.write(b"BBBB") # reserved

	buf = bytearray()
	for idx, fsize in enumerate(fsizes):
	buf.extend(bytes(chr(0x43 + idx), 'ascii') * fsize)

	# per file raw data after filename list
	f.seek(raw_data_start, 0)
	f.write(buf)
	f.close()
	return f