camconn/bigdecoder.py

## bigdecoder.py
#!/usr/bin/env python3
""" bigdecoder.py, by aderyn@gmail.com (2009-03-01) and cam {at-goes-here} camconn.cc (2018-05-03)

This is a Decoder for .BIG-format files utilized by many EA Games.
EA's .big is a trivial archival format. quite frankly, this is
probably the simplest compound file format imaginable.

File Structure
==============
The file consists of a global header, an index of all the embedded
files, and the actual file data.

    global header {
        header, charstring, 4 bytes - BIG4, BIGF, or something similiar
        total file size, unsigned integer, 4 bytes, little endian byte order
        number of embedded files, unsigned integer, 4 bytes, big endian byte order
        total size of index table in bytes, unsigned integer, 4 bytes, big endian byte order
    }, only occurs once

An index of files follows directly after the global header. Each embedded file has its own
entry index.

    index entry {
        position of embedded file within BIG-file, unsigned integer, 4 bytes, big endian byte order
        size of embedded data, unsigned integer, 4 bytes, big endian byte order
        file name, cstring ending with null byte
    }

After that, we have the actual data.

    file data: {
        raw data - at the positions specified in the index
    }

Usage
=====
Sorry, but I haven't document this yet.

"""

import struct
import sys
import os
import os.path

# Define an empty class to emulate a c struct
# that can hold the data for each entry in the
# file index.
class entry:
    pass

if len(sys.argv) != 3:
    print("usage: python bigdecoder.py [file] [target]")
    exit()

print("BIG-file decoder by aderyn@gmail.com and Cameron Conn")

filePath = sys.argv[1]
targetDir = os.path.abspath(sys.argv[2])
print("filepath: {}; targetDir: {}".format(filePath, targetDir))

if not os.path.exists(filePath):
    print("Requested file doesn't exist.")
    exit()

print("Processing {}".format(filePath))

# open the file in binary read mode.
# without the b-flag the tell-method
# returns the wrong value.
big_file = open(filePath, "rb")

# read global header:

# Check magic bytes
header = big_file.read(4).decode("utf-8")
if header != "BIG4" and header != "BIGF":
    print("Invalid file format.")
    exit()

# this seems to be the only value encoded in
# little-endian order.
size = struct.unpack("I", big_file.read(4))[0]
print("size: {}".format(size))

entryCount,indexSize = struct.unpack(">II", big_file.read(8))
print("entry count: {}".format(entryCount))
print("index size: {}".format(indexSize))

print()

# read the index table:

# assume that the file contains the amount of
# entries specified by the global header
entries = []
for j in range(0, entryCount):

    (entryPos,entrySize) = struct.unpack(">II", big_file.read(8))

    # the filename is stored as a cstring and
    # ends with a null byte. read until we reach
    # this byte.
    fileName = ""
    while True:
        n = big_file.read(1).decode("utf-8")
        if ord(n) == 0:
            break

        fileName += n

    e = entry()
    e.name = fileName
    e.position = entryPos
    e.size = entrySize

    entries.append(e)

# iterate through the index entries and
# copy the data into separate files.
for i, e in enumerate(entries):
    print("opening {} (size: {}, position: {})".format(e.name,e.size,e.position))
    print("file {} of {}".format(i+1, entryCount))

    # calculate the path where the file will be created
    # in order to ensure that the directories needed actually
    # exists
    fileTargetDir = targetDir
    file_path, fileName = os.path.split(e.name)

    targetPath = os.path.join(fileTargetDir, file_path, fileName)
    print("TargetPath: {}".format(targetPath))
    fileTargetDir = os.path.dirname(targetPath)

    # create the directories if they don't exist.
    if not os.path.exists(fileTargetDir):
        os.makedirs(fileTargetDir)

    # skip files that already exist.
    if os.path.exists(targetPath):
        print("{} exists. Skipping.".format(targetPath))
        continue

    print("Opening {} for writing".format(targetPath))
    targetFile = open(targetPath, "wb")

    print("Seeked to {}".format(e.position))
    big_file.seek(e.position)

    print("Starting data transfer")
    for i in range(0, e.size):
        byte = big_file.read(1)
        targetFile.write(byte)

    print("Wrote {} bytes".format(e.size))

    print("Done, closing file.")
    targetFile.close()

    print()
	#!/usr/bin/env python3
	""" bigdecoder.py, by aderyn@gmail.com (2009-03-01) and cam {at-goes-here} camconn.cc (2018-05-03)

	This is a Decoder for .BIG-format files utilized by many EA Games.
	EA's .big is a trivial archival format. quite frankly, this is
	probably the simplest compound file format imaginable.

	File Structure
	==============
	The file consists of a global header, an index of all the embedded
	files, and the actual file data.

	global header {
	header, charstring, 4 bytes - BIG4, BIGF, or something similiar
	total file size, unsigned integer, 4 bytes, little endian byte order
	number of embedded files, unsigned integer, 4 bytes, big endian byte order
	total size of index table in bytes, unsigned integer, 4 bytes, big endian byte order
	}, only occurs once

	An index of files follows directly after the global header. Each embedded file has its own
	entry index.

	index entry {
	position of embedded file within BIG-file, unsigned integer, 4 bytes, big endian byte order
	size of embedded data, unsigned integer, 4 bytes, big endian byte order
	file name, cstring ending with null byte
	}

	After that, we have the actual data.

	file data: {
	raw data - at the positions specified in the index
	}

	Usage
	=====
	Sorry, but I haven't document this yet.

	"""

	import struct
	import sys
	import os
	import os.path

	# Define an empty class to emulate a c struct
	# that can hold the data for each entry in the
	# file index.
	class entry:
	pass

	if len(sys.argv) != 3:
	print("usage: python bigdecoder.py [file] [target]")
	exit()

	print("BIG-file decoder by aderyn@gmail.com and Cameron Conn")

	filePath = sys.argv[1]
	targetDir = os.path.abspath(sys.argv[2])
	print("filepath: {}; targetDir: {}".format(filePath, targetDir))

	if not os.path.exists(filePath):
	print("Requested file doesn't exist.")
	exit()

	print("Processing {}".format(filePath))

	# open the file in binary read mode.
	# without the b-flag the tell-method
	# returns the wrong value.
	big_file = open(filePath, "rb")

	# read global header:

	# Check magic bytes
	header = big_file.read(4).decode("utf-8")
	if header != "BIG4" and header != "BIGF":
	print("Invalid file format.")
	exit()

	# this seems to be the only value encoded in
	# little-endian order.
	size = struct.unpack("I", big_file.read(4))[0]
	print("size: {}".format(size))

	entryCount,indexSize = struct.unpack(">II", big_file.read(8))
	print("entry count: {}".format(entryCount))
	print("index size: {}".format(indexSize))

	print()

	# read the index table:

	# assume that the file contains the amount of
	# entries specified by the global header
	entries = []
	for j in range(0, entryCount):

	(entryPos,entrySize) = struct.unpack(">II", big_file.read(8))

	# the filename is stored as a cstring and
	# ends with a null byte. read until we reach
	# this byte.
	fileName = ""
	while True:
	n = big_file.read(1).decode("utf-8")
	if ord(n) == 0:
	break

	fileName += n

	e = entry()
	e.name = fileName
	e.position = entryPos
	e.size = entrySize

	entries.append(e)

	# iterate through the index entries and
	# copy the data into separate files.
	for i, e in enumerate(entries):
	print("opening {} (size: {}, position: {})".format(e.name,e.size,e.position))
	print("file {} of {}".format(i+1, entryCount))

	# calculate the path where the file will be created
	# in order to ensure that the directories needed actually
	# exists
	fileTargetDir = targetDir
	file_path, fileName = os.path.split(e.name)

	targetPath = os.path.join(fileTargetDir, file_path, fileName)
	print("TargetPath: {}".format(targetPath))
	fileTargetDir = os.path.dirname(targetPath)

	# create the directories if they don't exist.
	if not os.path.exists(fileTargetDir):
	os.makedirs(fileTargetDir)

	# skip files that already exist.
	if os.path.exists(targetPath):
	print("{} exists. Skipping.".format(targetPath))
	continue

	print("Opening {} for writing".format(targetPath))
	targetFile = open(targetPath, "wb")

	print("Seeked to {}".format(e.position))
	big_file.seek(e.position)

	print("Starting data transfer")
	for i in range(0, e.size):
	byte = big_file.read(1)
	targetFile.write(byte)

	print("Wrote {} bytes".format(e.size))

	print("Done, closing file.")
	targetFile.close()

	print()