PookyFan/extract_graph.py Secret

## extract_graph.py
#Helping function for retrieving numbers from GTF headers (in big endian)
def getIntegerBE(data, index):
    return (ord(data[index]) << 24) + (ord(data[index + 1]) << 16) + (ord(data[index + 2]) << 8) + ord(data[index + 3])

#Find block of data that contains file names
file = open("Graph.dat", "rb")
buf = file.read(1024)
while "Graph.dat" not in buf:
    buf = buf[512:]
    buf += file.read(512)

chunk = file.read(1024)
buf += chunk
while ".gtf" in chunk:
    chunk = file.read(1024)
    buf += chunk

#Get texture file names
buf = buf[buf.find("Graph.dat"):]
textures_names = []
index = 0
start = 0
while buf[index] != '\0' or buf[index + 1] != '\0':
    if buf[index] == '\0':
        name = buf[start:index]
        textures_names.append(name)
        start = index + 1
    index += 1

#One last filename
name = buf[start:index]
textures_names.append(name)

#Don't need Graph.dat filename
textures_names = textures_names[1:]

#Look for the beginning of first texture file
while buf[index] == '\0': index += 1
buf = buf[index:]
index = 0

#Extract files one by one
for filename in textures_names:
    if ".PNG" in filename:
        print "There's a plain PNG file - {0}".format(filename)
        buf += file.read(16) #Buffer some more data
        index += 8 #Ignore header
        chunk_size = getIntegerBE(buf, index)
        chunk_type = buf[index + 4:index + 8]
        while chunk_type != "IEND":
            index += 8 + chunk_size + 4
            if index >= len(buf):
                #Read only just the beginning of next chunk
                buf += file.read(index - len(buf) + 8)

            chunk_size = getIntegerBE(buf, index)
            chunk_type = buf[index + 4:index + 8]

        #Read rest of the last chunk (just 4 bytes of CRC) and save the file
        buf += file.read(4)
        with open(filename, "wb") as out:
            out.write(buf)
        buf = ""
        index = 0

    else:
        #Main part of GTF header is 16 bytes long, plus there are additional 4 important bytes, so we need at least 20 bytes buffered for now
        if len(buf) < 20:
            buf += file.read(20 - len(buf))

        #Check for GTF header (just to be extra sure)
        if ord(buf[index]) != 2 or ord(buf[index + 1]) != 2 or ord(buf[index + 2]) != 0 or ord(buf[index + 3]) != 255:
            raise Exception("No GTF header found for file {0}!".format(filename))
        index += 4

        #Get length of texture data from header
        datasize = getIntegerBE(buf, index)
        index += 4

        #Get number of textures (just for the sake of it)
        texnum = getIntegerBE(buf, index)
        index += 8

        #Get header size
        headsize = getIntegerBE(buf, index)

        #Count size of entire file
        filesize = headsize + datasize

        print "{0} has {1} textures, file size = {2}".format(filename, texnum, filesize)

        if len(buf) < filesize:
            buf += file.read(filesize - len(buf))

        with open(filename, "wb") as out:
            out.write(buf)

        buf = ""
        index = 0

print "Done extracting!"

## extract_sound.py
import sys

if(len(sys.argv) < 2):
    raise Exception("Need a filename")

data_file_name = sys.argv[1]
if data_file_name != "Sound.dat" and data_file_name != "Voice.dat":
    raise Exception("Data file not supported")

#Find block of data that contains file names
file = open(data_file_name, "rb")
buf = file.read(1024)
while data_file_name not in buf:
    buf = buf[512:]
    buf += file.read(512)

chunk = file.read(1024)
buf += chunk
while ".msf" in chunk:
    chunk = file.read(1024)
    buf += chunk

#Get file names
buf = buf[buf.find(data_file_name):]
file_names = []
index = 0
start = 0
while True:# (buf[index] != '\0' or buf[index + 1] != '\0') and buf[index + 2 : index + 6] != "MSFC":
    if buf[index + 2 : index + 6] == "MSFC":
        break

    if buf[index] == '\0' and buf[index + 1] != '\0':
        end = index
        while buf[end - 1] == '\0': end -= 1
        name = buf[start:end]
        file_names.append(name)
        start = index + 1
    index += 1

#One last filename
name = buf[start:index]
file_names.append(name)

#Don't need main resource file name
file_names = file_names[1:]
#
print len(file_names)
#
#Look for the beginning of first sound file
while buf[index] == '\0': index += 1
buf = buf[index:]
index = 0

#Just a sanity check
if not buf.startswith("MSFC"):
    raise Exception("Parse error: start of sound file expected")

#Extract files one by one
for filename in file_names:
    #Sometimes there are "nasty" characters in file names - need to get rid of them before using the filename
    if '\\' in filename:
        nasty_part = filename[:filename.find('\\')]
        val = 0
        shift = 8 * (len(nasty_part) - 1)
        for c in nasty_part:
            val += (ord(c) << shift)
            shift -= 8
        filename = "{0}_{1}".format(str(val), filename[filename.find('\\') + 1:])

    if not ".msf" in filename:
        raise Exception("Unknown file format - {0}".format(filename))

    #Sound files start with MSFC characters, so we can tell one from another just by looking at these characters
    #But there is a problem with the last file, since nothing follows it... it will be handled as special case, then
    next_file_start = buf.find("MSFC", 4)
    while next_file_start < 0:
        chunk = file.read(4096)
        buf += chunk
        if len(chunk) < 4096 or not chunk: #Last file special case
            next_file_start = len(buf)
            break
        next_file_start = buf.find("MSFC", 4)

    print "Saving sound file {0}\n".format(filename)
    with open(filename, "wb") as out:
        out.write(buf[:next_file_start]) #Current file ends where next starts
    buf = buf[next_file_start:]

print "Done extracting!"
	#Helping function for retrieving numbers from GTF headers (in big endian)
	def getIntegerBE(data, index):
	return (ord(data[index]) << 24) + (ord(data[index + 1]) << 16) + (ord(data[index + 2]) << 8) + ord(data[index + 3])

	#Find block of data that contains file names
	file = open("Graph.dat", "rb")
	buf = file.read(1024)
	while "Graph.dat" not in buf:
	buf = buf[512:]
	buf += file.read(512)

	chunk = file.read(1024)
	buf += chunk
	while ".gtf" in chunk:
	chunk = file.read(1024)
	buf += chunk

	#Get texture file names
	buf = buf[buf.find("Graph.dat"):]
	textures_names = []
	index = 0
	start = 0
	while buf[index] != '\0' or buf[index + 1] != '\0':
	if buf[index] == '\0':
	name = buf[start:index]
	textures_names.append(name)
	start = index + 1
	index += 1

	#One last filename
	name = buf[start:index]
	textures_names.append(name)

	#Don't need Graph.dat filename
	textures_names = textures_names[1:]

	#Look for the beginning of first texture file
	while buf[index] == '\0': index += 1
	buf = buf[index:]
	index = 0

	#Extract files one by one
	for filename in textures_names:
	if ".PNG" in filename:
	print "There's a plain PNG file - {0}".format(filename)
	buf += file.read(16) #Buffer some more data
	index += 8 #Ignore header
	chunk_size = getIntegerBE(buf, index)
	chunk_type = buf[index + 4:index + 8]
	while chunk_type != "IEND":
	index += 8 + chunk_size + 4
	if index >= len(buf):
	#Read only just the beginning of next chunk
	buf += file.read(index - len(buf) + 8)

	chunk_size = getIntegerBE(buf, index)
	chunk_type = buf[index + 4:index + 8]

	#Read rest of the last chunk (just 4 bytes of CRC) and save the file
	buf += file.read(4)
	with open(filename, "wb") as out:
	out.write(buf)
	buf = ""
	index = 0

	else:
	#Main part of GTF header is 16 bytes long, plus there are additional 4 important bytes, so we need at least 20 bytes buffered for now
	if len(buf) < 20:
	buf += file.read(20 - len(buf))

	#Check for GTF header (just to be extra sure)
	if ord(buf[index]) != 2 or ord(buf[index + 1]) != 2 or ord(buf[index + 2]) != 0 or ord(buf[index + 3]) != 255:
	raise Exception("No GTF header found for file {0}!".format(filename))
	index += 4

	#Get length of texture data from header
	datasize = getIntegerBE(buf, index)
	index += 4

	#Get number of textures (just for the sake of it)
	texnum = getIntegerBE(buf, index)
	index += 8

	#Get header size
	headsize = getIntegerBE(buf, index)

	#Count size of entire file
	filesize = headsize + datasize

	print "{0} has {1} textures, file size = {2}".format(filename, texnum, filesize)

	if len(buf) < filesize:
	buf += file.read(filesize - len(buf))

	with open(filename, "wb") as out:
	out.write(buf)

	buf = ""
	index = 0

	print "Done extracting!"
	import sys

	if(len(sys.argv) < 2):
	raise Exception("Need a filename")

	data_file_name = sys.argv[1]
	if data_file_name != "Sound.dat" and data_file_name != "Voice.dat":
	raise Exception("Data file not supported")

	#Find block of data that contains file names
	file = open(data_file_name, "rb")
	buf = file.read(1024)
	while data_file_name not in buf:
	buf = buf[512:]
	buf += file.read(512)

	chunk = file.read(1024)
	buf += chunk
	while ".msf" in chunk:
	chunk = file.read(1024)
	buf += chunk

	#Get file names
	buf = buf[buf.find(data_file_name):]
	file_names = []
	index = 0
	start = 0
	while True:# (buf[index] != '\0' or buf[index + 1] != '\0') and buf[index + 2 : index + 6] != "MSFC":
	if buf[index + 2 : index + 6] == "MSFC":
	break

	if buf[index] == '\0' and buf[index + 1] != '\0':
	end = index
	while buf[end - 1] == '\0': end -= 1
	name = buf[start:end]
	file_names.append(name)
	start = index + 1
	index += 1

	#One last filename
	name = buf[start:index]
	file_names.append(name)

	#Don't need main resource file name
	file_names = file_names[1:]
	#
	print len(file_names)
	#
	#Look for the beginning of first sound file
	while buf[index] == '\0': index += 1
	buf = buf[index:]
	index = 0

	#Just a sanity check
	if not buf.startswith("MSFC"):
	raise Exception("Parse error: start of sound file expected")

	#Extract files one by one
	for filename in file_names:
	#Sometimes there are "nasty" characters in file names - need to get rid of them before using the filename
	if '\\' in filename:
	nasty_part = filename[:filename.find('\\')]
	val = 0
	shift = 8 * (len(nasty_part) - 1)
	for c in nasty_part:
	val += (ord(c) << shift)
	shift -= 8
	filename = "{0}_{1}".format(str(val), filename[filename.find('\\') + 1:])

	if not ".msf" in filename:
	raise Exception("Unknown file format - {0}".format(filename))

	#Sound files start with MSFC characters, so we can tell one from another just by looking at these characters
	#But there is a problem with the last file, since nothing follows it... it will be handled as special case, then
	next_file_start = buf.find("MSFC", 4)
	while next_file_start < 0:
	chunk = file.read(4096)
	buf += chunk
	if len(chunk) < 4096 or not chunk: #Last file special case
	next_file_start = len(buf)
	break
	next_file_start = buf.find("MSFC", 4)

	print "Saving sound file {0}\n".format(filename)
	with open(filename, "wb") as out:
	out.write(buf[:next_file_start]) #Current file ends where next starts
	buf = buf[next_file_start:]

	print "Done extracting!"