cabalex/extract_ptd.py

## extract_ptd.py
"""
--- NOTE ---
This python script is now irrelevant, as PTD extraction and repacking is built into my Astral Extractor with v0.10.0+.
This script only unpacks the values; Astral Extractor unpacks the keys and additional values as well. It also allows for complete
repacking, should you be interested in modding the game. Check it out!

https://cabalex.github.io/astral-extractor/

"""

#Forked from NWPlayer123/extract_ptd.py and Xzonn/extract_ptd.py
#thanks Simon for being a smartie
#unfinished, need to parse all the tables
#string_offset starts at +0x20 (header)

# hopefully patched to parse every string (maybe for future replacement/modding?)
from struct import unpack
import os

"""
https://zenhax.com/viewtopic.php?t=12437
File has three sections:
- table
- unknown data (encrypted/compressed filenames?)
- file data with some info before block.
And they are twice, one after another.
"""


filetoopen = r"GameWord_USen.bin"
with open(filetoopen, "rb") as f:
    outname = filetoopen[:-4]
    success = 0
    o = open(f"{outname}.txt", "w", -1, "utf-8")
    base = f.tell() # base is just 0?
    header1 = unpack("<4s6I", f.read(0x1C)) # 4 bytes (PTD ), then 6 unsigned int (4 bytes each, 32 bit int)
    # header1[0] to header1[3] - bytes
    # header1[4] to header1[5] - dummy
    # header1[5] - amount of text
    # header1[6] - table_offset
    # header1[7] - unknown
    # header1[8] - data_offset
    print(f"{int(header1[5])} strings found")
    for x in range(header1[5]):
        f.seek(header1[6] + (0x14 * x)) # move to table
        print("--- New entry at %08X" % f.tell())
        base = f.tell()
        header2 = unpack("<6I", f.read(0x18)) # read table
        # header2[0] - unknown
        # header2[1] - unknown
        # header2[2] - unknown
        # header2[3] - unknown
        # header2[4] - unknown
        # header2[5] - unknown
        f.seek(base + header2[4])
        print("%08X" % f.tell())
        base = f.tell()
        header3 = unpack("<6I", f.read(0x18))
        entries3 = [unpack("<4I", f.read(0x10)) for i in range(header3[4])]
        print("%08X" % f.tell())

        blobsize = (header3[5] + 0xC) - (0x18 + (0x10 * header3[4]))
        byte_array = []
        for i in range(blobsize):
            byte_array.append((ord(f.read(1)) - 0x26) % 256)
        raw_string = bytes(byte_array).decode("utf-16-le")
        o.write(raw_string.replace("\r\n", "\\n").replace("\0", "\n"))
        print("Written successfully :D")
        success += 1
        o.write("--- TEXT END ---\n")
        f.seek(base + header3[5] + 0xC)
        # print("%08X" % f.tell())
        base = f.tell()
    o.close()
    print(f"{success}/{int(header1[5])} strings dumped")
    #entries = [unpack("<4I", f.read(0x10)) for i in range(header[0])]
    '''f.seek(0x91DE)
    with open("fuck.txt", "wb") as o:
        for i in range(0x56CE):
            o.write(chr(ord(f.read(1)) - 0x26))
        #o.write("".join(output))'''
    '''f.seek(0x00041BD0 + 0x20)
    stuff = [chr(ord(f.read(1)) - 0x26) for i in range(0x20)]
    print("".join(stuff).decode("UTF-16-LE"))'''
    '''for i in range(header[0] - 1):
        print("%08X" % (entries[i+1][0] - entries[i][0]))
        print("%08X %08X %08X %08X" % entries[i])'''
	"""
	--- NOTE ---
	This python script is now irrelevant, as PTD extraction and repacking is built into my Astral Extractor with v0.10.0+.
	This script only unpacks the values; Astral Extractor unpacks the keys and additional values as well. It also allows for complete
	repacking, should you be interested in modding the game. Check it out!

	https://cabalex.github.io/astral-extractor/

	"""

	#Forked from NWPlayer123/extract_ptd.py and Xzonn/extract_ptd.py
	#thanks Simon for being a smartie
	#unfinished, need to parse all the tables
	#string_offset starts at +0x20 (header)

	# hopefully patched to parse every string (maybe for future replacement/modding?)
	from struct import unpack
	import os

	"""
	https://zenhax.com/viewtopic.php?t=12437
	File has three sections:
	- table
	- unknown data (encrypted/compressed filenames?)
	- file data with some info before block.
	And they are twice, one after another.
	"""


	filetoopen = r"GameWord_USen.bin"
	with open(filetoopen, "rb") as f:
	outname = filetoopen[:-4]
	success = 0
	o = open(f"{outname}.txt", "w", -1, "utf-8")
	base = f.tell() # base is just 0?
	header1 = unpack("<4s6I", f.read(0x1C)) # 4 bytes (PTD ), then 6 unsigned int (4 bytes each, 32 bit int)
	# header1[0] to header1[3] - bytes
	# header1[4] to header1[5] - dummy
	# header1[5] - amount of text
	# header1[6] - table_offset
	# header1[7] - unknown
	# header1[8] - data_offset
	print(f"{int(header1[5])} strings found")
	for x in range(header1[5]):
	f.seek(header1[6] + (0x14 * x)) # move to table
	print("--- New entry at %08X" % f.tell())
	base = f.tell()
	header2 = unpack("<6I", f.read(0x18)) # read table
	# header2[0] - unknown
	# header2[1] - unknown
	# header2[2] - unknown
	# header2[3] - unknown
	# header2[4] - unknown
	# header2[5] - unknown
	f.seek(base + header2[4])
	print("%08X" % f.tell())
	base = f.tell()
	header3 = unpack("<6I", f.read(0x18))
	entries3 = [unpack("<4I", f.read(0x10)) for i in range(header3[4])]
	print("%08X" % f.tell())

	blobsize = (header3[5] + 0xC) - (0x18 + (0x10 * header3[4]))
	byte_array = []
	for i in range(blobsize):
	byte_array.append((ord(f.read(1)) - 0x26) % 256)
	raw_string = bytes(byte_array).decode("utf-16-le")
	o.write(raw_string.replace("\r\n", "\\n").replace("\0", "\n"))
	print("Written successfully :D")
	success += 1
	o.write("--- TEXT END ---\n")
	f.seek(base + header3[5] + 0xC)
	# print("%08X" % f.tell())
	base = f.tell()
	o.close()
	print(f"{success}/{int(header1[5])} strings dumped")
	#entries = [unpack("<4I", f.read(0x10)) for i in range(header[0])]
	'''f.seek(0x91DE)
	with open("fuck.txt", "wb") as o:
	for i in range(0x56CE):
	o.write(chr(ord(f.read(1)) - 0x26))
	#o.write("".join(output))'''
	'''f.seek(0x00041BD0 + 0x20)
	stuff = [chr(ord(f.read(1)) - 0x26) for i in range(0x20)]
	print("".join(stuff).decode("UTF-16-LE"))'''
	'''for i in range(header[0] - 1):
	print("%08X" % (entries[i+1][0] - entries[i][0]))
	print("%08X %08X %08X %08X" % entries[i])'''