Last active
February 22, 2023 06:46
-
-
Save tpwrules/4d44708ee4829ef60181 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This Python 3 script extracts cards from TextWare cardfiles | |
# it dumps every card with its number and name to the directory called "output" | |
# under the current directory | |
# first and only argument is the .TWC file to extract | |
# license and format information are located at the bottom of the file | |
# | |
# PLEASE NOTE! | |
# This is designed to only extract text. I have not found a cardfile with pictures | |
# so I can't say how it performs. Also, many articles have typos and glitches. | |
# Check for them with the original viewer before concluding there is a bug. | |
import os, sys | |
import traceback | |
from struct import Struct, unpack | |
fin = open(sys.argv[1], "rb") | |
os.chdir("output") | |
si = Struct("<I") | |
fin.seek(0x04) | |
if si.unpack(fin.read(4))[0]&0xffff != 0x0f0c: | |
print("Not a .twc file") | |
exit(1) | |
fin.seek(0x160) | |
name, = unpack("60s", fin.read(60)) | |
print("Name: "+name.rstrip(b'\x00').decode("ascii")) | |
fin.seek(0x3B0) | |
num_cards, = si.unpack(fin.read(4)) | |
print("Number of cards:", num_cards) | |
# now get the first card pointer | |
# it's at a fixed location in the file (I think), but part of some double nested table thing | |
# but fortunately cards are in a linked list so we don't have to parse it | |
fin.seek(0xC00) | |
card_ptr, = si.unpack(fin.read(4)) | |
card_pos = 1 | |
card_hdr = Struct("<IIIIIIHH68s") | |
card_hdr_len = card_hdr.size | |
card_ind_hdr = Struct("<IHH") | |
card_ind_hdr_len = card_ind_hdr.size | |
chunk_hdr = Struct("<IIIH") | |
chunk_hdr_len = chunk_hdr.size | |
# set up for making filenames | |
# generate translation to turn all non alnum into _ | |
name_translate = b''.join(c if (c.isalnum() or c == b" ") else b"_" for c in map(lambda x: bytes([x]), range(256))) | |
# format to keep card files sorted | |
card_name_format = "{:0"+str(len(str(num_cards)))+"} - {}.txt" | |
def decompress_block(bytedata): | |
# first we have to convert to 12 bit words | |
def wordgen(bd): | |
pos = 0 | |
while True: | |
# generator is stopped by caller | |
x = bd[pos:pos+3] | |
pos += 3 | |
yield (x[0]<<4)+(x[1]>>4) | |
yield ((x[1]&0x0F)<<8)+x[2] | |
block = [] | |
for word in wordgen(bytedata): | |
block.append(word) | |
if word == 0xFFF: break | |
def go(pos=0, count=None): | |
out = bytearray() | |
in_words = 0 | |
while True: | |
word = block[pos] | |
if word == 0xFFF: break | |
if word < 256: | |
out.append(word) | |
else: | |
if count is None: | |
t = go(word-256, 2) | |
else: | |
t = go(word-256, count-in_words) | |
out.extend(t) | |
in_words += 1 | |
if in_words == count: | |
break | |
pos += 1 | |
return out | |
try: | |
return go() | |
except: | |
raise Exception("Decompression error.") | |
last_data_ptr = None | |
last_data = None | |
def process(expected_num, card_ptr): | |
global last_data_ptr, last_data | |
fin.seek(card_ptr) | |
num, prev, next_card_ptr, u1, t1, chunk_ptr, t2, length, name = \ | |
card_hdr.unpack(fin.read(card_hdr_len)) | |
if expected_num != num: | |
raise Exception("Error following linked list. Something is very wrong. 0x{:08X}".format(card_ptr)) | |
print(num) | |
typle = (t1, t2) | |
if typle == (1, 7): # indirect compressed | |
data_ptr, offset, data_len = \ | |
card_ind_hdr.unpack(fin.read(card_ind_hdr_len)) | |
# optimization to avoid decompressing data multiple times | |
if last_data_ptr == data_ptr: | |
data = last_data | |
else: | |
fin.seek(data_ptr) | |
data = decompress_block(fin.read(data_len)) | |
last_data_ptr = data_ptr | |
last_data = data | |
text = data[offset:(offset+length-1)] | |
elif typle in ((5, 2), (1, 2)): #direct | |
chunks = [] | |
data = fin.read(length) | |
if typle == (5, 2): # compressed | |
data = decompress_block(data) | |
chunks.append(data[:-1]) # remove terminating 00 | |
# load any additional chunks if necessary | |
while chunk_ptr != 0: | |
fin.seek(chunk_ptr) | |
cprev, chunk_ptr, t1, length = \ | |
chunk_hdr.unpack(fin.read(chunk_hdr_len)) | |
data = fin.read(length) | |
if t1 == 4: | |
data = decompress_block(data) | |
elif t1 == 0: | |
pass | |
else: | |
raise Exception("Error at card #{}, 0x{:08x}".format(num, card_ptr)) | |
chunks.append(data[:-1]) | |
text = b''.join(chunks) | |
else: | |
raise Exception("Card of unknown type #{}, 0x{:08x}, TYPE: {}".format(num, card_ptr, typle)) | |
tname = name.translate(name_translate).decode("ascii").strip("_") | |
fout = open(card_name_format.format(num, tname), "wb") | |
fout.write(text) | |
fout.close() | |
return next_card_ptr | |
errors = [] | |
while card_pos <= num_cards: | |
try: | |
card_ptr = process(card_pos, card_ptr) | |
except KeyboardInterrupt: | |
raise | |
except Exception as e: | |
if len(e.args) == 1 and e.args[0].startswith("Error following linked list. Something is very wrong. "): | |
raise | |
errors.append((card_pos, card_ptr, traceback.format_exc())) | |
#get next pointer anyway | |
#hopefully we aren't completely lost | |
fin.seek(card_ptr+8) | |
card_ptr, = si.unpack(fin.read(4)) | |
card_pos += 1 | |
if len(errors) == 0: | |
print("All cards extracted successfully.") | |
exit(0) | |
print("There were errors extracting some cards.") | |
for error in errors: | |
print("CARD: {} ADDRESS: {:08X}".format(error[0], error[1])) | |
print(error[2]) | |
print("If the cards display correctly in the original viewer, tell me this stuff.") | |
# text is 00 terminated | |
# also note that text is stored with 0A line ending | |
# this decoder outputs that verbatim | |
# the viewer program converts this to dos 0D 0A | |
#5,2: direct compressed | |
# L: compressed length | |
#1,2: direct uncompressed | |
# L: uncompressed length | |
#it is possible for direct cards to be in multiple chunks | |
#pointer to next chunk is in U3 | |
#L only applies to the first chunk | |
# chunk header | |
# P, N, U3, L | |
# 4, 4, 4, 2 | |
# L: length of this chunk (compressed length if chunk is compressed) | |
# U3: 4 if compressed, 0 if uncompressed | |
# N: pointer to next chunk | |
# P: pointer to previous chunk (will not point to card header!) | |
#chunks are simply concatenated together after decompression (if applicable) | |
#(being mindful of terminating 00s) | |
#1,7: indirect compressed | |
# L: uncompressed length, A1: compressed block pointer A2: uncompressed offset | |
# A3: compressed block length | |
# this type allows multiple cards to be stored in one compressed block. | |
# the card is stored at position A2 in the uncompressed block | |
# and has uncompressed length L | |
#BIZARRE TYPES | |
#340,2 | |
# seems to be a direct compressed type, but with no name. the data begins directly | |
# after the length word. the program treats its title as the first characters of | |
# the card. confusingly, attempting to go to this card by number gives an error | |
# that the card has been deleted | |
#compression is very simple backreferencing scheme | |
#stream is encoded as 12 bit words | |
# $AA $AB $BB | |
# A and B are two 12 bit words, stored big endian | |
# word with high nibble 0 is literal, low byte is output directly | |
# otherwise a backreference! begin processing from the beginning of the stream | |
# at offset (word-256) | |
# | |
# there is no length! instead we have 2 easy rules | |
# given offset = word-256, process the words at offset and offset+1 | |
# 1. if the word at offset is yet another offset, | |
# process two words at that offset and offset+1, recursively | |
# 2. if the word at offset+1 is another offset, process ONE word at offset, recursively | |
# both these rules apply to any recursions | |
# this means the backreference length depends entirely on what is being backreferenced | |
# | |
# word of FFF seems to indicate end of compressed block | |
# Copyright (c) 2016 Thomas Watson (@tpw_rules) | |
# Permission is hereby granted, free of charge, to any person obtaining a copy of | |
# this software and associated documentation files (the "Software"), to deal in | |
# the Software without restriction, including without limitation the rights to | |
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |
# of the Software, and to permit persons to whom the Software is furnished to do | |
# so, subject to the following conditions: | |
# The above copyright notice and this permission notice shall be included in all | |
# copies or substantial portions of the Software. | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
# IN THE SOFTWARE. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment