Skip to content

Instantly share code, notes, and snippets.

@tpwrules
Last active February 22, 2023 06:46
Show Gist options
  • Save tpwrules/4d44708ee4829ef60181 to your computer and use it in GitHub Desktop.
Save tpwrules/4d44708ee4829ef60181 to your computer and use it in GitHub Desktop.
# This Python 3 script extracts cards from TextWare cardfiles
# it dumps every card with its number and name to the directory called "output"
# under the current directory
# first and only argument is the .TWC file to extract
# license and format information are located at the bottom of the file
#
# PLEASE NOTE!
# This is designed to only extract text. I have not found a cardfile with pictures
# so I can't say how it performs. Also, many articles have typos and glitches.
# Check for them with the original viewer before concluding there is a bug.
import os, sys
import traceback
from struct import Struct, unpack
fin = open(sys.argv[1], "rb")
os.chdir("output")
si = Struct("<I")
fin.seek(0x04)
if si.unpack(fin.read(4))[0]&0xffff != 0x0f0c:
print("Not a .twc file")
exit(1)
fin.seek(0x160)
name, = unpack("60s", fin.read(60))
print("Name: "+name.rstrip(b'\x00').decode("ascii"))
fin.seek(0x3B0)
num_cards, = si.unpack(fin.read(4))
print("Number of cards:", num_cards)
# now get the first card pointer
# it's at a fixed location in the file (I think), but part of some double nested table thing
# but fortunately cards are in a linked list so we don't have to parse it
fin.seek(0xC00)
card_ptr, = si.unpack(fin.read(4))
card_pos = 1
card_hdr = Struct("<IIIIIIHH68s")
card_hdr_len = card_hdr.size
card_ind_hdr = Struct("<IHH")
card_ind_hdr_len = card_ind_hdr.size
chunk_hdr = Struct("<IIIH")
chunk_hdr_len = chunk_hdr.size
# set up for making filenames
# generate translation to turn all non alnum into _
name_translate = b''.join(c if (c.isalnum() or c == b" ") else b"_" for c in map(lambda x: bytes([x]), range(256)))
# format to keep card files sorted
card_name_format = "{:0"+str(len(str(num_cards)))+"} - {}.txt"
def decompress_block(bytedata):
# first we have to convert to 12 bit words
def wordgen(bd):
pos = 0
while True:
# generator is stopped by caller
x = bd[pos:pos+3]
pos += 3
yield (x[0]<<4)+(x[1]>>4)
yield ((x[1]&0x0F)<<8)+x[2]
block = []
for word in wordgen(bytedata):
block.append(word)
if word == 0xFFF: break
def go(pos=0, count=None):
out = bytearray()
in_words = 0
while True:
word = block[pos]
if word == 0xFFF: break
if word < 256:
out.append(word)
else:
if count is None:
t = go(word-256, 2)
else:
t = go(word-256, count-in_words)
out.extend(t)
in_words += 1
if in_words == count:
break
pos += 1
return out
try:
return go()
except:
raise Exception("Decompression error.")
last_data_ptr = None
last_data = None
def process(expected_num, card_ptr):
global last_data_ptr, last_data
fin.seek(card_ptr)
num, prev, next_card_ptr, u1, t1, chunk_ptr, t2, length, name = \
card_hdr.unpack(fin.read(card_hdr_len))
if expected_num != num:
raise Exception("Error following linked list. Something is very wrong. 0x{:08X}".format(card_ptr))
print(num)
typle = (t1, t2)
if typle == (1, 7): # indirect compressed
data_ptr, offset, data_len = \
card_ind_hdr.unpack(fin.read(card_ind_hdr_len))
# optimization to avoid decompressing data multiple times
if last_data_ptr == data_ptr:
data = last_data
else:
fin.seek(data_ptr)
data = decompress_block(fin.read(data_len))
last_data_ptr = data_ptr
last_data = data
text = data[offset:(offset+length-1)]
elif typle in ((5, 2), (1, 2)): #direct
chunks = []
data = fin.read(length)
if typle == (5, 2): # compressed
data = decompress_block(data)
chunks.append(data[:-1]) # remove terminating 00
# load any additional chunks if necessary
while chunk_ptr != 0:
fin.seek(chunk_ptr)
cprev, chunk_ptr, t1, length = \
chunk_hdr.unpack(fin.read(chunk_hdr_len))
data = fin.read(length)
if t1 == 4:
data = decompress_block(data)
elif t1 == 0:
pass
else:
raise Exception("Error at card #{}, 0x{:08x}".format(num, card_ptr))
chunks.append(data[:-1])
text = b''.join(chunks)
else:
raise Exception("Card of unknown type #{}, 0x{:08x}, TYPE: {}".format(num, card_ptr, typle))
tname = name.translate(name_translate).decode("ascii").strip("_")
fout = open(card_name_format.format(num, tname), "wb")
fout.write(text)
fout.close()
return next_card_ptr
errors = []
while card_pos <= num_cards:
try:
card_ptr = process(card_pos, card_ptr)
except KeyboardInterrupt:
raise
except Exception as e:
if len(e.args) == 1 and e.args[0].startswith("Error following linked list. Something is very wrong. "):
raise
errors.append((card_pos, card_ptr, traceback.format_exc()))
#get next pointer anyway
#hopefully we aren't completely lost
fin.seek(card_ptr+8)
card_ptr, = si.unpack(fin.read(4))
card_pos += 1
if len(errors) == 0:
print("All cards extracted successfully.")
exit(0)
print("There were errors extracting some cards.")
for error in errors:
print("CARD: {} ADDRESS: {:08X}".format(error[0], error[1]))
print(error[2])
print("If the cards display correctly in the original viewer, tell me this stuff.")
# text is 00 terminated
# also note that text is stored with 0A line ending
# this decoder outputs that verbatim
# the viewer program converts this to dos 0D 0A
#5,2: direct compressed
# L: compressed length
#1,2: direct uncompressed
# L: uncompressed length
#it is possible for direct cards to be in multiple chunks
#pointer to next chunk is in U3
#L only applies to the first chunk
# chunk header
# P, N, U3, L
# 4, 4, 4, 2
# L: length of this chunk (compressed length if chunk is compressed)
# U3: 4 if compressed, 0 if uncompressed
# N: pointer to next chunk
# P: pointer to previous chunk (will not point to card header!)
#chunks are simply concatenated together after decompression (if applicable)
#(being mindful of terminating 00s)
#1,7: indirect compressed
# L: uncompressed length, A1: compressed block pointer A2: uncompressed offset
# A3: compressed block length
# this type allows multiple cards to be stored in one compressed block.
# the card is stored at position A2 in the uncompressed block
# and has uncompressed length L
#BIZARRE TYPES
#340,2
# seems to be a direct compressed type, but with no name. the data begins directly
# after the length word. the program treats its title as the first characters of
# the card. confusingly, attempting to go to this card by number gives an error
# that the card has been deleted
#compression is very simple backreferencing scheme
#stream is encoded as 12 bit words
# $AA $AB $BB
# A and B are two 12 bit words, stored big endian
# word with high nibble 0 is literal, low byte is output directly
# otherwise a backreference! begin processing from the beginning of the stream
# at offset (word-256)
#
# there is no length! instead we have 2 easy rules
# given offset = word-256, process the words at offset and offset+1
# 1. if the word at offset is yet another offset,
# process two words at that offset and offset+1, recursively
# 2. if the word at offset+1 is another offset, process ONE word at offset, recursively
# both these rules apply to any recursions
# this means the backreference length depends entirely on what is being backreferenced
#
# word of FFF seems to indicate end of compressed block
# Copyright (c) 2016 Thomas Watson (@tpw_rules)
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
# of the Software, and to permit persons to whom the Software is furnished to do
# so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment