Skip to content

Instantly share code, notes, and snippets.

@bbbradsmith
Created February 7, 2022 11:53
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bbbradsmith/935c03fc31d81ad29b489a943bc79a5c to your computer and use it in GitHub Desktop.
Save bbbradsmith/935c03fc31d81ad29b489a943bc79a5c to your computer and use it in GitHub Desktop.
Compressor and decompressor for Terranigma (SNES)
# Terranigma data compressor and decompressor
# Brad Smith, 2022-02-07
# https://rainwarrior.ca
#
# Format reference:
# https://www.terranigma.be/index.php/Compression
import sys
def usage():
print("Usage:")
print(" Decompress:")
print(" d compressed.bin uncompressed.bin")
print(" Compress:")
print(" c uncompressed.bin compressed.bin")
print(" Extract from ROM and decompress:")
print(" e terranigma.sfc 380000 title.chr")
print(" Compress and inject into ROM:")
print(" i terranigma.sfc 380000 title.chr")
print(" ROM addresses are in hexadecimal.")
print()
raise Exception("Invalid command line arguments.")
debug = False # True to debug the decompression
def dbg(msg):
if debug:
print(msg)
def decompress(rom, address):
def read_byte():
nonlocal address
nonlocal rom
byte = 0xFF
#print("read_byte %06X" % address)
if address < len(rom):
byte = rom[address]
address += 1
return byte
print("Packet at %06X" % address)
packet_start = address
# header: 00, 16-bit length, 1st byte of output data
h0 = read_byte()
if h0 != 0:
print("Packet header should start with 00. Found: %02X" % h0)
plen = read_byte() + (read_byte() << 8)
print("Length: $%04X" % plen)
d = bytearray()
d.append(read_byte())
dbg("%06X: %02X" % (address-1,d[0]))
# data:
# 8-bit control bitstream, 1 byte appears whenever the bitstream is empty
# 1 = copy next byte
# data: byte
# 00xx = copy xx+2 bytes from offset (8-bit) - $100
# data: offset byte
# 01 = copy bytes from long offset (13-bit) - $2000
# data: 2 bytes = 13-bit offset, 3-bit short-length (+2)
# data: 3 bytes = 13-bit offset, 3-bit = 0, 8-bit long-length (+1)
# long-length = 0 ends data (offset also 0)
control_bits = 0
control_shift = 0
def read_control():
nonlocal control_bits
nonlocal control_shift
if control_bits == 0:
control_bits = 8
control_shift = read_byte()
dbg("%06X: control $%02X" % (address-1,control_shift))
control_bits -= 1
bit = (control_shift >> 7) & 1 # return high bit
control_shift = (control_shift << 1) & 0xFF
return bit
plen += 255 # if no end found, read at most 256 extra bytes
ended = False
while plen > 0:
#print("%06X" % address)
if read_control() == 1: # control 1 = literal data
d.append(read_byte())
dbg("%06X: 1 %02X" % (address-1,rom[address-1]))
else:
if read_control() == 0: # control 00xx = short copy
clen = (read_control() << 1) + read_control() + 2 # 2-bit copy length in control stream
o = (len(d) - 0x100) + read_byte()
if (o < 0):
print("Error at %6X: copy offset (%X) past beginning of output data (%X)." % (address-1,len(d)-o,len(d)))
o = 0
dbg("%06X: 00 $%X (-$%02X)" % (address-1,clen,len(d)-o))
for i in range(clen):
d.append(d[o+i])
else: # control 01 = long copy
astart = address
p = (read_byte() << 8) + read_byte()
o = p >> 3
o = (len(d) - 0x2000) + o
clen = (p & 7) # 3-bit short length
if clen > 0: # short length
clen += 2
else: # long length if short length = 0
clen = read_byte() + 1
if clen == 1:
ended = True
dbg("%06X: 01 end -%04X" % (astart,len(d)-o))
break
if (o < 0):
print("Error at %6X: copy offset (%X) past beginning of output data (%X)." % (address-1,len(d)-o,len(d)))
o = 0
dbg("%06X: 01 $%02X -$%04X" % (astart,clen,len(d)-o))
for i in range(clen):
d.append(d[o+i])
print("End of packet: %06X" % address)
print("Compressed size: $%04X" % (address-packet_start))
if address > len(rom):
print("Packet longer than ROM length? End at: %06" % address)
if not ended:
print("No end-of-packet 01 $00 $00 $00 found.")
return d
def compress(d):
print("Compressing $%04X bytes..." % len(d))
c = bytearray()
if (len(d) < 1):
print("Warning: data too small!")
return c
c.append(0)
if (len(d) >= 65536):
print("Warning: data too large for 16-bit packet size.")
c.append(len(d) & 0xFF)
c.append((len(d)>>8) & 0xFF)
c.append(d[0]) # first byte is uncompressed
pos = 1
control_bits = 0
control_pos = 0
def control_bit(b):
nonlocal control_bits
nonlocal control_pos
nonlocal c
if control_bits < 1:
control_pos = len(c)
c.append(0) # new control byte
control_bits = 8
# fill in current working control byte
control_bits -= 1
c[control_pos] = (c[control_pos] << 1) | b
while pos < len(d):
# search for longest match in last $2000 bytes (overlap, i.e. self-repetition allowed)
ms = max(0,pos-0x2000)
best_len = 0
best_p = 0
for p in range(pos-1,ms-1,-1):
plen = 0
for j in range(0,0x100):
if (pos+j) >= len(d): break
if d[p+j] != d[pos+j]: break
plen = j+1
if plen > best_len:
best_p = p
best_len = plen
# choose best encoding for match
if (best_len >= 2) and (best_len <= 5) and (best_p >= (pos-0x100)): # 8-bit short copy
control_bit(0)
control_bit(0)
lb = best_len - 2
control_bit((lb >> 1) & 1)
control_bit((lb >> 0) & 1)
c.append(best_p-(pos-0x100))
pos += best_len
continue
elif best_len <= 2: # literal
control_bit(1)
c.append(d[pos])
pos += 1
continue
# best_len > 2
op = best_p-(pos-0x2000) # 13-bit long offset
control_bit(0)
control_bit(1)
c.append((op >> 5) & 0xFF)
if best_len <= 9: # short length
c.append(((op << 3) & 0xFF) | (best_len-2))
else: # long length
c.append((op << 3) & 0xFF)
c.append(best_len-1)
pos += best_len
# terminal data entry
control_bit(0)
control_bit(1)
c.append(0)
c.append(0)
c.append(0)
# conclude final control bit
c[control_pos] <<= control_bits
print("Output size: $%04X" % len(c))
if debug:
print("Verify compression:")
assert(decompress(c,0) == d)
return c
if __name__ == "__main__":
if len(sys.argv) < 4: usage()
command = sys.argv[1].lower()
if command == "d":
open(sys.argv[3],"wb").write(decompress(open(sys.argv[2],"rb").read(),0))
exit(0)
if command == "c":
open(sys.argv[3],"wb").write(compress(open(sys.argv[2],"rb").read()))
exit(0)
if len(sys.argv) < 5: usage()
if command == "e":
open(sys.argv[4],"wb").write(decompress(open(sys.argv[2],"rb").read(),int(sys.argv[3],base=16)))
exit(0)
if command == "i":
r = bytearray(open(sys.argv[2],"rb").read())
d = compress(open(sys.argv[4],"rb").read())
a = int(sys.argv[3],base=16)
r[a:a+len(d)] = d
open(sys.argv[2],"wb").write(r)
exit(0)
usage()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment