Last active
May 22, 2021 16:45
-
-
Save lab313ru/caf056c30ae1a866c4c7ced257f09d4d to your computer and use it in GitHub Desktop.
Casper Sega Saturn PX compression
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import struct | |
TAG = '>2s HHHH' | |
BLOCK_HDR = '>HH' | |
MIN_REPEAT = 3 | |
MAX_BLOCK = 0x81 | |
def get_word(data, off): | |
return struct.unpack_from('>H', data, off)[0] | |
def get_word_rev(data, off): | |
return struct.unpack_from('<H', data, off)[0] | |
def pack_byte(v): | |
return struct.pack('>B', v & 0xFF) | |
def pack_word(v): | |
return struct.pack('>H', v & 0xFFFF) | |
def pack_word_rev(v): | |
return struct.pack('<H', v & 0xFFFF) | |
def pack_dword(v): | |
return struct.pack('>I', v & 0xFFFFFFFF) | |
def unpack_block(data, off, w, h, is_pz): | |
dest = b'' | |
if is_pz: | |
return data[off:off+w*h*2] | |
b = data[off] | |
off2 = off + 1 | |
while True: | |
b &= 0xFF | |
if b == 0xFF or len(dest) >= (0x14A00 * 2): | |
return bytes(dest) | |
if b < 0x80: | |
off1 = off2 | |
for i in range(b + 1): | |
off1 = off2 | |
val = get_word(data, data[off1] * 2) | |
dest += pack_word(val) | |
off2 = off1 + 1 | |
b = data[off1 + 1] | |
off2 = off1 + 2 | |
else: | |
x = b - 0x7D | |
val = get_word(data, data[off2] * 2) | |
if x > 8: | |
dw = (val << 16) | val | |
if len(dest) & 2: | |
dest += pack_word(val) | |
x = b - 0x7E | |
b = x >> 2 | |
x &= 3 | |
for i in range(b): | |
dest += pack_dword(dw) | |
dest += pack_dword(dw) | |
for i in range(x): | |
dest += pack_word(val) | |
off2 += 1 | |
b = data[off2] | |
off2 += 1 | |
def add_dict_word(offs, w): | |
if w in offs: | |
return True, offs[w] | |
if len(offs) == 0x100: | |
raise Exception('Too many unique words!') | |
off = len(offs) * 2 | |
assert off <= 0xFF * 2 | |
offs[w] = off | |
return False, offs[w] | |
def find_repeats(data, off, w, offs_dict): | |
_, first_off = add_dict_word(offs_dict, w) | |
count = 1 | |
while off + (count+1) * 2 < len(data) and (count < MAX_BLOCK): | |
new_word = get_word_rev(data, off + count * 2) | |
if w == new_word: | |
count += 1 | |
else: | |
break | |
passed = [first_off] * count | |
if count < MIN_REPEAT: | |
return 1, first_off, passed | |
else: | |
return count, first_off, passed | |
def pack_block(data): | |
offs_dict = dict() | |
off = 0 | |
block2 = b'' | |
non_repeated_count = 0 | |
non_repeated_offs = [] | |
while off < len(data): | |
w = get_word_rev(data, off) | |
count, dict_off, repeated = find_repeats(data, off, w, offs_dict) | |
if count < MIN_REPEAT: | |
non_repeated_count += 1 | |
non_repeated_offs.append(dict_off) | |
if non_repeated_count == MAX_BLOCK: | |
block2 += pack_byte(non_repeated_count - 1) | |
for i in range(non_repeated_count): | |
block2 += pack_byte(non_repeated_offs[i] >> 1) | |
non_repeated_count = 0 | |
non_repeated_offs = [] | |
else: | |
if non_repeated_count > 0: | |
block2 += pack_byte(non_repeated_count - 1) | |
for i in range(non_repeated_count): | |
block2 += pack_byte(non_repeated_offs[i] >> 1) | |
non_repeated_count = 0 | |
non_repeated_offs = [] | |
block2 += pack_word(((0x7D + count) << 8) | (dict_off >> 1)) | |
off += count * 2 | |
if non_repeated_count > 0: | |
block2 += pack_byte(non_repeated_count - 1) | |
for i in range(non_repeated_count): | |
block2 += pack_byte(non_repeated_offs[i] >> 1) | |
block2 += pack_word(0xFFFF) | |
dest = bytearray(b'\x00\x00' * len(offs_dict)) | |
for k, v in offs_dict.items(): | |
struct.pack_into('>H', dest, v, k) | |
dest = bytes(dest) | |
dest += block2 | |
return dest, len(offs_dict) | |
def pack_blocks(blocks, w, h): | |
dest = b'\x00\x00\x00\x00\x00\x00\x00\x00' \ | |
b'\x00\x00\x00\x00\x7F\xFF\x00\x00' \ | |
b'\x00\x00\x00\x00\x00\x00\x00\x00' \ | |
b'\x00\x00\x00\x00\x00\x01\x3F\x3F\x00\x00' | |
dest += b'PX' | |
dest += struct.pack('>HHHH', w, h, len(blocks), w) | |
for data in blocks: | |
packed, woff = pack_block(data) | |
dest += pack_word(len(packed) + 2 + 2) | |
dest += pack_word(woff) | |
dest += packed | |
return dest | |
def parse(data, offset, is_pz): | |
tag, w, h, count, w2 = struct.unpack_from(TAG, data, offset) | |
print('width = %d' % w) | |
print('height = %d' % h) | |
print('count = %d' % count) | |
off = offset + struct.calcsize(TAG) | |
datas = [] | |
for i in range(count): | |
size, data_offset = struct.unpack_from(BLOCK_HDR, data, off) | |
print('%03d: size = %d, data_offset = %d' % (i + 1, size, data_offset * 2)) | |
if not is_pz: | |
block = data[off+4:off+size] | |
else: | |
block = data[off + 4:] | |
b_data = unpack_block(block, data_offset * 2, w, h, is_pz) | |
datas.append(b_data) | |
off += size & 0xFFFE | |
return datas | |
def swap_words(data): | |
dest = b'' | |
i = 0 | |
while i < len(data) // 2: | |
w = get_word(data, i * 2) | |
dest += pack_word_rev(w) | |
i += 1 | |
return dest | |
def main(path, width, height): | |
f = open(path, 'rb') | |
data = f.read() | |
f.close() | |
full = pack_blocks([data], width, height) | |
w = open('%s.enc' % path, 'wb') | |
w.write(full) | |
w.close() | |
if __name__ == '__main__': | |
if len(sys.argv) == 4: | |
main(sys.argv[1], int(sys.argv[2]), int(sys.argv[3])) | |
else: | |
print('usage: unpacked_file.bin width height') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import struct | |
TAG = '>2s HHHH' | |
BLOCK_HDR = '>HH' | |
MIN_REPEAT = 3 | |
MAX_BLOCK = 0x81 | |
def get_word(data, off): | |
return struct.unpack_from('>H', data, off)[0] | |
def get_word_rev(data, off): | |
return struct.unpack_from('<H', data, off)[0] | |
def pack_byte(v): | |
return struct.pack('>B', v & 0xFF) | |
def pack_word(v): | |
return struct.pack('>H', v & 0xFFFF) | |
def pack_word_rev(v): | |
return struct.pack('<H', v & 0xFFFF) | |
def pack_dword(v): | |
return struct.pack('>I', v & 0xFFFFFFFF) | |
def unpack_block(data, off, w, h, is_pz): | |
dest = b'' | |
if is_pz: | |
return data[off:off+w*h*2] | |
b = data[off] | |
off2 = off + 1 | |
while True: | |
b &= 0xFF | |
if b == 0xFF or len(dest) >= (0x14A00 * 2): | |
return bytes(dest) | |
if b < 0x80: | |
off1 = off2 | |
for i in range(b + 1): | |
off1 = off2 | |
val = get_word(data, data[off1] * 2) | |
dest += pack_word(val) | |
off2 = off1 + 1 | |
b = data[off1 + 1] | |
off2 = off1 + 2 | |
else: | |
x = b - 0x7D | |
val = get_word(data, data[off2] * 2) | |
if x > 8: | |
dw = (val << 16) | val | |
if len(dest) & 2: | |
dest += pack_word(val) | |
x = b - 0x7E | |
b = x >> 2 | |
x &= 3 | |
for i in range(b): | |
dest += pack_dword(dw) | |
dest += pack_dword(dw) | |
for i in range(x): | |
dest += pack_word(val) | |
off2 += 1 | |
b = data[off2] | |
off2 += 1 | |
def add_dict_word(offs, w): | |
if w in offs: | |
return True, offs[w] | |
if len(offs) == 0x100: | |
raise Exception('Too many unique words!') | |
off = len(offs) * 2 | |
assert off <= 0xFF * 2 | |
offs[w] = off | |
return False, offs[w] | |
def find_repeats(data, off, w, offs_dict): | |
_, first_off = add_dict_word(offs_dict, w) | |
count = 1 | |
while off + (count+1) * 2 < len(data) and (count < MAX_BLOCK): | |
new_word = get_word_rev(data, off + count * 2) | |
if w == new_word: | |
count += 1 | |
else: | |
break | |
last_count = count | |
passed = [first_off] * count | |
while off + (count+1) * 2 < len(data) and (last_count < MIN_REPEAT) and (count < MAX_BLOCK): | |
new_word = get_word_rev(data, off + count * 2) | |
repeat, dict_off = add_dict_word(offs_dict, new_word) | |
if repeat: | |
passed.append(dict_off) | |
count += 1 | |
if not repeat: | |
break | |
if count < MIN_REPEAT: | |
return count, first_off, passed | |
if count > last_count: | |
better = 0 | |
while better+1 < len(passed) and passed[1] == passed[1+better]: | |
better += 1 | |
if better >= 3: | |
return 1, first_off, passed | |
return MIN_REPEAT-1, first_off, passed | |
return last_count, first_off, passed | |
def pack_block(data): | |
offs_dict = dict() | |
off = 0 | |
block2 = b'' | |
while off < len(data): | |
w = get_word_rev(data, off) | |
count, dict_off, repeated = find_repeats(data, off, w, offs_dict) | |
if count < MIN_REPEAT: | |
block2 += pack_byte(count - 1) | |
for i in range(count): | |
block2 += pack_byte(repeated[i] >> 1) | |
else: | |
block2 += pack_word(((0x7D + count) << 8) | (dict_off >> 1)) | |
off += count * 2 | |
block2 += pack_word(0xFFFF) | |
dest = bytearray(b'\x00\x00' * len(offs_dict)) | |
for k, v in offs_dict.items(): | |
struct.pack_into('>H', dest, v, k) | |
dest = bytes(dest) | |
dest += block2 | |
return dest, len(offs_dict) | |
def pack_blocks(blocks, w, h): | |
dest = b'\x00\x00\x00\x00\x00\x00\x00\x00' \ | |
b'\x00\x00\x00\x00\x7F\xFF\x00\x00' \ | |
b'\x00\x00\x00\x00\x00\x00\x00\x00' \ | |
b'\x00\x00\x00\x00\x00\x01\x3F\x3F\x00\x00' | |
dest += b'PX' | |
dest += struct.pack('>HHHH', w, h, len(blocks), w) | |
for data in blocks: | |
dest += pack_word(len(data)) | |
packed, woff = pack_block(data) | |
dest += pack_word(woff) | |
dest += packed | |
return dest | |
def parse(data, offset, is_pz): | |
tag, w, h, count, w2 = struct.unpack_from(TAG, data, offset) | |
print('width = %d' % w) | |
print('height = %d' % h) | |
print('count = %d' % count) | |
off = offset + struct.calcsize(TAG) | |
datas = [] | |
for i in range(count): | |
size, data_offset = struct.unpack_from(BLOCK_HDR, data, off) | |
print('%03d: size = %d, data_offset = %d' % (i + 1, size, data_offset * 2)) | |
if not is_pz: | |
block = data[off+4:off+size] | |
else: | |
block = data[off + 4:] | |
b_data = unpack_block(block, data_offset * 2, w, h, is_pz) | |
datas.append(b_data) | |
off += size & 0xFFFE | |
return datas | |
def swap_words(data): | |
dest = b'' | |
i = 0 | |
while i < len(data) // 2: | |
w = get_word(data, i * 2) | |
dest += pack_word_rev(w) | |
i += 1 | |
return dest | |
def main(path): | |
f = open(path, 'rb') | |
data = f.read() | |
f.close() | |
p = 0 | |
while True: | |
p = data.find(b'\x00\x00PX', p) | |
if p == -1: | |
break | |
p += 2 | |
blocks = parse(data, p, False) | |
for i, block in enumerate(blocks): | |
w = open('%s_PX_%04X_%d.dec' % (path, p, i + 1), 'wb') | |
w.write(swap_words(block)) | |
w.close() | |
p = 0 | |
while True: | |
p = data.find(b'\x00\x00PZ', p) | |
if p == -1: | |
break | |
p += 2 | |
blocks = parse(data, p, True) | |
for i, block in enumerate(blocks): | |
w = open('%s_PZ_%04X_%d.dec' % (path, p, i + 1), 'wb') | |
w.write(swap_words(block)) | |
w.close() | |
if __name__ == '__main__': | |
if len(sys.argv) == 2: | |
main(sys.argv[1]) | |
else: | |
print('usage: file_with_packed_data.bin') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment