Skip to content

Instantly share code, notes, and snippets.

@egormkn
Created September 2, 2022 17:09
Show Gist options
  • Save egormkn/c44a7d51e55236fb56ba3fea4f8f4f8f to your computer and use it in GitHub Desktop.
Save egormkn/c44a7d51e55236fb56ba3fea4f8f4f8f to your computer and use it in GitHub Desktop.
Mathematica Decoder
#!/usr/bin/env python3
from argparse import ArgumentParser
huffman_table = [
"1111110101100100000010",
"1111110101100100000011",
"1111110101100100000100",
"1111110101100100000101",
"1111110101100100000110",
"1111110101100100000111",
"1111110101100100001000",
"1111110101100100001001",
"1111110101100100001010",
"00111",
"10101",
"1111110101100100001011",
"1111110101100100001100",
"1111110101100100001101",
"1111110101100100001110",
"1111110101100100001111",
"1111110101100100010000",
"1111110101100100010001",
"1111110101100100010010",
"1111110101100100010011",
"1111110101100100010100",
"1111110101100100010101",
"1111110101100100010110",
"1111110101100100010111",
"1111110101100100011000",
"1111110101100100011001",
"1111110101100100011010",
"1111110101100100011011",
"1111110101100100011100",
"1111110101100100011101",
"1111110101100100011110",
"1111110101100100011111",
"110",
"01100000101",
"0000011",
"10110110111",
"101101101100",
"1111110101101",
"1111110100",
"11111101011000",
"0100100",
"0100101",
"11101100",
"10111110",
"11100",
"1111011",
"1110111",
"1001111",
"101100",
"111100",
"010110",
"1011110",
"1011010",
"0101110",
"0000010",
"0110001",
"0101111",
"0100001",
"1110101",
"10111111",
"101101101101",
"010101",
"01111100",
"111111010100",
"0111111100100",
"010011101",
"111011010",
"0100110",
"101101111",
"011000010",
"100111010",
"011000000",
"1001110110",
"01111101",
"011000001001",
"111111010111",
"101101100",
"111111011",
"101101110",
"0111111101",
"11111100",
"0111111111",
"01111110",
"0100000",
"01001111",
"011111110011",
"01111111000",
"1001110111",
"11111101011001001",
"011000001000",
"1111110101100101",
"00101",
"111111010101",
"00110",
"10011100",
"1110100",
"0110000011",
"10001",
"0101000",
"011001",
"1111010",
"0001",
"1001101",
"1111101",
"000010",
"10100",
"010011100",
"111011011",
"101110",
"010001",
"10010",
"01101",
"011110",
"1011011010",
"01110",
"00100",
"10000",
"000011",
"0101001",
"011000011",
"1111100",
"1001100",
"0111111110",
"1111111",
"0111111100101",
"000000",
"111111010110011",
"111111010110010000000",
]
class HuffmanTree:
def __init__(self, value=None, left=None, right=None):
self.value = value
self.left = left
self.right = right
def get(self, key, offset=0):
if offset > len(key):
raise Exception("offset > len(key)")
elif offset == len(key):
return self.value, offset
elif self.value is not None:
return self.value, offset
elif key[offset] == "0" and self.left is not None:
return self.left.get(key, offset + 1)
elif key[offset] == "1" and self.right is not None:
return self.right.get(key, offset + 1)
else:
return None, offset
def set(self, key, value, offset=0):
if offset > len(key):
raise Exception("offset > len(key)")
elif offset == len(key):
self.value = value
elif self.value is not None:
raise Exception(f"Overlapping keys: {key[:offset]} and {key}")
elif key[offset] == "0":
if self.left is None:
self.left = HuffmanTree()
self.left.set(key, value, offset + 1)
elif key[offset] == "1":
if self.right is None:
self.right = HuffmanTree()
self.right.set(key, value, offset + 1)
else:
raise AssertionError()
@classmethod
def build(cls, table):
root = cls()
for i, bits in enumerate(table):
root.set(bits, chr(i))
return root
def __repr__(self):
result = "{"
if self.left is not None:
result += "0: " + repr(self.left)
if self.left is not None and self.right is not None:
result += ", "
elif self.value is not None:
result += repr(self.value)
if self.right is not None:
result += "1: " + repr(self.right)
result += "}"
return result
tree = HuffmanTree.build(huffman_table)
def split_string(s: str, n: int):
return [s[i:i + n] for i in range(0, len(s), n)]
def huffman_encode(body: str):
result = []
for i in range(0, len(body), 1):
result.append(huffman_table[ord(body[i])])
bits = "".join(result)
# add trailing zeros to match a multiple 13 bits
nzeros = 13 - (len(bits) % 13)
nzeros = 0 if nzeros == 13 else nzeros
for i in range(nzeros):
bits += "0"
return bits
def base95_encode(bits: str):
result = []
for b in split_string(bits, 13):
# convert string of 13 bits to be in reversed order
number = int(b[::-1], 2)
hi = number // 95
lo = number % 95
c1 = chr(hi + 32)
c2 = chr(lo + 32)
result.append(c1)
result.append(c2)
return "".join(result)
def huffman_decode(bits: str):
result = []
value, offset = tree.get(bits)
while value is not None:
result += value
value, offset = tree.get(bits, offset)
return "".join(result)
def base95_decode(code: str):
result = []
for c1, c2 in split_string(code, 2):
hi = ord(c1) - 32
lo = ord(c2) - 32
number = hi * 95 + lo
b = format(number, "013b")[::-1]
result.append(b)
return "".join(result)
def main():
parser = ArgumentParser(description="Encode/decode Mathematica files")
parser.add_argument("action", choices=["encode", "decode"])
parser.add_argument("input_file", help="Input file")
parser.add_argument("output_file", help="Output file")
args = parser.parse_args()
with open(args.input_file, "r") as input_file:
content = input_file.read()
header = "(*!1N!*)mcm"
if args.action == "encode":
bits = huffman_encode(content)
code = base95_encode(bits)
assert huffman_decode(bits) == content
assert base95_decode(code) == bits
lines = [header, *split_string(code, 70)]
result = "\n".join(lines)
else:
lines = content.splitlines(keepends=False)
if not lines or lines[0] != header:
raise ValueError("Expected header: " + header)
code = "".join(lines[1:])
bits = base95_decode(code)
result = huffman_decode(bits)
with open(args.output_file, "w") as output_file:
output_file.write(result)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment