Skip to content

Instantly share code, notes, and snippets.

@magical
Last active October 18, 2020 08:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save magical/c28c026cf542bc2f7ffba1a334e13f8b to your computer and use it in GitHub Desktop.
Save magical/c28c026cf542bc2f7ffba1a334e13f8b to your computer and use it in GitHub Desktop.
Notes on CC2's checksum scheme
Notes on CC2's checksum scheme
==============================
Background
----------
The "KEY " chunk in C2M files stores a 128-bit hash. This
was originally part of a scheme to prevent users from playing
levels not made with the official editor, but was ripped out
before the public release.[1]
The "LOCK" chunk was also part of the scheme.
Most of the official levels still have vestigial "KEY " and
"LOCK" chunks but the editor no longer adds them to new files,
and their contents are ignored.
The "LOCK" value is usually "Custom version for Chuck Sommerville\0",
and an obfuscated version of this string appears in the
executable. However, some levels in the official pack have a
different value for it: usually the name of a member of the
Chip's Challenge community. It seems likely that this was the
name of a person to whom a beta copy was given.
Details
-------
The contents of every chunk between the "LOCK" chunk and the
"KEY " chunk are concatenated and hashed with a modified
version of MD5. This includes the contents of the "LOCK" chunk
but (obviously) not the contents of the "KEY " chunk. The
chunk name and length are NOT included in the hashed data,
just the contents. There is no delimiter between chunks.
The "CC2M" and "VERS" chunks always appear before the "LOCK"
chunk, and their contents are not[*] included in the hash.
[*]: Actually, VERS was added after the checksum scheme was
removed so its behaviour is special. It is not included when
writing the level but it does seem to be when reading.
The "PRPL" (replay) chunk appears after the "KEY " chunk, so
its contents are not included in the hash.
### Modified MD5 ###
The MD5 state is initialized with a nonstandard set of
constants, and the number of bits already processed is
initialized to 0x200 (512 decimal).
A = 0xf8611bf2
B = 0xee782ca4
C = 0xf30869b2
D = 0xb048f18f
N = 0x200
These values are probably from an intermediate state reached
by processing one block of data. However, the input data which
led to this state is unknown, and likely infeasible to recover.
### Pseudocode ###
MD5_init(h, <precomputed values>)
for each chunk:
if chunk.name == "CC2M":
skip
else if chunk.name == "KEY "
break
else
MD5_Update(h, chunk.data)
MD5_Final(h)
References
----------
[1]: https://www.facebook.com/groups/175825302437909/permalink/950827674937664/
def parse_c2m(f, filename=""):
header = f.read(10)
assert header[:8] == b'CC2M\x02\x00\x00\x00'
#assert header == b'CC2M\x02\x00\x00\x007\x00'
hash_data = bytearray()
key = b''
while True:
name, data = read_chunk(f)
if name == b'END ':
break
elif name == b'KEY ':
print(filename, "KEY: ", data.hex())
key = data
break
else:
if name == b'LOCK':
print(filename, "LOCK:", ascii(data.decode('latin-1')))
hash_data += data
h = md5sum_cc2(hash_data)
print(filename, "HASH:", h)
if key and key.hex() != h:
print(filename, "FAIL")
def read_chunk(f):
name = f.read(4)
len_bytes = f.read(4)
n = int.from_bytes(len_bytes, byteorder='little')
data = f.read(n)
return name, data
def main():
import sys
for filename in sys.argv[1:]:
with open(filename, 'rb') as f:
parse_c2m(f, filename)
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# MD5 implementation
# adapted from https://github.com/M-Taghizadeh/MD5_Algorithm/blob/master/md5_algorithm.py
SV = [0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf,
0x4787c62a, 0xa8304613, 0xfd469501, 0x698098d8, 0x8b44f7af,
0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e,
0x49b40821, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
0xd62f105d, 0x2441453, 0xd8a1e681, 0xe7d3fbc8, 0x21e1cde6,
0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8,
0x676f02d9, 0x8d2a4c8a, 0xfffa3942, 0x8771f681, 0x6d9d6122,
0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05, 0xd9d4d039,
0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, 0xf4292244, 0x432aff97,
0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d,
0x85845dd1, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391]
def leftCircularShift(k,bits):
bits = bits%32
k = k%(2**32)
upper = (k<<bits)%(2**32)
result = upper | (k>>(32-(bits)))
return(result)
def blockDivide(block, chunks):
result = []
size = len(block)//chunks
for i in range(0, chunks):
result.append( int.from_bytes( block[i*size:(i+1)*size],byteorder="little" ))
return(result)
def F(X,Y,Z):
return( (X&Y)|((~X)&Z) )
def G(X,Y,Z):
return( (X&Z) | (Y&(~Z)) )
def H(X,Y,Z):
return( X^Y^Z )
def I(X,Y,Z):
return( Y^(X|(~Z)) )
def FF(a,b,c,d,M,s,t):
result = b + leftCircularShift( (a+F(b,c,d)+M+t), s)
return(result)
def GG(a,b,c,d,M,s,t):
result = b + leftCircularShift( (a+G(b,c,d)+M+t), s)
return(result)
def HH(a,b,c,d,M,s,t):
result = b + leftCircularShift( (a+H(b,c,d)+M+t), s)
return(result)
def II(a,b,c,d,M,s,t):
result = b + leftCircularShift( (a+I(b,c,d)+M+t), s)
return(result)
def fmt8(num):
return num.to_bytes(4, byteorder='little').hex()
def bitlen( bitstring ):
return(len(bitstring)*8)
def md5sum(msg):
#chaining variables
A = 0x67452301
B = 0xefcdab89
C = 0x98badcfe
D = 0x10325476
return _md5sum(msg, 0, A, B, C, D)
def md5sum_cc2(msg):
#chaining variables
A = 0xf8611bf2
B = 0xee782ca4
C = 0xf30869b2
D = 0xb048f18f
return _md5sum(msg, 0x200, A, B, C, D)
def _md5sum(msg, extra_bits, A, B, C, D):
#First, we pad the message
msgLen = (bitlen(msg)+extra_bits)%(2**64)
msg = msg + b'\x80'
zeroPad = (448 - (msgLen+8)%512)%512
zeroPad //= 8
msg = msg + b'\x00'*zeroPad + msgLen.to_bytes(8,byteorder='little')
iterations = bitlen(msg)//512
#main loop
for i in range(0,iterations):
a = A
b = B
c = C
d = D
block = msg[i*64:(i+1)*64]
M = blockDivide(block,16)
#Rounds
a = FF( a,b,c,d, M[0], 7, SV[0] )
d = FF( d,a,b,c, M[1], 12, SV[1] )
c = FF( c,d,a,b, M[2], 17, SV[2] )
b = FF( b,c,d,a, M[3], 22, SV[3] )
a = FF( a,b,c,d, M[4], 7, SV[4] )
d = FF( d,a,b,c, M[5], 12, SV[5] )
c = FF( c,d,a,b, M[6], 17, SV[6] )
b = FF( b,c,d,a, M[7], 22, SV[7] )
a = FF( a,b,c,d, M[8], 7, SV[8] )
d = FF( d,a,b,c, M[9], 12, SV[9] )
c = FF( c,d,a,b, M[10], 17, SV[10] )
b = FF( b,c,d,a, M[11], 22, SV[11] )
a = FF( a,b,c,d, M[12], 7, SV[12] )
d = FF( d,a,b,c, M[13], 12, SV[13] )
c = FF( c,d,a,b, M[14], 17, SV[14] )
b = FF( b,c,d,a, M[15], 22, SV[15] )
a = GG( a,b,c,d, M[1], 5, SV[16] )
d = GG( d,a,b,c, M[6], 9, SV[17] )
c = GG( c,d,a,b, M[11], 14, SV[18] )
b = GG( b,c,d,a, M[0], 20, SV[19] )
a = GG( a,b,c,d, M[5], 5, SV[20] )
d = GG( d,a,b,c, M[10], 9, SV[21] )
c = GG( c,d,a,b, M[15], 14, SV[22] )
b = GG( b,c,d,a, M[4], 20, SV[23] )
a = GG( a,b,c,d, M[9], 5, SV[24] )
d = GG( d,a,b,c, M[14], 9, SV[25] )
c = GG( c,d,a,b, M[3], 14, SV[26] )
b = GG( b,c,d,a, M[8], 20, SV[27] )
a = GG( a,b,c,d, M[13], 5, SV[28] )
d = GG( d,a,b,c, M[2], 9, SV[29] )
c = GG( c,d,a,b, M[7], 14, SV[30] )
b = GG( b,c,d,a, M[12], 20, SV[31] )
a = HH( a,b,c,d, M[5], 4, SV[32] )
d = HH( d,a,b,c, M[8], 11, SV[33] )
c = HH( c,d,a,b, M[11], 16, SV[34] )
b = HH( b,c,d,a, M[14], 23, SV[35] )
a = HH( a,b,c,d, M[1], 4, SV[36] )
d = HH( d,a,b,c, M[4], 11, SV[37] )
c = HH( c,d,a,b, M[7], 16, SV[38] )
b = HH( b,c,d,a, M[10], 23, SV[39] )
a = HH( a,b,c,d, M[13], 4, SV[40] )
d = HH( d,a,b,c, M[0], 11, SV[41] )
c = HH( c,d,a,b, M[3], 16, SV[42] )
b = HH( b,c,d,a, M[6], 23, SV[43] )
a = HH( a,b,c,d, M[9], 4, SV[44] )
d = HH( d,a,b,c, M[12], 11, SV[45] )
c = HH( c,d,a,b, M[15], 16, SV[46] )
b = HH( b,c,d,a, M[2], 23, SV[47] )
a = II( a,b,c,d, M[0], 6, SV[48] )
d = II( d,a,b,c, M[7], 10, SV[49] )
c = II( c,d,a,b, M[14], 15, SV[50] )
b = II( b,c,d,a, M[5], 21, SV[51] )
a = II( a,b,c,d, M[12], 6, SV[52] )
d = II( d,a,b,c, M[3], 10, SV[53] )
c = II( c,d,a,b, M[10], 15, SV[54] )
b = II( b,c,d,a, M[1], 21, SV[55] )
a = II( a,b,c,d, M[8], 6, SV[56] )
d = II( d,a,b,c, M[15], 10, SV[57] )
c = II( c,d,a,b, M[6], 15, SV[58] )
b = II( b,c,d,a, M[13], 21, SV[59] )
a = II( a,b,c,d, M[4], 6, SV[60] )
d = II( d,a,b,c, M[11], 10, SV[61] )
c = II( c,d,a,b, M[2], 15, SV[62] )
b = II( b,c,d,a, M[9], 21, SV[63] )
A = (A + a)%(2**32)
B = (B + b)%(2**32)
C = (C + c)%(2**32)
D = (D + d)%(2**32)
result = fmt8(A)+fmt8(B)+fmt8(C)+fmt8(D)
return(result)
def _test():
import hashlib
#print( md5sum(b"abcd") , hashlib.md5(b"abcd").hexdigest() )
assert md5sum(b"abcd") == hashlib.md5(b"abcd").hexdigest()
if __name__ == '__main__':
_test()
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment