Last active
October 18, 2020 08:44
-
-
Save magical/c28c026cf542bc2f7ffba1a334e13f8b to your computer and use it in GitHub Desktop.
Notes on CC2's checksum scheme
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Notes on CC2's checksum scheme | |
============================== | |
Background | |
---------- | |
The "KEY " chunk in C2M files stores a 128-bit hash. This | |
was originally part of a scheme to prevent users from playing | |
levels not made with the official editor, but was ripped out | |
before the public release.[1] | |
The "LOCK" chunk was also part of the scheme. | |
Most of the official levels still have vestigial "KEY " and | |
"LOCK" chunks but the editor no longer adds them to new files, | |
and their contents are ignored. | |
The "LOCK" value is usually "Custom version for Chuck Sommerville\0", | |
and an obfuscated version of this string appears in the | |
executable. However, some levels in the official pack have a | |
different value for it: usually the name of a member of the | |
Chip's Challenge community. It seems likely that this was the | |
name of a person to whom a beta copy was given. | |
Details | |
------- | |
The contents of every chunk between the "LOCK" chunk and the | |
"KEY " chunk are concatenated and hashed with a modified | |
version of MD5. This includes the contents of the "LOCK" chunk | |
but (obviously) not the contents of the "KEY " chunk. The | |
chunk name and length are NOT included in the hashed data, | |
just the contents. There is no delimiter between chunks. | |
The "CC2M" and "VERS" chunks always appear before the "LOCK" | |
chunk, and their contents are not[*] included in the hash. | |
[*]: Actually, VERS was added after the checksum scheme was | |
removed so its behaviour is special. It is not included when | |
writing the level but it does seem to be when reading. | |
The "PRPL" (replay) chunk appears after the "KEY " chunk, so | |
its contents are not included in the hash. | |
### Modified MD5 ### | |
The MD5 state is initialized with a nonstandard set of | |
constants, and the number of bits already processed is | |
initialized to 0x200 (512 decimal). | |
A = 0xf8611bf2 | |
B = 0xee782ca4 | |
C = 0xf30869b2 | |
D = 0xb048f18f | |
N = 0x200 | |
These values are probably from an intermediate state reached | |
by processing one block of data. However, the input data which | |
led to this state is unknown, and likely infeasible to recover. | |
### Pseudocode ### | |
MD5_init(h, <precomputed values>) | |
for each chunk: | |
if chunk.name == "CC2M": | |
skip | |
else if chunk.name == "KEY " | |
break | |
else | |
MD5_Update(h, chunk.data) | |
MD5_Final(h) | |
References | |
---------- | |
[1]: https://www.facebook.com/groups/175825302437909/permalink/950827674937664/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def parse_c2m(f, filename=""): | |
header = f.read(10) | |
assert header[:8] == b'CC2M\x02\x00\x00\x00' | |
#assert header == b'CC2M\x02\x00\x00\x007\x00' | |
hash_data = bytearray() | |
key = b'' | |
while True: | |
name, data = read_chunk(f) | |
if name == b'END ': | |
break | |
elif name == b'KEY ': | |
print(filename, "KEY: ", data.hex()) | |
key = data | |
break | |
else: | |
if name == b'LOCK': | |
print(filename, "LOCK:", ascii(data.decode('latin-1'))) | |
hash_data += data | |
h = md5sum_cc2(hash_data) | |
print(filename, "HASH:", h) | |
if key and key.hex() != h: | |
print(filename, "FAIL") | |
def read_chunk(f): | |
name = f.read(4) | |
len_bytes = f.read(4) | |
n = int.from_bytes(len_bytes, byteorder='little') | |
data = f.read(n) | |
return name, data | |
def main(): | |
import sys | |
for filename in sys.argv[1:]: | |
with open(filename, 'rb') as f: | |
parse_c2m(f, filename) | |
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
# MD5 implementation | |
# adapted from https://github.com/M-Taghizadeh/MD5_Algorithm/blob/master/md5_algorithm.py | |
SV = [0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, | |
0x4787c62a, 0xa8304613, 0xfd469501, 0x698098d8, 0x8b44f7af, | |
0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, | |
0x49b40821, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, | |
0xd62f105d, 0x2441453, 0xd8a1e681, 0xe7d3fbc8, 0x21e1cde6, | |
0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, | |
0x676f02d9, 0x8d2a4c8a, 0xfffa3942, 0x8771f681, 0x6d9d6122, | |
0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, | |
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05, 0xd9d4d039, | |
0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, 0xf4292244, 0x432aff97, | |
0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, | |
0x85845dd1, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, | |
0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391] | |
def leftCircularShift(k,bits): | |
bits = bits%32 | |
k = k%(2**32) | |
upper = (k<<bits)%(2**32) | |
result = upper | (k>>(32-(bits))) | |
return(result) | |
def blockDivide(block, chunks): | |
result = [] | |
size = len(block)//chunks | |
for i in range(0, chunks): | |
result.append( int.from_bytes( block[i*size:(i+1)*size],byteorder="little" )) | |
return(result) | |
def F(X,Y,Z): | |
return( (X&Y)|((~X)&Z) ) | |
def G(X,Y,Z): | |
return( (X&Z) | (Y&(~Z)) ) | |
def H(X,Y,Z): | |
return( X^Y^Z ) | |
def I(X,Y,Z): | |
return( Y^(X|(~Z)) ) | |
def FF(a,b,c,d,M,s,t): | |
result = b + leftCircularShift( (a+F(b,c,d)+M+t), s) | |
return(result) | |
def GG(a,b,c,d,M,s,t): | |
result = b + leftCircularShift( (a+G(b,c,d)+M+t), s) | |
return(result) | |
def HH(a,b,c,d,M,s,t): | |
result = b + leftCircularShift( (a+H(b,c,d)+M+t), s) | |
return(result) | |
def II(a,b,c,d,M,s,t): | |
result = b + leftCircularShift( (a+I(b,c,d)+M+t), s) | |
return(result) | |
def fmt8(num): | |
return num.to_bytes(4, byteorder='little').hex() | |
def bitlen( bitstring ): | |
return(len(bitstring)*8) | |
def md5sum(msg): | |
#chaining variables | |
A = 0x67452301 | |
B = 0xefcdab89 | |
C = 0x98badcfe | |
D = 0x10325476 | |
return _md5sum(msg, 0, A, B, C, D) | |
def md5sum_cc2(msg): | |
#chaining variables | |
A = 0xf8611bf2 | |
B = 0xee782ca4 | |
C = 0xf30869b2 | |
D = 0xb048f18f | |
return _md5sum(msg, 0x200, A, B, C, D) | |
def _md5sum(msg, extra_bits, A, B, C, D): | |
#First, we pad the message | |
msgLen = (bitlen(msg)+extra_bits)%(2**64) | |
msg = msg + b'\x80' | |
zeroPad = (448 - (msgLen+8)%512)%512 | |
zeroPad //= 8 | |
msg = msg + b'\x00'*zeroPad + msgLen.to_bytes(8,byteorder='little') | |
iterations = bitlen(msg)//512 | |
#main loop | |
for i in range(0,iterations): | |
a = A | |
b = B | |
c = C | |
d = D | |
block = msg[i*64:(i+1)*64] | |
M = blockDivide(block,16) | |
#Rounds | |
a = FF( a,b,c,d, M[0], 7, SV[0] ) | |
d = FF( d,a,b,c, M[1], 12, SV[1] ) | |
c = FF( c,d,a,b, M[2], 17, SV[2] ) | |
b = FF( b,c,d,a, M[3], 22, SV[3] ) | |
a = FF( a,b,c,d, M[4], 7, SV[4] ) | |
d = FF( d,a,b,c, M[5], 12, SV[5] ) | |
c = FF( c,d,a,b, M[6], 17, SV[6] ) | |
b = FF( b,c,d,a, M[7], 22, SV[7] ) | |
a = FF( a,b,c,d, M[8], 7, SV[8] ) | |
d = FF( d,a,b,c, M[9], 12, SV[9] ) | |
c = FF( c,d,a,b, M[10], 17, SV[10] ) | |
b = FF( b,c,d,a, M[11], 22, SV[11] ) | |
a = FF( a,b,c,d, M[12], 7, SV[12] ) | |
d = FF( d,a,b,c, M[13], 12, SV[13] ) | |
c = FF( c,d,a,b, M[14], 17, SV[14] ) | |
b = FF( b,c,d,a, M[15], 22, SV[15] ) | |
a = GG( a,b,c,d, M[1], 5, SV[16] ) | |
d = GG( d,a,b,c, M[6], 9, SV[17] ) | |
c = GG( c,d,a,b, M[11], 14, SV[18] ) | |
b = GG( b,c,d,a, M[0], 20, SV[19] ) | |
a = GG( a,b,c,d, M[5], 5, SV[20] ) | |
d = GG( d,a,b,c, M[10], 9, SV[21] ) | |
c = GG( c,d,a,b, M[15], 14, SV[22] ) | |
b = GG( b,c,d,a, M[4], 20, SV[23] ) | |
a = GG( a,b,c,d, M[9], 5, SV[24] ) | |
d = GG( d,a,b,c, M[14], 9, SV[25] ) | |
c = GG( c,d,a,b, M[3], 14, SV[26] ) | |
b = GG( b,c,d,a, M[8], 20, SV[27] ) | |
a = GG( a,b,c,d, M[13], 5, SV[28] ) | |
d = GG( d,a,b,c, M[2], 9, SV[29] ) | |
c = GG( c,d,a,b, M[7], 14, SV[30] ) | |
b = GG( b,c,d,a, M[12], 20, SV[31] ) | |
a = HH( a,b,c,d, M[5], 4, SV[32] ) | |
d = HH( d,a,b,c, M[8], 11, SV[33] ) | |
c = HH( c,d,a,b, M[11], 16, SV[34] ) | |
b = HH( b,c,d,a, M[14], 23, SV[35] ) | |
a = HH( a,b,c,d, M[1], 4, SV[36] ) | |
d = HH( d,a,b,c, M[4], 11, SV[37] ) | |
c = HH( c,d,a,b, M[7], 16, SV[38] ) | |
b = HH( b,c,d,a, M[10], 23, SV[39] ) | |
a = HH( a,b,c,d, M[13], 4, SV[40] ) | |
d = HH( d,a,b,c, M[0], 11, SV[41] ) | |
c = HH( c,d,a,b, M[3], 16, SV[42] ) | |
b = HH( b,c,d,a, M[6], 23, SV[43] ) | |
a = HH( a,b,c,d, M[9], 4, SV[44] ) | |
d = HH( d,a,b,c, M[12], 11, SV[45] ) | |
c = HH( c,d,a,b, M[15], 16, SV[46] ) | |
b = HH( b,c,d,a, M[2], 23, SV[47] ) | |
a = II( a,b,c,d, M[0], 6, SV[48] ) | |
d = II( d,a,b,c, M[7], 10, SV[49] ) | |
c = II( c,d,a,b, M[14], 15, SV[50] ) | |
b = II( b,c,d,a, M[5], 21, SV[51] ) | |
a = II( a,b,c,d, M[12], 6, SV[52] ) | |
d = II( d,a,b,c, M[3], 10, SV[53] ) | |
c = II( c,d,a,b, M[10], 15, SV[54] ) | |
b = II( b,c,d,a, M[1], 21, SV[55] ) | |
a = II( a,b,c,d, M[8], 6, SV[56] ) | |
d = II( d,a,b,c, M[15], 10, SV[57] ) | |
c = II( c,d,a,b, M[6], 15, SV[58] ) | |
b = II( b,c,d,a, M[13], 21, SV[59] ) | |
a = II( a,b,c,d, M[4], 6, SV[60] ) | |
d = II( d,a,b,c, M[11], 10, SV[61] ) | |
c = II( c,d,a,b, M[2], 15, SV[62] ) | |
b = II( b,c,d,a, M[9], 21, SV[63] ) | |
A = (A + a)%(2**32) | |
B = (B + b)%(2**32) | |
C = (C + c)%(2**32) | |
D = (D + d)%(2**32) | |
result = fmt8(A)+fmt8(B)+fmt8(C)+fmt8(D) | |
return(result) | |
def _test(): | |
import hashlib | |
#print( md5sum(b"abcd") , hashlib.md5(b"abcd").hexdigest() ) | |
assert md5sum(b"abcd") == hashlib.md5(b"abcd").hexdigest() | |
if __name__ == '__main__': | |
_test() | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment