magical/0_cc2key.txt

## 0_cc2key.txt
Notes on CC2's checksum scheme
==============================

Background
----------

The "KEY " chunk in C2M files stores a 128-bit hash. This
was originally part of a scheme to prevent users from playing
levels not made with the official editor, but was ripped out
before the public release.[1]

The "LOCK" chunk was also part of the scheme.

Most of the official levels still have vestigial "KEY " and
"LOCK" chunks but the editor no longer adds them to new files,
and their contents are ignored.

The "LOCK" value is usually "Custom version for Chuck Sommerville\0",
and an obfuscated version of this string appears in the
executable. However, some levels in the official pack have a
different value for it: usually the name of a member of the
Chip's Challenge community. It seems likely that this was the
name of a person to whom a beta copy was given.


Details
-------

The contents of every chunk between the "LOCK" chunk and the
"KEY " chunk are concatenated and hashed with a modified
version of MD5. This includes the contents of the "LOCK" chunk
but (obviously) not the contents of the "KEY " chunk. The
chunk name and length are NOT included in the hashed data,
just the contents. There is no delimiter between chunks.

The "CC2M" and "VERS" chunks always appear before the "LOCK"
chunk, and their contents are not[*] included in the hash.

[*]: Actually, VERS was added after the checksum scheme was
removed so its behaviour is special. It is not included when
writing the level but it does seem to be when reading.

The "PRPL" (replay) chunk appears after the "KEY " chunk, so
its contents are not included in the hash.


### Modified MD5 ###

The MD5 state is initialized with a nonstandard set of
constants, and the number of bits already processed is
initialized to 0x200 (512 decimal).

    A = 0xf8611bf2
    B = 0xee782ca4
    C = 0xf30869b2
    D = 0xb048f18f
    N = 0x200

These values are probably from an intermediate state reached
by processing one block of data. However, the input data which
led to this state is unknown, and likely infeasible to recover.

### Pseudocode ###

    MD5_init(h, <precomputed values>)
    for each chunk:
        if chunk.name == "CC2M":
            skip
        else if chunk.name == "KEY "
            break
        else
            MD5_Update(h, chunk.data)
    MD5_Final(h)

References
----------

[1]: https://www.facebook.com/groups/175825302437909/permalink/950827674937664/

## 1_cc2key.py

def parse_c2m(f, filename=""):
    header = f.read(10)
    assert header[:8] == b'CC2M\x02\x00\x00\x00'
    #assert header == b'CC2M\x02\x00\x00\x007\x00'

    hash_data = bytearray()
    key = b''
    while True:
        name, data = read_chunk(f)
        if name == b'END ':
            break
        elif name == b'KEY ':
            print(filename, "KEY: ", data.hex())
            key = data
            break
        else:
            if name == b'LOCK':
                print(filename, "LOCK:", ascii(data.decode('latin-1')))
            hash_data += data

    h = md5sum_cc2(hash_data)
    print(filename, "HASH:", h)
    if key and key.hex() != h:
        print(filename, "FAIL")

def read_chunk(f):
    name = f.read(4)
    len_bytes = f.read(4)
    n = int.from_bytes(len_bytes, byteorder='little')
    data = f.read(n)
    return name, data

def main():
    import sys
    for filename in sys.argv[1:]:
        with open(filename, 'rb') as f:
            parse_c2m(f, filename)


#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# MD5 implementation
# adapted from https://github.com/M-Taghizadeh/MD5_Algorithm/blob/master/md5_algorithm.py

SV = [0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf,
        0x4787c62a, 0xa8304613, 0xfd469501, 0x698098d8, 0x8b44f7af,
        0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e,
        0x49b40821, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
        0xd62f105d, 0x2441453, 0xd8a1e681, 0xe7d3fbc8, 0x21e1cde6,
        0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8,
        0x676f02d9, 0x8d2a4c8a, 0xfffa3942, 0x8771f681, 0x6d9d6122,
        0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
        0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05, 0xd9d4d039,
        0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, 0xf4292244, 0x432aff97,
        0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d,
        0x85845dd1, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
        0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391]

def leftCircularShift(k,bits):
    bits = bits%32
    k = k%(2**32)
    upper = (k<<bits)%(2**32)
    result = upper | (k>>(32-(bits)))
    return(result)

def blockDivide(block, chunks):
    result = []
    size = len(block)//chunks
    for i in range(0, chunks):
        result.append( int.from_bytes( block[i*size:(i+1)*size],byteorder="little" ))
    return(result)

def F(X,Y,Z):
    return( (X&Y)|((~X)&Z) )

def G(X,Y,Z):
    return( (X&Z) | (Y&(~Z)) )

def H(X,Y,Z):
    return( X^Y^Z )

def I(X,Y,Z):
    return( Y^(X|(~Z)) )

def FF(a,b,c,d,M,s,t):
    result = b + leftCircularShift( (a+F(b,c,d)+M+t), s)
    return(result)

def GG(a,b,c,d,M,s,t):
    result = b + leftCircularShift( (a+G(b,c,d)+M+t), s)
    return(result)

def HH(a,b,c,d,M,s,t):
    result = b + leftCircularShift( (a+H(b,c,d)+M+t), s)
    return(result)

def II(a,b,c,d,M,s,t):
    result = b + leftCircularShift( (a+I(b,c,d)+M+t), s)
    return(result)

def fmt8(num):
    return num.to_bytes(4, byteorder='little').hex()

def bitlen( bitstring ):
    return(len(bitstring)*8)

def md5sum(msg):
    #chaining variables
    A = 0x67452301
    B = 0xefcdab89
    C = 0x98badcfe
    D = 0x10325476
    return _md5sum(msg, 0, A, B, C, D)

def md5sum_cc2(msg):
    #chaining variables
    A = 0xf8611bf2
    B = 0xee782ca4
    C = 0xf30869b2
    D = 0xb048f18f
    return _md5sum(msg, 0x200, A, B, C, D)

def _md5sum(msg, extra_bits, A, B, C, D):
    #First, we pad the message
    msgLen = (bitlen(msg)+extra_bits)%(2**64)
    msg = msg + b'\x80'
    zeroPad = (448 - (msgLen+8)%512)%512
    zeroPad //= 8
    msg = msg + b'\x00'*zeroPad + msgLen.to_bytes(8,byteorder='little')
    iterations = bitlen(msg)//512
    #main loop
    for i in range(0,iterations):
        a = A
        b = B
        c = C
        d = D
        block = msg[i*64:(i+1)*64]
        M = blockDivide(block,16)
        #Rounds
        a = FF( a,b,c,d, M[0], 7, SV[0] )
        d = FF( d,a,b,c, M[1], 12, SV[1] )
        c = FF( c,d,a,b, M[2], 17, SV[2] )
        b = FF( b,c,d,a, M[3], 22, SV[3] )
        a = FF( a,b,c,d, M[4], 7, SV[4] )
        d = FF( d,a,b,c, M[5], 12, SV[5] )
        c = FF( c,d,a,b, M[6], 17, SV[6] )
        b = FF( b,c,d,a, M[7], 22, SV[7] )
        a = FF( a,b,c,d, M[8], 7, SV[8] )
        d = FF( d,a,b,c, M[9], 12, SV[9] )
        c = FF( c,d,a,b, M[10], 17, SV[10] )
        b = FF( b,c,d,a, M[11], 22, SV[11] )
        a = FF( a,b,c,d, M[12], 7, SV[12] )
        d = FF( d,a,b,c, M[13], 12, SV[13] )
        c = FF( c,d,a,b, M[14], 17, SV[14] )
        b = FF( b,c,d,a, M[15], 22, SV[15] )
        a = GG( a,b,c,d, M[1], 5, SV[16] )
        d = GG( d,a,b,c, M[6], 9, SV[17] )
        c = GG( c,d,a,b, M[11], 14, SV[18] )
        b = GG( b,c,d,a, M[0], 20, SV[19] )
        a = GG( a,b,c,d, M[5], 5, SV[20] )
        d = GG( d,a,b,c, M[10], 9, SV[21] )
        c = GG( c,d,a,b, M[15], 14, SV[22] )
        b = GG( b,c,d,a, M[4], 20, SV[23] )
        a = GG( a,b,c,d, M[9], 5, SV[24] )
        d = GG( d,a,b,c, M[14], 9, SV[25] )
        c = GG( c,d,a,b, M[3], 14, SV[26] )
        b = GG( b,c,d,a, M[8], 20, SV[27] )
        a = GG( a,b,c,d, M[13], 5, SV[28] )
        d = GG( d,a,b,c, M[2], 9, SV[29] )
        c = GG( c,d,a,b, M[7], 14, SV[30] )
        b = GG( b,c,d,a, M[12], 20, SV[31] )
        a = HH( a,b,c,d, M[5], 4, SV[32] )
        d = HH( d,a,b,c, M[8], 11, SV[33] )
        c = HH( c,d,a,b, M[11], 16, SV[34] )
        b = HH( b,c,d,a, M[14], 23, SV[35] )
        a = HH( a,b,c,d, M[1], 4, SV[36] )
        d = HH( d,a,b,c, M[4], 11, SV[37] )
        c = HH( c,d,a,b, M[7], 16, SV[38] )
        b = HH( b,c,d,a, M[10], 23, SV[39] )
        a = HH( a,b,c,d, M[13], 4, SV[40] )
        d = HH( d,a,b,c, M[0], 11, SV[41] )
        c = HH( c,d,a,b, M[3], 16, SV[42] )
        b = HH( b,c,d,a, M[6], 23, SV[43] )
        a = HH( a,b,c,d, M[9], 4, SV[44] )
        d = HH( d,a,b,c, M[12], 11, SV[45] )
        c = HH( c,d,a,b, M[15], 16, SV[46] )
        b = HH( b,c,d,a, M[2], 23, SV[47] )
        a = II( a,b,c,d, M[0], 6, SV[48] )
        d = II( d,a,b,c, M[7], 10, SV[49] )
        c = II( c,d,a,b, M[14], 15, SV[50] )
        b = II( b,c,d,a, M[5], 21, SV[51] )
        a = II( a,b,c,d, M[12], 6, SV[52] )
        d = II( d,a,b,c, M[3], 10, SV[53] )
        c = II( c,d,a,b, M[10], 15, SV[54] )
        b = II( b,c,d,a, M[1], 21, SV[55] )
        a = II( a,b,c,d, M[8], 6, SV[56] )
        d = II( d,a,b,c, M[15], 10, SV[57] )
        c = II( c,d,a,b, M[6], 15, SV[58] )
        b = II( b,c,d,a, M[13], 21, SV[59] )
        a = II( a,b,c,d, M[4], 6, SV[60] )
        d = II( d,a,b,c, M[11], 10, SV[61] )
        c = II( c,d,a,b, M[2], 15, SV[62] )
        b = II( b,c,d,a, M[9], 21, SV[63] )
        A = (A + a)%(2**32)
        B = (B + b)%(2**32)
        C = (C + c)%(2**32)
        D = (D + d)%(2**32)
    result = fmt8(A)+fmt8(B)+fmt8(C)+fmt8(D)
    return(result)

def _test():
    import hashlib
    #print( md5sum(b"abcd") ,  hashlib.md5(b"abcd").hexdigest() )
    assert md5sum(b"abcd") == hashlib.md5(b"abcd").hexdigest()

if __name__ == '__main__':
    _test()
    main()
	Notes on CC2's checksum scheme
	==============================

	Background
	----------

	The "KEY " chunk in C2M files stores a 128-bit hash. This
	was originally part of a scheme to prevent users from playing
	levels not made with the official editor, but was ripped out
	before the public release.[1]

	The "LOCK" chunk was also part of the scheme.

	Most of the official levels still have vestigial "KEY " and
	"LOCK" chunks but the editor no longer adds them to new files,
	and their contents are ignored.

	The "LOCK" value is usually "Custom version for Chuck Sommerville\0",
	and an obfuscated version of this string appears in the
	executable. However, some levels in the official pack have a
	different value for it: usually the name of a member of the
	Chip's Challenge community. It seems likely that this was the
	name of a person to whom a beta copy was given.


	Details
	-------

	The contents of every chunk between the "LOCK" chunk and the
	"KEY " chunk are concatenated and hashed with a modified
	version of MD5. This includes the contents of the "LOCK" chunk
	but (obviously) not the contents of the "KEY " chunk. The
	chunk name and length are NOT included in the hashed data,
	just the contents. There is no delimiter between chunks.

	The "CC2M" and "VERS" chunks always appear before the "LOCK"
	chunk, and their contents are not[*] included in the hash.

	[*]: Actually, VERS was added after the checksum scheme was
	removed so its behaviour is special. It is not included when
	writing the level but it does seem to be when reading.

	The "PRPL" (replay) chunk appears after the "KEY " chunk, so
	its contents are not included in the hash.


	### Modified MD5 ###

	The MD5 state is initialized with a nonstandard set of
	constants, and the number of bits already processed is
	initialized to 0x200 (512 decimal).

	A = 0xf8611bf2
	B = 0xee782ca4
	C = 0xf30869b2
	D = 0xb048f18f
	N = 0x200

	These values are probably from an intermediate state reached
	by processing one block of data. However, the input data which
	led to this state is unknown, and likely infeasible to recover.

	### Pseudocode ###

	MD5_init(h, <precomputed values>)
	for each chunk:
	if chunk.name == "CC2M":
	skip
	else if chunk.name == "KEY "
	break
	else
	MD5_Update(h, chunk.data)
	MD5_Final(h)

	References
	----------

	[1]: https://www.facebook.com/groups/175825302437909/permalink/950827674937664/

	def parse_c2m(f, filename=""):
	header = f.read(10)
	assert header[:8] == b'CC2M\x02\x00\x00\x00'
	#assert header == b'CC2M\x02\x00\x00\x007\x00'

	hash_data = bytearray()
	key = b''
	while True:
	name, data = read_chunk(f)
	if name == b'END ':
	break
	elif name == b'KEY ':
	print(filename, "KEY: ", data.hex())
	key = data
	break
	else:
	if name == b'LOCK':
	print(filename, "LOCK:", ascii(data.decode('latin-1')))
	hash_data += data

	h = md5sum_cc2(hash_data)
	print(filename, "HASH:", h)
	if key and key.hex() != h:
	print(filename, "FAIL")

	def read_chunk(f):
	name = f.read(4)
	len_bytes = f.read(4)
	n = int.from_bytes(len_bytes, byteorder='little')
	data = f.read(n)
	return name, data

	def main():
	import sys
	for filename in sys.argv[1:]:
	with open(filename, 'rb') as f:
	parse_c2m(f, filename)


	#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
	# MD5 implementation
	# adapted from https://github.com/M-Taghizadeh/MD5_Algorithm/blob/master/md5_algorithm.py

	SV = [0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf,
	0x4787c62a, 0xa8304613, 0xfd469501, 0x698098d8, 0x8b44f7af,
	0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e,
	0x49b40821, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
	0xd62f105d, 0x2441453, 0xd8a1e681, 0xe7d3fbc8, 0x21e1cde6,
	0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8,
	0x676f02d9, 0x8d2a4c8a, 0xfffa3942, 0x8771f681, 0x6d9d6122,
	0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
	0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05, 0xd9d4d039,
	0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, 0xf4292244, 0x432aff97,
	0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d,
	0x85845dd1, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
	0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391]

	def leftCircularShift(k,bits):
	bits = bits%32
	k = k%(2**32)
	upper = (k<<bits)%(2**32)
	result = upper \| (k>>(32-(bits)))
	return(result)

	def blockDivide(block, chunks):
	result = []
	size = len(block)//chunks
	for i in range(0, chunks):
	result.append( int.from_bytes( block[isize:(i+1)size],byteorder="little" ))
	return(result)

	def F(X,Y,Z):
	return( (X&Y)\|((~X)&Z) )

	def G(X,Y,Z):
	return( (X&Z) \| (Y&(~Z)) )

	def H(X,Y,Z):
	return( X^Y^Z )

	def I(X,Y,Z):
	return( Y^(X\|(~Z)) )

	def FF(a,b,c,d,M,s,t):
	result = b + leftCircularShift( (a+F(b,c,d)+M+t), s)
	return(result)

	def GG(a,b,c,d,M,s,t):
	result = b + leftCircularShift( (a+G(b,c,d)+M+t), s)
	return(result)

	def HH(a,b,c,d,M,s,t):
	result = b + leftCircularShift( (a+H(b,c,d)+M+t), s)
	return(result)

	def II(a,b,c,d,M,s,t):
	result = b + leftCircularShift( (a+I(b,c,d)+M+t), s)
	return(result)

	def fmt8(num):
	return num.to_bytes(4, byteorder='little').hex()

	def bitlen( bitstring ):
	return(len(bitstring)*8)

	def md5sum(msg):
	#chaining variables
	A = 0x67452301
	B = 0xefcdab89
	C = 0x98badcfe
	D = 0x10325476
	return _md5sum(msg, 0, A, B, C, D)

	def md5sum_cc2(msg):
	#chaining variables
	A = 0xf8611bf2
	B = 0xee782ca4
	C = 0xf30869b2
	D = 0xb048f18f
	return _md5sum(msg, 0x200, A, B, C, D)

	def _md5sum(msg, extra_bits, A, B, C, D):
	#First, we pad the message
	msgLen = (bitlen(msg)+extra_bits)%(2**64)
	msg = msg + b'\x80'
	zeroPad = (448 - (msgLen+8)%512)%512
	zeroPad //= 8
	msg = msg + b'\x00'*zeroPad + msgLen.to_bytes(8,byteorder='little')
	iterations = bitlen(msg)//512
	#main loop
	for i in range(0,iterations):
	a = A
	b = B
	c = C
	d = D
	block = msg[i64:(i+1)64]
	M = blockDivide(block,16)
	#Rounds
	a = FF( a,b,c,d, M[0], 7, SV[0] )
	d = FF( d,a,b,c, M[1], 12, SV[1] )
	c = FF( c,d,a,b, M[2], 17, SV[2] )
	b = FF( b,c,d,a, M[3], 22, SV[3] )
	a = FF( a,b,c,d, M[4], 7, SV[4] )
	d = FF( d,a,b,c, M[5], 12, SV[5] )
	c = FF( c,d,a,b, M[6], 17, SV[6] )
	b = FF( b,c,d,a, M[7], 22, SV[7] )
	a = FF( a,b,c,d, M[8], 7, SV[8] )
	d = FF( d,a,b,c, M[9], 12, SV[9] )
	c = FF( c,d,a,b, M[10], 17, SV[10] )
	b = FF( b,c,d,a, M[11], 22, SV[11] )
	a = FF( a,b,c,d, M[12], 7, SV[12] )
	d = FF( d,a,b,c, M[13], 12, SV[13] )
	c = FF( c,d,a,b, M[14], 17, SV[14] )
	b = FF( b,c,d,a, M[15], 22, SV[15] )
	a = GG( a,b,c,d, M[1], 5, SV[16] )
	d = GG( d,a,b,c, M[6], 9, SV[17] )
	c = GG( c,d,a,b, M[11], 14, SV[18] )
	b = GG( b,c,d,a, M[0], 20, SV[19] )
	a = GG( a,b,c,d, M[5], 5, SV[20] )
	d = GG( d,a,b,c, M[10], 9, SV[21] )
	c = GG( c,d,a,b, M[15], 14, SV[22] )
	b = GG( b,c,d,a, M[4], 20, SV[23] )
	a = GG( a,b,c,d, M[9], 5, SV[24] )
	d = GG( d,a,b,c, M[14], 9, SV[25] )
	c = GG( c,d,a,b, M[3], 14, SV[26] )
	b = GG( b,c,d,a, M[8], 20, SV[27] )
	a = GG( a,b,c,d, M[13], 5, SV[28] )
	d = GG( d,a,b,c, M[2], 9, SV[29] )
	c = GG( c,d,a,b, M[7], 14, SV[30] )
	b = GG( b,c,d,a, M[12], 20, SV[31] )
	a = HH( a,b,c,d, M[5], 4, SV[32] )
	d = HH( d,a,b,c, M[8], 11, SV[33] )
	c = HH( c,d,a,b, M[11], 16, SV[34] )
	b = HH( b,c,d,a, M[14], 23, SV[35] )
	a = HH( a,b,c,d, M[1], 4, SV[36] )
	d = HH( d,a,b,c, M[4], 11, SV[37] )
	c = HH( c,d,a,b, M[7], 16, SV[38] )
	b = HH( b,c,d,a, M[10], 23, SV[39] )
	a = HH( a,b,c,d, M[13], 4, SV[40] )
	d = HH( d,a,b,c, M[0], 11, SV[41] )
	c = HH( c,d,a,b, M[3], 16, SV[42] )
	b = HH( b,c,d,a, M[6], 23, SV[43] )
	a = HH( a,b,c,d, M[9], 4, SV[44] )
	d = HH( d,a,b,c, M[12], 11, SV[45] )
	c = HH( c,d,a,b, M[15], 16, SV[46] )
	b = HH( b,c,d,a, M[2], 23, SV[47] )
	a = II( a,b,c,d, M[0], 6, SV[48] )
	d = II( d,a,b,c, M[7], 10, SV[49] )
	c = II( c,d,a,b, M[14], 15, SV[50] )
	b = II( b,c,d,a, M[5], 21, SV[51] )
	a = II( a,b,c,d, M[12], 6, SV[52] )
	d = II( d,a,b,c, M[3], 10, SV[53] )
	c = II( c,d,a,b, M[10], 15, SV[54] )
	b = II( b,c,d,a, M[1], 21, SV[55] )
	a = II( a,b,c,d, M[8], 6, SV[56] )
	d = II( d,a,b,c, M[15], 10, SV[57] )
	c = II( c,d,a,b, M[6], 15, SV[58] )
	b = II( b,c,d,a, M[13], 21, SV[59] )
	a = II( a,b,c,d, M[4], 6, SV[60] )
	d = II( d,a,b,c, M[11], 10, SV[61] )
	c = II( c,d,a,b, M[2], 15, SV[62] )
	b = II( b,c,d,a, M[9], 21, SV[63] )
	A = (A + a)%(2**32)
	B = (B + b)%(2**32)
	C = (C + c)%(2**32)
	D = (D + d)%(2**32)
	result = fmt8(A)+fmt8(B)+fmt8(C)+fmt8(D)
	return(result)

	def _test():
	import hashlib
	#print( md5sum(b"abcd") , hashlib.md5(b"abcd").hexdigest() )
	assert md5sum(b"abcd") == hashlib.md5(b"abcd").hexdigest()

	if __name__ == '__main__':
	_test()
	main()