rgov/ciphcrack.py

## ciphcrack.py
# Cracker for David Lougheed's Ciphode 0.06, by rgov.
# See www.davidlougheed.com/ciphode for details.

'''
printable is a set of all of the characters that can be easily typed. We can
use it to test whether a numkey decrypts the message into gibberish or not.
'''
from string import printable

def isprintable(string):
    for c in string:
        if c not in printable: return False
    return True

'''
Randomness isn't needed to crack the cipher, but this creates more interesting
looking passcodes.
'''
from random import shuffle

'''
A reduced mapping of precode values. It's reduced in the sense that some
characters map to the same precode. I generated this from ci_key.py.
'''
precodes = {
    27: 'A', 28: 'B', 29: 'C', 30: 'D', 31: 'E', 32: 'F', 33: 'G', 34: 'H',
    35: 'I', 36: 'J', 37: 'K', 38: 'L', 39: 'M', 40: 'N', 41: 'O', 42: 'P',
    43: 'Q', 44: 'R', 45: 'S', 46: 'T', 47: 'U', 48: 'V', 49: 'W', 50: 'X',
    51: 'Y', 52: 'Z', 53: 'z', 90: '0'
}

'''
This is my version of decode() that lets me directly pass the value of numkey
instead of having it derived from the keycode.
'''
def decode(numkey, msg):
    keydigits = [ int(c) for c in str(numkey) ]
    return ''.join([ chr((c >> keydigits[i  % len(keydigits)]) / numkey) \
                     for i, c in enumerate(msg) ])

'''
A simple, recursive subset sum solver. It finds a subset of precodes which sum
up to the input value. For our purposes, this means it converts a numkey back
into a possible keycode.

For example if numkey = 85, this function finds that 85 = 27 + 29 + 29 -> ACC
even if the original keycode was BBC -> 28 + 28 + 29.

For speed's sake, this just returns the first one it finds. But to make it
interesting, I added some randomization for better-looking passcodes.

For more info, see http://en.wikipedia.org/wiki/Subset_sum_problem.
'''
def subsetsolve(n, subset = [ ]):
    # When we reach 0, we know that we have a valid sum!
    if n == 0: return subset

    # Find all precodes that are less than or equal to n.
    goodcodes = [ p for p in precodes.iterkeys() if p <= n ]
    shuffle(goodcodes)

    # Recursively try to solve smaller problems.
    for precode in goodcodes:
        solution = subsetsolve(n - precode, subset + [ precode ])
        if solution is not None: return solution

    # We return None if we couldn't find a solution.
    return None

'''
The greatest common divisor algorithm finds the greatest number g such that
g divides both a and b without a remainder.

Further, it is true that any d which divides both a and b without a remainder
*must also* divide g without a reminder.

Since the numkey divides every part of the ciphertext, then numkey divides the
greatest common divisor of the parts of the ciphertext. And so numkey <= g.

http://en.wikipedia.org/wiki/Greatest_common_divisor
'''
def gcd(a, b):
    if a > b: a, b = b, a
    while a > 0: a, b = (b % a), a
    return b


if __name__ == '__main__':
    # Here's a sample ciphertext we'll try to crack.
    message = '''
    167904 72504 8547840 279840 65508 2605056 282384 69960 2605056 300192
    64236 9280512 292560 66780 9036288 279840 20352 3907584 117024 30528
    4477440 83952
    '''

    # We split the message into a list of numbers.
    msgparts = [ int(x) for x in message.split() ]

    # Compute bounds on numkey. At the very minimum it will be a single-
    # character keycode, 'A'. At most, it will be the gcd of all of the
    # message parts.
    minimum = min(precodes.iterkeys())
    maximum = reduce(gcd, msgparts)

    # Now try each numkey in this range.
    print '[*] Trying numkeys between %i and %i' % (minimum, maximum)
    for numkey in xrange(minimum, maximum + 1):
        # Optimization: We only need to try numkeys that divide the gcd, since
        # otherwise when we divide by numkey there will be a remainder.
        if maximum % numkey != 0: continue

        # Try to decode the message using numkey. Note that Python might error
        # if it's the wrong one, so we catch the error and skip this numkey.
        try:
            decoded = decode(numkey, msgparts)
        except ValueError:
            continue

        # Heuristic: Skip numkeys that don't decode to printable characters.
        if not isprintable(decoded): continue

        # Find a subset of precodes that sum up to numkey. Note that some
        # numkeys, like 24, cannot possibly be generated by a keycode. This
        # means that our numkey is invalid.
        subset = subsetsolve(numkey)
        if subset is None: continue

        # Now we can map the precodes in our subset back into the characters
        # of the keycode.
        keycode = ''.join([ precodes[x] for x in subset ])

        print '[+] Keycode "%s" (numkey %i) -> %s' % (keycode, numkey, decoded)
	# Cracker for David Lougheed's Ciphode 0.06, by rgov.
	# See www.davidlougheed.com/ciphode for details.

	'''
	printable is a set of all of the characters that can be easily typed. We can
	use it to test whether a numkey decrypts the message into gibberish or not.
	'''
	from string import printable

	def isprintable(string):
	for c in string:
	if c not in printable: return False
	return True

	'''
	Randomness isn't needed to crack the cipher, but this creates more interesting
	looking passcodes.
	'''
	from random import shuffle

	'''
	A reduced mapping of precode values. It's reduced in the sense that some
	characters map to the same precode. I generated this from ci_key.py.
	'''
	precodes = {
	27: 'A', 28: 'B', 29: 'C', 30: 'D', 31: 'E', 32: 'F', 33: 'G', 34: 'H',
	35: 'I', 36: 'J', 37: 'K', 38: 'L', 39: 'M', 40: 'N', 41: 'O', 42: 'P',
	43: 'Q', 44: 'R', 45: 'S', 46: 'T', 47: 'U', 48: 'V', 49: 'W', 50: 'X',
	51: 'Y', 52: 'Z', 53: 'z', 90: '0'
	}

	'''
	This is my version of decode() that lets me directly pass the value of numkey
	instead of having it derived from the keycode.
	'''
	def decode(numkey, msg):
	keydigits = [ int(c) for c in str(numkey) ]
	return ''.join([ chr((c >> keydigits[i % len(keydigits)]) / numkey) \
	for i, c in enumerate(msg) ])

	'''
	A simple, recursive subset sum solver. It finds a subset of precodes which sum
	up to the input value. For our purposes, this means it converts a numkey back
	into a possible keycode.

	For example if numkey = 85, this function finds that 85 = 27 + 29 + 29 -> ACC
	even if the original keycode was BBC -> 28 + 28 + 29.

	For speed's sake, this just returns the first one it finds. But to make it
	interesting, I added some randomization for better-looking passcodes.

	For more info, see http://en.wikipedia.org/wiki/Subset_sum_problem.
	'''
	def subsetsolve(n, subset = [ ]):
	# When we reach 0, we know that we have a valid sum!
	if n == 0: return subset

	# Find all precodes that are less than or equal to n.
	goodcodes = [ p for p in precodes.iterkeys() if p <= n ]
	shuffle(goodcodes)

	# Recursively try to solve smaller problems.
	for precode in goodcodes:
	solution = subsetsolve(n - precode, subset + [ precode ])
	if solution is not None: return solution

	# We return None if we couldn't find a solution.
	return None

	'''
	The greatest common divisor algorithm finds the greatest number g such that
	g divides both a and b without a remainder.

	Further, it is true that any d which divides both a and b without a remainder
	must also divide g without a reminder.

	Since the numkey divides every part of the ciphertext, then numkey divides the
	greatest common divisor of the parts of the ciphertext. And so numkey <= g.

	http://en.wikipedia.org/wiki/Greatest_common_divisor
	'''
	def gcd(a, b):
	if a > b: a, b = b, a
	while a > 0: a, b = (b % a), a
	return b


	if __name__ == '__main__':
	# Here's a sample ciphertext we'll try to crack.
	message = '''
	167904 72504 8547840 279840 65508 2605056 282384 69960 2605056 300192
	64236 9280512 292560 66780 9036288 279840 20352 3907584 117024 30528
	4477440 83952
	'''

	# We split the message into a list of numbers.
	msgparts = [ int(x) for x in message.split() ]

	# Compute bounds on numkey. At the very minimum it will be a single-
	# character keycode, 'A'. At most, it will be the gcd of all of the
	# message parts.
	minimum = min(precodes.iterkeys())
	maximum = reduce(gcd, msgparts)

	# Now try each numkey in this range.
	print '[*] Trying numkeys between %i and %i' % (minimum, maximum)
	for numkey in xrange(minimum, maximum + 1):
	# Optimization: We only need to try numkeys that divide the gcd, since
	# otherwise when we divide by numkey there will be a remainder.
	if maximum % numkey != 0: continue

	# Try to decode the message using numkey. Note that Python might error
	# if it's the wrong one, so we catch the error and skip this numkey.
	try:
	decoded = decode(numkey, msgparts)
	except ValueError:
	continue

	# Heuristic: Skip numkeys that don't decode to printable characters.
	if not isprintable(decoded): continue

	# Find a subset of precodes that sum up to numkey. Note that some
	# numkeys, like 24, cannot possibly be generated by a keycode. This
	# means that our numkey is invalid.
	subset = subsetsolve(numkey)
	if subset is None: continue

	# Now we can map the precodes in our subset back into the characters
	# of the keycode.
	keycode = ''.join([ precodes[x] for x in subset ])

	print '[+] Keycode "%s" (numkey %i) -> %s' % (keycode, numkey, decoded)