decidedlygray/crypto_challenge_20161101.py

## crypto_challenge_20161101.py
"""
	Break Simple Substitution Cipher (automated cryptanalysis)
	---
	Use a hill climbing algo to maximize fitness score accross iterations of keys (mutate, check, rinse/repeat).
	Fitness is determined by comparing quadgram statistics of decrypted text against the engilish quadgrams.
	---
	REQUIREMENTS
	Install pycipher for easily applying key to ciphertext: pip install pycipher
	Also need ngram_score module: http://practicalcryptography.com/media/cryptanalysis/files/ngram_score_1.py
	As well as english_quadgrams: http://practicalcryptography.com/media/cryptanalysis/files/english_quadgrams.txt.zip
	---
	Original Author: PracticalCryptography.com
	Original Resource: http://practicalcryptography.com/cryptanalysis/stochastic-searching/cryptanalysis-s
							imple-substitution-cipher/
	Methedology, readme for a good overall picture: http://practicalcryptography.com/cryptanalysis/text-characterisation/quadgrams/
	Additional info on subst ciphers: http://practicalcryptography.com/ciphers/simple-substitution-cipher/
	Additional info on quadgrams: http://practicalcryptography.com/cryptanalysis/text-characterisation/qu
							adgrams/#a-python-implementation
	Modified by @decidedlygray to include spacing of decrypt
"""

from pycipher import SimpleSubstitution as SimpleSub # cat pycipher>requirements.txt
import random
import re
from ngram_score import ngram_score
fitness = ngram_score('english_quadgrams.txt') # load our quadgram statistics

ctext='QRBQBTZ"VREUREMD"JOUOPPBPQNQRB.NKCNTPBMDHEURPELQENMOTFEM2005.' # NSA CRYPTO CHALLENGE 10/31/2016
ctext = re.sub('[^A-Z]','',ctext.upper())

# thanks stackoverflow :) this makes decrypts easier to mentally parse
def add_spacing(seqRNA, *breakPoint):
    seqRNAList = []
    noOfBreakPoints = len(breakPoint)
    for breakPt in range(noOfBreakPoints):
        for index in breakPoint:
            seqRNAList.append(seqRNA[:index])
            seqRNA = seqRNA[index:]
        break
    return seqRNAList

maxkey = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
maxscore = -99e9
parentscore,parentkey = maxscore,maxkey[:]
print "Substitution Cipher solver, you may have to wait several iterations"
print "for the correct result. Press ctrl+c to exit program."
# keep going until we are killed by the user
i = 0
while 1:
    i = i+1
    random.shuffle(parentkey)
    deciphered = SimpleSub(parentkey).decipher(ctext)
    parentscore = fitness.score(deciphered)
    count = 0
    while count < 1000:
        a = random.randint(0,25)
        b = random.randint(0,25)
        child = parentkey[:]
        # swap two characters in the child
        child[a],child[b] = child[b],child[a]
        deciphered = SimpleSub(child).decipher(ctext)
        score = fitness.score(deciphered)
        # if the child was better, replace the parent with it
        if score > parentscore:
            parentscore = score
            parentkey = child[:]
            count = 0
        count = count+1
    # keep track of best score seen so far
    if parentscore>maxscore:
        maxscore,maxkey = parentscore,parentkey[:]
        print '\nbest score so far:',maxscore,'on iteration',i
        ss = SimpleSub(maxkey)
        print '    best key: '+''.join(maxkey)
        print '    plaintext: '+ss.decipher(ctext)
        # add spacing, numbers are length of words to break up
	spaced = add_spacing(ss.decipher(ctext), 3, 4, 8, 3, 5, 2, 3, 6, 7, 10, 2)
        print ' '.join(spaced)

"""
The above found a very good best fit within a few seconds, but still wasn't totally correct. After
that it was easier to make the final modifications manually. Test code for separate module manual_test.py:


from pycipher import SimpleSubstitution as SimpleSub
import re

ctext='QRB QBTZ "VREUREMD" JOU OPPBP QN QRB NKCNTP BMDHEUR PELQENMOTF EM 2005.'
ctext = re.sub(r'[^A-Z]','',ctext.upper())

# key='OZLPBKDREYSHFMNCATUQGIJWVX' # CLOSE
key='OWLPBCDREYSHZMNVATUQGIJKFX' #SPOILER ALERT!
alp='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
print zip(key,alp) # for easy viewing while entering into website

def add_spacing(seqRNA, *breakPoint):
    seqRNAList = []
    noOfBreakPoints = len(breakPoint)
    for breakPt in range(noOfBreakPoints):
        for index in breakPoint:
            seqRNAList.append(seqRNA[:index])
            seqRNA = seqRNA[index:]
        break
    return seqRNAList

ss = SimpleSub(key)
print 'ciphertxt: '+ctext
print 'plaintext: '+ss.decipher(ctext)
spaced = add_spacing(ss.decipher(ctext), 3, 4, 8, 3, 5, 2, 3, 6, 7, 10, 2)
print ' '.join(spaced)

"""
	"""
	Break Simple Substitution Cipher (automated cryptanalysis)
	---
	Use a hill climbing algo to maximize fitness score accross iterations of keys (mutate, check, rinse/repeat).
	Fitness is determined by comparing quadgram statistics of decrypted text against the engilish quadgrams.
	---
	REQUIREMENTS
	Install pycipher for easily applying key to ciphertext: pip install pycipher
	Also need ngram_score module: http://practicalcryptography.com/media/cryptanalysis/files/ngram_score_1.py
	As well as english_quadgrams: http://practicalcryptography.com/media/cryptanalysis/files/english_quadgrams.txt.zip
	---
	Original Author: PracticalCryptography.com
	Original Resource: http://practicalcryptography.com/cryptanalysis/stochastic-searching/cryptanalysis-s
	imple-substitution-cipher/
	Methedology, readme for a good overall picture: http://practicalcryptography.com/cryptanalysis/text-characterisation/quadgrams/
	Additional info on subst ciphers: http://practicalcryptography.com/ciphers/simple-substitution-cipher/
	Additional info on quadgrams: http://practicalcryptography.com/cryptanalysis/text-characterisation/qu
	adgrams/#a-python-implementation
	Modified by @decidedlygray to include spacing of decrypt
	"""

	from pycipher import SimpleSubstitution as SimpleSub # cat pycipher>requirements.txt
	import random
	import re
	from ngram_score import ngram_score
	fitness = ngram_score('english_quadgrams.txt') # load our quadgram statistics

	ctext='QRBQBTZ"VREUREMD"JOUOPPBPQNQRB.NKCNTPBMDHEURPELQENMOTFEM2005.' # NSA CRYPTO CHALLENGE 10/31/2016
	ctext = re.sub('[^A-Z]','',ctext.upper())

	# thanks stackoverflow :) this makes decrypts easier to mentally parse
	def add_spacing(seqRNA, *breakPoint):
	seqRNAList = []
	noOfBreakPoints = len(breakPoint)
	for breakPt in range(noOfBreakPoints):
	for index in breakPoint:
	seqRNAList.append(seqRNA[:index])
	seqRNA = seqRNA[index:]
	break
	return seqRNAList

	maxkey = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
	maxscore = -99e9
	parentscore,parentkey = maxscore,maxkey[:]
	print "Substitution Cipher solver, you may have to wait several iterations"
	print "for the correct result. Press ctrl+c to exit program."
	# keep going until we are killed by the user
	i = 0
	while 1:
	i = i+1
	random.shuffle(parentkey)
	deciphered = SimpleSub(parentkey).decipher(ctext)
	parentscore = fitness.score(deciphered)
	count = 0
	while count < 1000:
	a = random.randint(0,25)
	b = random.randint(0,25)
	child = parentkey[:]
	# swap two characters in the child
	child[a],child[b] = child[b],child[a]
	deciphered = SimpleSub(child).decipher(ctext)
	score = fitness.score(deciphered)
	# if the child was better, replace the parent with it
	if score > parentscore:
	parentscore = score
	parentkey = child[:]
	count = 0
	count = count+1
	# keep track of best score seen so far
	if parentscore>maxscore:
	maxscore,maxkey = parentscore,parentkey[:]
	print '\nbest score so far:',maxscore,'on iteration',i
	ss = SimpleSub(maxkey)
	print ' best key: '+''.join(maxkey)
	print ' plaintext: '+ss.decipher(ctext)
	# add spacing, numbers are length of words to break up
	spaced = add_spacing(ss.decipher(ctext), 3, 4, 8, 3, 5, 2, 3, 6, 7, 10, 2)
	print ' '.join(spaced)

	"""
	The above found a very good best fit within a few seconds, but still wasn't totally correct. After
	that it was easier to make the final modifications manually. Test code for separate module manual_test.py:


	from pycipher import SimpleSubstitution as SimpleSub
	import re

	ctext='QRB QBTZ "VREUREMD" JOU OPPBP QN QRB NKCNTP BMDHEUR PELQENMOTF EM 2005.'
	ctext = re.sub(r'[^A-Z]','',ctext.upper())

	# key='OZLPBKDREYSHFMNCATUQGIJWVX' # CLOSE
	key='OWLPBCDREYSHZMNVATUQGIJKFX' #SPOILER ALERT!
	alp='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
	print zip(key,alp) # for easy viewing while entering into website

	def add_spacing(seqRNA, *breakPoint):
	seqRNAList = []
	noOfBreakPoints = len(breakPoint)
	for breakPt in range(noOfBreakPoints):
	for index in breakPoint:
	seqRNAList.append(seqRNA[:index])
	seqRNA = seqRNA[index:]
	break
	return seqRNAList

	ss = SimpleSub(key)
	print 'ciphertxt: '+ctext
	print 'plaintext: '+ss.decipher(ctext)
	spaced = add_spacing(ss.decipher(ctext), 3, 4, 8, 3, 5, 2, 3, 6, 7, 10, 2)
	print ' '.join(spaced)

	"""