Last active
March 3, 2022 00:19
-
-
Save decidedlygray/e94f04d03b4fcd113eb63feffb9b87ef to your computer and use it in GitHub Desktop.
Short script to do automated cryptanalysis (really just finding best fit key) against substitution ciphers. Uses hill climbing algorithm to find best fit key.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Break Simple Substitution Cipher (automated cryptanalysis) | |
--- | |
Use a hill climbing algo to maximize fitness score accross iterations of keys (mutate, check, rinse/repeat). | |
Fitness is determined by comparing quadgram statistics of decrypted text against the engilish quadgrams. | |
--- | |
REQUIREMENTS | |
Install pycipher for easily applying key to ciphertext: pip install pycipher | |
Also need ngram_score module: http://practicalcryptography.com/media/cryptanalysis/files/ngram_score_1.py | |
As well as english_quadgrams: http://practicalcryptography.com/media/cryptanalysis/files/english_quadgrams.txt.zip | |
--- | |
Original Author: PracticalCryptography.com | |
Original Resource: http://practicalcryptography.com/cryptanalysis/stochastic-searching/cryptanalysis-s | |
imple-substitution-cipher/ | |
Methedology, readme for a good overall picture: http://practicalcryptography.com/cryptanalysis/text-characterisation/quadgrams/ | |
Additional info on subst ciphers: http://practicalcryptography.com/ciphers/simple-substitution-cipher/ | |
Additional info on quadgrams: http://practicalcryptography.com/cryptanalysis/text-characterisation/qu | |
adgrams/#a-python-implementation | |
Modified by @decidedlygray to include spacing of decrypt | |
""" | |
from pycipher import SimpleSubstitution as SimpleSub # cat pycipher>requirements.txt | |
import random | |
import re | |
from ngram_score import ngram_score | |
fitness = ngram_score('english_quadgrams.txt') # load our quadgram statistics | |
ctext='QRBQBTZ"VREUREMD"JOUOPPBPQNQRB.NKCNTPBMDHEURPELQENMOTFEM2005.' # NSA CRYPTO CHALLENGE 10/31/2016 | |
ctext = re.sub('[^A-Z]','',ctext.upper()) | |
# thanks stackoverflow :) this makes decrypts easier to mentally parse | |
def add_spacing(seqRNA, *breakPoint): | |
seqRNAList = [] | |
noOfBreakPoints = len(breakPoint) | |
for breakPt in range(noOfBreakPoints): | |
for index in breakPoint: | |
seqRNAList.append(seqRNA[:index]) | |
seqRNA = seqRNA[index:] | |
break | |
return seqRNAList | |
maxkey = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ') | |
maxscore = -99e9 | |
parentscore,parentkey = maxscore,maxkey[:] | |
print "Substitution Cipher solver, you may have to wait several iterations" | |
print "for the correct result. Press ctrl+c to exit program." | |
# keep going until we are killed by the user | |
i = 0 | |
while 1: | |
i = i+1 | |
random.shuffle(parentkey) | |
deciphered = SimpleSub(parentkey).decipher(ctext) | |
parentscore = fitness.score(deciphered) | |
count = 0 | |
while count < 1000: | |
a = random.randint(0,25) | |
b = random.randint(0,25) | |
child = parentkey[:] | |
# swap two characters in the child | |
child[a],child[b] = child[b],child[a] | |
deciphered = SimpleSub(child).decipher(ctext) | |
score = fitness.score(deciphered) | |
# if the child was better, replace the parent with it | |
if score > parentscore: | |
parentscore = score | |
parentkey = child[:] | |
count = 0 | |
count = count+1 | |
# keep track of best score seen so far | |
if parentscore>maxscore: | |
maxscore,maxkey = parentscore,parentkey[:] | |
print '\nbest score so far:',maxscore,'on iteration',i | |
ss = SimpleSub(maxkey) | |
print ' best key: '+''.join(maxkey) | |
print ' plaintext: '+ss.decipher(ctext) | |
# add spacing, numbers are length of words to break up | |
spaced = add_spacing(ss.decipher(ctext), 3, 4, 8, 3, 5, 2, 3, 6, 7, 10, 2) | |
print ' '.join(spaced) | |
""" | |
The above found a very good best fit within a few seconds, but still wasn't totally correct. After | |
that it was easier to make the final modifications manually. Test code for separate module manual_test.py: | |
from pycipher import SimpleSubstitution as SimpleSub | |
import re | |
ctext='QRB QBTZ "VREUREMD" JOU OPPBP QN QRB NKCNTP BMDHEUR PELQENMOTF EM 2005.' | |
ctext = re.sub(r'[^A-Z]','',ctext.upper()) | |
# key='OZLPBKDREYSHFMNCATUQGIJWVX' # CLOSE | |
key='OWLPBCDREYSHZMNVATUQGIJKFX' #SPOILER ALERT! | |
alp='ABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
print zip(key,alp) # for easy viewing while entering into website | |
def add_spacing(seqRNA, *breakPoint): | |
seqRNAList = [] | |
noOfBreakPoints = len(breakPoint) | |
for breakPt in range(noOfBreakPoints): | |
for index in breakPoint: | |
seqRNAList.append(seqRNA[:index]) | |
seqRNA = seqRNA[index:] | |
break | |
return seqRNAList | |
ss = SimpleSub(key) | |
print 'ciphertxt: '+ctext | |
print 'plaintext: '+ss.decipher(ctext) | |
spaced = add_spacing(ss.decipher(ctext), 3, 4, 8, 3, 5, 2, 3, 6, 7, 10, 2) | |
print ' '.join(spaced) | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment