Skip to content

Instantly share code, notes, and snippets.

@aaronyoo
Created September 5, 2018 00:30
Show Gist options
  • Save aaronyoo/52168ce1a27e0f3548ddc913139a971f to your computer and use it in GitHub Desktop.
Save aaronyoo/52168ce1a27e0f3548ddc913139a971f to your computer and use it in GitHub Desktop.
Breaking the Vigenere Cipher
import base64
from collections import Counter
# English language letter frequencies from:
# https://gist.githueb.com/pozhidaevak/0dca594d6f0de367f232909fe21cdb2f
letterFrequency = {'E' : 12.0, 'T' : 9.10, 'A' : 8.12, 'O' : 7.68,
'I' : 7.31, 'N' : 6.95, 'S' : 6.28, 'R' : 6.02,
'H' : 5.92, 'D' : 4.32, 'L' : 3.98, 'U' : 2.88,
'C' : 2.71, 'M' : 2.61, 'F' : 2.30, 'Y' : 2.11,
'W' : 2.09, 'G' : 2.03, 'P' : 1.82, 'B' : 1.49,
'V' : 1.11, 'K' : 0.69, 'X' : 0.17, 'Q' : 0.11,
'J' : 0.10, 'Z' : 0.07 , ' ' : 13.00}
def breakSingleByteRepeatingXOR(ct):
"""Breaks single byte repeating XOR by trying all possible bytes"""
l = []
for key in range(256):
# get the plaintext string
pt = bytearray()
for byte in ct:
pt.append(byte ^ key)
# try to convert the plaintext byte object into string form
# if there is a converstion error this isn't the right key so continue
try:
pt_str = str(pt, 'ascii').upper()
except UnicodeDecodeError:
continue
# use Counter to create a frequency dictionary from the
# uppercase version of the plaintext string
pt_freq = Counter(pt_str)
# score the string
score = 0
for k, v in pt_freq.items():
if k in letterFrequency:
score += letterFrequency[k]
# append result to l
l.append((key, pt, score))
# sort based on highest score
l.sort(key=lambda x: x[2], reverse=True)
# return the tuple associated with the highest score
if len(l) >= 1:
return l[0]
else:
return None
def applyMultiByteRepeatingXOR(t, k):
"""Applies multibyte repeating XOR to the text using the key"""
result = bytearray()
for i in range(len(t)):
result.append(t[i] ^ k[i % len(k)])
return result
def getKeySize(ct):
"""Returns the most likely key size"""
def hammingDistance(bs1, bs2):
"""Computes the Hamming distance between 2 byte-like objects"""
result = 0
for b1, b2 in zip(bs1, bs2):
result += sum(b == '1' for b in bin(b1 ^ b2))
return result
l = []
for key_size in range(2, 41):
# divide into chunks of key_size
chunks = [ct[i:i+key_size] for i in range(0, len(ct), key_size)]
distances = []
for c1, c2 in zip(chunks[0::2], chunks[1::2]):
distances.append(hammingDistance(c1, c2) / key_size)
avg_distance = sum(distances) / len(distances)
l.append((key_size, avg_distance))
# sort by the smallest hamming distance
l.sort(key=lambda x: x[1])
return l[0][0]
def getTransposedBlocks(key_size, ct):
"""Produces transposed blocks from key and ciphertext"""
# divide into chuncks of key_size
ct_blocks = [ct[i:i+key_size] for i in range(0, len(ct), key_size)]
# transpose blocks
ct_blocks_transposed = [bytearray() for i in range(key_size)]
for i in range(key_size):
for block in ct_blocks:
if (i < len(block)):
ct_blocks_transposed[i].append(block[i])
return ct_blocks_transposed
def main():
with open('6.txt', 'r') as infile:
ciphertext = base64.b64decode(infile.read())
key_size = getKeySize(ciphertext)
key = bytearray()
transposed_blocks = getTransposedBlocks(key_size, ciphertext)
for block in transposed_blocks:
# get the key from each single byte XOR
key.append(breakSingleByteRepeatingXOR(block)[0])
# decrypt the ciphertext using the key
plaintext = applyMultiByteRepeatingXOR(ciphertext, key)
print(str(key, 'ascii'))
print(str(plaintext, 'ascii'))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment