Created
September 5, 2018 00:30
-
-
Save aaronyoo/52168ce1a27e0f3548ddc913139a971f to your computer and use it in GitHub Desktop.
Breaking the Vigenere Cipher
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import base64 | |
from collections import Counter | |
# English language letter frequencies from: | |
# https://gist.githueb.com/pozhidaevak/0dca594d6f0de367f232909fe21cdb2f | |
letterFrequency = {'E' : 12.0, 'T' : 9.10, 'A' : 8.12, 'O' : 7.68, | |
'I' : 7.31, 'N' : 6.95, 'S' : 6.28, 'R' : 6.02, | |
'H' : 5.92, 'D' : 4.32, 'L' : 3.98, 'U' : 2.88, | |
'C' : 2.71, 'M' : 2.61, 'F' : 2.30, 'Y' : 2.11, | |
'W' : 2.09, 'G' : 2.03, 'P' : 1.82, 'B' : 1.49, | |
'V' : 1.11, 'K' : 0.69, 'X' : 0.17, 'Q' : 0.11, | |
'J' : 0.10, 'Z' : 0.07 , ' ' : 13.00} | |
def breakSingleByteRepeatingXOR(ct): | |
"""Breaks single byte repeating XOR by trying all possible bytes""" | |
l = [] | |
for key in range(256): | |
# get the plaintext string | |
pt = bytearray() | |
for byte in ct: | |
pt.append(byte ^ key) | |
# try to convert the plaintext byte object into string form | |
# if there is a converstion error this isn't the right key so continue | |
try: | |
pt_str = str(pt, 'ascii').upper() | |
except UnicodeDecodeError: | |
continue | |
# use Counter to create a frequency dictionary from the | |
# uppercase version of the plaintext string | |
pt_freq = Counter(pt_str) | |
# score the string | |
score = 0 | |
for k, v in pt_freq.items(): | |
if k in letterFrequency: | |
score += letterFrequency[k] | |
# append result to l | |
l.append((key, pt, score)) | |
# sort based on highest score | |
l.sort(key=lambda x: x[2], reverse=True) | |
# return the tuple associated with the highest score | |
if len(l) >= 1: | |
return l[0] | |
else: | |
return None | |
def applyMultiByteRepeatingXOR(t, k): | |
"""Applies multibyte repeating XOR to the text using the key""" | |
result = bytearray() | |
for i in range(len(t)): | |
result.append(t[i] ^ k[i % len(k)]) | |
return result | |
def getKeySize(ct): | |
"""Returns the most likely key size""" | |
def hammingDistance(bs1, bs2): | |
"""Computes the Hamming distance between 2 byte-like objects""" | |
result = 0 | |
for b1, b2 in zip(bs1, bs2): | |
result += sum(b == '1' for b in bin(b1 ^ b2)) | |
return result | |
l = [] | |
for key_size in range(2, 41): | |
# divide into chunks of key_size | |
chunks = [ct[i:i+key_size] for i in range(0, len(ct), key_size)] | |
distances = [] | |
for c1, c2 in zip(chunks[0::2], chunks[1::2]): | |
distances.append(hammingDistance(c1, c2) / key_size) | |
avg_distance = sum(distances) / len(distances) | |
l.append((key_size, avg_distance)) | |
# sort by the smallest hamming distance | |
l.sort(key=lambda x: x[1]) | |
return l[0][0] | |
def getTransposedBlocks(key_size, ct): | |
"""Produces transposed blocks from key and ciphertext""" | |
# divide into chuncks of key_size | |
ct_blocks = [ct[i:i+key_size] for i in range(0, len(ct), key_size)] | |
# transpose blocks | |
ct_blocks_transposed = [bytearray() for i in range(key_size)] | |
for i in range(key_size): | |
for block in ct_blocks: | |
if (i < len(block)): | |
ct_blocks_transposed[i].append(block[i]) | |
return ct_blocks_transposed | |
def main(): | |
with open('6.txt', 'r') as infile: | |
ciphertext = base64.b64decode(infile.read()) | |
key_size = getKeySize(ciphertext) | |
key = bytearray() | |
transposed_blocks = getTransposedBlocks(key_size, ciphertext) | |
for block in transposed_blocks: | |
# get the key from each single byte XOR | |
key.append(breakSingleByteRepeatingXOR(block)[0]) | |
# decrypt the ciphertext using the key | |
plaintext = applyMultiByteRepeatingXOR(ciphertext, key) | |
print(str(key, 'ascii')) | |
print(str(plaintext, 'ascii')) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment