Created
January 29, 2013 04:20
-
-
Save silveira/4661770 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Tries to decrypt a Caesar cipher without the key. | |
# Uses brute force and compare the results against the English letter frequency. | |
# Letters in the English alphabet | |
LETTERS = 26 | |
# Frequency per letter in the English alphabet | |
english = { | |
'a': 0.082, 'b': 0.015, 'c': 0.028, 'd': 0.043, 'e': 0.127, 'f': 0.022, | |
'g': 0.020, 'h': 0.061, 'i': 0.070, 'j': 0.002, 'k': 0.008, 'l': 0.040, | |
'm': 0.024, 'n': 0.067, 'o': 0.075, 'p': 0.019, 'q': 0.001, 'r': 0.060, | |
's': 0.063, 't': 0.091, 'u': 0.028, 'v': 0.010, 'w': 0.023, 'x': 0.001, | |
'y': 0.020, 'z': 0.001 | |
} | |
# Calculate a alphabet frequency for a string | |
def frequency(cipher): | |
freq = {} | |
letter_counter = 0 | |
for letter in cipher: | |
if letter >= 'a' and letter <= 'z': | |
letter_counter += 1 | |
if freq.has_key(letter): | |
freq[letter] += 1.0 | |
else: | |
freq[letter] = 1.0 | |
for letter in freq: | |
freq[letter] = freq[letter]/letter_counter | |
return freq | |
# Quantificate the distance between two alphabet frequencies | |
def distance(frequency_a, frequency_b): | |
sum = 0 | |
for letter in frequency_a: | |
if frequency_b.has_key(letter): | |
sum += abs(frequency_a[letter]-frequency_b[letter]) | |
return sum | |
# Decrypts cipher using key | |
def caesar_decrypt(cipher, key): | |
message = "" | |
for letter in cipher: | |
original = letter | |
if letter >= 'a' and letter <= 'z': | |
original = chr(ord('a') + ((ord(letter)+key)%LETTERS)) | |
message += original | |
return message | |
# Find the key that decryptes the cipher to a message with minimal distance | |
# to the English frequency | |
def break_caesar(cipher): | |
best_distance = float("inf") | |
best_key = 0 | |
best_message = "" | |
for key in xrange(LETTERS): | |
message = caesar_decrypt(cipher, key) | |
freq = frequency(message) | |
current_distance = distance(freq, english) | |
if current_distance < best_distance: | |
best_distance = current_distance | |
best_key = key | |
best_message = message | |
return best_message, best_key, best_distance | |
print break_caesar('qefp fp qeb cfopq qbuq') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment