Created
January 15, 2016 07:03
-
-
Save luqmana/0288a9641c7f39b2d734 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Implemented based on the techniques described in | |
// the notes by Shai Simonson: 'Public Key Crpytography' | |
// http://web.stonehill.edu/compsci/Shai_papers/RSA.pdf | |
// (Page 9-10) | |
use std::f32; | |
// Index of Coincidence for English text | |
// http://www.cs.mtu.edu/~shene/NSF-4/Tutorial/VIG/Vig-IOC.html | |
const IOC: f32 = 0.066332; | |
// Relative frequencies of English letter | |
// H. Beker and F. Piper, Cipher Systems, Wiley-Interscience, 1982. | |
const FREQS: [f32; 26] = [8.167, 1.492, 2.782, 4.253, 12.702, 2.228, 2.015, 6.094, 6.996, 0.153, | |
0.772, 4.025, 2.406, 6.749, 7.507, 1.929, 0.095, 5.987, 6.327, 9.056, | |
2.758, 0.978, 2.360, 0.150, 1.974, 0.074]; | |
const MIN_SHIFT: usize = 3; | |
const MAX_SHIFT: usize = 10; | |
fn m(x: i8, y: i8) -> i8 { | |
((x % y) + y) % y | |
} | |
fn find_codeword_len(cipher_text: &[u8]) -> Option<usize> { | |
// Try all the lens from MIN to MAX | |
(MIN_SHIFT..MAX_SHIFT) | |
.map(|shift| ( | |
(shift, | |
// For each shift length, iterate over the cipher text | |
cipher_text.iter() | |
// And at the same time iterate with the same | |
// cipher text but shifted by `shift` | |
.zip(cipher_text.iter().skip(shift).cycle()) | |
// Keep only the bytes which match in the cipher text | |
// and the shifted cipher text | |
.filter(|&(&x, &y)| x == y) | |
// Now count how many bytes matched | |
.count() as f32) | |
)) | |
// Now, go over each shift and the corresponding number of matches | |
// to calculate the Index of Coincidence | |
.map(|(n, k)| (n, k as f32 / cipher_text.len() as f32)) | |
// Next, calculate the distance from our reference value for English text | |
.map(|(n, k)| (n, (k - IOC).abs())) | |
// Finally, take the closest value | |
.fold((None, f32::INFINITY), |(n, k), (m, b)| { | |
if k < b { | |
(n, k) | |
} else { | |
(Some(m), b) | |
} | |
}) | |
.0 | |
} | |
fn find_codeword(cipher_text: &[u8], len: usize) -> String { | |
let mut codeword = String::with_capacity(len); | |
// We group the cipher text into columns that share a key | |
let columns = (0..len).map(|skip| { | |
// Iterate over the cipher text | |
cipher_text.iter() | |
// Keeping track of the index | |
.enumerate() | |
// Now keep the relevant ones | |
.filter(|&(i, _)| i % len == skip) | |
// We don't need the index anymore | |
.map(|(_, &x)| x) | |
// And let's put that all in a vector | |
.collect::<Vec<_>>() | |
}); | |
// Now let's process each column in turn | |
for column in columns { | |
let mut best_shift = 0; | |
let mut best_norm = f32::INFINITY; | |
// Each column is essentially encoded as a caeser chipher so we try | |
// all 26 possibilities and compare the frequency distributions | |
for shift in 0..26 { | |
// Now let's find the # of occurrences of each letter | |
let mut freqs = [0.0f32; 26]; | |
let len = column.iter() | |
// Take the ascii value, turn it 0-based, shift it and mod by 26 | |
.map(|&x| m(x as i8 - b'A' as i8 - shift as i8, 26)) | |
// Update our counts | |
.inspect(|x| freqs[*x as usize] += 1.) | |
// and finally just return the total which we use to normalize with | |
.count(); | |
// Use the square difference to determine a measure of similarity | |
// to typical english text. | |
let norm: f32 = freqs.iter() | |
// first we normalize each count by the total | |
// and multiply by 100 so we match our reference data | |
.map(|&x| x / len as f32 * 100.) | |
// also iterate over the reference values for english text | |
.zip(FREQS.iter()) | |
// the square of the difference | |
.map(|(x, &y)| (x - y).powi(2)) | |
// and finally sum it all up | |
.fold(0., |x, y| x + y); | |
// Update our values if we find anything better | |
if norm < best_norm { | |
best_norm = norm; | |
best_shift = shift; | |
} | |
} | |
// Update our codeword with our best guess | |
codeword.push((best_shift + b'A') as char); | |
} | |
codeword | |
} | |
fn main() { | |
let cipher_text = include_str!("cipher1.txt").trim(); | |
println!("Cipher Text:\n{}\n", cipher_text); | |
if let Some(len) = find_codeword_len(cipher_text.as_ref()) { | |
println!("Probable Codeword Len: {}\n", len); | |
let codeword = find_codeword(cipher_text.as_ref(), len); | |
println!("Code word is probably: {}\n", codeword); | |
let decoded = cipher_text.bytes() | |
.zip(codeword.bytes().cycle()) | |
.map(|(x, y)| m(x as i8 - y as i8, 26)) | |
.map(|x| (x as u8 + b'A') as char) | |
.collect::<String>(); | |
println!("Decoded Text:\n{}", decoded); | |
} else { | |
panic!("Unable to determine codeword len."); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment