luqmana/vigenere.rs

## vigenere.rs
// Implemented based on the techniques described in
// the notes by Shai Simonson: 'Public Key Crpytography'
// http://web.stonehill.edu/compsci/Shai_papers/RSA.pdf
// (Page 9-10)


use std::f32;

// Index of Coincidence for English text
// http://www.cs.mtu.edu/~shene/NSF-4/Tutorial/VIG/Vig-IOC.html
const IOC: f32 = 0.066332;

// Relative frequencies of English letter
// H. Beker and F. Piper, Cipher Systems, Wiley-Interscience, 1982.
const FREQS: [f32; 26] = [8.167, 1.492, 2.782, 4.253, 12.702, 2.228, 2.015, 6.094, 6.996, 0.153,
                          0.772, 4.025, 2.406, 6.749, 7.507, 1.929, 0.095, 5.987, 6.327, 9.056,
                          2.758, 0.978, 2.360, 0.150, 1.974, 0.074];

const MIN_SHIFT: usize = 3;
const MAX_SHIFT: usize = 10;

fn m(x: i8, y: i8) -> i8 {
    ((x % y) + y) % y
}

fn find_codeword_len(cipher_text: &[u8]) -> Option<usize> {
    // Try all the lens from MIN to MAX
    (MIN_SHIFT..MAX_SHIFT)
        .map(|shift| (
            (shift,
             // For each shift length, iterate over the cipher text
             cipher_text.iter()
                        // And at the same time iterate with the same
                        // cipher text but shifted by `shift`
                        .zip(cipher_text.iter().skip(shift).cycle())
                        // Keep only the bytes which match in the cipher text
                        // and the shifted cipher text
                        .filter(|&(&x, &y)| x == y)
                        // Now count how many bytes matched
                        .count() as f32)
        ))
        // Now, go over each shift and the corresponding number of matches
        // to calculate the Index of Coincidence
        .map(|(n, k)| (n, k as f32 / cipher_text.len() as f32))
        // Next, calculate the distance from our reference value for English text
        .map(|(n, k)| (n, (k - IOC).abs()))
        // Finally, take the closest value
        .fold((None, f32::INFINITY), |(n, k), (m, b)| {
            if k < b {
                (n, k)
            } else {
                (Some(m), b)
            }
        })
        .0
}

fn find_codeword(cipher_text: &[u8], len: usize) -> String {
    let mut codeword = String::with_capacity(len);

    // We group the cipher text into columns that share a key
    let columns = (0..len).map(|skip| {
        // Iterate over the cipher text
        cipher_text.iter()
                   // Keeping track of the index
                   .enumerate()
                   // Now keep the relevant ones
                   .filter(|&(i, _)| i % len == skip)
                   // We don't need the index anymore
                   .map(|(_, &x)| x)
                   // And let's put that all in a vector
                   .collect::<Vec<_>>()
    });

    // Now let's process each column in turn
    for column in columns {
        let mut best_shift = 0;
        let mut best_norm = f32::INFINITY;

        // Each column is essentially encoded as a caeser chipher so we try
        // all 26 possibilities and compare the frequency distributions
        for shift in 0..26 {
            // Now let's find the # of occurrences of each letter
            let mut freqs = [0.0f32; 26];
            let len = column.iter()
                            // Take the ascii value, turn it 0-based, shift it and mod by 26
                            .map(|&x| m(x as i8 - b'A' as i8 - shift as i8, 26))
                            // Update our counts
                            .inspect(|x| freqs[*x as usize] += 1.)
                            // and finally just return the total which we use to normalize with
                            .count();

            // Use the square difference to determine a measure of similarity
            // to typical english text.
            let norm: f32 = freqs.iter()
                                 // first we normalize each count by the total
                                 // and multiply by 100 so we match our reference data
                                 .map(|&x| x / len as f32 * 100.)
                                 // also iterate over the reference values for english text
                                 .zip(FREQS.iter())
                                 // the square of the difference
                                 .map(|(x, &y)| (x - y).powi(2))
                                 // and finally sum it all up
                                 .fold(0., |x, y| x + y);

            // Update our values if we find anything better
            if norm < best_norm {
                best_norm = norm;
                best_shift = shift;
            }
        }

        // Update our codeword with our best guess
        codeword.push((best_shift + b'A') as char);
    }

    codeword
}

fn main() {
    let cipher_text = include_str!("cipher1.txt").trim();

    println!("Cipher Text:\n{}\n", cipher_text);

    if let Some(len) = find_codeword_len(cipher_text.as_ref()) {
        println!("Probable Codeword Len: {}\n", len);

        let codeword = find_codeword(cipher_text.as_ref(), len);

        println!("Code word is probably: {}\n", codeword);

        let decoded = cipher_text.bytes()
                                 .zip(codeword.bytes().cycle())
                                 .map(|(x, y)| m(x as i8 - y as i8, 26))
                                 .map(|x| (x as u8 + b'A') as char)
                                 .collect::<String>();

        println!("Decoded Text:\n{}", decoded);

    } else {
        panic!("Unable to determine codeword len.");
    }
}
	// Implemented based on the techniques described in
	// the notes by Shai Simonson: 'Public Key Crpytography'
	// http://web.stonehill.edu/compsci/Shai_papers/RSA.pdf
	// (Page 9-10)


	use std::f32;

	// Index of Coincidence for English text
	// http://www.cs.mtu.edu/~shene/NSF-4/Tutorial/VIG/Vig-IOC.html
	const IOC: f32 = 0.066332;

	// Relative frequencies of English letter
	// H. Beker and F. Piper, Cipher Systems, Wiley-Interscience, 1982.
	const FREQS: [f32; 26] = [8.167, 1.492, 2.782, 4.253, 12.702, 2.228, 2.015, 6.094, 6.996, 0.153,
	0.772, 4.025, 2.406, 6.749, 7.507, 1.929, 0.095, 5.987, 6.327, 9.056,
	2.758, 0.978, 2.360, 0.150, 1.974, 0.074];

	const MIN_SHIFT: usize = 3;
	const MAX_SHIFT: usize = 10;

	fn m(x: i8, y: i8) -> i8 {
	((x % y) + y) % y
	}

	fn find_codeword_len(cipher_text: &[u8]) -> Option<usize> {
	// Try all the lens from MIN to MAX
	(MIN_SHIFT..MAX_SHIFT)
	.map(\|shift\| (
	(shift,
	// For each shift length, iterate over the cipher text
	cipher_text.iter()
	// And at the same time iterate with the same
	// cipher text but shifted by `shift`
	.zip(cipher_text.iter().skip(shift).cycle())
	// Keep only the bytes which match in the cipher text
	// and the shifted cipher text
	.filter(\|&(&x, &y)\| x == y)
	// Now count how many bytes matched
	.count() as f32)
	))
	// Now, go over each shift and the corresponding number of matches
	// to calculate the Index of Coincidence
	.map(\|(n, k)\| (n, k as f32 / cipher_text.len() as f32))
	// Next, calculate the distance from our reference value for English text
	.map(\|(n, k)\| (n, (k - IOC).abs()))
	// Finally, take the closest value
	.fold((None, f32::INFINITY), \|(n, k), (m, b)\| {
	if k < b {
	(n, k)
	} else {
	(Some(m), b)
	}
	})
	.0
	}

	fn find_codeword(cipher_text: &[u8], len: usize) -> String {
	let mut codeword = String::with_capacity(len);

	// We group the cipher text into columns that share a key
	let columns = (0..len).map(\|skip\| {
	// Iterate over the cipher text
	cipher_text.iter()
	// Keeping track of the index
	.enumerate()
	// Now keep the relevant ones
	.filter(\|&(i, _)\| i % len == skip)
	// We don't need the index anymore
	.map(\|(_, &x)\| x)
	// And let's put that all in a vector
	.collect::<Vec<_>>()
	});

	// Now let's process each column in turn
	for column in columns {
	let mut best_shift = 0;
	let mut best_norm = f32::INFINITY;

	// Each column is essentially encoded as a caeser chipher so we try
	// all 26 possibilities and compare the frequency distributions
	for shift in 0..26 {
	// Now let's find the # of occurrences of each letter
	let mut freqs = [0.0f32; 26];
	let len = column.iter()
	// Take the ascii value, turn it 0-based, shift it and mod by 26
	.map(\|&x\| m(x as i8 - b'A' as i8 - shift as i8, 26))
	// Update our counts
	.inspect(\|x\| freqs[*x as usize] += 1.)
	// and finally just return the total which we use to normalize with
	.count();

	// Use the square difference to determine a measure of similarity
	// to typical english text.
	let norm: f32 = freqs.iter()
	// first we normalize each count by the total
	// and multiply by 100 so we match our reference data
	.map(\|&x\| x / len as f32 * 100.)
	// also iterate over the reference values for english text
	.zip(FREQS.iter())
	// the square of the difference
	.map(\|(x, &y)\| (x - y).powi(2))
	// and finally sum it all up
	.fold(0., \|x, y\| x + y);

	// Update our values if we find anything better
	if norm < best_norm {
	best_norm = norm;
	best_shift = shift;
	}
	}

	// Update our codeword with our best guess
	codeword.push((best_shift + b'A') as char);
	}

	codeword
	}

	fn main() {
	let cipher_text = include_str!("cipher1.txt").trim();

	println!("Cipher Text:\n{}\n", cipher_text);

	if let Some(len) = find_codeword_len(cipher_text.as_ref()) {
	println!("Probable Codeword Len: {}\n", len);

	let codeword = find_codeword(cipher_text.as_ref(), len);

	println!("Code word is probably: {}\n", codeword);

	let decoded = cipher_text.bytes()
	.zip(codeword.bytes().cycle())
	.map(\|(x, y)\| m(x as i8 - y as i8, 26))
	.map(\|x\| (x as u8 + b'A') as char)
	.collect::<String>();

	println!("Decoded Text:\n{}", decoded);

	} else {
	panic!("Unable to determine codeword len.");
	}
	}