Last active
August 29, 2015 14:15
-
-
Save omaskery/d3a07322850401b248f4 to your computer and use it in GitHub Desktop.
Playing with the beale ciphers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::old_io::File; | |
use std::rand; | |
use std::rand::Rng; | |
use std::str::{StrExt, FromStr}; | |
use std::ascii::OwnedAsciiExt; | |
use std::collections::HashMap; | |
use std::iter::AdditiveIterator; | |
use std::vec::Vec; | |
const DATA_PATH: &'static str = "data/1.txt"; | |
const SCORE_WORDS_PATH: &'static str = "data/topwords.txt"; | |
const POP_SIZE: u32 = 50; | |
const GENERATIONS: u32 = 10000; | |
type CipherUid = u32; | |
type FreqCount = u32; | |
type RawCipherNumber = u32; | |
type DistributionTable = HashMap<RawCipherNumber, (CipherUid, FreqCount)>; | |
type Solution = Vec<u8>; | |
type Score = u32; | |
type Output = String; | |
#[derive(Copy, Clone)] | |
struct CipherNumber { | |
number: RawCipherNumber, | |
uid: CipherUid, | |
} | |
#[derive(Clone)] | |
struct ScoredSolution { | |
solution: Solution, | |
score: Score, | |
output: Output, | |
} | |
fn main() { | |
let cipher_path = Path::new(DATA_PATH); | |
let score_word_path = Path::new(SCORE_WORDS_PATH); | |
let numbers = load_cipher(&cipher_path); | |
let score_words = load_score_words(&score_word_path, 2); | |
let frequency = calculate_freq(numbers.as_slice()); | |
let numbers = map_numbers(numbers.as_slice(), &frequency); | |
println!("{} numbers in cipher, {} are different.", | |
numbers.len(), frequency.len() | |
); | |
let mut population = initial_population(numbers.len(), POP_SIZE); | |
let mut fittest: Option<ScoredSolution> = None; | |
for generation in range(0, GENERATIONS) { | |
let mut local_fittest: Option<ScoredSolution> = None; | |
for solution in population.iter() { | |
let (output, score) = score_solution(numbers.as_slice(), solution, score_words.as_slice()); | |
let mut better = false; | |
match local_fittest { | |
Some(ref scored) => { | |
if scored.score < score { | |
better = true; | |
} | |
}, | |
None => better = true, | |
} | |
if better { | |
local_fittest = Some(ScoredSolution { | |
solution: (*solution).clone(), | |
score: score, | |
output: output | |
}); | |
} | |
} | |
let mut better = false; | |
match (&fittest, &local_fittest) { | |
(&Some(ref scored), &Some(ref local_scored)) => { | |
if scored.score < local_scored.score { | |
better = true; | |
} | |
}, | |
(&None, _) => better = true, | |
_ => {}, | |
} | |
if better { | |
fittest = local_fittest.clone(); | |
match &fittest { | |
&Some(ref scored) => { | |
println!("[{}] new fittest [{}]: '{}'", | |
generation, scored.score, scored.output | |
); | |
}, | |
_ => {}, | |
} | |
} | |
let local_fittest = local_fittest.unwrap(); | |
for (index, mut solution) in population.iter_mut().enumerate() { | |
mutate_solution(local_fittest.solution.as_slice(), solution.as_mut_slice(), index as u32); | |
} | |
} | |
let fittest = fittest.unwrap(); | |
} | |
fn mutate_solution(fittest: &[u8], new_solution: &mut [u8], severity: u32) { | |
for (index, value) in fittest.iter().enumerate() { | |
new_solution[index] = *value; | |
} | |
for index in range(0, severity) { | |
let size = match rand::thread_rng().gen_range::<usize>(0, severity as usize) { | |
x if x < 0 => 0, | |
x if x >= new_solution.len() => new_solution.len() - 1, | |
x => x, | |
}; | |
let start = match rand::thread_rng().gen_range::<isize>(0, (new_solution.len() as isize) - (size as isize)) { | |
x if x < 0 => 0, | |
x => x as usize, | |
}; | |
for index in range(start, start+size) { | |
if index >= new_solution.len() { | |
println!("oops! index: {}, start: {}, size: {}, solution len: {}", | |
index, start, size, new_solution.len() | |
); | |
} | |
new_solution[index] = rand::thread_rng().gen_range::<u8>('a' as u8, ('z' as u8) + 1); | |
} | |
} | |
} | |
fn score_solution(numbers: &[CipherNumber], solution: &Solution, score_words: &[String]) -> (Output, Score) { | |
let output = apply_solution(numbers, solution); | |
let score = score_words.iter() | |
.map(|x| -> Score { | |
if output.find_str(x).is_none() == false { | |
x.len() as u32 | |
} else { | |
0 | |
} | |
}) | |
.sum(); | |
(output, score) | |
} | |
fn apply_solution(numbers: &[CipherNumber], solution: &Solution) -> String { | |
String::from_utf8(numbers.iter() | |
.map(|x| -> u8 { | |
solution[x.uid as usize] | |
}) | |
.collect::<Vec<u8>>()) | |
.unwrap() | |
} | |
fn initial_population(numbers: usize, pop_size: u32) -> Vec<Solution> { | |
fn chr(n: u8) -> u8 { | |
n + ('a' as u8) | |
} | |
range(0, pop_size) | |
.map(|_| { | |
let mut result = Vec::new(); | |
for letter in range(0, numbers).map(|x| rand::thread_rng().gen_range::<u8>(0, 25)).map(chr) { | |
result.push(letter); | |
} | |
result | |
}) | |
.collect() | |
} | |
fn map_numbers(numbers: &[RawCipherNumber], freq: &DistributionTable) -> Vec<CipherNumber> { | |
numbers.iter() | |
.map(|x| -> CipherNumber { | |
CipherNumber { | |
number: *x, | |
uid: freq.get(x).unwrap().0 | |
} | |
}) | |
.collect() | |
} | |
fn calculate_freq(numbers: &[RawCipherNumber]) -> DistributionTable { | |
let mut result: DistributionTable = HashMap::new(); | |
let mut next_uid: CipherUid = 0; | |
for number in numbers { | |
if result.contains_key(number) { | |
match result.get_mut(number) { | |
Some(count) => (*count).1 += 1u32, | |
None => {}, | |
} | |
} else { | |
result.insert(*number, (next_uid, 1u32)); | |
next_uid += 1; | |
} | |
} | |
result | |
} | |
fn load_file(path: &Path) -> String { | |
match File::open(path).read_to_string() { | |
Ok(s) => s, | |
Err(e) => panic!("failed to load file {}", path.display()), | |
} | |
} | |
fn load_cipher(path: &Path) -> Vec<RawCipherNumber> { | |
let text = load_file(path); | |
text.as_slice().split_str(", ") | |
.map(|x| -> RawCipherNumber { | |
FromStr::from_str(x).unwrap() | |
}) | |
.collect() | |
} | |
fn load_score_words(path: &Path, minimum_len: usize) -> Vec<String> { | |
let text = load_file(path); | |
text.as_slice().lines_any() | |
.filter(|x| x.len() > minimum_len) | |
.map(|x| String::from_str(x)) | |
.map(|x| x.into_ascii_lowercase()) | |
.collect() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment