Gordin508/main.rs

## main.rs
#![allow(unused)]
#![allow(dead_code)]


/*
 * An unoptimized implementation could simply use HashSets to solve the task,
 * and the runtime of that is actually fine. However, HashSets are somewhat overkill
 * for these inputs (only a handful of words per line).
 * Therefore my idea is to implement alternative data structures which behave
 * identically, but perform better for small input sets.
 *
 * My original idea was using probabilistic data structures with mock hash functions,
 * which should be highly efficient for our small input size.
 * However, I ended up scrapping a lot of implementation details, as the speedup only became noticable
 * for significantly longer input lines.
 */

// drop-in replacement for HashSets
trait BloomFilter<'a> {
    fn new() -> Self;
    fn insert(&mut self, word: &'a str) -> bool;
    fn clear(&mut self);
}

// DS to store words as references to strings
struct WordBloomFilter<'a> {
    bitmap: u32,
    words: Vec<&'a str>
}

// DS to store words as unordered vec of chars
// (uses u8 internally as all inputs are 'a'..='z')
struct SimpleUnorderedBloomFilter {
    bitmap: u64,
    words: Vec<[u8;26]>
}

impl<'a> BloomFilter<'a> for WordBloomFilter<'a> {
    fn new() -> WordBloomFilter<'a> {
        WordBloomFilter{bitmap: 0, words: Vec::new()}
    }

    fn insert(&mut self, word: &'a str) -> bool {
        // RESOLUTION is how many characters we consider for our filter
        // instead of using RESOLUTION hash functions, we simply consider the first RESOLUTION chars
        // so hash_1(word) == word[0], hash_2(word) == word[1] ...
        // The value 2 is experimentally chosen
        const RESOLUTION: usize = 2;
        let mut nums = [0u32; RESOLUTION];
        for (i, n) in word.bytes().take(RESOLUTION).enumerate() {
            nums[i] = 1 << (n - 0x61); // 0x61 == 'a'
        }
        if nums.iter().all(|n| self.bitmap & *n != 0 || *n == 0) {
            if self.words.iter().any(|stored| *stored == word) {
                return false
            }
        }
        for n in nums {
            self.bitmap |= n;
        }

        self.words.push(word);
        true
    }

    fn clear(&mut self) {
        self.bitmap = 0;
        self.words.clear();
    }
}

impl<'a> BloomFilter<'a> for SimpleUnorderedBloomFilter {
    fn new() -> SimpleUnorderedBloomFilter {
        SimpleUnorderedBloomFilter{bitmap: 0, words: Vec::new()}
    }

    fn insert(&mut self, word: &str) -> bool {
        // well, this once was like a bloom filter, but all my 'optimizations'
        // made the code slower
        // My guess is that my original ideas would have been
        // faster if the passphrases were much longer

        // convert word into an array of character counts
        let mut charcounts = [0u8; 26];

        for b in word.bytes().map(|c| c - 0x61) {
            charcounts[b as usize] += 1;
        }
        // we trust that no word is longer than 63 characters
        // if we had such long words, we could simply omit this check
        // as it saves almost no time anyway
        let wordlen_bit = 1 << (word.len() - 1);
        if wordlen_bit & self.bitmap > 0 && self.words.iter().any(|stored| *stored == charcounts) {
            return false
        }
        self.bitmap |= wordlen_bit;
        self.words.push(charcounts);
        true
    }

    fn clear(&mut self) {
        self.bitmap = 0;
        self.words.clear();
    }
}

// passphrase policy of part 1
fn password_valid_no_repeats(passphrase: &str) -> bool {
    let mut seen = WordBloomFilter::new();
    password_valid(&mut seen, passphrase)
}

// passphrase policy of part 2
fn password_valid_no_anagrams(passphrase: &str) -> bool {
    let mut seen = SimpleUnorderedBloomFilter::new();
    password_valid(&mut seen, passphrase)
}

// passphrase policy is based on first argument
// This function allows to reuse the 'seen' argument across invocations.
// This increases speed, as the filter is internally backed by a vector,
// which is costly if rapidly reinitialized.
fn password_valid<'a, T: BloomFilter<'a>>(seen: &mut T, passphrase: &'a str) -> bool {
    let result = passphrase.split(' ').all(|word| seen.insert(word));
    seen.clear();
    result
}

fn part1(lines: &Vec<&str>) -> Option<i64> {
    let mut seen = WordBloomFilter::new();
    Some(lines
         .iter()
         .map(|line| password_valid(&mut seen, line))
         .filter(|b| *b)
         .count()
         as i64)
}

fn part2(lines: &Vec<&str>) -> Option<i64> {
    let mut seen = SimpleUnorderedBloomFilter::new();
    Some(lines
         .iter()
         .map(|line| password_valid(&mut seen, line))
         .filter(|b| *b)
         .count()
         as i64)
}

fn main() {
    use std::fs;
    use std::env;
    use std::time::Instant;
    let args: Vec<String> =  env::args().collect();
    let infile = args.get(1).unwrap_or_else(|| {
        println!("Usage: {} <puzzle input>", args[0]);
        std::process::exit(1);
    });

    let contents = fs::read_to_string(infile)
        .expect("Could not read in file");

    let lines: Vec<&str> = contents.lines().collect();

    // execute part 1 and part 2, print their results if they exist
    // later parts may follow, so we loop over the part functions
    let parts = [part1, part2];
    for (index, part) in parts.iter().enumerate() {
        let partstart = Instant::now();
        let result = part(&lines);
        match result {
            Some(result) => println!("Part {}: {}\t({:?})", index+1, result, partstart.elapsed()),
            None => println!("Part {}: No result", index+1),
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn valid_valid_simple() {
        assert!(password_valid_no_repeats("aa bb cc dd ee"));
    }

    #[test]
    fn valid_invalid_simple() {
        assert!(!password_valid_no_repeats("aa bb cc dd aa"));
    }

    #[test]
    fn valid_sharedprefix() {
        assert!(password_valid_no_repeats("aa bb cc dd aaa"));
    }

    #[test]
    fn valid_no_anagrams() {
        assert!(password_valid_no_anagrams("abcde fghij"));
        assert!(password_valid_no_anagrams("iiii oiii ooii oooi oooo"));
    }

    #[test]
    fn invalid_anagrams() {
        assert!(!password_valid_no_anagrams("abcde xyz ecdab"));
        assert!(!password_valid_no_anagrams("oiii ioii iioi iiio"));
    }

    #[test]
    fn valid_partial_anagrams() {
        assert!(password_valid_no_anagrams("a ab abc abd abf abj"));
    }
}
	#![allow(unused)]
	#![allow(dead_code)]


	/*
	* An unoptimized implementation could simply use HashSets to solve the task,
	* and the runtime of that is actually fine. However, HashSets are somewhat overkill
	* for these inputs (only a handful of words per line).
	* Therefore my idea is to implement alternative data structures which behave
	* identically, but perform better for small input sets.
	*
	* My original idea was using probabilistic data structures with mock hash functions,
	* which should be highly efficient for our small input size.
	* However, I ended up scrapping a lot of implementation details, as the speedup only became noticable
	* for significantly longer input lines.
	*/

	// drop-in replacement for HashSets
	trait BloomFilter<'a> {
	fn new() -> Self;
	fn insert(&mut self, word: &'a str) -> bool;
	fn clear(&mut self);
	}

	// DS to store words as references to strings
	struct WordBloomFilter<'a> {
	bitmap: u32,
	words: Vec<&'a str>
	}

	// DS to store words as unordered vec of chars
	// (uses u8 internally as all inputs are 'a'..='z')
	struct SimpleUnorderedBloomFilter {
	bitmap: u64,
	words: Vec<[u8;26]>
	}

	impl<'a> BloomFilter<'a> for WordBloomFilter<'a> {
	fn new() -> WordBloomFilter<'a> {
	WordBloomFilter{bitmap: 0, words: Vec::new()}
	}

	fn insert(&mut self, word: &'a str) -> bool {
	// RESOLUTION is how many characters we consider for our filter
	// instead of using RESOLUTION hash functions, we simply consider the first RESOLUTION chars
	// so hash_1(word) == word[0], hash_2(word) == word[1] ...
	// The value 2 is experimentally chosen
	const RESOLUTION: usize = 2;
	let mut nums = [0u32; RESOLUTION];
	for (i, n) in word.bytes().take(RESOLUTION).enumerate() {
	nums[i] = 1 << (n - 0x61); // 0x61 == 'a'
	}
	if nums.iter().all(\|n\| self.bitmap & n != 0 \|\| n == 0) {
	if self.words.iter().any(\|stored\| *stored == word) {
	return false
	}
	}
	for n in nums {
	self.bitmap \|= n;
	}

	self.words.push(word);
	true
	}

	fn clear(&mut self) {
	self.bitmap = 0;
	self.words.clear();
	}
	}

	impl<'a> BloomFilter<'a> for SimpleUnorderedBloomFilter {
	fn new() -> SimpleUnorderedBloomFilter {
	SimpleUnorderedBloomFilter{bitmap: 0, words: Vec::new()}
	}

	fn insert(&mut self, word: &str) -> bool {
	// well, this once was like a bloom filter, but all my 'optimizations'
	// made the code slower
	// My guess is that my original ideas would have been
	// faster if the passphrases were much longer

	// convert word into an array of character counts
	let mut charcounts = [0u8; 26];

	for b in word.bytes().map(\|c\| c - 0x61) {
	charcounts[b as usize] += 1;
	}
	// we trust that no word is longer than 63 characters
	// if we had such long words, we could simply omit this check
	// as it saves almost no time anyway
	let wordlen_bit = 1 << (word.len() - 1);
	if wordlen_bit & self.bitmap > 0 && self.words.iter().any(\|stored\| *stored == charcounts) {
	return false
	}
	self.bitmap \|= wordlen_bit;
	self.words.push(charcounts);
	true
	}

	fn clear(&mut self) {
	self.bitmap = 0;
	self.words.clear();
	}
	}

	// passphrase policy of part 1
	fn password_valid_no_repeats(passphrase: &str) -> bool {
	let mut seen = WordBloomFilter::new();
	password_valid(&mut seen, passphrase)
	}

	// passphrase policy of part 2
	fn password_valid_no_anagrams(passphrase: &str) -> bool {
	let mut seen = SimpleUnorderedBloomFilter::new();
	password_valid(&mut seen, passphrase)
	}

	// passphrase policy is based on first argument
	// This function allows to reuse the 'seen' argument across invocations.
	// This increases speed, as the filter is internally backed by a vector,
	// which is costly if rapidly reinitialized.
	fn password_valid<'a, T: BloomFilter<'a>>(seen: &mut T, passphrase: &'a str) -> bool {
	let result = passphrase.split(' ').all(\|word\| seen.insert(word));
	seen.clear();
	result
	}

	fn part1(lines: &Vec<&str>) -> Option<i64> {
	let mut seen = WordBloomFilter::new();
	Some(lines
	.iter()
	.map(\|line\| password_valid(&mut seen, line))
	.filter(\|b\| *b)
	.count()
	as i64)
	}

	fn part2(lines: &Vec<&str>) -> Option<i64> {
	let mut seen = SimpleUnorderedBloomFilter::new();
	Some(lines
	.iter()
	.map(\|line\| password_valid(&mut seen, line))
	.filter(\|b\| *b)
	.count()
	as i64)
	}

	fn main() {
	use std::fs;
	use std::env;
	use std::time::Instant;
	let args: Vec<String> = env::args().collect();
	let infile = args.get(1).unwrap_or_else(\|\| {
	println!("Usage: {} <puzzle input>", args[0]);
	std::process::exit(1);
	});

	let contents = fs::read_to_string(infile)
	.expect("Could not read in file");

	let lines: Vec<&str> = contents.lines().collect();

	// execute part 1 and part 2, print their results if they exist
	// later parts may follow, so we loop over the part functions
	let parts = [part1, part2];
	for (index, part) in parts.iter().enumerate() {
	let partstart = Instant::now();
	let result = part(&lines);
	match result {
	Some(result) => println!("Part {}: {}\t({:?})", index+1, result, partstart.elapsed()),
	None => println!("Part {}: No result", index+1),
	}
	}
	}

	#[cfg(test)]
	mod tests {
	use super::*;

	#[test]
	fn valid_valid_simple() {
	assert!(password_valid_no_repeats("aa bb cc dd ee"));
	}

	#[test]
	fn valid_invalid_simple() {
	assert!(!password_valid_no_repeats("aa bb cc dd aa"));
	}

	#[test]
	fn valid_sharedprefix() {
	assert!(password_valid_no_repeats("aa bb cc dd aaa"));
	}

	#[test]
	fn valid_no_anagrams() {
	assert!(password_valid_no_anagrams("abcde fghij"));
	assert!(password_valid_no_anagrams("iiii oiii ooii oooi oooo"));
	}

	#[test]
	fn invalid_anagrams() {
	assert!(!password_valid_no_anagrams("abcde xyz ecdab"));
	assert!(!password_valid_no_anagrams("oiii ioii iioi iiio"));
	}

	#[test]
	fn valid_partial_anagrams() {
	assert!(password_valid_no_anagrams("a ab abc abd abf abj"));
	}
	}