Skip to content

Instantly share code, notes, and snippets.

@Gordin508
Last active January 9, 2024 08:34
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Gordin508/fba1aa154f92b8bfd136c742c695a683 to your computer and use it in GitHub Desktop.
Save Gordin508/fba1aa154f92b8bfd136c742c695a683 to your computer and use it in GitHub Desktop.
Advent of Code 2017 Day04 solution
#![allow(unused)]
#![allow(dead_code)]
/*
* An unoptimized implementation could simply use HashSets to solve the task,
* and the runtime of that is actually fine. However, HashSets are somewhat overkill
* for these inputs (only a handful of words per line).
* Therefore my idea is to implement alternative data structures which behave
* identically, but perform better for small input sets.
*
* My original idea was using probabilistic data structures with mock hash functions,
* which should be highly efficient for our small input size.
* However, I ended up scrapping a lot of implementation details, as the speedup only became noticable
* for significantly longer input lines.
*/
// drop-in replacement for HashSets
trait BloomFilter<'a> {
fn new() -> Self;
fn insert(&mut self, word: &'a str) -> bool;
fn clear(&mut self);
}
// DS to store words as references to strings
struct WordBloomFilter<'a> {
bitmap: u32,
words: Vec<&'a str>
}
// DS to store words as unordered vec of chars
// (uses u8 internally as all inputs are 'a'..='z')
struct SimpleUnorderedBloomFilter {
bitmap: u64,
words: Vec<[u8;26]>
}
impl<'a> BloomFilter<'a> for WordBloomFilter<'a> {
fn new() -> WordBloomFilter<'a> {
WordBloomFilter{bitmap: 0, words: Vec::new()}
}
fn insert(&mut self, word: &'a str) -> bool {
// RESOLUTION is how many characters we consider for our filter
// instead of using RESOLUTION hash functions, we simply consider the first RESOLUTION chars
// so hash_1(word) == word[0], hash_2(word) == word[1] ...
// The value 2 is experimentally chosen
const RESOLUTION: usize = 2;
let mut nums = [0u32; RESOLUTION];
for (i, n) in word.bytes().take(RESOLUTION).enumerate() {
nums[i] = 1 << (n - 0x61); // 0x61 == 'a'
}
if nums.iter().all(|n| self.bitmap & *n != 0 || *n == 0) {
if self.words.iter().any(|stored| *stored == word) {
return false
}
}
for n in nums {
self.bitmap |= n;
}
self.words.push(word);
true
}
fn clear(&mut self) {
self.bitmap = 0;
self.words.clear();
}
}
impl<'a> BloomFilter<'a> for SimpleUnorderedBloomFilter {
fn new() -> SimpleUnorderedBloomFilter {
SimpleUnorderedBloomFilter{bitmap: 0, words: Vec::new()}
}
fn insert(&mut self, word: &str) -> bool {
// well, this once was like a bloom filter, but all my 'optimizations'
// made the code slower
// My guess is that my original ideas would have been
// faster if the passphrases were much longer
// convert word into an array of character counts
let mut charcounts = [0u8; 26];
for b in word.bytes().map(|c| c - 0x61) {
charcounts[b as usize] += 1;
}
// we trust that no word is longer than 63 characters
// if we had such long words, we could simply omit this check
// as it saves almost no time anyway
let wordlen_bit = 1 << (word.len() - 1);
if wordlen_bit & self.bitmap > 0 && self.words.iter().any(|stored| *stored == charcounts) {
return false
}
self.bitmap |= wordlen_bit;
self.words.push(charcounts);
true
}
fn clear(&mut self) {
self.bitmap = 0;
self.words.clear();
}
}
// passphrase policy of part 1
fn password_valid_no_repeats(passphrase: &str) -> bool {
let mut seen = WordBloomFilter::new();
password_valid(&mut seen, passphrase)
}
// passphrase policy of part 2
fn password_valid_no_anagrams(passphrase: &str) -> bool {
let mut seen = SimpleUnorderedBloomFilter::new();
password_valid(&mut seen, passphrase)
}
// passphrase policy is based on first argument
// This function allows to reuse the 'seen' argument across invocations.
// This increases speed, as the filter is internally backed by a vector,
// which is costly if rapidly reinitialized.
fn password_valid<'a, T: BloomFilter<'a>>(seen: &mut T, passphrase: &'a str) -> bool {
let result = passphrase.split(' ').all(|word| seen.insert(word));
seen.clear();
result
}
fn part1(lines: &Vec<&str>) -> Option<i64> {
let mut seen = WordBloomFilter::new();
Some(lines
.iter()
.map(|line| password_valid(&mut seen, line))
.filter(|b| *b)
.count()
as i64)
}
fn part2(lines: &Vec<&str>) -> Option<i64> {
let mut seen = SimpleUnorderedBloomFilter::new();
Some(lines
.iter()
.map(|line| password_valid(&mut seen, line))
.filter(|b| *b)
.count()
as i64)
}
fn main() {
use std::fs;
use std::env;
use std::time::Instant;
let args: Vec<String> = env::args().collect();
let infile = args.get(1).unwrap_or_else(|| {
println!("Usage: {} <puzzle input>", args[0]);
std::process::exit(1);
});
let contents = fs::read_to_string(infile)
.expect("Could not read in file");
let lines: Vec<&str> = contents.lines().collect();
// execute part 1 and part 2, print their results if they exist
// later parts may follow, so we loop over the part functions
let parts = [part1, part2];
for (index, part) in parts.iter().enumerate() {
let partstart = Instant::now();
let result = part(&lines);
match result {
Some(result) => println!("Part {}: {}\t({:?})", index+1, result, partstart.elapsed()),
None => println!("Part {}: No result", index+1),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn valid_valid_simple() {
assert!(password_valid_no_repeats("aa bb cc dd ee"));
}
#[test]
fn valid_invalid_simple() {
assert!(!password_valid_no_repeats("aa bb cc dd aa"));
}
#[test]
fn valid_sharedprefix() {
assert!(password_valid_no_repeats("aa bb cc dd aaa"));
}
#[test]
fn valid_no_anagrams() {
assert!(password_valid_no_anagrams("abcde fghij"));
assert!(password_valid_no_anagrams("iiii oiii ooii oooi oooo"));
}
#[test]
fn invalid_anagrams() {
assert!(!password_valid_no_anagrams("abcde xyz ecdab"));
assert!(!password_valid_no_anagrams("oiii ioii iioi iiio"));
}
#[test]
fn valid_partial_anagrams() {
assert!(password_valid_no_anagrams("a ab abc abd abf abj"));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment