Last active
December 18, 2018 19:13
-
-
Save CryZe/ba8ca34bb6233f00518b1e767070ee46 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use hashbrown::hash_map::{Entry, HashMap}; | |
use packed_simd::{m8x64, shuffle, u8x64}; | |
use std::{ | |
cmp::{Eq, PartialEq}, | |
hash::{Hash, Hasher}, | |
mem, | |
rc::Rc, | |
}; | |
fn shuffle_right(v: u8x64) -> u8x64 { | |
shuffle!( | |
v, | |
[ | |
63, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, | |
23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, | |
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62 | |
] | |
) | |
} | |
fn shuffle_left(v: u8x64) -> u8x64 { | |
shuffle!( | |
v, | |
[ | |
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, | |
25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, | |
47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 0 | |
] | |
) | |
} | |
pub fn step(src: &[u8x64; 52], dst: &mut [u8x64; 52]) { | |
let mask = m8x64::new( | |
false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, | |
true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, | |
true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, | |
true, true, true, true, true, true, false, false, false, false, false, false, false, false, | |
false, false, false, false, false, | |
); | |
let open = u8x64::splat(0); | |
let tree = u8x64::splat(1); | |
let lumber = u8x64::splat(2); | |
for (dst, src) in dst[1..51].iter_mut().zip(src.windows(3)) { | |
let t = src[0]; | |
let c = src[1]; | |
let b = src[2]; | |
let tl = shuffle_left(t); | |
let tr = shuffle_right(t); | |
let l = shuffle_left(c); | |
let r = shuffle_right(c); | |
let bl = shuffle_left(b); | |
let br = shuffle_right(b); | |
let tree_tl = tl & tree; | |
let tree_tr = tr & tree; | |
let tree_l = l & tree; | |
let tree_r = r & tree; | |
let tree_bl = bl & tree; | |
let tree_br = br & tree; | |
let tree_t = t & tree; | |
let tree_b = b & tree; | |
let tree_counts = tree_tl + tree_tr + tree_l + tree_r + tree_bl + tree_br + tree_t + tree_b; | |
let new_open = tree_counts.ge(u8x64::splat(3)).select(tree, open); | |
let lumber_tl = tl & lumber; | |
let lumber_tr = tr & lumber; | |
let lumber_l = l & lumber; | |
let lumber_r = r & lumber; | |
let lumber_bl = bl & lumber; | |
let lumber_br = br & lumber; | |
let lumber_t = t & lumber; | |
let lumber_b = b & lumber; | |
let lumber_counts_times_two = lumber_tl | |
+ lumber_tr | |
+ lumber_l | |
+ lumber_r | |
+ lumber_bl | |
+ lumber_br | |
+ lumber_t | |
+ lumber_b; | |
let new_tree = lumber_counts_times_two | |
.ge(u8x64::splat(6)) | |
.select(lumber, tree); | |
let new_lumber = (tree_counts.ge(u8x64::splat(1)) | |
& lumber_counts_times_two.ge(u8x64::splat(2))) | |
.select(lumber, open); | |
let is_lumber = c.eq(lumber); | |
let is_tree = c.eq(tree); | |
let new_cells = is_tree.select(new_tree, is_lumber.select(new_lumber, new_open)); | |
*dst = mask.select(new_cells, u8x64::splat(0)); | |
} | |
} | |
fn parse(input: &str) -> [u8x64; 52] { | |
let mut field = [u8x64::splat(0); 52]; | |
for (line, cells) in input.lines().zip(&mut field[1..51]) { | |
for (i, b) in line.bytes().enumerate() { | |
let val = match b { | |
b'|' => 1, | |
b'#' => 2, | |
_ => 0, | |
}; | |
*cells = cells.replace(i + 1, val); | |
} | |
} | |
field | |
} | |
fn area(field: &[u8x64; 52]) -> usize { | |
let tree_count = field[1..51] | |
.iter() | |
.map(|&cells| { | |
cells | |
.eq(u8x64::splat(1)) | |
.select(u8x64::splat(1), u8x64::splat(0)) | |
.wrapping_sum() as usize | |
}) | |
.sum::<usize>(); | |
let lumber_count = field[1..51] | |
.iter() | |
.map(|&cells| { | |
cells | |
.eq(u8x64::splat(2)) | |
.select(u8x64::splat(1), u8x64::splat(0)) | |
.wrapping_sum() as usize | |
}) | |
.sum::<usize>(); | |
tree_count * lumber_count | |
} | |
pub fn part1(input: &str) -> usize { | |
let mut input = parse(input); | |
let mut other = [u8x64::splat(0); 52]; | |
let (src, dst) = (&mut input, &mut other); | |
for _ in 0..10 { | |
step(src, dst); | |
mem::swap(src, dst); | |
} | |
area(src) | |
} | |
struct Field([u8x64; 52]); | |
impl Hash for Field { | |
fn hash<H>(&self, state: &mut H) | |
where | |
H: Hasher, | |
{ | |
Hash::hash_slice(&self.0, state); | |
} | |
} | |
impl PartialEq for Field { | |
fn eq(&self, other: &Field) -> bool { | |
self.0.iter().zip(other.0.iter()).all(|(a, b)| a == b) | |
} | |
} | |
impl Eq for Field {} | |
pub fn part2(input: &str) -> usize { | |
let mut input = parse(input); | |
let mut other = [u8x64::splat(0); 52]; | |
let (src, dst) = (&mut input, &mut other); | |
let mut seen = HashMap::new(); | |
let mut states = Vec::new(); | |
let total_minutes = 1000000000; | |
for minute in 0..total_minutes { | |
let snapshot = Rc::new(Field(*src)); | |
match seen.entry(snapshot.clone()) { | |
Entry::Vacant(e) => { | |
e.insert(minute); | |
states.push(snapshot); | |
} | |
Entry::Occupied(e) => { | |
let cycle_start = *e.get(); | |
let cycle_length = minute - cycle_start; | |
let cycle_index = (total_minutes - cycle_start) % cycle_length + cycle_start; | |
return area(&states[cycle_index as usize].0); | |
} | |
} | |
step(src, dst); | |
mem::swap(src, dst); | |
} | |
area(src) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
day_18::step: | |
sub rsp, 24 | |
vmovdqa xmmword, ptr, [rsp], xmm6 | |
xor eax, eax | |
vmovdqa64 zmm0, zmmword, ptr, [rip, +, .LCPI0_0] | |
vmovdqa64 zmm1, zmmword, ptr, [rip, +, .LCPI0_1] | |
vmovdqa64 zmm2, zmmword, ptr, [rip, +, .LCPI0_2] | |
vmovdqa64 zmm3, zmmword, ptr, [rip, +, .LCPI0_3] | |
vmovdqa64 zmm4, zmmword, ptr, [rip, +, .LCPI0_4] | |
vmovdqa64 zmm5, zmmword, ptr, [rip, +, .LCPI0_5] | |
vmovdqa64 zmm16, zmmword, ptr, [rip, +, .LCPI0_6] | |
vmovdqa64 zmm17, zmmword, ptr, [rip, +, .LCPI0_7] | |
.LBB0_1: | |
vmovdqa64 zmm18, zmmword, ptr, [rcx, +, rax] | |
vmovdqa64 zmm19, zmmword, ptr, [rcx, +, rax, +, 64] | |
vmovdqa64 zmm20, zmmword, ptr, [rcx, +, rax, +, 128] | |
vpermb zmm21, zmm0, zmm18 | |
vpermb zmm22, zmm1, zmm18 | |
vpermb zmm23, zmm0, zmm19 | |
vpermb zmm24, zmm1, zmm19 | |
vpermb zmm25, zmm0, zmm20 | |
vpermb zmm26, zmm1, zmm20 | |
vpandq zmm27, zmm21, zmm2 | |
vpandq zmm28, zmm22, zmm2 | |
vpandq zmm29, zmm23, zmm2 | |
vpaddb zmm28, zmm28, zmm29 | |
vpandq zmm29, zmm24, zmm2 | |
vpandq zmm30, zmm25, zmm2 | |
vpandq zmm31, zmm26, zmm2 | |
vpandq zmm6, zmm18, zmm2 | |
vpaddb zmm27, zmm27, zmm6 | |
vpaddb zmm27, zmm27, zmm28 | |
vpandq zmm28, zmm20, zmm2 | |
vpaddb zmm28, zmm29, zmm28 | |
vpaddb zmm28, zmm28, zmm30 | |
vpaddb zmm27, zmm27, zmm28 | |
vpaddb zmm27, zmm27, zmm31 | |
vpcmpnleub k1, zmm27, zmm3 | |
vmovdqu8 zmm28, {k1}, {z}, zmm2 | |
vpandq zmm21, zmm21, zmm3 | |
vpandq zmm22, zmm22, zmm3 | |
vpandq zmm23, zmm23, zmm3 | |
vpaddb zmm22, zmm22, zmm23 | |
vpandq zmm23, zmm24, zmm3 | |
vpandq zmm24, zmm25, zmm3 | |
vpandq zmm25, zmm26, zmm3 | |
vpandq zmm18, zmm18, zmm3 | |
vpaddb zmm18, zmm21, zmm18 | |
vpaddb zmm18, zmm18, zmm22 | |
vpandq zmm20, zmm20, zmm3 | |
vpaddb zmm20, zmm23, zmm20 | |
vpaddb zmm20, zmm20, zmm24 | |
vpaddb zmm18, zmm18, zmm20 | |
vpaddb zmm18, zmm18, zmm25 | |
vpcmpnleub k1, zmm18, zmm4 | |
vpblendmb zmm20, {k1}, zmm16, zmm5 | |
vpcmpnleub k1, zmm18, zmm2 | |
vptestmb k1, {k1}, zmm27, zmm27 | |
vmovdqu8 zmm18, {k1}, {z}, zmm5 | |
vpcmpeqb k1, zmm19, zmm3 | |
vpcmpeqb k2, zmm19, zmm2 | |
vmovdqu8 zmm28, {k1}, zmm18 | |
vmovdqu8 zmm28, {k2}, zmm20 | |
vpshufb zmm18, zmm28, zmm17 | |
vmovdqa64 zmmword, ptr, [rdx, +, rax, +, 64], zmm18 | |
add rax, 64 | |
cmp rax, 3200 | |
jne .LBB0_1 | |
vmovaps xmm6, xmmword, ptr, [rsp] | |
add rsp, 24 | |
vzeroupper | |
ret |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment