Skip to content

Instantly share code, notes, and snippets.

@CryZe
Last active December 18, 2018 19:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save CryZe/ba8ca34bb6233f00518b1e767070ee46 to your computer and use it in GitHub Desktop.
Save CryZe/ba8ca34bb6233f00518b1e767070ee46 to your computer and use it in GitHub Desktop.
use hashbrown::hash_map::{Entry, HashMap};
use packed_simd::{m8x64, shuffle, u8x64};
use std::{
cmp::{Eq, PartialEq},
hash::{Hash, Hasher},
mem,
rc::Rc,
};
fn shuffle_right(v: u8x64) -> u8x64 {
shuffle!(
v,
[
63, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62
]
)
}
fn shuffle_left(v: u8x64) -> u8x64 {
shuffle!(
v,
[
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 0
]
)
}
pub fn step(src: &[u8x64; 52], dst: &mut [u8x64; 52]) {
let mask = m8x64::new(
false, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
true, true, true, true, true, true, false, false, false, false, false, false, false, false,
false, false, false, false, false,
);
let open = u8x64::splat(0);
let tree = u8x64::splat(1);
let lumber = u8x64::splat(2);
for (dst, src) in dst[1..51].iter_mut().zip(src.windows(3)) {
let t = src[0];
let c = src[1];
let b = src[2];
let tl = shuffle_left(t);
let tr = shuffle_right(t);
let l = shuffle_left(c);
let r = shuffle_right(c);
let bl = shuffle_left(b);
let br = shuffle_right(b);
let tree_tl = tl & tree;
let tree_tr = tr & tree;
let tree_l = l & tree;
let tree_r = r & tree;
let tree_bl = bl & tree;
let tree_br = br & tree;
let tree_t = t & tree;
let tree_b = b & tree;
let tree_counts = tree_tl + tree_tr + tree_l + tree_r + tree_bl + tree_br + tree_t + tree_b;
let new_open = tree_counts.ge(u8x64::splat(3)).select(tree, open);
let lumber_tl = tl & lumber;
let lumber_tr = tr & lumber;
let lumber_l = l & lumber;
let lumber_r = r & lumber;
let lumber_bl = bl & lumber;
let lumber_br = br & lumber;
let lumber_t = t & lumber;
let lumber_b = b & lumber;
let lumber_counts_times_two = lumber_tl
+ lumber_tr
+ lumber_l
+ lumber_r
+ lumber_bl
+ lumber_br
+ lumber_t
+ lumber_b;
let new_tree = lumber_counts_times_two
.ge(u8x64::splat(6))
.select(lumber, tree);
let new_lumber = (tree_counts.ge(u8x64::splat(1))
& lumber_counts_times_two.ge(u8x64::splat(2)))
.select(lumber, open);
let is_lumber = c.eq(lumber);
let is_tree = c.eq(tree);
let new_cells = is_tree.select(new_tree, is_lumber.select(new_lumber, new_open));
*dst = mask.select(new_cells, u8x64::splat(0));
}
}
fn parse(input: &str) -> [u8x64; 52] {
let mut field = [u8x64::splat(0); 52];
for (line, cells) in input.lines().zip(&mut field[1..51]) {
for (i, b) in line.bytes().enumerate() {
let val = match b {
b'|' => 1,
b'#' => 2,
_ => 0,
};
*cells = cells.replace(i + 1, val);
}
}
field
}
fn area(field: &[u8x64; 52]) -> usize {
let tree_count = field[1..51]
.iter()
.map(|&cells| {
cells
.eq(u8x64::splat(1))
.select(u8x64::splat(1), u8x64::splat(0))
.wrapping_sum() as usize
})
.sum::<usize>();
let lumber_count = field[1..51]
.iter()
.map(|&cells| {
cells
.eq(u8x64::splat(2))
.select(u8x64::splat(1), u8x64::splat(0))
.wrapping_sum() as usize
})
.sum::<usize>();
tree_count * lumber_count
}
pub fn part1(input: &str) -> usize {
let mut input = parse(input);
let mut other = [u8x64::splat(0); 52];
let (src, dst) = (&mut input, &mut other);
for _ in 0..10 {
step(src, dst);
mem::swap(src, dst);
}
area(src)
}
struct Field([u8x64; 52]);
impl Hash for Field {
fn hash<H>(&self, state: &mut H)
where
H: Hasher,
{
Hash::hash_slice(&self.0, state);
}
}
impl PartialEq for Field {
fn eq(&self, other: &Field) -> bool {
self.0.iter().zip(other.0.iter()).all(|(a, b)| a == b)
}
}
impl Eq for Field {}
pub fn part2(input: &str) -> usize {
let mut input = parse(input);
let mut other = [u8x64::splat(0); 52];
let (src, dst) = (&mut input, &mut other);
let mut seen = HashMap::new();
let mut states = Vec::new();
let total_minutes = 1000000000;
for minute in 0..total_minutes {
let snapshot = Rc::new(Field(*src));
match seen.entry(snapshot.clone()) {
Entry::Vacant(e) => {
e.insert(minute);
states.push(snapshot);
}
Entry::Occupied(e) => {
let cycle_start = *e.get();
let cycle_length = minute - cycle_start;
let cycle_index = (total_minutes - cycle_start) % cycle_length + cycle_start;
return area(&states[cycle_index as usize].0);
}
}
step(src, dst);
mem::swap(src, dst);
}
area(src)
}
day_18::step:
sub rsp, 24
vmovdqa xmmword, ptr, [rsp], xmm6
xor eax, eax
vmovdqa64 zmm0, zmmword, ptr, [rip, +, .LCPI0_0]
vmovdqa64 zmm1, zmmword, ptr, [rip, +, .LCPI0_1]
vmovdqa64 zmm2, zmmword, ptr, [rip, +, .LCPI0_2]
vmovdqa64 zmm3, zmmword, ptr, [rip, +, .LCPI0_3]
vmovdqa64 zmm4, zmmword, ptr, [rip, +, .LCPI0_4]
vmovdqa64 zmm5, zmmword, ptr, [rip, +, .LCPI0_5]
vmovdqa64 zmm16, zmmword, ptr, [rip, +, .LCPI0_6]
vmovdqa64 zmm17, zmmword, ptr, [rip, +, .LCPI0_7]
.LBB0_1:
vmovdqa64 zmm18, zmmword, ptr, [rcx, +, rax]
vmovdqa64 zmm19, zmmword, ptr, [rcx, +, rax, +, 64]
vmovdqa64 zmm20, zmmword, ptr, [rcx, +, rax, +, 128]
vpermb zmm21, zmm0, zmm18
vpermb zmm22, zmm1, zmm18
vpermb zmm23, zmm0, zmm19
vpermb zmm24, zmm1, zmm19
vpermb zmm25, zmm0, zmm20
vpermb zmm26, zmm1, zmm20
vpandq zmm27, zmm21, zmm2
vpandq zmm28, zmm22, zmm2
vpandq zmm29, zmm23, zmm2
vpaddb zmm28, zmm28, zmm29
vpandq zmm29, zmm24, zmm2
vpandq zmm30, zmm25, zmm2
vpandq zmm31, zmm26, zmm2
vpandq zmm6, zmm18, zmm2
vpaddb zmm27, zmm27, zmm6
vpaddb zmm27, zmm27, zmm28
vpandq zmm28, zmm20, zmm2
vpaddb zmm28, zmm29, zmm28
vpaddb zmm28, zmm28, zmm30
vpaddb zmm27, zmm27, zmm28
vpaddb zmm27, zmm27, zmm31
vpcmpnleub k1, zmm27, zmm3
vmovdqu8 zmm28, {k1}, {z}, zmm2
vpandq zmm21, zmm21, zmm3
vpandq zmm22, zmm22, zmm3
vpandq zmm23, zmm23, zmm3
vpaddb zmm22, zmm22, zmm23
vpandq zmm23, zmm24, zmm3
vpandq zmm24, zmm25, zmm3
vpandq zmm25, zmm26, zmm3
vpandq zmm18, zmm18, zmm3
vpaddb zmm18, zmm21, zmm18
vpaddb zmm18, zmm18, zmm22
vpandq zmm20, zmm20, zmm3
vpaddb zmm20, zmm23, zmm20
vpaddb zmm20, zmm20, zmm24
vpaddb zmm18, zmm18, zmm20
vpaddb zmm18, zmm18, zmm25
vpcmpnleub k1, zmm18, zmm4
vpblendmb zmm20, {k1}, zmm16, zmm5
vpcmpnleub k1, zmm18, zmm2
vptestmb k1, {k1}, zmm27, zmm27
vmovdqu8 zmm18, {k1}, {z}, zmm5
vpcmpeqb k1, zmm19, zmm3
vpcmpeqb k2, zmm19, zmm2
vmovdqu8 zmm28, {k1}, zmm18
vmovdqu8 zmm28, {k2}, zmm20
vpshufb zmm18, zmm28, zmm17
vmovdqa64 zmmword, ptr, [rdx, +, rax, +, 64], zmm18
add rax, 64
cmp rax, 3200
jne .LBB0_1
vmovaps xmm6, xmmword, ptr, [rsp]
add rsp, 24
vzeroupper
ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment