Skip to content

Instantly share code, notes, and snippets.

@int-e
Last active December 15, 2021 08:16
Embed
What would you like to do?
Bad Dumbo Octopodes
/*
Cf. https://adventofcode.com/2021/day/11
This program looks for configurations that take long before synchronizing.
Times reflecting speedup from using SSE3:
without sse3 (g++):
real 0m54.995s
with sse3 (g++):
real 0m11.220s
flash in even/odd order (g++):
real 0m9.051s
use clang++:
real 0m8.345s
Top results:
- 61015 steps to synchronize:
0238201656
0719426291
5112132117
4965173335
6669930847
7499711987
3366207277
3756024376
0790852422
8003411407
- loop of length 2192400 (reached after 66 steps):
5013125450
6697075902
6070937177
5506112146
7446828172
0235270390
7255575520
2114352283
7748772191
9712046648
*/
#include <functional>
#include <algorithm>
#include <iostream>
#include <iomanip>
#include <cstring>
#include <random>
#include <cstdint>
#ifdef __SSE3__
#include <smmintrin.h>
#endif
#define M128(h,l) (__m128i)((unsigned __int128)UINT64_C(h) << 64 | UINT64_C(l))
struct grid {
char o[10][16];
int step();
private:
void inc(int x, int y);
};
int grid::step()
{
#ifdef __SSE3__
const __m128i N1 = M128(0x0000000000000101, 0x0101010101010101);
const __m128i N9 = M128(0x0000000000000909, 0x0909090909090909);
const __m128i L = M128(0x8080808080800807, 0x0605040302010080);
const __m128i M = M128(0x2020202020202020, 0x2020202020202020);
__m128i t[10];
for (int i = 0; i < 10; i++) {
t[i] = _mm_add_epi8(*(__m128i*)o[i], N1);
}
__m128i c = M128(0,0);
for(;;) {
__m128i a = M128(0,0);
for (int j = 0; j < 10; j++) {
int i = j*2 % 10 + j/5;
__m128i m = _mm_cmpgt_epi8(t[i], N9);
__m128i l = _mm_shuffle_epi8(m, L);
__m128i r = _mm_srli_si128(m, 1);
a = _mm_add_epi8(a, m);
__m128i o = _mm_add_epi8(m, _mm_add_epi8(l, r));
if (i > 0)
t[i-1] = _mm_sub_epi8(t[i-1], o);
t[i] = _mm_sub_epi8(_mm_sub_epi8(t[i], _mm_and_si128(m, M)), o);
if (i < 9)
t[i+1] = _mm_sub_epi8(t[i+1], o);
}
c = _mm_sub_epi8(c, a);
if (_mm_movemask_epi8(a) == 0)
break;
}
for (int i = 0; i < 10; i++) {
*(__m128i*)o[i] = _mm_max_epi8(t[i], M128(0,0));
}
c = _mm_add_epi8(c, _mm_srli_si128(c, 8));
return (uint64_t)(__int128)c % 255;
#else
for (int x = 0; x < 10; x++) {
for (int y = 0; y < 10; y++) {
inc(x, y);
}
}
int r = 0;
for (int x = 0; x < 10; x++) {
for (int y = 0; y < 10; y++) {
if (o[x][y] > 9) {
o[x][y] = 0;
r++;
}
}
}
return r;
#endif
}
void grid::inc(int x, int y)
{
if (++o[x][y] != 10)
return;
for (int nx = std::max(0, x-1); nx < std::min(10, x+2); nx++)
for (int ny = std::max(0, y-1); ny < std::min(10, y+2); ny++)
if (nx != x || ny != y)
inc(nx, ny);
}
int64_t val(grid t)
{
int64_t m = 0;
grid s = t;
for (int64_t i = 1; ; i++) {
if (i == (i & -i)) {
m = i;
s = t;
}
if (t.step() == 100) {
return i;
}
if (memcmp(t.o, s.o, sizeof(t.o)) == 0)
return -i-1 + m;
}
return -1;
}
int main(int argc, char **argv)
{
std::mt19937 generator;
if (argc > 1)
generator.seed(atoi(argv[1]));
std::uniform_int_distribution<int> distribution(0, 9);
auto rng = std::bind(distribution, generator);
int64_t best = 0, bestl = 0;
int64_t loops = 0, total = 0, totall = 0;
for (uint64_t i = 1; ; i++) {
grid t;
for (int x = 0; x < 10; x++) {
for (int y = 0; y < 10; y++) {
t.o[x][y] = rng();
}
}
int64_t x = val(t);
if (x < 0) {
loops++;
totall += -x;
} else {
total += x;
}
if (x > best || x < bestl) {
if (x > best)
best = x;
else
bestl = x;
std::cerr << "\e[J" << std::flush;
std::cout << i << ": " << x << "\n";
for (int x = 0; x < 10; x++) {
for (int y = 0; y < 10; y++) {
std::cout << (int)t.o[x][y];
}
std::cout << "\n";
}
std::cout << std::endl;
x = 0;
}
if ((i & 0xFFFF) == 0 || x == 0) {
std::cerr
<< i << ": "
<< std::setprecision(5) << (1.0*total/(i-loops)) << " (" << best << ") tts; "
<< std::setprecision(5) << (100.0*loops/i) << "% loops; "
<< std::setprecision(5) << (1.0*totall/loops) << " (" << -bestl << ") loop length"
<< "\e[J\r" << std::flush;
}
#if 0
if (best > 10000)
break;
#endif
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment