Bad Dumbo Octopodes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Cf. https://adventofcode.com/2021/day/11 | |
This program looks for configurations that take long before synchronizing. | |
Times reflecting speedup from using SSE3: | |
without sse3 (g++): | |
real 0m54.995s | |
with sse3 (g++): | |
real 0m11.220s | |
flash in even/odd order (g++): | |
real 0m9.051s | |
use clang++: | |
real 0m8.345s | |
Top results: | |
- 61015 steps to synchronize: | |
0238201656 | |
0719426291 | |
5112132117 | |
4965173335 | |
6669930847 | |
7499711987 | |
3366207277 | |
3756024376 | |
0790852422 | |
8003411407 | |
- loop of length 2192400 (reached after 66 steps): | |
5013125450 | |
6697075902 | |
6070937177 | |
5506112146 | |
7446828172 | |
0235270390 | |
7255575520 | |
2114352283 | |
7748772191 | |
9712046648 | |
*/ | |
#include <functional> | |
#include <algorithm> | |
#include <iostream> | |
#include <iomanip> | |
#include <cstring> | |
#include <random> | |
#include <cstdint> | |
#ifdef __SSE3__ | |
#include <smmintrin.h> | |
#endif | |
#define M128(h,l) (__m128i)((unsigned __int128)UINT64_C(h) << 64 | UINT64_C(l)) | |
struct grid { | |
char o[10][16]; | |
int step(); | |
private: | |
void inc(int x, int y); | |
}; | |
int grid::step() | |
{ | |
#ifdef __SSE3__ | |
const __m128i N1 = M128(0x0000000000000101, 0x0101010101010101); | |
const __m128i N9 = M128(0x0000000000000909, 0x0909090909090909); | |
const __m128i L = M128(0x8080808080800807, 0x0605040302010080); | |
const __m128i M = M128(0x2020202020202020, 0x2020202020202020); | |
__m128i t[10]; | |
for (int i = 0; i < 10; i++) { | |
t[i] = _mm_add_epi8(*(__m128i*)o[i], N1); | |
} | |
__m128i c = M128(0,0); | |
for(;;) { | |
__m128i a = M128(0,0); | |
for (int j = 0; j < 10; j++) { | |
int i = j*2 % 10 + j/5; | |
__m128i m = _mm_cmpgt_epi8(t[i], N9); | |
__m128i l = _mm_shuffle_epi8(m, L); | |
__m128i r = _mm_srli_si128(m, 1); | |
a = _mm_add_epi8(a, m); | |
__m128i o = _mm_add_epi8(m, _mm_add_epi8(l, r)); | |
if (i > 0) | |
t[i-1] = _mm_sub_epi8(t[i-1], o); | |
t[i] = _mm_sub_epi8(_mm_sub_epi8(t[i], _mm_and_si128(m, M)), o); | |
if (i < 9) | |
t[i+1] = _mm_sub_epi8(t[i+1], o); | |
} | |
c = _mm_sub_epi8(c, a); | |
if (_mm_movemask_epi8(a) == 0) | |
break; | |
} | |
for (int i = 0; i < 10; i++) { | |
*(__m128i*)o[i] = _mm_max_epi8(t[i], M128(0,0)); | |
} | |
c = _mm_add_epi8(c, _mm_srli_si128(c, 8)); | |
return (uint64_t)(__int128)c % 255; | |
#else | |
for (int x = 0; x < 10; x++) { | |
for (int y = 0; y < 10; y++) { | |
inc(x, y); | |
} | |
} | |
int r = 0; | |
for (int x = 0; x < 10; x++) { | |
for (int y = 0; y < 10; y++) { | |
if (o[x][y] > 9) { | |
o[x][y] = 0; | |
r++; | |
} | |
} | |
} | |
return r; | |
#endif | |
} | |
void grid::inc(int x, int y) | |
{ | |
if (++o[x][y] != 10) | |
return; | |
for (int nx = std::max(0, x-1); nx < std::min(10, x+2); nx++) | |
for (int ny = std::max(0, y-1); ny < std::min(10, y+2); ny++) | |
if (nx != x || ny != y) | |
inc(nx, ny); | |
} | |
int64_t val(grid t) | |
{ | |
int64_t m = 0; | |
grid s = t; | |
for (int64_t i = 1; ; i++) { | |
if (i == (i & -i)) { | |
m = i; | |
s = t; | |
} | |
if (t.step() == 100) { | |
return i; | |
} | |
if (memcmp(t.o, s.o, sizeof(t.o)) == 0) | |
return -i-1 + m; | |
} | |
return -1; | |
} | |
int main(int argc, char **argv) | |
{ | |
std::mt19937 generator; | |
if (argc > 1) | |
generator.seed(atoi(argv[1])); | |
std::uniform_int_distribution<int> distribution(0, 9); | |
auto rng = std::bind(distribution, generator); | |
int64_t best = 0, bestl = 0; | |
int64_t loops = 0, total = 0, totall = 0; | |
for (uint64_t i = 1; ; i++) { | |
grid t; | |
for (int x = 0; x < 10; x++) { | |
for (int y = 0; y < 10; y++) { | |
t.o[x][y] = rng(); | |
} | |
} | |
int64_t x = val(t); | |
if (x < 0) { | |
loops++; | |
totall += -x; | |
} else { | |
total += x; | |
} | |
if (x > best || x < bestl) { | |
if (x > best) | |
best = x; | |
else | |
bestl = x; | |
std::cerr << "\e[J" << std::flush; | |
std::cout << i << ": " << x << "\n"; | |
for (int x = 0; x < 10; x++) { | |
for (int y = 0; y < 10; y++) { | |
std::cout << (int)t.o[x][y]; | |
} | |
std::cout << "\n"; | |
} | |
std::cout << std::endl; | |
x = 0; | |
} | |
if ((i & 0xFFFF) == 0 || x == 0) { | |
std::cerr | |
<< i << ": " | |
<< std::setprecision(5) << (1.0*total/(i-loops)) << " (" << best << ") tts; " | |
<< std::setprecision(5) << (100.0*loops/i) << "% loops; " | |
<< std::setprecision(5) << (1.0*totall/loops) << " (" << -bestl << ") loop length" | |
<< "\e[J\r" << std::flush; | |
} | |
#if 0 | |
if (best > 10000) | |
break; | |
#endif | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment