Skip to content

Instantly share code, notes, and snippets.

@kazuho
Created September 21, 2023 12:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kazuho/c58932c2e66a9ec69ad8acfce22af837 to your computer and use it in GitHub Desktop.
Save kazuho/c58932c2e66a9ec69ad8acfce22af837 to your computer and use it in GitHub Desktop.
fizzbuzz using ymm + non-overlapping writes of uint32
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <immintrin.h>
#include <unistd.h>
#define inline __attribute__((always_inline))
static char tens[48] __attribute__((aligned(64)));
static size_t tenslen = 0;
static int n, regen = 1;
static unsigned tens32;
static char outbuf[8000], *outp = outbuf;
static void flush(void)
{
write(1, outbuf, outp - outbuf);
outp = outbuf;
}
static void flush_and_exit(void)
{
flush();
exit(0);
}
static void plus10(void)
{
if (tenslen == 0)
tenslen = 1;
size_t i = 15;
for (i = 15;; i--) {
if (__builtin_expect(++tens[i] <= '9', 1))
break;
tens[i] = '0';
if (16 - i == tenslen)
++tenslen;
if (i <= 12)
regen = 2;
}
if (i == 15) {
tens32 += 0x1000000; /* avoid partial forward */
} else {
tens32 = *(unsigned *)(tens + 12);
}
}
static inline void emit30(int maxval)
{
#define EMIT(s1, s2, s3) \
do { \
if (++n > maxval) flush_and_exit(); \
if (__builtin_expect(regen, 0)) { \
static const char s[] __attribute__((aligned(64))) = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" s1 s2 s3 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; \
_mm256_storeu_si256((__m256i *)outp, \
_mm256_or_si256(tens256, \
_mm256_loadu_si256((__m256i *)(s + 31 - tenslen)))); \
} else { \
*(unsigned *)(outp + tenslen - 4) = tens32; \
} \
outp += tenslen + 2; \
if (sizeof(s2) != 1) { \
if (++n > maxval) flush_and_exit(); \
outp += sizeof(s2) - 1; \
if (sizeof(s3) != 1) { \
if (++n > maxval) flush_and_exit(); \
outp += sizeof(s3) - 1; \
} \
} \
} while (0)
__m256i tens256;
if (__builtin_expect(regen, 0))
tens256 = _mm256_loadu_si256((__m256i *)(tens + 16 - tenslen));
EMIT("1\n", "", "");
EMIT("2\n", "fizz\n", "");
EMIT("4\n", "buzz\n", "fizz\n");
EMIT("7\n", "", "");
EMIT("8\n", "fizz\n", "buzz\n");
plus10();
if (__builtin_expect(regen, 0))
tens256 = _mm256_loadu_si256((__m256i *)(tens + 16 - tenslen));
EMIT("1\n", "fizz\n", "");
EMIT("3\n", "", "");
EMIT("4\n", "fizzbuzz\n", "");
EMIT("6\n", "", "");
EMIT("7\n", "fizz\n", "");
EMIT("9\n", "buzz\n", "fizz\n");
plus10();
if (__builtin_expect(regen, 0))
tens256 = _mm256_loadu_si256((__m256i *)(tens + 16 - tenslen));
EMIT("2\n", "", "");
EMIT("3\n", "fizz\n", "buzz\n");
EMIT("6\n", "fizz\n", "");
EMIT("8\n", "", "");
EMIT("9\n", "fizzbuzz\n", "");
plus10();
}
int main(int argc, char **argv)
{
int maxval;
if (argc < 2 || sscanf(argv[1], "%d", &maxval) != 1) {
fprintf(stderr, "usage: %s <max-value>\n", argv[0]);
exit(1);
}
memset(tens, '0', 16);
int prevgen = 1;
while (n + 30 < maxval) {
emit30(INT_MAX);
if (__builtin_expect(outp - outbuf >= sizeof(outbuf) - 500, 0)) {
flush();
if (regen != 0 && n >= 10000 && prevgen >= 10000)
--regen;
prevgen = n + 1;
}
}
emit30(maxval);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment