Skip to content

Instantly share code, notes, and snippets.

@kazuho
Created September 21, 2023 12:25
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kazuho/5185d03052c66fb7c40cfa4f0eef8fc8 to your computer and use it in GitHub Desktop.
Save kazuho/5185d03052c66fb7c40cfa4f0eef8fc8 to your computer and use it in GitHub Desktop.
optimized fizzbuzz in C using ymm registers
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <immintrin.h>
#include <unistd.h>
#define inline __attribute__((always_inline))
static char tens[48] __attribute__((aligned(64)));
static size_t tenslen = 0;
static int n;
static char outbuf[8000], *outp = outbuf;
static void flush(void)
{
write(1, outbuf, outp - outbuf);
outp = outbuf;
}
static void flush_and_exit(void)
{
flush();
exit(0);
}
static void plus10(void)
{
if (tenslen == 0)
tenslen = 1;
for (size_t i = 15;; i--) {
if (__builtin_expect(++tens[i] <= '9', 1))
return;
tens[i] = '0';
if (16 - i == tenslen)
++tenslen;
}
}
static inline void emit30(int maxval)
{
#define EMIT(s1, s2, s3) \
do { \
if (++n > maxval) \
flush_and_exit(); \
static const char s[] __attribute__((aligned(64))) = \
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" s1 s2 s3 \
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; \
_mm256_storeu_si256((__m256i *)outp, _mm256_or_si256(tens256, _mm256_loadu_si256((__m256i *)(s + 31 - tenslen)))); \
outp += tenslen + 2; \
if (sizeof(s2) != 1) { \
if (++n > maxval) \
flush_and_exit(); \
outp += sizeof(s2) - 1; \
if (sizeof(s3) != 1) { \
if (++n > maxval) \
flush_and_exit(); \
outp += sizeof(s3) - 1; \
} \
} \
} while (0)
__m256i tens256 = _mm256_loadu_si256((__m256i *)(tens + 16 - tenslen));
EMIT("1\n", "", "");
EMIT("2\n", "fizz\n", "");
EMIT("4\n", "buzz\n", "fizz\n");
EMIT("7\n", "", "");
EMIT("8\n", "fizz\n", "buzz\n");
plus10();
tens256 = _mm256_loadu_si256((__m256i *)(tens + 16 - tenslen));
EMIT("1\n", "fizz\n", "");
EMIT("3\n", "", "");
EMIT("4\n", "fizzbuzz\n", "");
EMIT("6\n", "", "");
EMIT("7\n", "fizz\n", "");
EMIT("9\n", "buzz\n", "fizz\n");
plus10();
tens256 = _mm256_loadu_si256((__m256i *)(tens + 16 - tenslen));
EMIT("2\n", "", "");
EMIT("3\n", "fizz\n", "buzz\n");
EMIT("6\n", "fizz\n", "");
EMIT("8\n", "", "");
EMIT("9\n", "fizzbuzz\n", "");
plus10();
}
int main(int argc, char **argv)
{
int maxval;
if (argc < 2 || sscanf(argv[1], "%d", &maxval) != 1) {
fprintf(stderr, "usage: %s <max-value>\n", argv[0]);
exit(1);
}
memset(tens, '0', 16);
while (n + 30 < maxval) {
emit30(INT_MAX);
if (outp - outbuf >= sizeof(outbuf) - 500)
flush();
}
emit30(maxval);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment