Created
September 21, 2023 12:25
-
-
Save kazuho/5185d03052c66fb7c40cfa4f0eef8fc8 to your computer and use it in GitHub Desktop.
optimized fizzbuzz in C using ymm registers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <limits.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <immintrin.h> | |
#include <unistd.h> | |
#define inline __attribute__((always_inline)) | |
static char tens[48] __attribute__((aligned(64))); | |
static size_t tenslen = 0; | |
static int n; | |
static char outbuf[8000], *outp = outbuf; | |
static void flush(void) | |
{ | |
write(1, outbuf, outp - outbuf); | |
outp = outbuf; | |
} | |
static void flush_and_exit(void) | |
{ | |
flush(); | |
exit(0); | |
} | |
static void plus10(void) | |
{ | |
if (tenslen == 0) | |
tenslen = 1; | |
for (size_t i = 15;; i--) { | |
if (__builtin_expect(++tens[i] <= '9', 1)) | |
return; | |
tens[i] = '0'; | |
if (16 - i == tenslen) | |
++tenslen; | |
} | |
} | |
static inline void emit30(int maxval) | |
{ | |
#define EMIT(s1, s2, s3) \ | |
do { \ | |
if (++n > maxval) \ | |
flush_and_exit(); \ | |
static const char s[] __attribute__((aligned(64))) = \ | |
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" s1 s2 s3 \ | |
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; \ | |
_mm256_storeu_si256((__m256i *)outp, _mm256_or_si256(tens256, _mm256_loadu_si256((__m256i *)(s + 31 - tenslen)))); \ | |
outp += tenslen + 2; \ | |
if (sizeof(s2) != 1) { \ | |
if (++n > maxval) \ | |
flush_and_exit(); \ | |
outp += sizeof(s2) - 1; \ | |
if (sizeof(s3) != 1) { \ | |
if (++n > maxval) \ | |
flush_and_exit(); \ | |
outp += sizeof(s3) - 1; \ | |
} \ | |
} \ | |
} while (0) | |
__m256i tens256 = _mm256_loadu_si256((__m256i *)(tens + 16 - tenslen)); | |
EMIT("1\n", "", ""); | |
EMIT("2\n", "fizz\n", ""); | |
EMIT("4\n", "buzz\n", "fizz\n"); | |
EMIT("7\n", "", ""); | |
EMIT("8\n", "fizz\n", "buzz\n"); | |
plus10(); | |
tens256 = _mm256_loadu_si256((__m256i *)(tens + 16 - tenslen)); | |
EMIT("1\n", "fizz\n", ""); | |
EMIT("3\n", "", ""); | |
EMIT("4\n", "fizzbuzz\n", ""); | |
EMIT("6\n", "", ""); | |
EMIT("7\n", "fizz\n", ""); | |
EMIT("9\n", "buzz\n", "fizz\n"); | |
plus10(); | |
tens256 = _mm256_loadu_si256((__m256i *)(tens + 16 - tenslen)); | |
EMIT("2\n", "", ""); | |
EMIT("3\n", "fizz\n", "buzz\n"); | |
EMIT("6\n", "fizz\n", ""); | |
EMIT("8\n", "", ""); | |
EMIT("9\n", "fizzbuzz\n", ""); | |
plus10(); | |
} | |
int main(int argc, char **argv) | |
{ | |
int maxval; | |
if (argc < 2 || sscanf(argv[1], "%d", &maxval) != 1) { | |
fprintf(stderr, "usage: %s <max-value>\n", argv[0]); | |
exit(1); | |
} | |
memset(tens, '0', 16); | |
while (n + 30 < maxval) { | |
emit30(INT_MAX); | |
if (outp - outbuf >= sizeof(outbuf) - 500) | |
flush(); | |
} | |
emit30(maxval); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment