Created
September 21, 2023 12:27
-
-
Save kazuho/c58932c2e66a9ec69ad8acfce22af837 to your computer and use it in GitHub Desktop.
fizzbuzz using ymm + non-overlapping writes of uint32
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <limits.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <immintrin.h> | |
#include <unistd.h> | |
#define inline __attribute__((always_inline)) | |
static char tens[48] __attribute__((aligned(64))); | |
static size_t tenslen = 0; | |
static int n, regen = 1; | |
static unsigned tens32; | |
static char outbuf[8000], *outp = outbuf; | |
static void flush(void) | |
{ | |
write(1, outbuf, outp - outbuf); | |
outp = outbuf; | |
} | |
static void flush_and_exit(void) | |
{ | |
flush(); | |
exit(0); | |
} | |
static void plus10(void) | |
{ | |
if (tenslen == 0) | |
tenslen = 1; | |
size_t i = 15; | |
for (i = 15;; i--) { | |
if (__builtin_expect(++tens[i] <= '9', 1)) | |
break; | |
tens[i] = '0'; | |
if (16 - i == tenslen) | |
++tenslen; | |
if (i <= 12) | |
regen = 2; | |
} | |
if (i == 15) { | |
tens32 += 0x1000000; /* avoid partial forward */ | |
} else { | |
tens32 = *(unsigned *)(tens + 12); | |
} | |
} | |
static inline void emit30(int maxval) | |
{ | |
#define EMIT(s1, s2, s3) \ | |
do { \ | |
if (++n > maxval) flush_and_exit(); \ | |
if (__builtin_expect(regen, 0)) { \ | |
static const char s[] __attribute__((aligned(64))) = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" s1 s2 s3 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; \ | |
_mm256_storeu_si256((__m256i *)outp, \ | |
_mm256_or_si256(tens256, \ | |
_mm256_loadu_si256((__m256i *)(s + 31 - tenslen)))); \ | |
} else { \ | |
*(unsigned *)(outp + tenslen - 4) = tens32; \ | |
} \ | |
outp += tenslen + 2; \ | |
if (sizeof(s2) != 1) { \ | |
if (++n > maxval) flush_and_exit(); \ | |
outp += sizeof(s2) - 1; \ | |
if (sizeof(s3) != 1) { \ | |
if (++n > maxval) flush_and_exit(); \ | |
outp += sizeof(s3) - 1; \ | |
} \ | |
} \ | |
} while (0) | |
__m256i tens256; | |
if (__builtin_expect(regen, 0)) | |
tens256 = _mm256_loadu_si256((__m256i *)(tens + 16 - tenslen)); | |
EMIT("1\n", "", ""); | |
EMIT("2\n", "fizz\n", ""); | |
EMIT("4\n", "buzz\n", "fizz\n"); | |
EMIT("7\n", "", ""); | |
EMIT("8\n", "fizz\n", "buzz\n"); | |
plus10(); | |
if (__builtin_expect(regen, 0)) | |
tens256 = _mm256_loadu_si256((__m256i *)(tens + 16 - tenslen)); | |
EMIT("1\n", "fizz\n", ""); | |
EMIT("3\n", "", ""); | |
EMIT("4\n", "fizzbuzz\n", ""); | |
EMIT("6\n", "", ""); | |
EMIT("7\n", "fizz\n", ""); | |
EMIT("9\n", "buzz\n", "fizz\n"); | |
plus10(); | |
if (__builtin_expect(regen, 0)) | |
tens256 = _mm256_loadu_si256((__m256i *)(tens + 16 - tenslen)); | |
EMIT("2\n", "", ""); | |
EMIT("3\n", "fizz\n", "buzz\n"); | |
EMIT("6\n", "fizz\n", ""); | |
EMIT("8\n", "", ""); | |
EMIT("9\n", "fizzbuzz\n", ""); | |
plus10(); | |
} | |
int main(int argc, char **argv) | |
{ | |
int maxval; | |
if (argc < 2 || sscanf(argv[1], "%d", &maxval) != 1) { | |
fprintf(stderr, "usage: %s <max-value>\n", argv[0]); | |
exit(1); | |
} | |
memset(tens, '0', 16); | |
int prevgen = 1; | |
while (n + 30 < maxval) { | |
emit30(INT_MAX); | |
if (__builtin_expect(outp - outbuf >= sizeof(outbuf) - 500, 0)) { | |
flush(); | |
if (regen != 0 && n >= 10000 && prevgen >= 10000) | |
--regen; | |
prevgen = n + 1; | |
} | |
} | |
emit30(maxval); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment