Skip to content

Instantly share code, notes, and snippets.

@iscgar
Last active November 9, 2020 22:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save iscgar/0d559c17d65b78de624eb124beabf0bf to your computer and use it in GitHub Desktop.
Save iscgar/0d559c17d65b78de624eb124beabf0bf to your computer and use it in GitHub Desktop.
Hand-rolled, compiler copy FIzzBuzz (thanks, Gal!)
#include <string.h>
#include <unistd.h>
#define CACHELINE 64
#define ALIGNED_BUF 65536
#define FIZZ "Fizz"
#define BUZZ "Buzz"
#define DELIM " "
typedef struct {
unsigned char offset;
char data[CACHELINE - sizeof(unsigned char)];
} counters_t;
#define CPYEL(i) do { \
const char *end = counters[i].data + counters[i].offset; \
const unsigned int n = sizeof(counters[i].data) - counters[i].offset - 1; \
memcpy(off, end, n); \
off += n; \
} while (0)
enum
{
CARRY = 0,
VALUE = 1,
OFFSET = 2,
};
int main(void)
{
__attribute__((aligned(CACHELINE))) static counters_t counters[] = {
{ sizeof(counters[0].data) - sizeof(DELIM) - 1, "0000000000000000000000000000000000000000000000000000000000001" DELIM },
{ sizeof(counters[1].data) - sizeof(DELIM) - 1, "0000000000000000000000000000000000000000000000000000000000002" DELIM },
{ sizeof(counters[2].data) - sizeof(DELIM) - 1, "0000000000000000000000000000000000000000000000000000000000004" DELIM },
{ sizeof(counters[3].data) - sizeof(DELIM) - 1, "0000000000000000000000000000000000000000000000000000000000007" DELIM },
{ sizeof(counters[4].data) - sizeof(DELIM) - 1, "0000000000000000000000000000000000000000000000000000000000008" DELIM },
{ sizeof(counters[5].data) - sizeof(DELIM) - 2, "0000000000000000000000000000000000000000000000000000000000011" DELIM },
{ sizeof(counters[6].data) - sizeof(DELIM) - 2, "0000000000000000000000000000000000000000000000000000000000013" DELIM },
{ sizeof(counters[7].data) - sizeof(DELIM) - 2, "0000000000000000000000000000000000000000000000000000000000014" DELIM },
};
__attribute__((aligned(4096))) static char buf[ALIGNED_BUF + (sizeof(counters[0].data) * 15)] = { 0 };
char *off = buf;
for (;;)
{
while (off - buf < ALIGNED_BUF)
{
if (__builtin_expect(counters[0].data[0] != '0', 0))
{
goto end;
}
CPYEL(0);
CPYEL(1);
memcpy(off, FIZZ DELIM, sizeof(FIZZ DELIM) - 1);
off += sizeof(FIZZ DELIM) - 1;
CPYEL(2);
memcpy(off, BUZZ DELIM FIZZ DELIM, sizeof(BUZZ DELIM FIZZ DELIM) - 1);
off += sizeof(BUZZ DELIM FIZZ DELIM) - 1;
CPYEL(3);
CPYEL(4);
memcpy(off, FIZZ DELIM BUZZ DELIM, sizeof(FIZZ DELIM BUZZ DELIM) - 1);
off += sizeof(FIZZ DELIM BUZZ DELIM) - 1;
CPYEL(5);
memcpy(off, FIZZ DELIM, sizeof(FIZZ DELIM) - 1);
off += sizeof(FIZZ DELIM) - 1;
CPYEL(6);
CPYEL(7);
memcpy(off, FIZZ BUZZ DELIM, sizeof(FIZZ BUZZ DELIM) - 1);
off += sizeof(FIZZ BUZZ DELIM) - 1;
// Increment the counters
#define ADVANCE(i) \
if (counters[i].data[digit_offset] > '9') { \
counters[i].data[digit_offset] -= 10; \
parallel_inc[OFFSET][i] = digit_offset; \
++parallel_inc[CARRY][i]; \
b = 1; \
}
#define FIXOFF(i) \
if (parallel_inc[OFFSET][i] <= counters[i].offset) { \
counters[i].offset = parallel_inc[OFFSET][i] - 1; \
} \
// carry, value, offset in SoA layout
char parallel_inc[3][8];
memset(parallel_inc[CARRY], 1, sizeof(parallel_inc[CARRY]));
memset(parallel_inc[VALUE], 5, sizeof(parallel_inc[VALUE]));
memset(parallel_inc[OFFSET], sizeof(counters[0].data) - sizeof(DELIM) - 1, sizeof(parallel_inc[OFFSET]));
for (unsigned digit_offset = sizeof(counters[0].data) - sizeof(DELIM) - 1;; --digit_offset)
{
int b = 0;
counters[0].data[digit_offset] += parallel_inc[VALUE][0];
counters[1].data[digit_offset] += parallel_inc[VALUE][1];
counters[2].data[digit_offset] += parallel_inc[VALUE][2];
counters[3].data[digit_offset] += parallel_inc[VALUE][3];
counters[4].data[digit_offset] += parallel_inc[VALUE][4];
counters[5].data[digit_offset] += parallel_inc[VALUE][5];
counters[6].data[digit_offset] += parallel_inc[VALUE][6];
counters[7].data[digit_offset] += parallel_inc[VALUE][7];
ADVANCE(0);
ADVANCE(1);
ADVANCE(2);
ADVANCE(3);
ADVANCE(4);
ADVANCE(5);
ADVANCE(6);
ADVANCE(7);
memcpy(parallel_inc[VALUE], parallel_inc[CARRY], sizeof(parallel_inc[CARRY]));
memset(parallel_inc[CARRY], 0, sizeof(parallel_inc[CARRY]));
if (!b)
{
FIXOFF(0);
FIXOFF(1);
FIXOFF(2);
FIXOFF(3);
FIXOFF(4);
FIXOFF(5);
FIXOFF(6);
FIXOFF(7);
break;
}
}
}
write(1, buf, ALIGNED_BUF);
memcpy(buf, buf + ALIGNED_BUF, (off - buf) % ALIGNED_BUF);
off -= ALIGNED_BUF;
}
end:
*off++ = '\n';
write(1, buf, off - buf);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment