Skip to content

Instantly share code, notes, and snippets.

@maxdeliso
Last active September 29, 2015 22:48
Show Gist options
  • Save maxdeliso/1680623 to your computer and use it in GitHub Desktop.
Save maxdeliso/1680623 to your computer and use it in GitHub Desktop.
Sieve of Atkin
/*
* Max DeLiso <maxdeliso@gmail.com>
*
* Purpose: compute prime numbers using a Sieve of Atkin
*
* Runtime efficiency: O(N/log log N)
*
* Memory efficiency: N/8 bytes
*
* Input: Limiting value n, [1,n-1] will be computed and outputted.
* n is constrained by ULLONG_MAX, which has the value
* 18446744073709551615, or ~ 18.4 * 10^18, or eighteen quintillion.
*
* NOTE: Currently, the program attempts to allocate n/8 bytes of RAM
* off the heap, and if it fails the program aborts
*
* TODO: Figure out how to make the program work for values of n which
* exceed available RAM. (pipelining)
*
* TODO: Add optional concurrency via pthreads
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>
#include <limits.h>
#include <inttypes.h>
#include <assert.h>
#include <math.h>
#include <string.h>
#include <time.h>
#include <errno.h>
#include <pthread.h>
/* Functions for bit manipulation */
#define BITS_PER_BYTE 8
#define BYTE(i) ((i)/BITS_PER_BYTE)
#define BIT(i) ((i)%BITS_PER_BYTE)
typedef uint64_t bigUint;
static const unsigned char BYTE_MASK[] = { 128, 64, 32, 16, 8, 4, 2, 1 };
/* Prototypes */
static bool isDebug() {
#ifdef NDEBUG
return false;
#else
return true;
#endif
}
static bigUint processArgs(int argc, char **argv);
static bool getByteBit(unsigned char c, bigUint n);
static void flipByteBit(unsigned char *cptr, unsigned short n);
static void setByteBit(unsigned char *cptr, unsigned short n);
static void clearByteBit(unsigned char *cptr, unsigned short n);
struct atkinSegmentParams {
bigUint x;
bigUint y;
bigUint limit;
unsigned char * bufferPtr;
pthread_mutex_t * bufferMutexPtr;
};
void *atkinSegment(void* arg);
static clock_t atkin(bigUint limit, bigUint bufferSize, unsigned char *buffer);
static void printResults(bigUint limit, bigUint bufferSize,
const unsigned char *buffer, double time);
/* Entry point */
int main(int argc, char **argv)
{
bigUint limit, bufferSize;
unsigned char *buffer;
double time;
limit = processArgs(argc, argv);
if(isDebug()) {
printf("info: read in limit as %" PRIu64 "\n", limit);
}
bufferSize = limit / BITS_PER_BYTE;
if (limit % BITS_PER_BYTE != 0) {
++bufferSize;
}
buffer = malloc(bufferSize);
if (buffer == NULL) {
fprintf(stderr,
"Failed to allocate memory for a limit of %" PRIu64 ", (%"
PRIu64 " bytes)\n", limit, bufferSize);
exit(1);
}
memset(buffer, 0, bufferSize);
time = atkin(limit, bufferSize, buffer);
printResults(limit, bufferSize, buffer, time / CLOCKS_PER_SEC);
if(isDebug()) {
printf("info: freeing heap allocated buffer @ %p\n", buffer);
}
free(buffer);
return 0;
}
/* Function definitions */
bigUint processArgs(int argc, char **argv)
{
bigUint limit;
if (argc != 2) {
printf("usage: %s limit\n", argv[0]);
exit(1);
} else {
limit = strtoull(argv[1], NULL, 0);
if (limit <= 5 || (limit == ULLONG_MAX && errno == ERANGE)) {
fprintf(stderr, "fatal: limit out of range\n");
exit(1);
}
return limit;
}
}
inline bool getByteBit(unsigned char c, bigUint n)
{
assert(n < 8);
if(isDebug()) {
printf("getting bit %" PRIu64 " from byte %x\n", n, c);
}
return c & BYTE_MASK[n];
}
inline void flipByteBit(unsigned char *cptr, unsigned short n)
{
assert(n < 8);
if(isDebug()) {
printf("flipping bit %u on block @ %p\n", n, cptr);
}
*cptr ^= BYTE_MASK[n];
}
inline void setByteBit(unsigned char *cptr, unsigned short n)
{
assert(n < 8);
if(isDebug()) {
printf("setting bit %u on block @ %p\n", n, cptr);
}
*cptr |= BYTE_MASK[n];
}
inline void clearByteBit(unsigned char *cptr, unsigned short n)
{
assert(n < 8);
if(isDebug()) {
printf("clearing bit %u on block @ %p\n", n, cptr);
}
*cptr &= ~BYTE_MASK[n];
}
void* atkinSegment(void* arg) {
struct atkinSegmentParams * params = (struct atkinSegmentParams*) arg;
bigUint n;
bigUint x = params -> x;
bigUint y = params -> y;
bigUint limit = params -> limit;
unsigned char * buffer = params -> bufferPtr;
pthread_mutex_t * bufferMutexPtr = params -> bufferMutexPtr;
/* NOTE: this threading strategy is naive, the entire buffer is locked on each access.
* the contention actually makes this implementation substantially slower than an equivalent one
* that does not use threads */
n = 4 * x * x + y * y;
if (n <= limit && ((n % 12 == 1) || (n % 12 == 5))) {
pthread_mutex_lock(bufferMutexPtr);
flipByteBit(buffer + BYTE(n), BIT(n));
pthread_mutex_unlock(bufferMutexPtr);
}
n = 3 * x * x + y * y;
if (n <= limit && (n % 12 == 7)) {
pthread_mutex_lock(bufferMutexPtr);
flipByteBit(buffer + BYTE(n), BIT(n));
pthread_mutex_unlock(bufferMutexPtr);
}
n = 3 * x * x - y * y;
if (x > y && (n <= limit) && (n % 12 == 11)) {
pthread_mutex_lock(bufferMutexPtr);
flipByteBit(buffer + BYTE(n), BIT(n));
pthread_mutex_unlock(bufferMutexPtr);
}
return NULL;
}
clock_t atkin(bigUint limit, bigUint bufferSize, unsigned char *buffer)
{
bigUint x, y, n, limitSqrt;
clock_t startClock, endClock;
double limitSqrtD;
pthread_mutex_t bufferMutex;
if(pthread_mutex_init(&bufferMutex, NULL) != 0) {
fprintf(stderr, "couldn't initialize a mutex. errno = %08x\n", errno);
exit(1);
}
(void) bufferSize; /* to quiet warnings during release build */
startClock = clock();
limitSqrtD = floor(sqrt(limit));
assert(limitSqrtD <= ULLONG_MAX);
limitSqrt = (bigUint) limitSqrtD;
setByteBit(buffer + BYTE(2), BIT(2));
setByteBit(buffer + BYTE(3), BIT(3));
for (x = 1; x <= limitSqrt; ++x) {
/* allocate param structs on stack and parallel array of worker thread handles */
struct atkinSegmentParams workerParams [limitSqrt];
pthread_t workerThreads[limitSqrt];
struct atkinSegmentParams thParamTemplate = {
.x = x,
.y = 0, /* set for reach y in loop below */
.limit = limit,
.bufferPtr = buffer,
.bufferMutexPtr = &bufferMutex
};
/* spawn the workers */
for (y = 1; y <= limitSqrt; ++y) {
thParamTemplate.y = y;
workerParams[y-1] = thParamTemplate;
pthread_create(&workerThreads[y-1], NULL, atkinSegment, &workerParams[y-1]); /* TODO: ret */
}
/* join the workers */
for (y = 1; y <= limitSqrt; ++y) {
pthread_join(workerThreads[y-1], NULL);
}
}
for (n = 5; n <= limitSqrt; ++n) {
assert(BYTE(n) < bufferSize);
if (getByteBit(buffer[BYTE(n)], BIT(n))) {
y = 1;
x = n * n;
/* TODO: parallelize these operations by splitting the primes array
* into four equal parts, and then iterating down those parts with
* one thread each */
while (y * x < limit) {
assert(BYTE(y * x) < bufferSize);
clearByteBit(buffer + BYTE(y * x), BIT(y * x));
++y;
}
}
}
endClock = clock();
return endClock - startClock;
}
void printResults(bigUint limit, bigUint bufferSize,
const unsigned char *buffer, double time)
{
bigUint i, j, lastByte, lastBit;
bool more;
lastByte = BYTE(limit);
lastBit = BIT(limit);
fprintf(stderr, "%lf\n", time);
more = true;
for (i = 0; more && i < bufferSize; ++i) {
for (j = 0; more && j < BITS_PER_BYTE; ++j) {
if (i == lastByte && (j + 1) == lastBit) {
more = false;
} else if (getByteBit(buffer[i], j)) {
printf("%" PRIu64 "\n", i * BITS_PER_BYTE + j);
}
}
}
printf("\n");
}
# Max DeLiso <maxdeliso@gmail.com>
# TODO: add various targets for differing levels of sse/avx support
CC=gcc
STRIP=strip
CFLAGS=-Wall -Wextra -Werror -pedantic -std=c99
OPTFLAGS=-O3 -mtune=native
DBGFLAGS=-O0 -g
LDFLAGS=-lm -lpthread
OUT=atkin
.PHONY: clean help
atkin-release: atkin.c
gcc $(CFLAGS) $(LDFLAGS) $(OPTFLAGS) -DNDEBUG -o $@ atkin.c
$(STRIP) $@
atkin-static.o: atkin.c
gcc $(CFLAGS) $(OPTFLAGS) -DNDEBUG atkin.c -c -o $@
atkin-static: atkin-static.o
gcc atkin-static.o -static -o $@ $(LDFLAGS)
$(STRIP) $@
atkin-gen-pgo: atkin.c
gcc $(CFLAGS) $(LDFLAGS) $(OPTFLAGS) -DNDEBUG -fprofile-generate -o $@ atkin.c
atkin-use-pgo: atkin.c
gcc $(CFLAGS) $(LDFLAGS) $(OPTFLAGS) -DNDEBUG -fprofile-use -o $@ atkin.c
atkin-gen-gcov: atkin.c
gcc $(CFLAGS) $(LDFLAGS) -DNDEBUG -fprofile-arcs -ftest-coverage -o $@ atkin.c
atkin-debug: atkin.c
gcc $(CFLAGS) $(LDFLAGS) $(DBGFLAGS) atkin.c -o $@
check: atkin-gen-gcov
@echo test
clean:
rm -f atkin atkin.asm atkin-* *.{gcno,gcda,gcov} *~
help:
@echo atkin-release atkin-static atkin-gen-pgo atkin-use-pgo atkin-gen-gcov atkin-debug check
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment