-
-
Save skeeto/212715 to your computer and use it in GitHub Desktop.
log | |
pi | |
state | |
state~ |
CFLAGS = -W -Wall -O3 | |
LDFLAGS = -lgsl -lgslcblas -pthread | |
pi : pi.c | |
.PHONY : run clean | |
run : pi | |
./pi | tee -a log | |
clean : | |
$(RM) pi log state state~ |
#include <stdio.h> | |
#include <fcntl.h> | |
#include <time.h> | |
#include <unistd.h> | |
#include <pthread.h> | |
#include <string.h> | |
#include <signal.h> | |
#include <stdint.h> | |
#include <gsl/gsl_rng.h> | |
uint16_t threads; /* Number of threads to use. */ | |
uint64_t *states; /* Calculation state */ | |
#define STATE "state" /* Filename for storing the state. */ | |
#define STATE_T "state~" /* Temporary state filename. */ | |
#define SAMPLE_RATE 1 /* Seconds per printout. */ | |
#define SAVE_RATE 10 /* Printouts per save. */ | |
#define STATE_SIZE ((ssize_t) (threads * 2 * sizeof(uint64_t))) | |
/* Initialize a brand new RNG state, with unique seed. */ | |
gsl_rng *make_rng() | |
{ | |
gsl_rng_default_seed = time(NULL) + rand(); | |
const gsl_rng_type *T = gsl_rng_ranlux389; | |
gsl_rng *r = gsl_rng_alloc(T); | |
return r; | |
} | |
/* Single calculating instance. */ | |
void *calc(void *arg) | |
{ | |
uint64_t *state = (uint64_t *) arg; | |
gsl_rng *r = make_rng(); | |
while (1) { | |
double x = (gsl_rng_uniform(r) * 2.0) - 1.0; | |
double y = (gsl_rng_uniform(r) * 2.0) - 1.0; | |
if (x * x + y * y < 1.0) | |
state[1]++; | |
state[0]++; | |
} | |
return NULL; | |
} | |
/* Save the current state. */ | |
void save() | |
{ | |
int out = open(STATE_T, O_CREAT | O_TRUNC | O_WRONLY, 0644); | |
if (out == -1) { | |
perror("warning: failed to save state"); | |
return; | |
} | |
size_t bytes = write(out, &threads, sizeof(uint16_t)); | |
bytes += write(out, states, STATE_SIZE); | |
if (bytes != sizeof(uint16_t) + STATE_SIZE) { | |
perror("warning: failed to save state"); | |
return; | |
} | |
fsync(out); | |
close(out); | |
int r = rename(STATE_T, STATE); | |
if (r == -1) { | |
perror("warning: failed to save state"); | |
return; | |
} | |
} | |
/* Load a previous state. */ | |
void load() | |
{ | |
int in = open(STATE, O_RDONLY); | |
if (in == -1) { | |
perror("error: failed to load state"); | |
exit(EXIT_FAILURE); | |
} | |
if (read(in, &threads, sizeof(uint16_t)) != 2) { | |
fprintf(stderr, "error: state file is corrupt\n"); | |
exit(EXIT_FAILURE); | |
} | |
states = malloc(STATE_SIZE); | |
if (read(in, states, STATE_SIZE) != STATE_SIZE) { | |
fprintf(stderr, "error: state file is corrupt\n"); | |
exit(EXIT_FAILURE); | |
} | |
close(in); | |
} | |
/* Handle SIGINT. */ | |
void handler(int arg) | |
{ | |
(void) arg; | |
save(); | |
fprintf(stderr, "exiting: interrupted (saved state)\n"); | |
exit(EXIT_SUCCESS); | |
} | |
int main() | |
{ | |
/* Init */ | |
gsl_rng_env_setup(); | |
if (access(STATE, R_OK) == 0) { | |
load(); | |
fprintf(stderr, "Using previous state (%d threads).\n", threads); | |
} else { | |
threads = sysconf(_SC_NPROCESSORS_ONLN); | |
states = malloc(2 * threads * sizeof(uint64_t)); | |
fprintf(stderr, "Using %d threads.\n", threads); | |
} | |
/* Set up signal handler. */ | |
struct sigaction sa; | |
sa.sa_handler = handler; | |
sigaction(SIGINT, &sa, NULL); | |
sigaction(SIGTERM, &sa, NULL); | |
/* Create threads. */ | |
int i; | |
for (i = 0; i < threads; i++) { | |
pthread_t thread; | |
pthread_create(&thread, NULL, &calc, states + i * 2); | |
} | |
/* Sample Monte Carlo state at regular intervals. */ | |
int counter = 0; | |
while (1) { | |
sleep(SAMPLE_RATE); | |
uint64_t total = 0, in = 0; | |
int i; | |
for (i = 0; i < threads; i++) { | |
total += states[i * 2]; | |
in += states[i * 2 + 1]; | |
} | |
double pi = in * 4.0 / total; | |
printf("%.15lg %ld\n", pi, total / 1000000); | |
fflush(stdout); | |
counter++; | |
if (counter >= SAVE_RATE) { | |
save(); | |
counter = 0; | |
} | |
} | |
return 0; | |
} |
Yup, it works out to be quite easy.
Last night I hit 1 trillion samples (about 30 hours in), and here’s where it stood at the 1 trillion mark.
3.14159487055621
Here's a plot of the error (initial billion skipped) vs. samples.
Here we are at 30 trillion samples today: 3.14159187116437.
Now at 3.14159221038522 at just over 60 trillion samples. It looks like I can do about 30 trillion per month with this computer.
Just hit 100 trillion samples last night: 3.14159252931055
It's definitely still converging.
These comments have become a log of this calculation's progress. Almost 7 months in, I'm within a day of 160 trillion samples, and pi = 3.14159265422063. Ten decimal places. We're still converging, baby!
201 trillion samples now. We're at 3.14159262669982. We lost two places of precision! I think this is as good as this method gets, since I'm using a high quality PRNG.
280 trillion samples, with 3.14159264341612. Still not improving.
It's convenient that you don't have to actually care about atomicity in this problem. Who cares is if your random counts are jitter +- 1 when you've got 100 millions of test points.