roblanf/repellent_primes.R

## repellent_primes.R
library(primes)
library(coda)
library(parallel)
library(ggplot2)

# set this to the number of processors on your machine
processors = 4

remainder <- function(number, divisor){ return(number%%divisor) }

remainders <- function(divisor, numbers, mc.cores = processors){
    # get the remainders of a vector of numbers after dividing by divisor.
    ld = as.numeric(unlist(mclapply(numbers, remainder, divisor = divisor, mc.cores = mc.cores)))
    return(ld)
}

# get ~4m primes
p = generate_primes(min = 0, max = exp(18))

# if you want to check that the code is sensible, use randomised primes first...
# this helps to train intuition
#p = sample(p)

# use a range of divisors
divisors = 5:20

# make a list of remainders of the primes with different divisors
p.r = lapply(divisors, remainders, numbers = p)

# make them into mcmc objects (a bit like a time series)
p.r.mcmc = mclapply(p.r, as.mcmc, mc.cores = processors)

# calculate autocorrelations at lags from 1:50K
p.r.ac = mclapply(p.r.mcmc, autocorr, lags = 1:50000, mc.cores = processors)

# now let's look at the cumulative sum of the autocorrelations
# the reason being that this should be roughly a random walk if
# nothing interesting is happening
p.r.ac.cumsum = lapply(p.r.ac, cumsum)

# now we just build all the data into a big dataframe for plotting
div = rep(divisors, each = length(p.r.ac[[1]]))
lags = rep(1:length(p.r.ac[[1]]), length(divisors))
dat = data.frame('divisor' = div, 'lag' = lags, 'autocorrelation' = unlist(p.r.ac), 'cumulative.ac' = unlist(p.r.ac.cumsum))

# and here are our plots: the same data two ways
ggplot(dat, aes(x = lag, y = cumulative.ac)) + geom_line() + facet_wrap(~divisor)
ggplot(dat, aes(x = lag, y = cumulative.ac, color = factor(divisor))) + geom_line(alpha = 0.5)
	library(primes)
	library(coda)
	library(parallel)
	library(ggplot2)

	# set this to the number of processors on your machine
	processors = 4

	remainder <- function(number, divisor){ return(number%%divisor) }

	remainders <- function(divisor, numbers, mc.cores = processors){
	# get the remainders of a vector of numbers after dividing by divisor.
	ld = as.numeric(unlist(mclapply(numbers, remainder, divisor = divisor, mc.cores = mc.cores)))
	return(ld)
	}

	# get ~4m primes
	p = generate_primes(min = 0, max = exp(18))

	# if you want to check that the code is sensible, use randomised primes first...
	# this helps to train intuition
	#p = sample(p)

	# use a range of divisors
	divisors = 5:20

	# make a list of remainders of the primes with different divisors
	p.r = lapply(divisors, remainders, numbers = p)

	# make them into mcmc objects (a bit like a time series)
	p.r.mcmc = mclapply(p.r, as.mcmc, mc.cores = processors)

	# calculate autocorrelations at lags from 1:50K
	p.r.ac = mclapply(p.r.mcmc, autocorr, lags = 1:50000, mc.cores = processors)

	# now let's look at the cumulative sum of the autocorrelations
	# the reason being that this should be roughly a random walk if
	# nothing interesting is happening
	p.r.ac.cumsum = lapply(p.r.ac, cumsum)

	# now we just build all the data into a big dataframe for plotting
	div = rep(divisors, each = length(p.r.ac[[1]]))
	lags = rep(1:length(p.r.ac[[1]]), length(divisors))
	dat = data.frame('divisor' = div, 'lag' = lags, 'autocorrelation' = unlist(p.r.ac), 'cumulative.ac' = unlist(p.r.ac.cumsum))

	# and here are our plots: the same data two ways
	ggplot(dat, aes(x = lag, y = cumulative.ac)) + geom_line() + facet_wrap(~divisor)
	ggplot(dat, aes(x = lag, y = cumulative.ac, color = factor(divisor))) + geom_line(alpha = 0.5)