roblanf/repellent_primes.R

## repellent_primes.R
library(primes)
library(coda)
library(parallel)
library(ggplot2)


# set this to the number of processors on your machine
processors = 4

remainders <- function(divisor, numbers){
  # get the remainders of a vector of numbers after dividing by divisor.
  ld = numbers %% divisor
  return(ld)
}

# get ~4m primes
p = generate_primes(min = 0, max = exp(18))

# if you want to check that the code is sensible, use randomised primes first...
#p = sample(p)

divisors = 3:10

# make a list of remainders with different divisors
p.r = mclapply(divisors, remainders, numbers = p, mc.cores = processors)

# make them into mcmc objects (a bit like a time series)
p.r.mcmc = mclapply(p.r, as.mcmc, mc.cores = processors)

# calculate autocorrelations at lags from 1:50K
p.r.ac = mclapply(p.r.mcmc, autocorr, lags = 1:50000, mc.cores = processors)

# now let's look at the cumulative sum of the autocorrelations
# the reason being that this should be roughly a random walk if
# nothing interesting is happening
p.r.ac.cumsum = lapply(p.r.ac, cumsum)

# now we just build all the data into a big dataframe for plotting
div = rep(divisors, each = length(p.r.ac[[1]]))
lags = rep(1:length(p.r.ac[[1]]), length(divisors))
dat = data.frame('divisor' = div, 'lag' = lags, 'autocorrelation' = unlist(p.r.ac), 'cumulative.ac' = unlist(p.r.ac.cumsum))

# and here are our plots: the same data two ways
ggplot(dat, aes(x = lag, y = cumulative.ac)) + geom_line() + facet_wrap(~divisor)
ggplot(dat, aes(x = lag, y = cumulative.ac, color = factor(divisor))) + geom_line(alpha = 0.5)
	library(primes)
	library(coda)
	library(parallel)
	library(ggplot2)


	# set this to the number of processors on your machine
	processors = 4

	remainders <- function(divisor, numbers){
	# get the remainders of a vector of numbers after dividing by divisor.
	ld = numbers %% divisor
	return(ld)
	}

	# get ~4m primes
	p = generate_primes(min = 0, max = exp(18))

	# if you want to check that the code is sensible, use randomised primes first...
	#p = sample(p)

	divisors = 3:10

	# make a list of remainders with different divisors
	p.r = mclapply(divisors, remainders, numbers = p, mc.cores = processors)

	# make them into mcmc objects (a bit like a time series)
	p.r.mcmc = mclapply(p.r, as.mcmc, mc.cores = processors)

	# calculate autocorrelations at lags from 1:50K
	p.r.ac = mclapply(p.r.mcmc, autocorr, lags = 1:50000, mc.cores = processors)

	# now let's look at the cumulative sum of the autocorrelations
	# the reason being that this should be roughly a random walk if
	# nothing interesting is happening
	p.r.ac.cumsum = lapply(p.r.ac, cumsum)

	# now we just build all the data into a big dataframe for plotting
	div = rep(divisors, each = length(p.r.ac[[1]]))
	lags = rep(1:length(p.r.ac[[1]]), length(divisors))
	dat = data.frame('divisor' = div, 'lag' = lags, 'autocorrelation' = unlist(p.r.ac), 'cumulative.ac' = unlist(p.r.ac.cumsum))

	# and here are our plots: the same data two ways
	ggplot(dat, aes(x = lag, y = cumulative.ac)) + geom_line() + facet_wrap(~divisor)
	ggplot(dat, aes(x = lag, y = cumulative.ac, color = factor(divisor))) + geom_line(alpha = 0.5)