richarddmorey/dice_rolling.R

## dice_rolling.R
library(dplyr)
library(memoise)

# Rolls
k = 3
# Sides
n = 5

sumDist = function(n,k,maxn = n){
  do.call(
    expand.grid,replicate(n = k,1:n,simplify = FALSE)
    ) %>%
    mutate(
      S = rowSums(.),
      Pr = (1/n)^k
      ) %>%
    group_by(S) %>%
    summarise(
      Pr = sum(Pr)
      ) %>%
    mutate(
      cPr = cumsum(Pr),
      sides = n
    ) -> x
  if(n<maxn){
    x = bind_rows(
      x,
      tibble(S = (3*n+1):(3*maxn),Pr = 0,cPr = 1)
    )
  }
  return(x %>% relocate(sides, .before = everything()))
}

upval = Vectorize(function(obsS, sides){
  x = sumDist(sides,3,sides) %>% filter(S == obsS)
  (1-x$cPr) + x$Pr
},'sides')

lpval = Vectorize(function(obsS, sides){
  x = sumDist(sides,3,sides) %>% filter(S == obsS)
  x$cPr
},'sides')

pval2 = memoise(function(obsS, sides){
  2*pmin(upval(obsS, sides),lpval(obsS, sides))
})

ubound = memoise(function(obsS, alpha=0.025, max_sides = 1000){
  sides = 2
  p = 1
  while(TRUE){
    oldp = p
    p = sumDist(sides,k,20) %>% filter(S==obsS) %>% pull(cPr)
    if(p<alpha) return(tibble(sides=sides-1,p=oldp))
    if(sides == max_sides){
      warning(glue::glue('max_sides ({max_sides)) reached: p={p}, alpha={alpha}, obsS={obsS}'))
      return(tibble(sides=NA_integer_,p=NA_real_))
    }
    sides = sides + 1
  }
})

lbound = memoise(function(obsS, alpha=0.025, max_sides = 1000){
  sides = 2
  oldp = 1
  while(TRUE){
    p = sumDist(sides,k,20) %>% filter(S==obsS) %>%
      mutate(cPr = 1 - cPr + Pr) %>% pull(cPr)
    if(p>alpha) return(tibble(sides=sides,p=p))
    if(sides == max_sides){
      warning(glue::glue('max_sides ({max_sides)) reached: p={p}, alpha={alpha}, obsS={obsS}'))
      return(tibble(sides=NA_integer_,p=NA_real_))
    }
    sides = sides + 1
  }
})

ci = function(obsS, conf = .95,...){
  c(lbound(obsS, (1-conf)/2, ...)$sides, ubound(obsS, (1-conf)/2, ...)$sides)
}

ci.raw = function(x, conf = .95, ...){
  z = ci(sum(x), conf, ...)
  z = z[1]:z[2]
  excl = sapply(z, function(v) any(x>v))
  range(z[!excl])
}

rnomial = Vectorize(function(r,n,k){
  i = 0:floor(k/r)
  sum(
    (-1)^i*choose(n, i)*choose(n+k-1-r*i, n-1)
    )
},'k')

# rnomial(k+1,n-1,5)


sides = c(4,6,8,10,12,20)

plot(0,0,xlim = c(0,60),ylim=c(0,1), typ='n',las=1,
     ylab = 'Cumulative Prob.', xlab = 'Sum')


sides %>%
  purrr::map(~sumDist(.,3,20)) -> x

split(x, sides) %>%
  purrr::walk(
    ~ points(.[[1]]$S,.[[1]]$cPr, pch = 19, col = .[[1]]$sides[1])
    )

ci.raw(c(2,3,5), conf = .95)
ci.raw(c(1,2,3), conf = .95)

# Adjust alpha for joint significance (square rejection)
alp0 = 1-sqrt(1-.05)
ci.raw(c(2,3,5), conf = 1-alp0)
# {6,8,10,12,20}
ci.raw(c(1,2,3), conf = 1-alp0)
# {4,6,8,10}

p1 = pval2(10, sides)
p1[1] = 0 # set the first one (sides = 4) to 0 because it is falsified
p2 = pval2(6, sides)

# Fisher's method of combining the p values
x2vals = -2 * outer(log(p1), log(p2), '+')
crit = qchisq(.95,4)
dimnames(x2vals) = list(p1 = sides, p2 = sides)
sig = x2vals < crit # Which ones are significant?
sig

# Compute combined p values
t(pchisq(x2vals,4,lower.tail = FALSE)) %>% round(3)

# Print in nice way
n = outer(paste0('(',sides), paste0(sides,')'), paste, sep = ', ')
paste(n[sig],collapse=', ')

# Simulation to check alpha, just in case
# Doesn't actually need a simulation - can compute
# analytically - but sometimes a simulation is good to check
# This will take a long time at first, but memoise will save
# the CIs for the sum, so it speeds up after a few minutes
# because it doesn't need to recalculate them.
s0 = 20 # Sides to simulate
M = 10000 # Simulations
pb <- progress::progress_bar$new(total = M)
purrr::map_df(1:M, function(m){
  x = sample.int(s0,k,replace=TRUE)
  ci0 = ci.raw(x)
  pb$tick()
  bind_rows(x1 = x[1],
            x2 = x[2],
            x3 = x[3],
            pval2 = pval2(sum(x), s0),
            sum = sum(x),
            m = m, s = s0, k = k, lo = ci0[1], up = ci0[2])
}) %>%
  mutate(too_high = s<lo,
         too_low  = s0>up,
         outside = too_low | too_high) -> z

# Check coverage (should be just above .95)
binom.test(sum(!z$outside), M)

# check distribution of p values
# (will be conservative, so almost uniform but favoring
# high values. Also discreteness makes the distribution a bit
# strange)
hist(z$pval2, breaks = seq(0,1,by = .05))
mean(z$pval2<.05) # alpha - should be 1-coverage
	library(dplyr)
	library(memoise)

	# Rolls
	k = 3
	# Sides
	n = 5

	sumDist = function(n,k,maxn = n){
	do.call(
	expand.grid,replicate(n = k,1:n,simplify = FALSE)
	) %>%
	mutate(
	S = rowSums(.),
	Pr = (1/n)^k
	) %>%
	group_by(S) %>%
	summarise(
	Pr = sum(Pr)
	) %>%
	mutate(
	cPr = cumsum(Pr),
	sides = n
	) -> x
	if(n<maxn){
	x = bind_rows(
	x,
	tibble(S = (3n+1):(3maxn),Pr = 0,cPr = 1)
	)
	}
	return(x %>% relocate(sides, .before = everything()))
	}

	upval = Vectorize(function(obsS, sides){
	x = sumDist(sides,3,sides) %>% filter(S == obsS)
	(1-x$cPr) + x$Pr
	},'sides')

	lpval = Vectorize(function(obsS, sides){
	x = sumDist(sides,3,sides) %>% filter(S == obsS)
	x$cPr
	},'sides')

	pval2 = memoise(function(obsS, sides){
	2*pmin(upval(obsS, sides),lpval(obsS, sides))
	})

	ubound = memoise(function(obsS, alpha=0.025, max_sides = 1000){
	sides = 2
	p = 1
	while(TRUE){
	oldp = p
	p = sumDist(sides,k,20) %>% filter(S==obsS) %>% pull(cPr)
	if(p<alpha) return(tibble(sides=sides-1,p=oldp))
	if(sides == max_sides){
	warning(glue::glue('max_sides ({max_sides)) reached: p={p}, alpha={alpha}, obsS={obsS}'))
	return(tibble(sides=NA_integer_,p=NA_real_))
	}
	sides = sides + 1
	}
	})

	lbound = memoise(function(obsS, alpha=0.025, max_sides = 1000){
	sides = 2
	oldp = 1
	while(TRUE){
	p = sumDist(sides,k,20) %>% filter(S==obsS) %>%
	mutate(cPr = 1 - cPr + Pr) %>% pull(cPr)
	if(p>alpha) return(tibble(sides=sides,p=p))
	if(sides == max_sides){
	warning(glue::glue('max_sides ({max_sides)) reached: p={p}, alpha={alpha}, obsS={obsS}'))
	return(tibble(sides=NA_integer_,p=NA_real_))
	}
	sides = sides + 1
	}
	})

	ci = function(obsS, conf = .95,...){
	c(lbound(obsS, (1-conf)/2, ...)$sides, ubound(obsS, (1-conf)/2, ...)$sides)
	}

	ci.raw = function(x, conf = .95, ...){
	z = ci(sum(x), conf, ...)
	z = z[1]:z[2]
	excl = sapply(z, function(v) any(x>v))
	range(z[!excl])
	}

	rnomial = Vectorize(function(r,n,k){
	i = 0:floor(k/r)
	sum(
	(-1)^ichoose(n, i)choose(n+k-1-r*i, n-1)
	)
	},'k')

	# rnomial(k+1,n-1,5)


	sides = c(4,6,8,10,12,20)

	plot(0,0,xlim = c(0,60),ylim=c(0,1), typ='n',las=1,
	ylab = 'Cumulative Prob.', xlab = 'Sum')


	sides %>%
	purrr::map(~sumDist(.,3,20)) -> x

	split(x, sides) %>%
	purrr::walk(
	~ points(.[[1]]$S,.[[1]]$cPr, pch = 19, col = .[[1]]$sides[1])
	)

	ci.raw(c(2,3,5), conf = .95)
	ci.raw(c(1,2,3), conf = .95)

	# Adjust alpha for joint significance (square rejection)
	alp0 = 1-sqrt(1-.05)
	ci.raw(c(2,3,5), conf = 1-alp0)
	# {6,8,10,12,20}
	ci.raw(c(1,2,3), conf = 1-alp0)
	# {4,6,8,10}

	p1 = pval2(10, sides)
	p1[1] = 0 # set the first one (sides = 4) to 0 because it is falsified
	p2 = pval2(6, sides)

	# Fisher's method of combining the p values
	x2vals = -2 * outer(log(p1), log(p2), '+')
	crit = qchisq(.95,4)
	dimnames(x2vals) = list(p1 = sides, p2 = sides)
	sig = x2vals < crit # Which ones are significant?
	sig

	# Compute combined p values
	t(pchisq(x2vals,4,lower.tail = FALSE)) %>% round(3)

	# Print in nice way
	n = outer(paste0('(',sides), paste0(sides,')'), paste, sep = ', ')
	paste(n[sig],collapse=', ')

	# Simulation to check alpha, just in case
	# Doesn't actually need a simulation - can compute
	# analytically - but sometimes a simulation is good to check
	# This will take a long time at first, but memoise will save
	# the CIs for the sum, so it speeds up after a few minutes
	# because it doesn't need to recalculate them.
	s0 = 20 # Sides to simulate
	M = 10000 # Simulations
	pb <- progress::progress_bar$new(total = M)
	purrr::map_df(1:M, function(m){
	x = sample.int(s0,k,replace=TRUE)
	ci0 = ci.raw(x)
	pb$tick()
	bind_rows(x1 = x[1],
	x2 = x[2],
	x3 = x[3],
	pval2 = pval2(sum(x), s0),
	sum = sum(x),
	m = m, s = s0, k = k, lo = ci0[1], up = ci0[2])
	}) %>%
	mutate(too_high = s<lo,
	too_low = s0>up,
	outside = too_low \| too_high) -> z

	# Check coverage (should be just above .95)
	binom.test(sum(!z$outside), M)

	# check distribution of p values
	# (will be conservative, so almost uniform but favoring
	# high values. Also discreteness makes the distribution a bit
	# strange)
	hist(z$pval2, breaks = seq(0,1,by = .05))
	mean(z$pval2<.05) # alpha - should be 1-coverage