Skip to content

Instantly share code, notes, and snippets.

@richarddmorey
Created July 14, 2022 11:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save richarddmorey/4ae51ac16e38c058573ec485c1942342 to your computer and use it in GitHub Desktop.
Save richarddmorey/4ae51ac16e38c058573ec485c1942342 to your computer and use it in GitHub Desktop.
Dice rolling significance test
library(dplyr)
library(memoise)
# Rolls
k = 3
# Sides
n = 5
sumDist = function(n,k,maxn = n){
do.call(
expand.grid,replicate(n = k,1:n,simplify = FALSE)
) %>%
mutate(
S = rowSums(.),
Pr = (1/n)^k
) %>%
group_by(S) %>%
summarise(
Pr = sum(Pr)
) %>%
mutate(
cPr = cumsum(Pr),
sides = n
) -> x
if(n<maxn){
x = bind_rows(
x,
tibble(S = (3*n+1):(3*maxn),Pr = 0,cPr = 1)
)
}
return(x %>% relocate(sides, .before = everything()))
}
upval = Vectorize(function(obsS, sides){
x = sumDist(sides,3,sides) %>% filter(S == obsS)
(1-x$cPr) + x$Pr
},'sides')
lpval = Vectorize(function(obsS, sides){
x = sumDist(sides,3,sides) %>% filter(S == obsS)
x$cPr
},'sides')
pval2 = memoise(function(obsS, sides){
2*pmin(upval(obsS, sides),lpval(obsS, sides))
})
ubound = memoise(function(obsS, alpha=0.025, max_sides = 1000){
sides = 2
p = 1
while(TRUE){
oldp = p
p = sumDist(sides,k,20) %>% filter(S==obsS) %>% pull(cPr)
if(p<alpha) return(tibble(sides=sides-1,p=oldp))
if(sides == max_sides){
warning(glue::glue('max_sides ({max_sides)) reached: p={p}, alpha={alpha}, obsS={obsS}'))
return(tibble(sides=NA_integer_,p=NA_real_))
}
sides = sides + 1
}
})
lbound = memoise(function(obsS, alpha=0.025, max_sides = 1000){
sides = 2
oldp = 1
while(TRUE){
p = sumDist(sides,k,20) %>% filter(S==obsS) %>%
mutate(cPr = 1 - cPr + Pr) %>% pull(cPr)
if(p>alpha) return(tibble(sides=sides,p=p))
if(sides == max_sides){
warning(glue::glue('max_sides ({max_sides)) reached: p={p}, alpha={alpha}, obsS={obsS}'))
return(tibble(sides=NA_integer_,p=NA_real_))
}
sides = sides + 1
}
})
ci = function(obsS, conf = .95,...){
c(lbound(obsS, (1-conf)/2, ...)$sides, ubound(obsS, (1-conf)/2, ...)$sides)
}
ci.raw = function(x, conf = .95, ...){
z = ci(sum(x), conf, ...)
z = z[1]:z[2]
excl = sapply(z, function(v) any(x>v))
range(z[!excl])
}
rnomial = Vectorize(function(r,n,k){
i = 0:floor(k/r)
sum(
(-1)^i*choose(n, i)*choose(n+k-1-r*i, n-1)
)
},'k')
# rnomial(k+1,n-1,5)
sides = c(4,6,8,10,12,20)
plot(0,0,xlim = c(0,60),ylim=c(0,1), typ='n',las=1,
ylab = 'Cumulative Prob.', xlab = 'Sum')
sides %>%
purrr::map(~sumDist(.,3,20)) -> x
split(x, sides) %>%
purrr::walk(
~ points(.[[1]]$S,.[[1]]$cPr, pch = 19, col = .[[1]]$sides[1])
)
ci.raw(c(2,3,5), conf = .95)
ci.raw(c(1,2,3), conf = .95)
# Adjust alpha for joint significance (square rejection)
alp0 = 1-sqrt(1-.05)
ci.raw(c(2,3,5), conf = 1-alp0)
# {6,8,10,12,20}
ci.raw(c(1,2,3), conf = 1-alp0)
# {4,6,8,10}
p1 = pval2(10, sides)
p1[1] = 0 # set the first one (sides = 4) to 0 because it is falsified
p2 = pval2(6, sides)
# Fisher's method of combining the p values
x2vals = -2 * outer(log(p1), log(p2), '+')
crit = qchisq(.95,4)
dimnames(x2vals) = list(p1 = sides, p2 = sides)
sig = x2vals < crit # Which ones are significant?
sig
# Compute combined p values
t(pchisq(x2vals,4,lower.tail = FALSE)) %>% round(3)
# Print in nice way
n = outer(paste0('(',sides), paste0(sides,')'), paste, sep = ', ')
paste(n[sig],collapse=', ')
# Simulation to check alpha, just in case
# Doesn't actually need a simulation - can compute
# analytically - but sometimes a simulation is good to check
# This will take a long time at first, but memoise will save
# the CIs for the sum, so it speeds up after a few minutes
# because it doesn't need to recalculate them.
s0 = 20 # Sides to simulate
M = 10000 # Simulations
pb <- progress::progress_bar$new(total = M)
purrr::map_df(1:M, function(m){
x = sample.int(s0,k,replace=TRUE)
ci0 = ci.raw(x)
pb$tick()
bind_rows(x1 = x[1],
x2 = x[2],
x3 = x[3],
pval2 = pval2(sum(x), s0),
sum = sum(x),
m = m, s = s0, k = k, lo = ci0[1], up = ci0[2])
}) %>%
mutate(too_high = s<lo,
too_low = s0>up,
outside = too_low | too_high) -> z
# Check coverage (should be just above .95)
binom.test(sum(!z$outside), M)
# check distribution of p values
# (will be conservative, so almost uniform but favoring
# high values. Also discreteness makes the distribution a bit
# strange)
hist(z$pval2, breaks = seq(0,1,by = .05))
mean(z$pval2<.05) # alpha - should be 1-coverage
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment