Skip to content

Instantly share code, notes, and snippets.

@Sleepingwell
Created March 3, 2013 14:38
Show Gist options
  • Save Sleepingwell/5076345 to your computer and use it in GitHub Desktop.
Save Sleepingwell/5076345 to your computer and use it in GitHub Desktop.
A comparison of some hypothesis tests for a specific two sample problem (for a forum).
pre <- scan()
51.11
64.44
73.33
77.78
66.67
84.44
68.89
57.78
64.44
60.00
64.44
88.89
75.56
66.67
86.67
77.78
75.56
91.11
post <- scan()
82.22
84.44
53.33
77.78
77.78
91.11
68.89
62.22
64.44
42.22
60.00
80.00
77.78
68.89
80.00
80.00
91.11
95.56
# function that calculates the bootstrap, the Mann-Whitney-Wilcoxon, permutation and T tests
# args:
# pre: The pre-test scores
# post: The post-test scores
# R: The number of replicates for the bootstrap and permutation tests
#
# Note that there are many MANY possible refinements of the bootstrap we could use which
# may be more powerful that the ones used here. I have just used a couple to demonstrate
# the ideas.
#
# The other approach to using the bootstrap for hypothesis testing is to calculate a
# confidence interval for the test statistic and see whether it contains zero, which I have
# not pursued here as I don't find it as appealing (and because of the duality between
# CIs and confidence intervals, the results are usually very similar - if not identical - for
# some method of constructing an interval).
demo.all.methods <- function(pre, post, R) {
# bootstrap resamples
stat <- mean(post) - mean(pre)
post.under.null <- post - stat
boots <- sapply(1:R, function(...) mean(sample(post.under.null, replace=T)) - mean(sample(pre, replace=T)))
# permutations
all.res <- c(pre, post)
perm.sampler <- function(..., replace) {
perm <- sample(all.res, replace=replace)
mean(rev(perm)[1:length(post)]) - mean(perm[1:length(pre)])
}
perms <- sapply(1:R, perm.sampler, replace=F)
perm.boots <- sapply(1:R, perm.sampler, replace=T)
# data for the Wilcox and T tests.
dat <- cbind(res=c(pre, post), group=rep(0:1, c(length(pre), length(post))))
c(
bootstrap=sum(boots-stat > 0) / R, # bootstrap assuming different pre and post distributions, sampling under the null
bootstrap2=sum((boots+stat) <= 0) / R, # bootstrap assuming different pre and post distributions, sampling directly from each
permutation=sum(perms > stat) / R, # a permutation test
bootstrap.permutation=sum(perm.boots > stat) / R, # a permutation test using the bootstrap - imples the same pre and post distributions under the null
wilcox=wilcox.test(res~group, data=dat, alternative="less")$p.value, # Mann–Whitney–Wilcoxon
t.test.not.equal.var=t.test(res~group, data=dat, alternative="less")$p.value, # T-test (unequal variances)
t.test.equal.var=t.test(res~group, data=dat, alternative="less", var.equal=T)$p.value # T-test (equal variances)
)
}
# function for comparing the power of the various methods for the given data. Works by setting the mean
# of the post-test scores equal to the mean of the pre-test scores and using the bootstrap to determine
# the proportion of times that a difference is detected, then progressively increasing the mean of the
# post-test scores and repeating.
# args:
# pre: The pre-test scores.
# post: The post-test scores.
# difs: Vector of proportions of the mean of the pre-test score to add to the post-test scores.
# alpha: The one-sided significance level of the test.
# R: The nubmer of boostrap replications to use in for the bootstrap and permutation tests.
# R.tests: The number of boostrap replications to use in for the power the calculations.
power.test <- function(pre, post, difs, alpha, R, R.power) {
mean.pre <- mean(pre)
mean.dif <- mean(post)-mean.pre
post <- post-mean.dif # set the means to be equal
tmp <- as.data.frame(sapply(difs, function(dif) {
post <- post + dif*mean.pre
reps <- t(sapply(1:R.power, function(...) demo.all.methods(sample(pre, replace=T), sample(post, replace=T), R)))
apply(reps<alpha, 2, sum) / R.power
}))
names(tmp) <- paste('p', difs, sep='')
tmp
}
# number of replicates to use in evaluation tests
R.power <- 1000
# number of replicates to use in the bootstrap and permutation methods.
R <- 1000
# significance level to use in significance tests in power calculations
alpha <- 0.05
# proportions of pre-test means to add to post-test data in power calculations
difs <- c(seq(0, 0.1, 0.01), seq(0.15, 0.25, 0.025))
# hopefully we'll all get the same results.
set.seed(42)
# wilcox is noisy because we have replicates - tell it to shut up
options(warn=-1)
res <- demo.all.methods(pre, post, R)
powers <- power.test(pre, post, difs, alpha, R, R.power)
cat("p-values:\n")
print(res)
cat("bootstrap power calculations\n")
print(powers)
#-----------------------------------------------------------------------------------------------------------------------
# Results
#-----------------------------------------------------------------------------------------------------------------------
#p-values:
#bootstrap 0.3160000
#bootstrap2 0.2680000
#permutation 0.2770000
#bootstrap.permutation 0.2700000
#wilcox 0.1871320
#t.test.not.equal.var 0.2907173
#t.test.equal.var 0.2906348
#Power calculations (using bootstrap):
#post-mean % of pre-mean 100 101 102 103 104 105 106 107 108 109 110 115 117.5 120 122.5 125
#bootstrap 0.062 0.091 0.122 0.158 0.223 0.268 0.292 0.383 0.448 0.493 0.597 0.836 0.922 0.974 0.991 0.992
#bootstrap2 0.059 0.089 0.114 0.150 0.214 0.253 0.280 0.375 0.421 0.485 0.588 0.833 0.916 0.976 0.990 0.993
#permutation 0.051 0.077 0.100 0.139 0.203 0.237 0.259 0.353 0.406 0.460 0.553 0.807 0.902 0.966 0.985 0.990
#bootstrap.permutation 0.054 0.087 0.111 0.143 0.205 0.238 0.269 0.365 0.415 0.466 0.565 0.825 0.905 0.966 0.984 0.991
#wilcox 0.053 0.158 0.157 0.163 0.331 0.329 0.284 0.391 0.395 0.378 0.582 0.773 0.894 0.970 0.988 0.997
#t.test.not.equal.var 0.052 0.080 0.106 0.137 0.204 0.235 0.258 0.355 0.410 0.457 0.554 0.810 0.907 0.965 0.986 0.991
#t.test.equal.var 0.052 0.080 0.106 0.138 0.204 0.235 0.258 0.355 0.412 0.459 0.556 0.811 0.907 0.966 0.986 0.991
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment