Created
March 3, 2013 14:38
-
-
Save Sleepingwell/5076345 to your computer and use it in GitHub Desktop.
A comparison of some hypothesis tests for a specific two sample problem (for a forum).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pre <- scan() | |
51.11 | |
64.44 | |
73.33 | |
77.78 | |
66.67 | |
84.44 | |
68.89 | |
57.78 | |
64.44 | |
60.00 | |
64.44 | |
88.89 | |
75.56 | |
66.67 | |
86.67 | |
77.78 | |
75.56 | |
91.11 | |
post <- scan() | |
82.22 | |
84.44 | |
53.33 | |
77.78 | |
77.78 | |
91.11 | |
68.89 | |
62.22 | |
64.44 | |
42.22 | |
60.00 | |
80.00 | |
77.78 | |
68.89 | |
80.00 | |
80.00 | |
91.11 | |
95.56 | |
# function that calculates the bootstrap, the Mann-Whitney-Wilcoxon, permutation and T tests | |
# args: | |
# pre: The pre-test scores | |
# post: The post-test scores | |
# R: The number of replicates for the bootstrap and permutation tests | |
# | |
# Note that there are many MANY possible refinements of the bootstrap we could use which | |
# may be more powerful that the ones used here. I have just used a couple to demonstrate | |
# the ideas. | |
# | |
# The other approach to using the bootstrap for hypothesis testing is to calculate a | |
# confidence interval for the test statistic and see whether it contains zero, which I have | |
# not pursued here as I don't find it as appealing (and because of the duality between | |
# CIs and confidence intervals, the results are usually very similar - if not identical - for | |
# some method of constructing an interval). | |
demo.all.methods <- function(pre, post, R) { | |
# bootstrap resamples | |
stat <- mean(post) - mean(pre) | |
post.under.null <- post - stat | |
boots <- sapply(1:R, function(...) mean(sample(post.under.null, replace=T)) - mean(sample(pre, replace=T))) | |
# permutations | |
all.res <- c(pre, post) | |
perm.sampler <- function(..., replace) { | |
perm <- sample(all.res, replace=replace) | |
mean(rev(perm)[1:length(post)]) - mean(perm[1:length(pre)]) | |
} | |
perms <- sapply(1:R, perm.sampler, replace=F) | |
perm.boots <- sapply(1:R, perm.sampler, replace=T) | |
# data for the Wilcox and T tests. | |
dat <- cbind(res=c(pre, post), group=rep(0:1, c(length(pre), length(post)))) | |
c( | |
bootstrap=sum(boots-stat > 0) / R, # bootstrap assuming different pre and post distributions, sampling under the null | |
bootstrap2=sum((boots+stat) <= 0) / R, # bootstrap assuming different pre and post distributions, sampling directly from each | |
permutation=sum(perms > stat) / R, # a permutation test | |
bootstrap.permutation=sum(perm.boots > stat) / R, # a permutation test using the bootstrap - imples the same pre and post distributions under the null | |
wilcox=wilcox.test(res~group, data=dat, alternative="less")$p.value, # Mann–Whitney–Wilcoxon | |
t.test.not.equal.var=t.test(res~group, data=dat, alternative="less")$p.value, # T-test (unequal variances) | |
t.test.equal.var=t.test(res~group, data=dat, alternative="less", var.equal=T)$p.value # T-test (equal variances) | |
) | |
} | |
# function for comparing the power of the various methods for the given data. Works by setting the mean | |
# of the post-test scores equal to the mean of the pre-test scores and using the bootstrap to determine | |
# the proportion of times that a difference is detected, then progressively increasing the mean of the | |
# post-test scores and repeating. | |
# args: | |
# pre: The pre-test scores. | |
# post: The post-test scores. | |
# difs: Vector of proportions of the mean of the pre-test score to add to the post-test scores. | |
# alpha: The one-sided significance level of the test. | |
# R: The nubmer of boostrap replications to use in for the bootstrap and permutation tests. | |
# R.tests: The number of boostrap replications to use in for the power the calculations. | |
power.test <- function(pre, post, difs, alpha, R, R.power) { | |
mean.pre <- mean(pre) | |
mean.dif <- mean(post)-mean.pre | |
post <- post-mean.dif # set the means to be equal | |
tmp <- as.data.frame(sapply(difs, function(dif) { | |
post <- post + dif*mean.pre | |
reps <- t(sapply(1:R.power, function(...) demo.all.methods(sample(pre, replace=T), sample(post, replace=T), R))) | |
apply(reps<alpha, 2, sum) / R.power | |
})) | |
names(tmp) <- paste('p', difs, sep='') | |
tmp | |
} | |
# number of replicates to use in evaluation tests | |
R.power <- 1000 | |
# number of replicates to use in the bootstrap and permutation methods. | |
R <- 1000 | |
# significance level to use in significance tests in power calculations | |
alpha <- 0.05 | |
# proportions of pre-test means to add to post-test data in power calculations | |
difs <- c(seq(0, 0.1, 0.01), seq(0.15, 0.25, 0.025)) | |
# hopefully we'll all get the same results. | |
set.seed(42) | |
# wilcox is noisy because we have replicates - tell it to shut up | |
options(warn=-1) | |
res <- demo.all.methods(pre, post, R) | |
powers <- power.test(pre, post, difs, alpha, R, R.power) | |
cat("p-values:\n") | |
print(res) | |
cat("bootstrap power calculations\n") | |
print(powers) | |
#----------------------------------------------------------------------------------------------------------------------- | |
# Results | |
#----------------------------------------------------------------------------------------------------------------------- | |
#p-values: | |
#bootstrap 0.3160000 | |
#bootstrap2 0.2680000 | |
#permutation 0.2770000 | |
#bootstrap.permutation 0.2700000 | |
#wilcox 0.1871320 | |
#t.test.not.equal.var 0.2907173 | |
#t.test.equal.var 0.2906348 | |
#Power calculations (using bootstrap): | |
#post-mean % of pre-mean 100 101 102 103 104 105 106 107 108 109 110 115 117.5 120 122.5 125 | |
#bootstrap 0.062 0.091 0.122 0.158 0.223 0.268 0.292 0.383 0.448 0.493 0.597 0.836 0.922 0.974 0.991 0.992 | |
#bootstrap2 0.059 0.089 0.114 0.150 0.214 0.253 0.280 0.375 0.421 0.485 0.588 0.833 0.916 0.976 0.990 0.993 | |
#permutation 0.051 0.077 0.100 0.139 0.203 0.237 0.259 0.353 0.406 0.460 0.553 0.807 0.902 0.966 0.985 0.990 | |
#bootstrap.permutation 0.054 0.087 0.111 0.143 0.205 0.238 0.269 0.365 0.415 0.466 0.565 0.825 0.905 0.966 0.984 0.991 | |
#wilcox 0.053 0.158 0.157 0.163 0.331 0.329 0.284 0.391 0.395 0.378 0.582 0.773 0.894 0.970 0.988 0.997 | |
#t.test.not.equal.var 0.052 0.080 0.106 0.137 0.204 0.235 0.258 0.355 0.410 0.457 0.554 0.810 0.907 0.965 0.986 0.991 | |
#t.test.equal.var 0.052 0.080 0.106 0.138 0.204 0.235 0.258 0.355 0.412 0.459 0.556 0.811 0.907 0.966 0.986 0.991 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment