Created
November 13, 2014 22:29
-
-
Save adelavega/6dc50d0a7189abc7c8e5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################## | |
##### Bootstrap of group difference ##### | |
############################################################## | |
### First, we generate two samples (N = 25) with a difference of 0.5 | |
### Second, test using parametric t.test | |
### Then we define a function to test the difference between the two samples | |
### using indices that boot will provide | |
## Then we run boot. Importantly, define the strata of your group labels | |
## so boot w/ resample appropriately (only within group w/ replacement) | |
## Then we run it w/ only 2 resamples to investigate (turn print indices on) | |
## Then we do it for real w/ 10000 resamples, investigate the bias of our staticis | |
## get a 95% confidence interval, and plot the density of our bootstrapped statistic | |
## If our bootstrapped difference does not include zero, then its significant | |
library(boot) | |
# make up data | |
dat <- stack(list(a = rnorm(25, mean=10), b = rnorm(25, mean=10.5))) | |
t.test(values ~ ind, data=dat, var.equal=T) | |
# define bootstrap statistic | |
# comment out print statements before bootstrapping | |
bootTwoSampleDiff <- function(data, indices){ | |
newdata <- data[indices,] | |
# print(indices) | |
return(diff = mean(subset(dat, ind==levels(dat$ind)[1])$value) - mean(subset(dat, ind==levels(dat$ind)[2])$value)) | |
} | |
# examine indices | |
boot(data=dat, statistic=bootTwoSampleDiff, R=2, strata=dat$ind) | |
# bootstrap! | |
resamples <- 10000 | |
results1 <- boot(data=dat, statistic=bootTwoSampleDiff, R=resamples, strata=strata=dat$ind) | |
results1 | |
boot.ci(results1) | |
plot(density(results1$t)) | |
############################################################## | |
##### Permutation test ##### | |
############################################################## | |
### Here instead of bootstrapping the statistics we do a permutation test | |
### to generate the distribution of the null hypothesis | |
### Similar to above but instead of will compare our real estimate | |
### to the null distribution we generated to see what the probability | |
### is that our estimate came from the null (two-tailed p-value) | |
str(dat) | |
# define permutation test statistic | |
# comment out print statements before permutating | |
permDiff <- function(data, indices){ | |
# print(indices) | |
# print(length(unique(indices))) | |
indices <- factor(indices <= 25, labels=c("b","a")) | |
newdata <- cbind.data.frame(indices, values=data$values) | |
return(diff = mean(newdata$values[newdata$indices=="b"]) - mean(newdata$values[newdata$indices=="a"])) | |
} | |
# examine indices | |
boot(data=dat, statistic=permDiff, R=2, sim="permutation") | |
# permutate! | |
resamples <- 10000 | |
results2 <- boot(data=dat, statistic=permDiff, R=resamples, sim="permutation") | |
plot(density(results2$t)) | |
# calculate real estimate | |
est <- mean(dat$values[dat$ind=="b"]) - mean(dat$values[dat$ind=="a"]) | |
# what is the probability our estimate came from the null distribution? | |
# two-tailed p-value | |
p.value <- mean(results2$t > est) + mean(results2$t < -1*est) | |
p.value |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment