Created
August 20, 2017 00:36
-
-
Save anonymous/659a4d43db10db1b46771d7ec7936d27 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Script to perform permutation tests in parallel with Statistical Thinking Series installment | |
# To substitute your own metric in these permutation tests, change the lines of code that are commented with "Customize this" | |
##### SETUP ##### | |
# Clear workspace | |
rm(list=ls()) | |
# Permutation tests are inherently random, resulting in a slightly different p-value every time, even with the same samples | |
# Setting a random seed gives the results from the article | |
# set.seed(42) | |
##### SIMULATE SAMPLING DATA ##### | |
# Simulate two populations | |
pop1 <- rnorm(1000, mean = 0, sd = 1) | |
pop2 <- rnorm(1000, mean = 1, sd = 2) | |
# Sample from the two populations | |
sampleSize1 <- 20 | |
sampleSize2 <- 15 | |
sample1 <- sample(pop1, size = sampleSize1) | |
sample2 <- sample(pop2, size = sampleSize2) | |
allData <- c(sample1, sample2) | |
# Plot histograms of sample data | |
hist(sample1, xlim = c(-6,6), breaks = -6:6, | |
col = rgb(red = 1, green = 0, blue = 0, alpha = 0.4), | |
xlab = 'Value', main = 'Sample 1 (red) vs. Sample 2 (blue)') | |
hist(sample2, xlim = c(-6,6), breaks = -6:6, add = T, | |
col = rgb(red = 0, green = 0, blue = 1, alpha = 0.4)) | |
##### CONDUCT PERMUTATION TEST FOR DIFFERENCE IN MEANS ##### | |
# Calculate observed difference in means for two samples | |
diffMean <- mean(sample1) - mean(sample2) # Customize this | |
# Construct null distribution via permutations | |
nPermutations <- 1000 | |
nullDiffMeans <- rep(NA, nPermutations) | |
for (i in 1:nPermutations) | |
{ | |
newLabels <- sample(c(rep(1, sampleSize1), rep(2, sampleSize2))) | |
permutedSamp1 <- allData[newLabels == 1] | |
permutedSamp2 <- allData[newLabels == 2] | |
# Calculate from permutation | |
nullDiffMeans[i] <- mean(permutedSamp1) - mean(permutedSamp2) # Customize this; should be same form as calculation above | |
} | |
# Is our actual difference extreme relative to the null distribution derived from permutations? | |
permPVal <- sum(abs(nullDiffMeans) > abs(diffMean)) / nPermutations | |
cat('Permutation test p-value for difference in means: ', permPVal, '\n') | |
# Plot null distribution and sampled difference in means | |
hist(nullDiffMeans, xlab = 'Difference', main = 'Permutations of difference in means') | |
abline(v = c(diffMean, -diffMean), col = 'red') | |
mtext(text = paste0('p = ', permPVal), side = 3, line = 0, adj = 1) | |
# t-test for comparison | |
tTestExample <- t.test(sample1, sample2) | |
tTestPVal <- tTestExample$p.value | |
cat('t-test p-value for difference in means: ', tTestPVal, '\n') | |
##### CONDUCT PERMUTATION TEST FOR DIFFERENCE IN VARIANCES ##### | |
# Calculate observed difference in variances | |
diffVar <- var(sample1) - var(sample2) # Customize this | |
nPermutations <- 1000 | |
nullDiffVars <- rep(NA, nPermutations) | |
for (i in 1:nPermutations) | |
{ | |
newLabels <- sample(c(rep(1, sampleSize1), rep(2, sampleSize2))) | |
permutedSamp1 <- allData[newLabels == 1] | |
permutedSamp2 <- allData[newLabels == 2] | |
# Calculate difference from permutation | |
nullDiffVars[i] <- var(permutedSamp1) - var(permutedSamp2) # Customize this; should be same form as calculation above | |
} | |
# Is our actual difference extreme relative to the null distribution derived from permutations? | |
permPVal <- sum(abs(nullDiffVars) > abs(diffVar)) / nPermutations | |
cat('Permutation p-value for difference in variances: ', permPVal, '\n') | |
# Plot null distribution and sampled difference in variances | |
hist(nullDiffVars, xlab = 'Difference', main = 'Permutations of difference in variances') | |
abline(v = c(diffVar, -diffVar), col = 'red') | |
mtext(text = paste0('p = ', permPVal), side = 3, line = 0, adj = 1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment