Skip to content

Instantly share code, notes, and snippets.

Created August 20, 2017 00:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/659a4d43db10db1b46771d7ec7936d27 to your computer and use it in GitHub Desktop.
Save anonymous/659a4d43db10db1b46771d7ec7936d27 to your computer and use it in GitHub Desktop.
# Script to perform permutation tests in parallel with Statistical Thinking Series installment
# To substitute your own metric in these permutation tests, change the lines of code that are commented with "Customize this"
##### SETUP #####
# Clear workspace
rm(list=ls())
# Permutation tests are inherently random, resulting in a slightly different p-value every time, even with the same samples
# Setting a random seed gives the results from the article
# set.seed(42)
##### SIMULATE SAMPLING DATA #####
# Simulate two populations
pop1 <- rnorm(1000, mean = 0, sd = 1)
pop2 <- rnorm(1000, mean = 1, sd = 2)
# Sample from the two populations
sampleSize1 <- 20
sampleSize2 <- 15
sample1 <- sample(pop1, size = sampleSize1)
sample2 <- sample(pop2, size = sampleSize2)
allData <- c(sample1, sample2)
# Plot histograms of sample data
hist(sample1, xlim = c(-6,6), breaks = -6:6,
col = rgb(red = 1, green = 0, blue = 0, alpha = 0.4),
xlab = 'Value', main = 'Sample 1 (red) vs. Sample 2 (blue)')
hist(sample2, xlim = c(-6,6), breaks = -6:6, add = T,
col = rgb(red = 0, green = 0, blue = 1, alpha = 0.4))
##### CONDUCT PERMUTATION TEST FOR DIFFERENCE IN MEANS #####
# Calculate observed difference in means for two samples
diffMean <- mean(sample1) - mean(sample2) # Customize this
# Construct null distribution via permutations
nPermutations <- 1000
nullDiffMeans <- rep(NA, nPermutations)
for (i in 1:nPermutations)
{
newLabels <- sample(c(rep(1, sampleSize1), rep(2, sampleSize2)))
permutedSamp1 <- allData[newLabels == 1]
permutedSamp2 <- allData[newLabels == 2]
# Calculate from permutation
nullDiffMeans[i] <- mean(permutedSamp1) - mean(permutedSamp2) # Customize this; should be same form as calculation above
}
# Is our actual difference extreme relative to the null distribution derived from permutations?
permPVal <- sum(abs(nullDiffMeans) > abs(diffMean)) / nPermutations
cat('Permutation test p-value for difference in means: ', permPVal, '\n')
# Plot null distribution and sampled difference in means
hist(nullDiffMeans, xlab = 'Difference', main = 'Permutations of difference in means')
abline(v = c(diffMean, -diffMean), col = 'red')
mtext(text = paste0('p = ', permPVal), side = 3, line = 0, adj = 1)
# t-test for comparison
tTestExample <- t.test(sample1, sample2)
tTestPVal <- tTestExample$p.value
cat('t-test p-value for difference in means: ', tTestPVal, '\n')
##### CONDUCT PERMUTATION TEST FOR DIFFERENCE IN VARIANCES #####
# Calculate observed difference in variances
diffVar <- var(sample1) - var(sample2) # Customize this
nPermutations <- 1000
nullDiffVars <- rep(NA, nPermutations)
for (i in 1:nPermutations)
{
newLabels <- sample(c(rep(1, sampleSize1), rep(2, sampleSize2)))
permutedSamp1 <- allData[newLabels == 1]
permutedSamp2 <- allData[newLabels == 2]
# Calculate difference from permutation
nullDiffVars[i] <- var(permutedSamp1) - var(permutedSamp2) # Customize this; should be same form as calculation above
}
# Is our actual difference extreme relative to the null distribution derived from permutations?
permPVal <- sum(abs(nullDiffVars) > abs(diffVar)) / nPermutations
cat('Permutation p-value for difference in variances: ', permPVal, '\n')
# Plot null distribution and sampled difference in variances
hist(nullDiffVars, xlab = 'Difference', main = 'Permutations of difference in variances')
abline(v = c(diffVar, -diffVar), col = 'red')
mtext(text = paste0('p = ', permPVal), side = 3, line = 0, adj = 1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment