/PermutationTest_SampleCode.r

## PermutationTest_SampleCode.r
# Script to perform permutation tests in parallel with Statistical Thinking Series installment
# To substitute your own metric in these permutation tests, change the lines of code that are commented with "Customize this"

##### SETUP #####
# Clear workspace
rm(list=ls())

# Permutation tests are inherently random, resulting in a slightly different p-value every time, even with the same samples
# Setting a random seed gives the results from the article
# set.seed(42)

##### SIMULATE SAMPLING DATA #####
# Simulate two populations
pop1 <- rnorm(1000, mean = 0, sd = 1)
pop2 <- rnorm(1000, mean = 1, sd = 2)

# Sample from the two populations
sampleSize1 <- 20
sampleSize2 <- 15

sample1 <- sample(pop1, size = sampleSize1)
sample2 <- sample(pop2, size = sampleSize2)
allData <- c(sample1, sample2)

# Plot histograms of sample data
hist(sample1, xlim = c(-6,6), breaks = -6:6,
     col = rgb(red = 1, green = 0, blue = 0, alpha = 0.4),
     xlab = 'Value', main = 'Sample 1 (red) vs. Sample 2 (blue)')
hist(sample2, xlim = c(-6,6), breaks = -6:6, add = T,
     col = rgb(red = 0, green = 0, blue = 1, alpha = 0.4))

##### CONDUCT PERMUTATION TEST FOR DIFFERENCE IN MEANS #####
# Calculate observed difference in means for two samples
diffMean <- mean(sample1) - mean(sample2) # Customize this

# Construct null distribution via permutations
nPermutations <- 1000
nullDiffMeans <- rep(NA, nPermutations)

for (i in 1:nPermutations)
{
  newLabels <- sample(c(rep(1, sampleSize1), rep(2, sampleSize2)))

  permutedSamp1 <- allData[newLabels == 1]
  permutedSamp2 <- allData[newLabels == 2]

  # Calculate from permutation
  nullDiffMeans[i] <- mean(permutedSamp1) - mean(permutedSamp2) # Customize this; should be same form as calculation above
}

# Is our actual difference extreme relative to the null distribution derived from permutations?
permPVal <- sum(abs(nullDiffMeans) > abs(diffMean)) / nPermutations
cat('Permutation test p-value for difference in means: ', permPVal, '\n')

# Plot null distribution and sampled difference in means
hist(nullDiffMeans, xlab = 'Difference', main = 'Permutations of difference in means')
abline(v = c(diffMean, -diffMean), col = 'red')
mtext(text = paste0('p = ', permPVal), side = 3, line = 0, adj = 1)

# t-test for comparison
tTestExample <- t.test(sample1, sample2)
tTestPVal <- tTestExample$p.value
cat('t-test p-value for difference in means: ', tTestPVal, '\n')

##### CONDUCT PERMUTATION TEST FOR DIFFERENCE IN VARIANCES #####
# Calculate observed difference in variances
diffVar <- var(sample1) - var(sample2) # Customize this

nPermutations <- 1000
nullDiffVars <- rep(NA, nPermutations)

for (i in 1:nPermutations)
{
  newLabels <- sample(c(rep(1, sampleSize1), rep(2, sampleSize2)))

  permutedSamp1 <- allData[newLabels == 1]
  permutedSamp2 <- allData[newLabels == 2]

  # Calculate difference from permutation
  nullDiffVars[i] <- var(permutedSamp1) - var(permutedSamp2) # Customize this; should be same form as calculation above
}

# Is our actual difference extreme relative to the null distribution derived from permutations?
permPVal <- sum(abs(nullDiffVars) > abs(diffVar)) / nPermutations
cat('Permutation p-value for difference in variances: ', permPVal, '\n')

# Plot null distribution and sampled difference in variances
hist(nullDiffVars, xlab = 'Difference', main = 'Permutations of difference in variances')
abline(v = c(diffVar, -diffVar), col = 'red')
mtext(text = paste0('p = ', permPVal), side = 3, line = 0, adj = 1)
	# Script to perform permutation tests in parallel with Statistical Thinking Series installment
	# To substitute your own metric in these permutation tests, change the lines of code that are commented with "Customize this"

	##### SETUP #####
	# Clear workspace
	rm(list=ls())

	# Permutation tests are inherently random, resulting in a slightly different p-value every time, even with the same samples
	# Setting a random seed gives the results from the article
	# set.seed(42)

	##### SIMULATE SAMPLING DATA #####
	# Simulate two populations
	pop1 <- rnorm(1000, mean = 0, sd = 1)
	pop2 <- rnorm(1000, mean = 1, sd = 2)

	# Sample from the two populations
	sampleSize1 <- 20
	sampleSize2 <- 15

	sample1 <- sample(pop1, size = sampleSize1)
	sample2 <- sample(pop2, size = sampleSize2)
	allData <- c(sample1, sample2)

	# Plot histograms of sample data
	hist(sample1, xlim = c(-6,6), breaks = -6:6,
	col = rgb(red = 1, green = 0, blue = 0, alpha = 0.4),
	xlab = 'Value', main = 'Sample 1 (red) vs. Sample 2 (blue)')
	hist(sample2, xlim = c(-6,6), breaks = -6:6, add = T,
	col = rgb(red = 0, green = 0, blue = 1, alpha = 0.4))

	##### CONDUCT PERMUTATION TEST FOR DIFFERENCE IN MEANS #####
	# Calculate observed difference in means for two samples
	diffMean <- mean(sample1) - mean(sample2) # Customize this

	# Construct null distribution via permutations
	nPermutations <- 1000
	nullDiffMeans <- rep(NA, nPermutations)

	for (i in 1:nPermutations)
	{
	newLabels <- sample(c(rep(1, sampleSize1), rep(2, sampleSize2)))

	permutedSamp1 <- allData[newLabels == 1]
	permutedSamp2 <- allData[newLabels == 2]

	# Calculate from permutation
	nullDiffMeans[i] <- mean(permutedSamp1) - mean(permutedSamp2) # Customize this; should be same form as calculation above
	}

	# Is our actual difference extreme relative to the null distribution derived from permutations?
	permPVal <- sum(abs(nullDiffMeans) > abs(diffMean)) / nPermutations
	cat('Permutation test p-value for difference in means: ', permPVal, '\n')

	# Plot null distribution and sampled difference in means
	hist(nullDiffMeans, xlab = 'Difference', main = 'Permutations of difference in means')
	abline(v = c(diffMean, -diffMean), col = 'red')
	mtext(text = paste0('p = ', permPVal), side = 3, line = 0, adj = 1)

	# t-test for comparison
	tTestExample <- t.test(sample1, sample2)
	tTestPVal <- tTestExample$p.value
	cat('t-test p-value for difference in means: ', tTestPVal, '\n')

	##### CONDUCT PERMUTATION TEST FOR DIFFERENCE IN VARIANCES #####
	# Calculate observed difference in variances
	diffVar <- var(sample1) - var(sample2) # Customize this

	nPermutations <- 1000
	nullDiffVars <- rep(NA, nPermutations)

	for (i in 1:nPermutations)
	{
	newLabels <- sample(c(rep(1, sampleSize1), rep(2, sampleSize2)))

	permutedSamp1 <- allData[newLabels == 1]
	permutedSamp2 <- allData[newLabels == 2]

	# Calculate difference from permutation
	nullDiffVars[i] <- var(permutedSamp1) - var(permutedSamp2) # Customize this; should be same form as calculation above
	}

	# Is our actual difference extreme relative to the null distribution derived from permutations?
	permPVal <- sum(abs(nullDiffVars) > abs(diffVar)) / nPermutations
	cat('Permutation p-value for difference in variances: ', permPVal, '\n')

	# Plot null distribution and sampled difference in variances
	hist(nullDiffVars, xlab = 'Difference', main = 'Permutations of difference in variances')
	abline(v = c(diffVar, -diffVar), col = 'red')
	mtext(text = paste0('p = ', permPVal), side = 3, line = 0, adj = 1)