Skip to content

Instantly share code, notes, and snippets.

@jalapic
Created March 2, 2015 20:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jalapic/ce2ce00a4e014c2259f3 to your computer and use it in GitHub Desktop.
Save jalapic/ce2ce00a4e014c2259f3 to your computer and use it in GitHub Desktop.
### Permutation Test Example... #based on: http://spark.rstudio.com/ahmed/permutation/
# Group A
a = c(86, 88, 89, 89, 92, 93, 94, 94, 94, 95, 95, 96, 96, 97, 97, 98, 98, 99, 99, 101, 106, 107, 110, 113, 116, 118)
# Group B
b = c(89, 90, 92, 93, 93, 96, 99, 99, 99, 102, 103, 104, 105, 106, 106, 107, 108, 108, 110, 110, 112, 114, 116, 116)
df <-
data.frame(
values = c(a,b),
group = c(rep("A", length(a)), rep("B", length(b)))
)
df
### Graph it...
library(ggplot2)
ggplot(df, aes(group, values, color=group)) +
geom_boxplot(aes(color=group, fill=group), alpha=.1) +
geom_point(aes(color=group), size=3) +
theme_bw()
#or,
ggplot(df, aes(x=values, fill=group)) + geom_density(alpha=.3) + theme_bw()
## what you'd probably do....
t.test(a, b) #ok then - normality ? - equal variances ?
shapiro.test(a)
shapiro.test(b) #ok, now what ?
x <- list(a, b)
x
bartlett.test(x)
t.test(a, b, var.equal=T) #ok, now what ?
### Randomization Tests !
# Combine the two datasets into a single dataset
# i.e., under the null hypothesis, there is no difference between the two groups
combined = c(a,b)
# Observed difference
diff.observed = mean(b) - mean(a)
### Permute one time.
### Shuffle
shuffled <- sample (combined, length(combined))
# Reallocate groups
a.random <- shuffled[1 : length(a)]
b.random <- shuffled[(length(a) + 1) : length(combined)]
a.random
b.random
diff.random[i] = mean(b.random) - mean(a.random)
### Do lots of times ...
nperms = 1000
diff.random = NULL
for (i in 1 : nperms) {
# Sample from the combined dataset without replacement
shuffled <- sample(combined)
a.random <- shuffled[1 : length(a)]
b.random <- shuffled[(length(a) + 1) : length(combined)]
# Null (permuated) difference
diff.random[i] = mean(b.random) - mean(a.random)
}
# P-value is the fraction of how many times the permuted difference is equal or more extreme than the observed difference
pvalue = sum(abs(diff.random) >= abs(diff.observed)) / nperms
pvalue
### graph results
ggplot(df, aes(x=diff.random)) +
geom_histogram(binwidth=.5, colour="black", fill="white") +
theme_bw() +
geom_vline(xintercept=diff.observed, color="red", lty=2, lwd=1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment