jalapic/permtest

## permtest
### Permutation Test Example... #based on:  http://spark.rstudio.com/ahmed/permutation/

# Group A
a = c(86, 88, 89, 89, 92, 93, 94, 94, 94, 95, 95, 96, 96, 97, 97, 98, 98, 99, 99, 101, 106, 107, 110, 113, 116, 118)

# Group B
b = c(89, 90, 92, 93, 93, 96, 99, 99, 99, 102, 103, 104, 105, 106, 106, 107, 108, 108, 110, 110, 112, 114, 116, 116)


df <-
  data.frame(
  values = c(a,b),
  group = c(rep("A", length(a)), rep("B", length(b)))
  )

df

### Graph it...

library(ggplot2)

ggplot(df, aes(group, values, color=group)) +
  geom_boxplot(aes(color=group, fill=group), alpha=.1) +
  geom_point(aes(color=group), size=3) +
  theme_bw()

#or,
ggplot(df, aes(x=values, fill=group)) + geom_density(alpha=.3) + theme_bw()


## what you'd probably do....

t.test(a, b)  #ok then  - normality ?  - equal variances ?

shapiro.test(a)
shapiro.test(b)  #ok, now what ?

x <- list(a, b)
x

bartlett.test(x)

t.test(a, b, var.equal=T) #ok, now what ?


### Randomization Tests !


# Combine the two datasets into a single dataset
# i.e., under the null hypothesis, there is no difference between the two groups
combined = c(a,b)

# Observed difference
diff.observed = mean(b) - mean(a)


###  Permute one time.

### Shuffle
shuffled <- sample (combined, length(combined))

# Reallocate groups
a.random <- shuffled[1 : length(a)]
b.random <- shuffled[(length(a) + 1) : length(combined)]

a.random
b.random

diff.random[i] = mean(b.random) - mean(a.random)


### Do lots of times ...

nperms = 1000

diff.random = NULL
for (i in 1 : nperms) {

  # Sample from the combined dataset without replacement
  shuffled <- sample(combined)

  a.random <- shuffled[1 : length(a)]
  b.random <- shuffled[(length(a) + 1) : length(combined)]

  # Null (permuated) difference
  diff.random[i] = mean(b.random) - mean(a.random)
}

# P-value is the fraction of how many times the permuted difference is equal or more extreme than the observed difference

pvalue = sum(abs(diff.random) >= abs(diff.observed)) / nperms
pvalue


### graph results

ggplot(df, aes(x=diff.random)) +
  geom_histogram(binwidth=.5, colour="black", fill="white") +
  theme_bw() +
  geom_vline(xintercept=diff.observed, color="red", lty=2, lwd=1)
	### Permutation Test Example... #based on: http://spark.rstudio.com/ahmed/permutation/

	# Group A
	a = c(86, 88, 89, 89, 92, 93, 94, 94, 94, 95, 95, 96, 96, 97, 97, 98, 98, 99, 99, 101, 106, 107, 110, 113, 116, 118)

	# Group B
	b = c(89, 90, 92, 93, 93, 96, 99, 99, 99, 102, 103, 104, 105, 106, 106, 107, 108, 108, 110, 110, 112, 114, 116, 116)


	df <-
	data.frame(
	values = c(a,b),
	group = c(rep("A", length(a)), rep("B", length(b)))
	)

	df

	### Graph it...

	library(ggplot2)

	ggplot(df, aes(group, values, color=group)) +
	geom_boxplot(aes(color=group, fill=group), alpha=.1) +
	geom_point(aes(color=group), size=3) +
	theme_bw()

	#or,
	ggplot(df, aes(x=values, fill=group)) + geom_density(alpha=.3) + theme_bw()


	## what you'd probably do....

	t.test(a, b) #ok then - normality ? - equal variances ?

	shapiro.test(a)
	shapiro.test(b) #ok, now what ?

	x <- list(a, b)
	x

	bartlett.test(x)

	t.test(a, b, var.equal=T) #ok, now what ?



	### Randomization Tests !


	# Combine the two datasets into a single dataset
	# i.e., under the null hypothesis, there is no difference between the two groups
	combined = c(a,b)

	# Observed difference
	diff.observed = mean(b) - mean(a)


	### Permute one time.

	### Shuffle
	shuffled <- sample (combined, length(combined))

	# Reallocate groups
	a.random <- shuffled[1 : length(a)]
	b.random <- shuffled[(length(a) + 1) : length(combined)]

	a.random
	b.random

	diff.random[i] = mean(b.random) - mean(a.random)




	### Do lots of times ...

	nperms = 1000

	diff.random = NULL
	for (i in 1 : nperms) {

	# Sample from the combined dataset without replacement
	shuffled <- sample(combined)

	a.random <- shuffled[1 : length(a)]
	b.random <- shuffled[(length(a) + 1) : length(combined)]

	# Null (permuated) difference
	diff.random[i] = mean(b.random) - mean(a.random)
	}

	# P-value is the fraction of how many times the permuted difference is equal or more extreme than the observed difference

	pvalue = sum(abs(diff.random) >= abs(diff.observed)) / nperms
	pvalue


	### graph results

	ggplot(df, aes(x=diff.random)) +
	geom_histogram(binwidth=.5, colour="black", fill="white") +
	theme_bw() +
	geom_vline(xintercept=diff.observed, color="red", lty=2, lwd=1)