# Single test ---- | |
# Scenarion 1: correlation between two randomly generated variables (null hypothesis is true) ---- | |
x.r <- rnorm (100) | |
y.r <- rnorm (100) | |
cor.test (x, y)$p.value | |
# Scenario 2: correlation between two dependent variables (null hypothesis is false), single test ---- | |
x.d <- rnorm (100) | |
y.d <- 0.2 * x.d + rnorm (100) | |
cor.test (x.d, y.d)$p.value | |
# Multiple tests (100 relationships, 10 traits and 10 environmental variables) ---- | |
# Scenario 1: all null hypotheses are true ---- | |
png (file = 'multiple_null_true_1.png', width = 6, height = 6, res = 300, units = 'in', pointsize = 12) | |
set.seed (321) | |
par (mfrow = c(10, 10)) | |
par (mar = c(1,1,1,1)) | |
for (i in seq (1, 100)) | |
{ | |
x <- rnorm (100) | |
y <- rnorm (100) | |
plot (y ~ x, pch = 16, col = 'grey', cex = .4, axes = F, main = i) | |
box () | |
lm.res <- lm (y ~ x) | |
p.value <- anova (lm.res)$'Pr(>F)'[1] | |
if (p.value < 0.05) abline (lm.res, col = 'red', lwd = 3) | |
} | |
dev.off () | |
# repeat | |
png (file = 'multiple_null_true_2.png', width = 6, height = 6, res = 300, units = 'in', pointsize = 12) | |
set.seed (3210) | |
par (mfrow = c(10, 10)) | |
par (mar = c(1,1,1,1)) | |
for (i in seq (1, 100)) | |
{ | |
x <- rnorm (100) | |
y <- rnorm (100) | |
plot (y ~ x, pch = 16, col = 'grey', cex = .4, axes = F, main = i) | |
box () | |
lm.res <- lm (y ~ x) | |
p.value <- anova (lm.res)$'Pr(>F)'[1] | |
if (p.value < 0.05) abline (lm.res, col = 'red', lwd = 3) | |
} | |
dev.off () | |
# Scenario 2: all null hypotheses are false ---- | |
png (file = 'multiple_null_false_1.png', width = 6, height = 6, res = 300, units = 'in', pointsize = 12) | |
set.seed (1234) | |
par (mfrow = c(10, 10)) | |
par (mar = c(1,1,1,1)) | |
for (i in seq (1, 100)) | |
{ | |
x <- rnorm (100) | |
y <- 0.2 * x + rnorm (100) # y is dependent on x + added noise | |
plot (y ~ x, axes = F, main = i, type = 'n') | |
rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "#ADD8E680") | |
points (y ~ x, pch = 16, col = 'grey', cex = .4) | |
box (col = 'blue') | |
lm.res <- lm (y ~ x) | |
p.value <- anova (lm.res)$'Pr(>F)'[1] | |
if (p.value < 0.05) abline (lm.res, col = 'red', lwd = 3) | |
} | |
dev.off () | |
# repeat | |
png (file = 'multiple_null_false_2.png', width = 6, height = 6, res = 300, units = 'in', pointsize = 12) | |
set.seed (12345) | |
par (mfrow = c(10, 10)) | |
par (mar = c(1,1,1,1)) | |
for (i in seq (1, 100)) | |
{ | |
x <- rnorm (100) | |
y <- 0.2 * x + rnorm (100) # y is dependent on x + added noise | |
plot (y ~ x, axes = F, main = i, type = 'n') | |
rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "#ADD8E680") | |
points (y ~ x, pch = 16, col = 'grey', cex = .4) | |
box (col = 'blue') | |
lm.res <- lm (y ~ x) | |
p.value <- anova (lm.res)$'Pr(>F)'[1] | |
if (p.value < 0.05) abline (lm.res, col = 'red', lwd = 3) | |
} | |
dev.off () | |
# Scenario 3: first 10 combnations with null false, other 90 with null true ---- | |
png (file = 'multiple_null_false_true_1.png', width = 6, height = 6, res = 300, units = 'in', pointsize = 12) | |
set.seed (345) | |
par (mfrow = c(10, 10)) | |
par (mar = c(1,1,1,1)) | |
b <- c(rep (0.2, 10), rep (0, 90)) | |
for (i in seq (1, 100)) | |
{ | |
x <- rnorm (100) | |
y <- b[i] * x + rnorm (100) # y is dependent on x (but only for b > 0) + added noise | |
plot (y ~ x, axes = F, main = i, type = 'n') | |
if (b[i] > 0) | |
rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "#ADD8E680") | |
points (y ~ x, pch = 16, col = 'grey', cex = .4) | |
box (col = if (b[i] > 0) 'blue' else 'black') | |
lm.res <- lm (y ~ x) | |
p.value <- anova (lm.res)$'Pr(>F)'[1] | |
if (p.value < 0.05) abline (lm.res, col = 'red', lwd = 3) | |
} | |
dev.off () | |
png (file = 'multiple_null_false_true_2.png', width = 6, height = 6, res = 300, units = 'in', pointsize = 12) | |
set.seed (3456) | |
par (mfrow = c(10, 10)) | |
par (mar = c(1,1,1,1)) | |
b <- c(rep (0.2, 10), rep (0, 90)) | |
for (i in seq (1, 100)) | |
{ | |
x <- rnorm (100) | |
y <- b[i] * x + rnorm (100) # y is dependent on x (but only for b > 0) + added noise | |
plot (y ~ x, axes = F, main = i, type = 'n') | |
if (b[i] > 0) | |
rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "#ADD8E680") | |
points (y ~ x, pch = 16, col = 'grey', cex = .4) | |
box (col = if (b[i] > 0) 'blue' else 'black') | |
lm.res <- lm (y ~ x) | |
p.value <- anova (lm.res)$'Pr(>F)'[1] | |
if (p.value < 0.05) abline (lm.res, col = 'red', lwd = 3) | |
} | |
dev.off () | |
# Apply Bonferroni ---- | |
png (file = 'multiple_null_false_true_bonf.png', width = 6, height = 6, res = 300, units = 'in', pointsize = 12) | |
set.seed (3456) | |
par (mfrow = c(10, 10)) | |
par (mar = c(1,1,1,1)) | |
b <- c(rep (0.2, 10), rep (0, 90)) | |
for (i in seq (1, 100)) | |
{ | |
x <- rnorm (100) | |
y <- b[i] * x + rnorm (100) # y is dependent on x (but only for b > 0) + added noise | |
plot (y ~ x, axes = F, main = i, type = 'n') | |
if (b[i] > 0) | |
rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "#ADD8E680") | |
points (y ~ x, pch = 16, col = 'grey', cex = .4) | |
box (col = if (b[i] > 0) 'blue' else 'black') | |
lm.res <- lm (y ~ x) | |
p.value <- anova (lm.res)$'Pr(>F)'[1] | |
p.adj <- p.adjust (p.value, n = 100, method = 'bonferroni') | |
if (p.adj < 0.05) abline (lm.res, col = 'red', lwd = 3) | |
} | |
dev.off () | |
# Icrease sample size (power) and apply Bonferroni ---- | |
png (file = 'multiple_null_false_true_bonf_increased-samplesize.png', width = 6, height = 6, res = 300, units = 'in', pointsize = 12) | |
set.seed (345678) | |
par (mfrow = c(10, 10)) | |
par (mar = c(1,1,1,1)) | |
b <- c(rep (0.2, 10), rep (0, 90)) | |
for (i in seq (1, 100)) | |
{ | |
x <- rnorm (500) | |
y <- b[i] * x + rnorm (500) # y is dependent on x (but only for b > 0) + added noise | |
plot (y ~ x, axes = F, main = i, type = 'n') | |
if (b[i] > 0) | |
rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "#ADD8E680") | |
points (y ~ x, pch = 16, col = 'grey', cex = .4) | |
box (col = if (b[i] > 0) 'blue' else 'black') | |
lm.res <- lm (y ~ x) | |
p.value <- anova (lm.res)$'Pr(>F)'[1] | |
p.adj <- p.adjust (p.value, n = 100, method = 'bonferroni') | |
if (p.adj < 0.05) abline (lm.res, col = 'red', lwd = 3) | |
} | |
dev.off () | |
# Estimate the power of the test | |
# for regression with b = 0.2 and 100 samples | |
P.val.100 <- replicate (10000, { | |
x <- rnorm (100) | |
y <- 0.2 * x + rnorm (100) | |
anova (lm (y ~ x))$'Pr(>F)'[1] | |
}) | |
sum (P.val.100 < 0.05)/10000 # 0.504 | |
# for regression with b = 0.2 and 100 samples | |
P.val.500 <- replicate (10000, { | |
x <- rnorm (500) | |
y <- 0.2 * x + rnorm (500) | |
anova (lm (y ~ x))$'Pr(>F)'[1] | |
}) | |
sum (P.val.500 < 0.05)/10000 # 0.993 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment