Skip to content

Instantly share code, notes, and snippets.

@zdealveindy
Created March 15, 2019 14:33
Show Gist options
  • Save zdealveindy/39feabd7c7b4f3d780d2b8c5232456a1 to your computer and use it in GitHub Desktop.
Save zdealveindy/39feabd7c7b4f3d780d2b8c5232456a1 to your computer and use it in GitHub Desktop.
# Single test ----
# Scenarion 1: correlation between two randomly generated variables (null hypothesis is true) ----
x.r <- rnorm (100)
y.r <- rnorm (100)
cor.test (x, y)$p.value
# Scenario 2: correlation between two dependent variables (null hypothesis is false), single test ----
x.d <- rnorm (100)
y.d <- 0.2 * x.d + rnorm (100)
cor.test (x.d, y.d)$p.value
# Multiple tests (100 relationships, 10 traits and 10 environmental variables) ----
# Scenario 1: all null hypotheses are true ----
png (file = 'multiple_null_true_1.png', width = 6, height = 6, res = 300, units = 'in', pointsize = 12)
set.seed (321)
par (mfrow = c(10, 10))
par (mar = c(1,1,1,1))
for (i in seq (1, 100))
{
x <- rnorm (100)
y <- rnorm (100)
plot (y ~ x, pch = 16, col = 'grey', cex = .4, axes = F, main = i)
box ()
lm.res <- lm (y ~ x)
p.value <- anova (lm.res)$'Pr(>F)'[1]
if (p.value < 0.05) abline (lm.res, col = 'red', lwd = 3)
}
dev.off ()
# repeat
png (file = 'multiple_null_true_2.png', width = 6, height = 6, res = 300, units = 'in', pointsize = 12)
set.seed (3210)
par (mfrow = c(10, 10))
par (mar = c(1,1,1,1))
for (i in seq (1, 100))
{
x <- rnorm (100)
y <- rnorm (100)
plot (y ~ x, pch = 16, col = 'grey', cex = .4, axes = F, main = i)
box ()
lm.res <- lm (y ~ x)
p.value <- anova (lm.res)$'Pr(>F)'[1]
if (p.value < 0.05) abline (lm.res, col = 'red', lwd = 3)
}
dev.off ()
# Scenario 2: all null hypotheses are false ----
png (file = 'multiple_null_false_1.png', width = 6, height = 6, res = 300, units = 'in', pointsize = 12)
set.seed (1234)
par (mfrow = c(10, 10))
par (mar = c(1,1,1,1))
for (i in seq (1, 100))
{
x <- rnorm (100)
y <- 0.2 * x + rnorm (100) # y is dependent on x + added noise
plot (y ~ x, axes = F, main = i, type = 'n')
rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "#ADD8E680")
points (y ~ x, pch = 16, col = 'grey', cex = .4)
box (col = 'blue')
lm.res <- lm (y ~ x)
p.value <- anova (lm.res)$'Pr(>F)'[1]
if (p.value < 0.05) abline (lm.res, col = 'red', lwd = 3)
}
dev.off ()
# repeat
png (file = 'multiple_null_false_2.png', width = 6, height = 6, res = 300, units = 'in', pointsize = 12)
set.seed (12345)
par (mfrow = c(10, 10))
par (mar = c(1,1,1,1))
for (i in seq (1, 100))
{
x <- rnorm (100)
y <- 0.2 * x + rnorm (100) # y is dependent on x + added noise
plot (y ~ x, axes = F, main = i, type = 'n')
rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "#ADD8E680")
points (y ~ x, pch = 16, col = 'grey', cex = .4)
box (col = 'blue')
lm.res <- lm (y ~ x)
p.value <- anova (lm.res)$'Pr(>F)'[1]
if (p.value < 0.05) abline (lm.res, col = 'red', lwd = 3)
}
dev.off ()
# Scenario 3: first 10 combnations with null false, other 90 with null true ----
png (file = 'multiple_null_false_true_1.png', width = 6, height = 6, res = 300, units = 'in', pointsize = 12)
set.seed (345)
par (mfrow = c(10, 10))
par (mar = c(1,1,1,1))
b <- c(rep (0.2, 10), rep (0, 90))
for (i in seq (1, 100))
{
x <- rnorm (100)
y <- b[i] * x + rnorm (100) # y is dependent on x (but only for b > 0) + added noise
plot (y ~ x, axes = F, main = i, type = 'n')
if (b[i] > 0)
rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "#ADD8E680")
points (y ~ x, pch = 16, col = 'grey', cex = .4)
box (col = if (b[i] > 0) 'blue' else 'black')
lm.res <- lm (y ~ x)
p.value <- anova (lm.res)$'Pr(>F)'[1]
if (p.value < 0.05) abline (lm.res, col = 'red', lwd = 3)
}
dev.off ()
png (file = 'multiple_null_false_true_2.png', width = 6, height = 6, res = 300, units = 'in', pointsize = 12)
set.seed (3456)
par (mfrow = c(10, 10))
par (mar = c(1,1,1,1))
b <- c(rep (0.2, 10), rep (0, 90))
for (i in seq (1, 100))
{
x <- rnorm (100)
y <- b[i] * x + rnorm (100) # y is dependent on x (but only for b > 0) + added noise
plot (y ~ x, axes = F, main = i, type = 'n')
if (b[i] > 0)
rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "#ADD8E680")
points (y ~ x, pch = 16, col = 'grey', cex = .4)
box (col = if (b[i] > 0) 'blue' else 'black')
lm.res <- lm (y ~ x)
p.value <- anova (lm.res)$'Pr(>F)'[1]
if (p.value < 0.05) abline (lm.res, col = 'red', lwd = 3)
}
dev.off ()
# Apply Bonferroni ----
png (file = 'multiple_null_false_true_bonf.png', width = 6, height = 6, res = 300, units = 'in', pointsize = 12)
set.seed (3456)
par (mfrow = c(10, 10))
par (mar = c(1,1,1,1))
b <- c(rep (0.2, 10), rep (0, 90))
for (i in seq (1, 100))
{
x <- rnorm (100)
y <- b[i] * x + rnorm (100) # y is dependent on x (but only for b > 0) + added noise
plot (y ~ x, axes = F, main = i, type = 'n')
if (b[i] > 0)
rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "#ADD8E680")
points (y ~ x, pch = 16, col = 'grey', cex = .4)
box (col = if (b[i] > 0) 'blue' else 'black')
lm.res <- lm (y ~ x)
p.value <- anova (lm.res)$'Pr(>F)'[1]
p.adj <- p.adjust (p.value, n = 100, method = 'bonferroni')
if (p.adj < 0.05) abline (lm.res, col = 'red', lwd = 3)
}
dev.off ()
# Icrease sample size (power) and apply Bonferroni ----
png (file = 'multiple_null_false_true_bonf_increased-samplesize.png', width = 6, height = 6, res = 300, units = 'in', pointsize = 12)
set.seed (345678)
par (mfrow = c(10, 10))
par (mar = c(1,1,1,1))
b <- c(rep (0.2, 10), rep (0, 90))
for (i in seq (1, 100))
{
x <- rnorm (500)
y <- b[i] * x + rnorm (500) # y is dependent on x (but only for b > 0) + added noise
plot (y ~ x, axes = F, main = i, type = 'n')
if (b[i] > 0)
rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "#ADD8E680")
points (y ~ x, pch = 16, col = 'grey', cex = .4)
box (col = if (b[i] > 0) 'blue' else 'black')
lm.res <- lm (y ~ x)
p.value <- anova (lm.res)$'Pr(>F)'[1]
p.adj <- p.adjust (p.value, n = 100, method = 'bonferroni')
if (p.adj < 0.05) abline (lm.res, col = 'red', lwd = 3)
}
dev.off ()
# Estimate the power of the test
# for regression with b = 0.2 and 100 samples
P.val.100 <- replicate (10000, {
x <- rnorm (100)
y <- 0.2 * x + rnorm (100)
anova (lm (y ~ x))$'Pr(>F)'[1]
})
sum (P.val.100 < 0.05)/10000 # 0.504
# for regression with b = 0.2 and 100 samples
P.val.500 <- replicate (10000, {
x <- rnorm (500)
y <- 0.2 * x + rnorm (500)
anova (lm (y ~ x))$'Pr(>F)'[1]
})
sum (P.val.500 < 0.05)/10000 # 0.993
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment