cbrown5/why-plot-your-data.R

## why-plot-your-data.R
# Why you should plot data before doing statistical tests
# CJ Brown 2020-11-06
#More at www.conservationhackers.org

library(ggplot2)

#
# Make some data
#
n <- 50 #Sample size per group
x <- 1:n
sd <- 1 #SD for errors

y1 <- x - 0.02*x^2-8 + rnorm(n, sd = sd)
y2 <- -1*x + 0.02*x^2 +8 + rnorm(n, sd = sd)

dat <- data.frame(x = c(x, x), y = c(y1, y2),
                  grp = rep(c("y1", "y2"), each = n))

#Do a t-test first - the wrong way
t.test(y1, y2)
#shows no 'significant' difference of mean y1 to mean y2

head(dat)

#Now plot the data
ggplot(dat) +
  aes(x = x, y = y, color = grp) +
  geom_point() +
  stat_smooth()

#Clearly y1 and y2 are different
	# Why you should plot data before doing statistical tests
	# CJ Brown 2020-11-06
	#More at www.conservationhackers.org

	library(ggplot2)

	#
	# Make some data
	#
	n <- 50 #Sample size per group
	x <- 1:n
	sd <- 1 #SD for errors

	y1 <- x - 0.02*x^2-8 + rnorm(n, sd = sd)
	y2 <- -1x + 0.02x^2 +8 + rnorm(n, sd = sd)

	dat <- data.frame(x = c(x, x), y = c(y1, y2),
	grp = rep(c("y1", "y2"), each = n))

	#Do a t-test first - the wrong way
	t.test(y1, y2)
	#shows no 'significant' difference of mean y1 to mean y2

	head(dat)

	#Now plot the data
	ggplot(dat) +
	aes(x = x, y = y, color = grp) +
	geom_point() +
	stat_smooth()

	#Clearly y1 and y2 are different