Skip to content

Instantly share code, notes, and snippets.

@mamhamed
Last active August 29, 2015 14:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mamhamed/7a3e980e8c8a9bf56003 to your computer and use it in GitHub Desktop.
Save mamhamed/7a3e980e8c8a9bf56003 to your computer and use it in GitHub Desktop.
### load libraries ------------------------------------------------------
library('ggplot2')
library('gridExtra')
### set the theme for ggplot --------------------------------------------
theme_set(theme_gray(base_size = 18))
### visualize the Anscombe's quartet datase ------------------------------
p1 <- ggplot(anscombe[,c("x1","y1")], aes(x1,y1)) + geom_point(size=4, col="blue") + ggtitle('set 1')
p2 <- ggplot(anscombe[,c("x2","y2")], aes(x2,y2)) + geom_point(size=4, col="blue") + ggtitle('set 2')
p3 <- ggplot(anscombe[,c("x3","y3")], aes(x3,y3)) + geom_point(size=4, col="blue") + ggtitle('set 3')
p4 <- ggplot(anscombe[,c("x4","y4")], aes(x4,y4)) + geom_point(size=4, col="blue") + ggtitle('set 4')
grid.arrange(p1, p2, p3, p4, ncol=2)
### load data ------------------------------------------------------
load("cat-base.Rda")
### plot cats -----------------------------------------------------
p <- ggplot(cats2, aes(x=Bwt, y=Hwt))
p <- p+geom_point()
p + stat_smooth(method=lm, level=.95) +
ggtitle("cats-base dataset with linear model and the confidence interval") +
xlab("Bwt (kg)") +
ylab("Hwt (g)")
### create linear model ----------------------------------------------
# for cats
cats.lm <- lm(Hwt ~ Bwt, cats2)
print(summary(cats.lm)$r.squared)
print(anova(cats.lm)$'Pr(>F)'[1])
# for x^2
x = seq(-3,3, length.out=50)
y = x^2 + rnorm(length(x))
ggplot(data.frame(x=x,y=y), aes(x=x, y=y)) +
geom_point(size=4, col="blue") +
ggtitle("y=x*x dataset: the GoF is small for this data")
xy.lm <- lm(y ~ x, data = data.frame(x=x,y=y))
print(summary(xy.lm)$r.squared)
print(anova(xy.lm)$'Pr(>F)'[1])
### confidence ------------------------------------------------------
p <- p + geom_abline(intercept = coef(cats.lm), slope = coef(cats.lm), col="red")
x = 3.1
res95 <- predict.lm(cats.lm, data.frame(Bwt =x), interval="confidence", level=.95)
print(res95)
p95 <- p +
geom_errorbar(data=data.frame(Bwt=x, Hwt =res95[1]), ymin=res95[2], ymax=res95[3], width=0.05, col='red') +
ggtitle("95% confidence interval for prediction in ") +
xlab("Bwt (kg)") +
ylab("Hwt (g)")
x = 2.2
res95 = predict.lm(cats.lm, data.frame(Bwt =x), interval="confidence", level=.95)
p95 <- p95 +
geom_errorbar(data=data.frame(Bwt=x, Hwt =res95[1]), ymin=res95[2], ymax=res95[3], width=0.05, col='red')
print(p95)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment