Last active
August 29, 2015 14:26
-
-
Save mamhamed/7a3e980e8c8a9bf56003 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### load libraries ------------------------------------------------------ | |
library('ggplot2') | |
library('gridExtra') | |
### set the theme for ggplot -------------------------------------------- | |
theme_set(theme_gray(base_size = 18)) | |
### visualize the Anscombe's quartet datase ------------------------------ | |
p1 <- ggplot(anscombe[,c("x1","y1")], aes(x1,y1)) + geom_point(size=4, col="blue") + ggtitle('set 1') | |
p2 <- ggplot(anscombe[,c("x2","y2")], aes(x2,y2)) + geom_point(size=4, col="blue") + ggtitle('set 2') | |
p3 <- ggplot(anscombe[,c("x3","y3")], aes(x3,y3)) + geom_point(size=4, col="blue") + ggtitle('set 3') | |
p4 <- ggplot(anscombe[,c("x4","y4")], aes(x4,y4)) + geom_point(size=4, col="blue") + ggtitle('set 4') | |
grid.arrange(p1, p2, p3, p4, ncol=2) | |
### load data ------------------------------------------------------ | |
load("cat-base.Rda") | |
### plot cats ----------------------------------------------------- | |
p <- ggplot(cats2, aes(x=Bwt, y=Hwt)) | |
p <- p+geom_point() | |
p + stat_smooth(method=lm, level=.95) + | |
ggtitle("cats-base dataset with linear model and the confidence interval") + | |
xlab("Bwt (kg)") + | |
ylab("Hwt (g)") | |
### create linear model ---------------------------------------------- | |
# for cats | |
cats.lm <- lm(Hwt ~ Bwt, cats2) | |
print(summary(cats.lm)$r.squared) | |
print(anova(cats.lm)$'Pr(>F)'[1]) | |
# for x^2 | |
x = seq(-3,3, length.out=50) | |
y = x^2 + rnorm(length(x)) | |
ggplot(data.frame(x=x,y=y), aes(x=x, y=y)) + | |
geom_point(size=4, col="blue") + | |
ggtitle("y=x*x dataset: the GoF is small for this data") | |
xy.lm <- lm(y ~ x, data = data.frame(x=x,y=y)) | |
print(summary(xy.lm)$r.squared) | |
print(anova(xy.lm)$'Pr(>F)'[1]) | |
### confidence ------------------------------------------------------ | |
p <- p + geom_abline(intercept = coef(cats.lm), slope = coef(cats.lm), col="red") | |
x = 3.1 | |
res95 <- predict.lm(cats.lm, data.frame(Bwt =x), interval="confidence", level=.95) | |
print(res95) | |
p95 <- p + | |
geom_errorbar(data=data.frame(Bwt=x, Hwt =res95[1]), ymin=res95[2], ymax=res95[3], width=0.05, col='red') + | |
ggtitle("95% confidence interval for prediction in ") + | |
xlab("Bwt (kg)") + | |
ylab("Hwt (g)") | |
x = 2.2 | |
res95 = predict.lm(cats.lm, data.frame(Bwt =x), interval="confidence", level=.95) | |
p95 <- p95 + | |
geom_errorbar(data=data.frame(Bwt=x, Hwt =res95[1]), ymin=res95[2], ymax=res95[3], width=0.05, col='red') | |
print(p95) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment