Skip to content

Instantly share code, notes, and snippets.

@t-redactyl
Created November 4, 2015 05:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save t-redactyl/5e0e274cb65716260946 to your computer and use it in GitHub Desktop.
Save t-redactyl/5e0e274cb65716260946 to your computer and use it in GitHub Desktop.
Code associate with blog post
mtcars$am.f <- as.factor(mtcars$am); levels(mtcars$am.f) <- c("Automatic", "Manual")
mtcars$cyl.f <- as.factor(mtcars$cyl); levels(mtcars$cyl.f) <- c("4 cyl", "6 cyl", "8 cyl")
mtcars$vs.f <- as.factor(mtcars$vs); levels(mtcars$vs.f) <- c("V engine", "Straight engine")
mtcars$gear.f <- as.factor(mtcars$gear); levels(mtcars$gear.f) <- c("3 gears", "4 gears", "5 gears")
mtcars$carb.f <- as.factor(mtcars$carb)
cor.list <- c()
outcome.cor <- function(predictor, ...) {
x <- cor(mtcars$mpg, predictor)
cor.list <- c(cor.list, x)
}
cor.list <- sapply(mtcars[ , c(3, 6:8, 10:11)], outcome.cor)
sort(abs(cor.list), decreasing = TRUE)
final.model <- lm(mpg ~ am.f + wt + am.f * wt, data = mtcars)
par(mfrow = c(2,2))
plot(final.model)
library(ggplot2); library(GGally)
mtcars <- mtcars[ , c(1, 9, 2:8, 10:16)]
g = ggpairs(mtcars[ , 1:11], lower = list(continuous = "smooth", params = c(method = "loess")))
g
mtcars$dfbetas <- round(dfbetas(final.model)[, 2], 3)
top.influence = sort(round(abs(dfbetas(final.model)[, 2]), 3), decreasing = TRUE)
head(top.influence)
model8 <- lm(mpg ~ am.f + wt + cyl.f + am.f * wt, data = mtcars)
lmfits <- lmfit.table(model3, model8)
model9 <- lm(mpg ~ am.f + wt + cyl.f + am.f * wt + am.f * cyl.f, data = mtcars)
lmfits <- lmfit.table(model8, model9)
model10 <- lm(mpg ~ am.f + wt + am.f * wt, data = mtcars)
lmfits <- lmfit.table(model2, model10)
kable(lmfits[7:9, ])
mtcars$name <- row.names(mtcars)
mtcars$hatvalues <- round(hatvalues(final.model), 3)
top.leverage = sort(round(hatvalues(final.model), 3), decreasing = TRUE)
head(top.leverage)
rm(list = ls())
data(mtcars)
lmfits <- data.frame()
lmfit.table <- function(model1, model2, ...) {
models <- sub("Model 1: ", "", attr(anova(model1, model2), "heading")[2])
x <- c(sub("\\n.*", "", models),
sub(".*Model 2: ", "", models),
round(anova(model1, model2)$"Pr(>F)"[2], 3))
lmfits <- rbind(lmfits, x)
}
model1 <- lm(mpg ~ am.f, data = mtcars)
model2 <- lm(mpg ~ am.f + wt, data = mtcars)
lmfits <- lmfit.table(model1, model2)
for (i in 1:3) {
lmfits[ , i] <- as.character(lmfits[ , i])
}
names(lmfits) <- c("Model 1", "Model 2", "p-value of model improvement")
model3 <- lm(mpg ~ am.f + wt + cyl.f, data = mtcars)
lmfits <- lmfit.table(model2, model3)
model4 <- lm(mpg ~ am.f + wt + cyl.f + drat, data = mtcars)
lmfits <- lmfit.table(model3, model4)
model5 <- lm(mpg ~ am.f + wt + cyl.f + carb.f, data = mtcars)
lmfits <- lmfit.table(model3, model5)
model6 <- lm(mpg ~ am.f + wt + cyl.f + gear.f, data = mtcars)
lmfits <- lmfit.table(model3, model6)
model7 <- lm(mpg ~ am.f + wt + cyl.f + qsec, data = mtcars)
lmfits <- lmfit.table(model3, model7)
require(knitr)
kable(lmfits)
# First build a plot of the model.
library(ggplot2)
gp <- ggplot(data=mtcars, aes(x=wt, y=mpg, colour=am.f)) +
geom_point(alpha = 0.7) +
geom_abline(intercept = coef(final.model)[1], slope = coef(final.model)[3],
size = 1, color = "#FF3721") +
geom_abline(intercept = coef(final.model)[1] + coef(final.model)[2],
slope = coef(final.model)[3] + coef(final.model)[4],
size = 1, color = "#4271AE") +
scale_colour_manual(name="Transmission", values =c("#FF3721", "#4271AE")) +
ylab("Miles per gallon") +
xlab("Weight (`000 lbs)") +
theme_bw()
# Leverage plot
g1 <- gp + geom_text(data=subset(mtcars, abs(hatvalues) >= top.leverage[3]),
aes(wt,mpg,label=name), size = 4, hjust=1, vjust=0) +
ggtitle("Three Points with Highest Leverage (hatvalues)")
# Influence plot
g2 <- gp + geom_text(data=subset(mtcars, abs(dfbetas) == top.influence[1]),
aes(wt,mpg,label=name), size = 4, hjust = 1, vjust = 0) +
geom_text(data=subset(mtcars, abs(dfbetas) == top.influence[2] |
abs(dfbetas) == top.influence[3]),
aes(wt,mpg,label=name), size = 4, hjust = 0, vjust = 0) +
ggtitle("Three Points with Highest Influence (dfbetas)")
library(gridExtra)
grid.arrange(g1, g2, nrow = 1, ncol = 2)
require(car)
round(summary(model10)$r.squared, 3)
vif(model10)
round(summary(model8)$r.squared, 3)
vif(model8)[ , 1]
# Function written by Joshua Wiley
spec.cor <- function (dat, r, ...) {
x <- cor(dat, ...)
x[upper.tri(x, TRUE)] <- NA
i <- which(abs(x) >= r, arr.ind = TRUE)
data.frame(matrix(colnames(x)[as.vector(i)], ncol = 2), value = x[i])
}
spec.cor(mtcars[ , 2:11], 0.8)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment