Skip to content

Instantly share code, notes, and snippets.

@aschleg
Created March 14, 2015 21:22
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save aschleg/396c2351d1e1a8b6569d to your computer and use it in GitHub Desktop.
Save aschleg/396c2351d1e1a8b6569d to your computer and use it in GitHub Desktop.
Quick examples of different types of regression using R. The datasets used for each are available in all R distributions.
library(caret)
ginzberg <- read.csv('D:/Google_Drive/Google_Drive/Resources/Datasets/Rdata/car/Ginzberg.csv')
ginzberg <- ginzberg[2:4]
train <- createDataPartition(y = ginzberg$depression,
p = 0.50,
list = FALSE)
training <- ginzberg[ train,]
testing <- ginzberg[-train,]
model.all <- lm(depression ~ ., data = training)
summary(model.all)
regprediction <- predict(model.all, testing)
modelvalues <- data.frame(obs = testing$depression, pred=regprediction)
defaultSummary(modelvalues)
control <- trainControl(method = "cv", number=10)
set.seed(1000)
x.testing <- testing[1:2]
model <- train(x = x.testing, y = testing$depression, method = "lm",
trControl = control)
model
prediction <- predict(model, testing)
new.modelvalues <- data.frame(obs = testing$depression, pred=prediction)
defaultSummary(new.modelvalues)
xyplot(new.modelvalues$obs ~ new.modelvalues$pred,
type = c("p", "g"),
xlab = "Predicted", ylab = "Observed")
xyplot(resid(model) ~ predict(model),
type = c("p", "g"),
xlab = "Predicted", ylab = "Residuals")
library(caret)
library(car)
library(pls)
longley <- read.csv('C:/Users/Aaron/Google_Drive/Resources/Datasets/Rdata/datasets/longley.csv')
plot(longley)
train <- createDataPartition(y = longley$GNP,
p = 0.50,
list = FALSE)
training <- longley[ train,]
testing <- longley[-train,]
model <- lm(GNP ~ ., data = training)
summary(model)
avPlots(model)
emp.train <- createDataPartition(y = longley$Employed,
p = 0.50,
list = FALSE)
emp.training <- longley[ train,]
emp.testing <- longley[-train,]
emp.model <- lm(Employed ~ ., data = emp.training)
summary(emp.model)
avPlots(emp.model)
pls.model <- plsr(Employed ~ ., data = training)
summary(pls.model)
predict(pls.model, ncomp=1:2)
control <- trainControl(method = "repeatedcv", repeats=3)
set.seed(100)
model.new <- train(Employed ~ .,
data = training,
method = "pls",
tuneLength = 20,
trControl = control,
preProc = c("center", "scale"))
model.new
pls.predict <- predict(model.new, newdata = testing)
plot(pls.predict)
xyplot(training$Employed ~ predict(model.new),
type = c("p", "g"),
xlab = "Predicted", ylab = "Observed")
xyplot(resid(model.new) ~ predict(model.new),
type = c("p", "g"),
xlab = "Predicted", ylab = "Residuals")
library(caret)
library(car)
library(pls)
ginzberg <- read.csv('C:/Users/Aaron/Google_Drive/Resources/Datasets/Rdata/datasets/Ginzberg.csv')
train <- createDataPartition(y = ginzberg$depression,
p = 0.50,
list = FALSE)
training <- ginzberg[ train,]
testing <- ginzberg[-train,]
pls.model <- plsr(depression ~ ., data = training)
summary(pls.model)
predict(pls.model)
control <- trainControl(method = "cv", number=10)
set.seed(100)
model.new <- train(depression ~ .,
data = training,
method = "pls",
tuneLength = 20,
trControl = control,
preProc = c("center", "scale"))
model.new
pls.predict <- predict(model.new, newdata = testing)
plot(pls.predict)
xyplot(training$depression ~ predict(model.new),
type = c("p", "g"),
xlab = "Predicted", ylab = "Observed")
xyplot(resid(model.new) ~ predict(model.new),
type = c("p", "g"),
xlab = "Predicted", ylab = "Residuals")
library(elasticnet)
ginzberg <- read.csv('C:/Users/Aaron/Google_Drive/Resources/Datasets/Rdata/car/Ginzberg.csv')
ginzberg <- ginzberg[2:4]
train <- createDataPartition(y = ginzberg$depression,
p = 0.50,
list = FALSE)
training <- ginzberg[ train,]
testing <- ginzberg[-train,]
x.training <- training[1:2]
x.testing <- testing[1:2]
model <- enet(x = as.matrix(x.training), y = training$depression,
lambda = 0.001)
modelPrediction <- predict(model, newx = as.matrix(x.training), s = 1,
mode = "fraction",
type = "fit")
head(modelPrediction$fit)
control <- trainControl(method = "cv", number=10)
grid <- data.frame(.lambda = seq(0, .1, length = 15))
set.seed(100)
modelRegFit <- train(x.training, training$depression,
method = "ridge",
tuneGrid = grid,
trControl = control,
preProc = c("center", "scale"))
modelRegFit
lasso.model <- enet(x = as.matrix(x.training), y = training$depression,
lambda = 0.1, normalize = TRUE)
lassoPred <- predict(lasso.model, newx = as.matrix(x.training),
s = .1, mode = "fraction",
type = "fit")
names(lassoPred)
head(lassoPred$fit)
modelCoef <- predict(lasso.model, newx = as.matrix(x.training),
s = .1, mode = "fraction",
type = "coefficients")
tail(modelCoef$coefficients)
grid1 <- expand.grid(.lambda = c(0, 0.01, .1),
.fraction = seq(.05, 1, length = 20))
set.seed(100)
modeltune <- train(x.training, training$depression,
method = "enet",
tuneGrid = grid1,
trControl = control,
preProc = c("center", "scale"))
plot(modeltune)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment