aschleg/linear_regression_R_example

## linear_regression_R_example
library(caret)

ginzberg <- read.csv('D:/Google_Drive/Google_Drive/Resources/Datasets/Rdata/car/Ginzberg.csv')
ginzberg <- ginzberg[2:4]

train <- createDataPartition(y = ginzberg$depression,
                                p = 0.50,
                                list = FALSE)

training <- ginzberg[ train,]
testing <- ginzberg[-train,]

model.all <- lm(depression ~ ., data = training)
summary(model.all)

regprediction <- predict(model.all, testing)

modelvalues <- data.frame(obs = testing$depression, pred=regprediction)
defaultSummary(modelvalues)

control <- trainControl(method = "cv", number=10)

set.seed(1000)

x.testing <- testing[1:2]
model <- train(x = x.testing, y = testing$depression, method = "lm",
               trControl = control)
model

prediction <- predict(model, testing)
new.modelvalues <- data.frame(obs = testing$depression, pred=prediction)
defaultSummary(new.modelvalues)

xyplot(new.modelvalues$obs ~ new.modelvalues$pred,
       type = c("p", "g"),
       xlab = "Predicted", ylab = "Observed")
xyplot(resid(model) ~ predict(model),
       type = c("p", "g"),
       xlab = "Predicted", ylab = "Residuals")

## partial_least_squares_R_example
library(caret)
library(car)
library(pls)

longley <- read.csv('C:/Users/Aaron/Google_Drive/Resources/Datasets/Rdata/datasets/longley.csv')

plot(longley)

train <- createDataPartition(y = longley$GNP,
                             p = 0.50,
                             list = FALSE)

training <- longley[ train,]
testing <- longley[-train,]

model <- lm(GNP ~ ., data = training)

summary(model)

avPlots(model)

emp.train <- createDataPartition(y = longley$Employed,
                             p = 0.50,
                             list = FALSE)

emp.training <- longley[ train,]
emp.testing <- longley[-train,]

emp.model <- lm(Employed ~ ., data = emp.training)

summary(emp.model)

avPlots(emp.model)


pls.model <- plsr(Employed ~ ., data = training)
summary(pls.model)

predict(pls.model, ncomp=1:2)

control <- trainControl(method = "repeatedcv", repeats=3)

set.seed(100)
model.new <- train(Employed ~ .,
                   data = training,
                   method = "pls",
                   tuneLength = 20,
                   trControl = control,
                   preProc = c("center", "scale"))

model.new

pls.predict <- predict(model.new, newdata = testing)
plot(pls.predict)

xyplot(training$Employed ~ predict(model.new),
       type = c("p", "g"),
       xlab = "Predicted", ylab = "Observed")
xyplot(resid(model.new) ~ predict(model.new),
       type = c("p", "g"),
       xlab = "Predicted", ylab = "Residuals")

## partial_least_squares_R_example2
library(caret)
library(car)
library(pls)

ginzberg <- read.csv('C:/Users/Aaron/Google_Drive/Resources/Datasets/Rdata/datasets/Ginzberg.csv')

train <- createDataPartition(y = ginzberg$depression,
                             p = 0.50,
                             list = FALSE)

training <- ginzberg[ train,]
testing <- ginzberg[-train,]

pls.model <- plsr(depression ~ ., data = training)
summary(pls.model)

predict(pls.model)

control <- trainControl(method = "cv", number=10)

set.seed(100)
model.new <- train(depression ~ .,
                   data = training,
                   method = "pls",
                   tuneLength = 20,
                   trControl = control,
                   preProc = c("center", "scale"))

model.new

pls.predict <- predict(model.new, newdata = testing)
plot(pls.predict)

xyplot(training$depression ~ predict(model.new),
       type = c("p", "g"),
       xlab = "Predicted", ylab = "Observed")
xyplot(resid(model.new) ~ predict(model.new),
       type = c("p", "g"),
       xlab = "Predicted", ylab = "Residuals")

## penalized_regression_R_example
library(elasticnet)

ginzberg <- read.csv('C:/Users/Aaron/Google_Drive/Resources/Datasets/Rdata/car/Ginzberg.csv')

ginzberg <- ginzberg[2:4]

train <- createDataPartition(y = ginzberg$depression,
                             p = 0.50,
                             list = FALSE)

training <- ginzberg[ train,]
testing <- ginzberg[-train,]

x.training <- training[1:2]
x.testing <- testing[1:2]

model <- enet(x = as.matrix(x.training), y = training$depression,
              lambda = 0.001)

modelPrediction <- predict(model, newx = as.matrix(x.training), s = 1,
                           mode = "fraction",
                           type = "fit")
head(modelPrediction$fit)

control <- trainControl(method = "cv", number=10)
grid <- data.frame(.lambda = seq(0, .1, length = 15))
set.seed(100)
modelRegFit <- train(x.training, training$depression,
                     method = "ridge",
                     tuneGrid = grid,
                     trControl = control,
                     preProc = c("center", "scale"))
modelRegFit


lasso.model <- enet(x = as.matrix(x.training), y = training$depression,
                    lambda = 0.1, normalize = TRUE)

lassoPred <- predict(lasso.model, newx = as.matrix(x.training),
                     s = .1, mode = "fraction",
                     type = "fit")
names(lassoPred)
head(lassoPred$fit)

modelCoef <- predict(lasso.model, newx = as.matrix(x.training),
                     s = .1, mode = "fraction",
                     type = "coefficients")
tail(modelCoef$coefficients)

grid1 <- expand.grid(.lambda = c(0, 0.01, .1),
                     .fraction = seq(.05, 1, length = 20))
set.seed(100)
modeltune <- train(x.training, training$depression,
                   method = "enet",
                   tuneGrid = grid1,
                   trControl = control,
                   preProc = c("center", "scale"))
plot(modeltune)
	library(caret)

	ginzberg <- read.csv('D:/Google_Drive/Google_Drive/Resources/Datasets/Rdata/car/Ginzberg.csv')
	ginzberg <- ginzberg[2:4]

	train <- createDataPartition(y = ginzberg$depression,
	p = 0.50,
	list = FALSE)

	training <- ginzberg[ train,]
	testing <- ginzberg[-train,]

	model.all <- lm(depression ~ ., data = training)
	summary(model.all)

	regprediction <- predict(model.all, testing)

	modelvalues <- data.frame(obs = testing$depression, pred=regprediction)
	defaultSummary(modelvalues)

	control <- trainControl(method = "cv", number=10)

	set.seed(1000)

	x.testing <- testing[1:2]
	model <- train(x = x.testing, y = testing$depression, method = "lm",
	trControl = control)
	model

	prediction <- predict(model, testing)
	new.modelvalues <- data.frame(obs = testing$depression, pred=prediction)
	defaultSummary(new.modelvalues)

	xyplot(new.modelvalues$obs ~ new.modelvalues$pred,
	type = c("p", "g"),
	xlab = "Predicted", ylab = "Observed")
	xyplot(resid(model) ~ predict(model),
	type = c("p", "g"),
	xlab = "Predicted", ylab = "Residuals")
	library(caret)
	library(car)
	library(pls)

	longley <- read.csv('C:/Users/Aaron/Google_Drive/Resources/Datasets/Rdata/datasets/longley.csv')

	plot(longley)

	train <- createDataPartition(y = longley$GNP,
	p = 0.50,
	list = FALSE)

	training <- longley[ train,]
	testing <- longley[-train,]

	model <- lm(GNP ~ ., data = training)

	summary(model)

	avPlots(model)

	emp.train <- createDataPartition(y = longley$Employed,
	p = 0.50,
	list = FALSE)

	emp.training <- longley[ train,]
	emp.testing <- longley[-train,]

	emp.model <- lm(Employed ~ ., data = emp.training)

	summary(emp.model)

	avPlots(emp.model)


	pls.model <- plsr(Employed ~ ., data = training)
	summary(pls.model)

	predict(pls.model, ncomp=1:2)

	control <- trainControl(method = "repeatedcv", repeats=3)

	set.seed(100)
	model.new <- train(Employed ~ .,
	data = training,
	method = "pls",
	tuneLength = 20,
	trControl = control,
	preProc = c("center", "scale"))

	model.new

	pls.predict <- predict(model.new, newdata = testing)
	plot(pls.predict)

	xyplot(training$Employed ~ predict(model.new),
	type = c("p", "g"),
	xlab = "Predicted", ylab = "Observed")
	xyplot(resid(model.new) ~ predict(model.new),
	type = c("p", "g"),
	xlab = "Predicted", ylab = "Residuals")
	library(elasticnet)

	ginzberg <- read.csv('C:/Users/Aaron/Google_Drive/Resources/Datasets/Rdata/car/Ginzberg.csv')

	ginzberg <- ginzberg[2:4]

	train <- createDataPartition(y = ginzberg$depression,
	p = 0.50,
	list = FALSE)

	training <- ginzberg[ train,]
	testing <- ginzberg[-train,]

	x.training <- training[1:2]
	x.testing <- testing[1:2]

	model <- enet(x = as.matrix(x.training), y = training$depression,
	lambda = 0.001)

	modelPrediction <- predict(model, newx = as.matrix(x.training), s = 1,
	mode = "fraction",
	type = "fit")
	head(modelPrediction$fit)

	control <- trainControl(method = "cv", number=10)
	grid <- data.frame(.lambda = seq(0, .1, length = 15))
	set.seed(100)
	modelRegFit <- train(x.training, training$depression,
	method = "ridge",
	tuneGrid = grid,
	trControl = control,
	preProc = c("center", "scale"))
	modelRegFit


	lasso.model <- enet(x = as.matrix(x.training), y = training$depression,
	lambda = 0.1, normalize = TRUE)

	lassoPred <- predict(lasso.model, newx = as.matrix(x.training),
	s = .1, mode = "fraction",
	type = "fit")
	names(lassoPred)
	head(lassoPred$fit)

	modelCoef <- predict(lasso.model, newx = as.matrix(x.training),
	s = .1, mode = "fraction",
	type = "coefficients")
	tail(modelCoef$coefficients)

	grid1 <- expand.grid(.lambda = c(0, 0.01, .1),
	.fraction = seq(.05, 1, length = 20))
	set.seed(100)
	modeltune <- train(x.training, training$depression,
	method = "enet",
	tuneGrid = grid1,
	trControl = control,
	preProc = c("center", "scale"))
	plot(modeltune)