tobigithub/regression-machine-learning.R

## regression-machine-learning.R
library(caret)

data(faithful)

set.seed(333)

# Plot data.
plot(x=faithful$waiting, faithful$eruptions)

# Calculate linear model.
fit <- lm(eruptions ~ waiting, data=faithful)

# Plot trend line.
abline(fit)

# Now do it again, this time with machine learning.
inTrain <- createDataPartition(y=faithful$eruptions, p = 0.6, list = FALSE)
training <- faithful[inTrain,]
test <- faithful[-inTrain,]

# Plot training data.
plot(x=training$waiting, y=training$eruptions)

# Train linear model (the coefficient values should be very similar to the first linear model fit that we made above).
fit2 <- train(eruptions ~ ., data = training, method = 'lm')

# Plot trend line from trained model.
results <- predict(fit2, newdata=training)
lines(x=training$waiting, y=results)

# Calculate error with RMSE (root mean square error).
trainRMSE <- sqrt(sum((fit2$finalModel$fitted.values - training$eruptions) ^ 2))

# Now predict on the test set and draw a new line.
plot(x=test$waiting, y=test$eruptions)
# Draw the predicted regression line on the test set. It should match the data points pretty closely, as it was trained on the training set.
results <- predict(fit2, newdata=test)
lines(x=test$waiting, y=results)

# Calculate error with RMSE (root mean square error).
testRMSE <- sqrt(sum((results - test$eruptions) ^ 2))
	library(caret)

	data(faithful)

	set.seed(333)

	# Plot data.
	plot(x=faithful$waiting, faithful$eruptions)

	# Calculate linear model.
	fit <- lm(eruptions ~ waiting, data=faithful)

	# Plot trend line.
	abline(fit)

	# Now do it again, this time with machine learning.
	inTrain <- createDataPartition(y=faithful$eruptions, p = 0.6, list = FALSE)
	training <- faithful[inTrain,]
	test <- faithful[-inTrain,]

	# Plot training data.
	plot(x=training$waiting, y=training$eruptions)

	# Train linear model (the coefficient values should be very similar to the first linear model fit that we made above).
	fit2 <- train(eruptions ~ ., data = training, method = 'lm')

	# Plot trend line from trained model.
	results <- predict(fit2, newdata=training)
	lines(x=training$waiting, y=results)

	# Calculate error with RMSE (root mean square error).
	trainRMSE <- sqrt(sum((fit2$finalModel$fitted.values - training$eruptions) ^ 2))

	# Now predict on the test set and draw a new line.
	plot(x=test$waiting, y=test$eruptions)
	# Draw the predicted regression line on the test set. It should match the data points pretty closely, as it was trained on the training set.
	results <- predict(fit2, newdata=test)
	lines(x=test$waiting, y=results)

	# Calculate error with RMSE (root mean square error).
	testRMSE <- sqrt(sum((results - test$eruptions) ^ 2))