Skip to content

Instantly share code, notes, and snippets.

@primaryobjects
Last active October 10, 2018 08:08
Show Gist options
  • Save primaryobjects/41c4230e43e11029cff1 to your computer and use it in GitHub Desktop.
Save primaryobjects/41c4230e43e11029cff1 to your computer and use it in GitHub Desktop.
# Quiz 4
# Question 1.
library(ElemStatLearn)
library(randomForest)
library(caret)
data(vowel.train)
data(vowel.test)
vowel.train$y <- as.factor(vowel.train$y)
vowel.test$y <- as.factor(vowel.test$y)
set.seed(33833)
fit1 <- train(y ~., data=vowel.train, method='rf')
fit2 <- train(y ~., data=vowel.train, method='gbm')
results1 <- predict(fit1, newdata=vowel.test)
results2 <- predict(fit2, newdata=vowel.test)
combo <- data.frame(results1, results2, y = vowel.test$y)
fit3 <- train(y ~ ., data = combo, method = "rf")
results3 <- predict(fit3, newdata = vowel.test)
c1 <- confusionMatrix(results1, vowel.test$y)
c2 <- confusionMatrix(results2, vowel.test$y)
c3 <- confusionMatrix(results3, combo$y)
# Question 2.
library(caret)
library(gbm)
set.seed(3433)
library(AppliedPredictiveModeling)
# Load data.
data(AlzheimerDisease)
adData = data.frame(diagnosis,predictors)
inTrain = createDataPartition(adData$diagnosis, p = 3/4)[[1]]
training = adData[ inTrain,]
testing = adData[-inTrain,]
set.seed(62433)
# Train using 3 different models.
fit1 <- train(diagnosis ~., data=training, method='rf')
fit2 <- train(diagnosis ~., data=training, method='gbm')
fit3 <- train(diagnosis ~., data=training, method='lda')
# Run models on testing data.
results1 <- predict(fit1, newdata=testing)
results2 <- predict(fit2, newdata=testing)
results3 <- predict(fit3, newdata=testing)
# Stack models together and combine with random forests.
combo <- data.frame(results1, results2, results3, diagnosis = testing$diagnosis)
fit4 <- train(diagnosis ~ ., data = combo, method = "rf")
# Run stacked model on testing data.
results4 <- predict(fit4, newdata = testing)
# random forests = 0.78
c1 <- confusionMatrix(results1, testing$diagnosis)
# boosting = 0.80
c2 <- confusionMatrix(results2, testing$diagnosis)
# lda = 0.77
c3 <- confusionMatrix(results3, testing$diagnosis)
# Stacked models = 0.82
c4 <- confusionMatrix(results4, combo$diagnosis)
# Question 3.
set.seed(3523)
library(AppliedPredictiveModeling)
data(concrete)
inTrain = createDataPartition(concrete$CompressiveStrength, p = 3/4)[[1]]
training = concrete[ inTrain,]
testing = concrete[-inTrain,]
set.seed(233)
fit1 <- train(CompressiveStrength ~., data=training, method='lasso')
plot.enet(fit1$finalModel, use.color=TRUE)
# Question 4.
library(lubridate) # For year() function below
library(forecast)
library(quantmod)
url <- 'https://d396qusza40orc.cloudfront.net/predmachlearn/gaData.csv'
fileName <- basename(url);
if (!file.exists(fileName)) {
download.file(url, fileName, method='curl')
}
dat <- read.csv(fileName, na.strings=c('', 'NA'))
training = dat[year(dat$date) < 2012,]
testing = dat[(year(dat$date)) > 2011,]
# Create a time-series.
tstrain = ts(training$visitsTumblr)
# Create a model using bats.
fit <- bats(tstrain)
# Count the length of the test set, so we can predict for this many points beyond the training data.
start <- dim(testing)[1]
# Create forecast model for the remaining points beyond training (up to the testing count), use a 95% prediction interval bound.
fcast <- forecast(fit, level = 95, h = start)
# Check accuracy.
accuracy(fcast, testing$visitsTumblr)
# For how many of the testing points is the true value within the 95% prediction interval bounds?
result <- c()
l <- length(fcast$lower)
for (i in 1:l){
x <- testing$visitsTumblr[i]
a <- fcast$lower[i] < x & x < fcast$upper[i]
result <- c(result, a)
}
sum(result)/l * 100
# Question 5.
set.seed(3523)
library(AppliedPredictiveModeling)
data(concrete)
inTrain = createDataPartition(concrete$CompressiveStrength, p = 3/4)[[1]]
training = concrete[ inTrain,]
testing = concrete[-inTrain,]
set.seed(325)
library(e1071)
library(caret)
# Train an svm.
fit <- svm(CompressiveStrength ~ ., data=training)
# Run svm on test set.
results <- predict(fit, testing)
# Check accuracy.
accuracy(results, testing$CompressiveStrength)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment