Skip to content

Instantly share code, notes, and snippets.

@nuthanmunaiah
Created April 2, 2016 18:50
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save nuthanmunaiah/e1d8085d6c3d228086db75391cbd54c9 to your computer and use it in GitHub Desktop.
R scripts accompanying the class notes for Week 10 of Applied Multivariate Statistical Analysis course.
# Initialize Libraries
library("MASS")
library("ROCR")
# Function Definitions
evaluate.performance <- function(model, data){
yhat <- predict(model, newdata = data)$class
cmat <- table(data$y, yhat)
accuracy <- sum(diag(cmat)) / nrow(data)
error <- (1 - accuracy)
return(list("accuracy" = accuracy, "error" = error))
}
show.legend <- function(dataset, location = "bottomright"){
legend(
location, legend = sort(unique(dataset$y)), bty = "n",
horiz = T, inset = 0.03, fill = sort(unique(dataset$y)) + 2,
title = "Class"
)
}
# Tuesday, 29 of March, 2016
# Banana Data Set
banana <- read.csv("banana-shaped-data-1.csv", header = T)
plot(banana[,-3], col = banana$y + 2)
show.legend(banana)
# LDA
lda.banana <- lda(y ~ ., data = banana)
lda.banana.performance <- evaluate.performance(lda.banana, banana)
# QDA
qda.banana <- qda(y ~ ., data = banana)
qda.banana.performance <- evaluate.performance(qda.banana, banana)
# Four Corners Data Set
fourcorners <- read.csv("four-corners-data-1.csv", header = T)
plot(fourcorners[,-3], col = fourcorners$y + 2)
show.legend(fourcorners, location = "top")
lda.fourcorners <- lda(y ~ ., data = fourcorners)
lda.fourcorners.performance <- evaluate.performance(lda.fourcorners, fourcorners)
# QDA
qda.fourcorners <- qda(y ~ ., data = fourcorners)
qda.fourcorners.performance <- evaluate.performance(qda.fourcorners, fourcorners)
# Easy Doughnut Data Set
easydoughnut <- read.csv("doughnuts-easy.csv", header = T)
plot(easydoughnut[,-3], col = easydoughnut$y + 2)
show.legend(easydoughnut)
# LDA
lda.easydoughnut <- lda(y ~ ., data = easydoughnut)
lda.easydoughnut.performance <- evaluate.performance(lda.easydoughnut, easydoughnut)
# QDA
qda.easydoughnut <- qda(y ~ ., data = easydoughnut)
qda.easydoughnut.performance <- evaluate.performance(qda.easydoughnut, easydoughnut)
# Doughnut Data Set
doughnut <- read.csv("doughnuts.csv", header = T)
plot(doughnut[,-3], col = doughnut$y + 2)
show.legend(doughnut)
# LDA
lda.doughnut <- lda(y ~ ., data = doughnut)
lda.doughnut.performance <- evaluate.performance(lda.doughnut, doughnut)
# QDA
qda.doughnut <- qda(y ~ ., data = doughnut)
qda.doughnut.performance <- evaluate.performance(qda.doughnut, doughnut)
# Brain Cancer Data Set
brain <- read.csv("brain-cancer-1.csv", header = T)
# Variance-Covariance Matrix
S <- cov(brain[,-1])
# Determinant of S will be Zero indicating that S is ill-conditioned/singular
generalized.variance <- det(S)
# LDA
lda.brain <- lda(brain.y ~ ., data = brain)
lda.brain.performance <- evaluate.performance(lda.brain, brain)
# QDA
qda.brain <- qda(brain.y ~ ., data = brain)
qda.brain.performance <- evaluate.performance(qda.brain, brain)
# Thursday, 31 of March, 2016
dataset <- read.csv("four-corners-data-1.csv", header = T)
qda.model <- qda(y ~ ., data = dataset)
qda.model.performance <- performance(
prediction(predict(qad.model, dataset)$posterior[,2], dataset$y),
measure = "tpr", x.measure = "fpr"
)
plot(
qda.model.performance,
main = "ROC Curve", xlab = "False Positive Rate", ylab = "True Positive Rate",
col = "forestgreen", lwd = 2
)
abline(a = 0, b = 1, lwd = 2, lty = 2)
legend(
"bottomright", col = c("black", "forestgreen"), lty = c(2, 1), bty = "n", lwd = 2,
horiz = T, legend = c("Random", "Optimal")
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment