Skip to content

Instantly share code, notes, and snippets.

@bryangoodrich
Created August 26, 2014 05:45
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bryangoodrich/38a1888cb020920e1b7d to your computer and use it in GitHub Desktop.
Save bryangoodrich/38a1888cb020920e1b7d to your computer and use it in GitHub Desktop.
Regularized Gradient Descent Logistic Classifier with Decision Boundary
loadInput <- function() {
structure(c(0.051267, -0.092742, -0.21371, -0.375, -0.51325,
-0.52477, -0.39804, -0.30588, 0.016705, 0.13191, 0.38537, 0.52938,
0.63882, 0.73675, 0.54666, 0.322, 0.16647, -0.046659, -0.17339,
-0.47869, -0.60541, -0.62846, -0.59389, -0.42108, -0.11578, 0.20104,
0.46601, 0.67339, -0.13882, -0.29435, -0.26555, -0.16187, -0.17339,
-0.28283, -0.36348, -0.30012, -0.23675, -0.06394, 0.062788, 0.22984,
0.2932, 0.48329, 0.64459, 0.46025, 0.6273, 0.57546, 0.72523,
0.22408, 0.44297, 0.322, 0.13767, -0.0063364, -0.092742, -0.20795,
-0.20795, -0.43836, -0.21947, -0.13882, 0.18376, 0.22408, 0.29896,
0.50634, 0.61578, 0.60426, 0.76555, 0.92684, 0.82316, 0.96141,
0.93836, 0.86348, 0.89804, 0.85196, 0.82892, 0.79435, 0.59274,
0.51786, 0.46601, 0.35081, 0.28744, 0.085829, 0.14919, -0.13306,
-0.40956, -0.39228, -0.74366, -0.69758, -0.75518, -0.69758, -0.4038,
-0.38076, -0.50749, -0.54781, 0.10311, 0.057028, -0.10426, -0.081221,
0.28744, 0.39689, 0.63882, 0.82316, 0.67339, 1.0709, -0.046659,
-0.23675, -0.15035, -0.49021, -0.46717, -0.28859, -0.61118, -0.66302,
-0.59965, -0.72638, -0.83007, -0.72062, -0.59389, -0.48445, -0.0063364,
0.63265, 0.69956, 0.68494, 0.69225, 0.50219, 0.46564, 0.2098,
0.034357, -0.19225, -0.40424, -0.51389, -0.56506, -0.5212, -0.24342,
-0.18494, 0.48757, 0.5826, 0.53874, 0.81652, 0.69956, 0.63377,
0.59722, 0.33406, 0.005117, -0.27266, -0.39693, -0.60161, -0.53582,
-0.53582, 0.54605, 0.77997, 0.96272, 0.8019, 0.64839, 0.47295,
0.31213, 0.027047, -0.21418, -0.18494, -0.16301, -0.41155, -0.2288,
-0.18494, -0.14108, 0.012427, 0.15863, 0.26827, 0.44371, 0.52412,
0.67032, 0.69225, 0.57529, 0.39985, 0.55336, 0.35599, 0.17325,
0.21711, -0.016813, -0.27266, 0.93348, 0.77997, 0.61915, 0.75804,
0.7288, 0.59722, 0.50219, 0.3633, 0.27558, 0.085526, 0.012427,
-0.082602, -0.20687, -0.36769, -0.5212, -0.55775, -0.7405, -0.5943,
-0.41886, -0.57968, -0.76974, -0.75512, -0.57968, -0.4481, -0.41155,
-0.25804, -0.25804, 0.041667, 0.2902, 0.68494, 0.70687, 0.91886,
0.90424, 0.70687, 0.77997, 0.91886, 0.99196, 1.1089, 1.087, 0.82383,
0.88962, 0.66301, 0.64108, 0.10015, -0.57968, -0.63816, -0.36769,
-0.3019, -0.13377, -0.060673, -0.067982, -0.21418, -0.41886,
-0.082602, 0.31213, 0.53874, 0.49488, 0.99927, 0.99927, -0.030612,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), .Dim = c(118L, 3L))
}
# The important Functions
gradientDescent <- function(X, y, initial_theta = NULL, lambda = 0, method = "BFGS", ...) {
if (is.null(initial_theta)) initial_theta <- matrix(0, nrow = ncol(X))
m <- nrow(y)
sigmoid <- function(x) 1 / (1 + exp(-x))
gradFunction <- function(theta) {
treg <- theta
treg[1] <- 0
est <- sigmoid(X %*% theta)
J <- (1/m) * (t(X) %*% (est - y))
Regularization <- (lambda/m) * treg
J + Regularization
}
costFunction <- function(theta) {
treg <- theta
treg[1] <- 0
est <- sigmoid(X %*% theta)
J <- (1/m) * (t(-y) %*% log(est) - t(1-y) %*% log(1 - est))
Regularization <- (lambda / (2*m)) * (t(treg) %*% treg)
J + Regularization
}
optim(initial_theta, costFunction, gradFunction, method = method, ...)
}
mapFeatures <- function(x,y, degree = 6) {
x <- as.matrix(x)
y <- as.matrix(y)
out <- matrix(1, nrow = nrow(x))
e <- ncol(out)
for (i in 1:degree)
for (j in 0:i)
out <- cbind(out, x^(i-j) * y^(j))
out
}
plotBoundary <- function(theta, x, y) {
cpal <- c('red', 'blue')
ppal <- c("+", "*")
plot(x[, 1], x[, 2], col = cpal[y+1], pch = ppal[y+1])
u <- seq(-1, 1.5, length = 50)
v <- seq(-1, 1.5, length = 50)
z <- matrix(0, nrow = length(u), ncol = length(v))
for (i in 1:length(u))
for (j in 1:length(v))
z[i, j] <- mapFeatures(u[i], v[j], degree = 6) %*% theta
contour(u, v, z, add = TRUE)
}
# The Computations
input <- loadInput()
X <- cbind(1, input[, 1:2])
y <- cbind(input[, 3])
# Now could be a good time to plot, but I took that out for this. You'll see it with plotBoundary
Xexpanded <- mapFeatures(X[, 2], X[, 3])
initial_theta <- matrix(0, nrow=ncol(X)) # No, we're not using this
# The Actual work.
# Toy around with Lambda := 0, 1, 100 to see underfit, good fit, and overfitting
res <- gradientDescent(Xexpanded, y, lambda = 1, method = "BFGS", control = list(maxit = 10000))
plotBoundary(res$par, Xexpanded[, 2:3], y)
@bryangoodrich
Copy link
Author

Not every line of code here is needed, such as adding the column vector of 1's to X since mapFeatures does this. I also don't need to pre-define the initial theta in this case since the function defaults to that same initialization.

Also, per advice from my colleague Dason at TalkStats.com, I could have added a "data" parameter to my gradFunction and costFunction that are used in optim and just passed X directly into optim with that additional parameter "data = X". My approach here was basically to define grad and cost within the same function scope that optim is called, so they get access to it. Cheap, I know. But I never do optimization in R!

"The More You Know" http://braintrustmusic.files.wordpress.com/2014/05/the-more-you-know2.png

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment