bryangoodrich/RegularizedGradientDescent.R

## RegularizedGradientDescent.R
loadInput <- function() {
    structure(c(0.051267, -0.092742, -0.21371, -0.375, -0.51325,
    -0.52477, -0.39804, -0.30588, 0.016705, 0.13191, 0.38537, 0.52938,
    0.63882, 0.73675, 0.54666, 0.322, 0.16647, -0.046659, -0.17339,
    -0.47869, -0.60541, -0.62846, -0.59389, -0.42108, -0.11578, 0.20104,
    0.46601, 0.67339, -0.13882, -0.29435, -0.26555, -0.16187, -0.17339,
    -0.28283, -0.36348, -0.30012, -0.23675, -0.06394, 0.062788, 0.22984,
    0.2932, 0.48329, 0.64459, 0.46025, 0.6273, 0.57546, 0.72523,
    0.22408, 0.44297, 0.322, 0.13767, -0.0063364, -0.092742, -0.20795,
    -0.20795, -0.43836, -0.21947, -0.13882, 0.18376, 0.22408, 0.29896,
    0.50634, 0.61578, 0.60426, 0.76555, 0.92684, 0.82316, 0.96141,
    0.93836, 0.86348, 0.89804, 0.85196, 0.82892, 0.79435, 0.59274,
    0.51786, 0.46601, 0.35081, 0.28744, 0.085829, 0.14919, -0.13306,
    -0.40956, -0.39228, -0.74366, -0.69758, -0.75518, -0.69758, -0.4038,
    -0.38076, -0.50749, -0.54781, 0.10311, 0.057028, -0.10426, -0.081221,
    0.28744, 0.39689, 0.63882, 0.82316, 0.67339, 1.0709, -0.046659,
    -0.23675, -0.15035, -0.49021, -0.46717, -0.28859, -0.61118, -0.66302,
    -0.59965, -0.72638, -0.83007, -0.72062, -0.59389, -0.48445, -0.0063364,
    0.63265, 0.69956, 0.68494, 0.69225, 0.50219, 0.46564, 0.2098,
    0.034357, -0.19225, -0.40424, -0.51389, -0.56506, -0.5212, -0.24342,
    -0.18494, 0.48757, 0.5826, 0.53874, 0.81652, 0.69956, 0.63377,
    0.59722, 0.33406, 0.005117, -0.27266, -0.39693, -0.60161, -0.53582,
    -0.53582, 0.54605, 0.77997, 0.96272, 0.8019, 0.64839, 0.47295,
    0.31213, 0.027047, -0.21418, -0.18494, -0.16301, -0.41155, -0.2288,
    -0.18494, -0.14108, 0.012427, 0.15863, 0.26827, 0.44371, 0.52412,
    0.67032, 0.69225, 0.57529, 0.39985, 0.55336, 0.35599, 0.17325,
    0.21711, -0.016813, -0.27266, 0.93348, 0.77997, 0.61915, 0.75804,
    0.7288, 0.59722, 0.50219, 0.3633, 0.27558, 0.085526, 0.012427,
    -0.082602, -0.20687, -0.36769, -0.5212, -0.55775, -0.7405, -0.5943,
    -0.41886, -0.57968, -0.76974, -0.75512, -0.57968, -0.4481, -0.41155,
    -0.25804, -0.25804, 0.041667, 0.2902, 0.68494, 0.70687, 0.91886,
    0.90424, 0.70687, 0.77997, 0.91886, 0.99196, 1.1089, 1.087, 0.82383,
    0.88962, 0.66301, 0.64108, 0.10015, -0.57968, -0.63816, -0.36769,
    -0.3019, -0.13377, -0.060673, -0.067982, -0.21418, -0.41886,
    -0.082602, 0.31213, 0.53874, 0.49488, 0.99927, 0.99927, -0.030612,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), .Dim = c(118L, 3L))
}


# The important Functions
gradientDescent <- function(X, y, initial_theta = NULL, lambda = 0, method = "BFGS", ...) {
  if (is.null(initial_theta)) initial_theta <- matrix(0, nrow = ncol(X))
  m <- nrow(y)

  sigmoid      <- function(x) 1 / (1 + exp(-x))
  gradFunction <- function(theta) {
    treg <- theta
    treg[1] <- 0
    est <- sigmoid(X %*% theta)
    J <- (1/m) * (t(X) %*% (est - y))
    Regularization <- (lambda/m) * treg
    J + Regularization
  }
  costFunction <- function(theta) {
    treg <- theta
    treg[1] <- 0
    est <- sigmoid(X %*% theta)
    J <- (1/m) * (t(-y) %*% log(est) - t(1-y) %*% log(1 - est))
    Regularization <- (lambda / (2*m)) * (t(treg) %*% treg)
    J + Regularization
  }

  optim(initial_theta, costFunction, gradFunction, method = method, ...)
}

mapFeatures <- function(x,y, degree = 6) {
  x <- as.matrix(x)
  y <- as.matrix(y)
  out <- matrix(1, nrow = nrow(x))
  e   <- ncol(out)
  for (i in 1:degree)
    for (j in 0:i)
      out <- cbind(out, x^(i-j) * y^(j))
  out
}

plotBoundary <- function(theta, x, y) {
  cpal <- c('red', 'blue')
  ppal <- c("+", "*")
  plot(x[, 1], x[, 2], col = cpal[y+1], pch = ppal[y+1])


  u <- seq(-1, 1.5, length = 50)
  v <- seq(-1, 1.5, length = 50)
  z <- matrix(0, nrow = length(u), ncol = length(v))

  for (i in 1:length(u))
    for (j in 1:length(v))
      z[i, j] <- mapFeatures(u[i], v[j], degree = 6) %*% theta
  contour(u, v, z, add = TRUE)
}


# The Computations
input <- loadInput()
X <- cbind(1, input[, 1:2])
y <- cbind(input[, 3])

# Now could be a good time to plot, but I took that out for this. You'll see it with plotBoundary
Xexpanded <- mapFeatures(X[, 2], X[, 3])
initial_theta <- matrix(0, nrow=ncol(X)) # No, we're not using this


# The Actual work.
# Toy around with Lambda := 0, 1, 100 to see underfit, good fit, and overfitting
res <- gradientDescent(Xexpanded, y, lambda = 1, method = "BFGS", control = list(maxit = 10000))
plotBoundary(res$par, Xexpanded[, 2:3], y)
	loadInput <- function() {
	structure(c(0.051267, -0.092742, -0.21371, -0.375, -0.51325,
	-0.52477, -0.39804, -0.30588, 0.016705, 0.13191, 0.38537, 0.52938,
	0.63882, 0.73675, 0.54666, 0.322, 0.16647, -0.046659, -0.17339,
	-0.47869, -0.60541, -0.62846, -0.59389, -0.42108, -0.11578, 0.20104,
	0.46601, 0.67339, -0.13882, -0.29435, -0.26555, -0.16187, -0.17339,
	-0.28283, -0.36348, -0.30012, -0.23675, -0.06394, 0.062788, 0.22984,
	0.2932, 0.48329, 0.64459, 0.46025, 0.6273, 0.57546, 0.72523,
	0.22408, 0.44297, 0.322, 0.13767, -0.0063364, -0.092742, -0.20795,
	-0.20795, -0.43836, -0.21947, -0.13882, 0.18376, 0.22408, 0.29896,
	0.50634, 0.61578, 0.60426, 0.76555, 0.92684, 0.82316, 0.96141,
	0.93836, 0.86348, 0.89804, 0.85196, 0.82892, 0.79435, 0.59274,
	0.51786, 0.46601, 0.35081, 0.28744, 0.085829, 0.14919, -0.13306,
	-0.40956, -0.39228, -0.74366, -0.69758, -0.75518, -0.69758, -0.4038,
	-0.38076, -0.50749, -0.54781, 0.10311, 0.057028, -0.10426, -0.081221,
	0.28744, 0.39689, 0.63882, 0.82316, 0.67339, 1.0709, -0.046659,
	-0.23675, -0.15035, -0.49021, -0.46717, -0.28859, -0.61118, -0.66302,
	-0.59965, -0.72638, -0.83007, -0.72062, -0.59389, -0.48445, -0.0063364,
	0.63265, 0.69956, 0.68494, 0.69225, 0.50219, 0.46564, 0.2098,
	0.034357, -0.19225, -0.40424, -0.51389, -0.56506, -0.5212, -0.24342,
	-0.18494, 0.48757, 0.5826, 0.53874, 0.81652, 0.69956, 0.63377,
	0.59722, 0.33406, 0.005117, -0.27266, -0.39693, -0.60161, -0.53582,
	-0.53582, 0.54605, 0.77997, 0.96272, 0.8019, 0.64839, 0.47295,
	0.31213, 0.027047, -0.21418, -0.18494, -0.16301, -0.41155, -0.2288,
	-0.18494, -0.14108, 0.012427, 0.15863, 0.26827, 0.44371, 0.52412,
	0.67032, 0.69225, 0.57529, 0.39985, 0.55336, 0.35599, 0.17325,
	0.21711, -0.016813, -0.27266, 0.93348, 0.77997, 0.61915, 0.75804,
	0.7288, 0.59722, 0.50219, 0.3633, 0.27558, 0.085526, 0.012427,
	-0.082602, -0.20687, -0.36769, -0.5212, -0.55775, -0.7405, -0.5943,
	-0.41886, -0.57968, -0.76974, -0.75512, -0.57968, -0.4481, -0.41155,
	-0.25804, -0.25804, 0.041667, 0.2902, 0.68494, 0.70687, 0.91886,
	0.90424, 0.70687, 0.77997, 0.91886, 0.99196, 1.1089, 1.087, 0.82383,
	0.88962, 0.66301, 0.64108, 0.10015, -0.57968, -0.63816, -0.36769,
	-0.3019, -0.13377, -0.060673, -0.067982, -0.21418, -0.41886,
	-0.082602, 0.31213, 0.53874, 0.49488, 0.99927, 0.99927, -0.030612,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), .Dim = c(118L, 3L))
	}


	# The important Functions
	gradientDescent <- function(X, y, initial_theta = NULL, lambda = 0, method = "BFGS", ...) {
	if (is.null(initial_theta)) initial_theta <- matrix(0, nrow = ncol(X))
	m <- nrow(y)

	sigmoid <- function(x) 1 / (1 + exp(-x))
	gradFunction <- function(theta) {
	treg <- theta
	treg[1] <- 0
	est <- sigmoid(X %*% theta)
	J <- (1/m) * (t(X) %*% (est - y))
	Regularization <- (lambda/m) * treg
	J + Regularization
	}
	costFunction <- function(theta) {
	treg <- theta
	treg[1] <- 0
	est <- sigmoid(X %*% theta)
	J <- (1/m) * (t(-y) %% log(est) - t(1-y) %% log(1 - est))
	Regularization <- (lambda / (2m)) (t(treg) %*% treg)
	J + Regularization
	}

	optim(initial_theta, costFunction, gradFunction, method = method, ...)
	}

	mapFeatures <- function(x,y, degree = 6) {
	x <- as.matrix(x)
	y <- as.matrix(y)
	out <- matrix(1, nrow = nrow(x))
	e <- ncol(out)
	for (i in 1:degree)
	for (j in 0:i)
	out <- cbind(out, x^(i-j) * y^(j))
	out
	}

	plotBoundary <- function(theta, x, y) {
	cpal <- c('red', 'blue')
	ppal <- c("+", "*")
	plot(x[, 1], x[, 2], col = cpal[y+1], pch = ppal[y+1])


	u <- seq(-1, 1.5, length = 50)
	v <- seq(-1, 1.5, length = 50)
	z <- matrix(0, nrow = length(u), ncol = length(v))

	for (i in 1:length(u))
	for (j in 1:length(v))
	z[i, j] <- mapFeatures(u[i], v[j], degree = 6) %*% theta
	contour(u, v, z, add = TRUE)
	}



	# The Computations
	input <- loadInput()
	X <- cbind(1, input[, 1:2])
	y <- cbind(input[, 3])

	# Now could be a good time to plot, but I took that out for this. You'll see it with plotBoundary
	Xexpanded <- mapFeatures(X[, 2], X[, 3])
	initial_theta <- matrix(0, nrow=ncol(X)) # No, we're not using this


	# The Actual work.
	# Toy around with Lambda := 0, 1, 100 to see underfit, good fit, and overfitting
	res <- gradientDescent(Xexpanded, y, lambda = 1, method = "BFGS", control = list(maxit = 10000))
	plotBoundary(res$par, Xexpanded[, 2:3], y)