emhart/linear_gd.R

## linear_gd.R
#' A function to do gradient descent on arbitrarily sized linear models
#' @param Y a vector of response variables
#' @param X a design matrix of predictor variables
#' @param alpha the gradient descent step size
#' @param init the seed to start off for estimating parameters, must be the same size as the columns in X
#' @param failure the number of iterations after which to say convergence has failed
#' @param epsilon the difference in size between loss function iterations after which to say convergence has been reached.

linear_gd <- function(Y, X, alpha, init = runif(dim(X)[2],-10,10),failure = 10000,epsilon = 0.0001){
  theta_new <- init
  j <- 2
  cost <- rep(NA,failure)
  cost[1] <-  (1/(2*dim(X)[2]))*sum((X%*%theta_new-Y)^2)
  ## Little hack to kick off the while loop
  cost[2] <- cost[1] + epsilon + 1

  while(abs(cost[j - 1] - cost[j])  > epsilon){
    j <- j+1
    if(failure <= j){stop(paste("algorithm failed to converge after",failure, "iterations",sep=" "))}
    theta_old <- theta_new
    for(i in 1:length(theta_old)){
      theta_new[i] <- theta_old[i] - (alpha*mean((X%*%theta_old-Y)*X[,i]))
    }
    cost[j] <- (1/(2*dim(X)[2]))*sum((X%*%theta_new-Y)^2)

}
cat(paste("Algorithm converged after",j,"iterations.",sep=" "))
cost <- cost[!is.na(cost)]
return(list(fit=theta_new,loss=cost))
}

### Simple example

# Generate some data
X <- cbind(rep(1,100),matrix(rnorm(300),ncol=3,nrow=100))
p1 <- c(2,3,-3,4)
Y <- X%*%p1
# Plot for fun
plot(Y~X[,2])

# Fit
out <- linear_gd(Y,X,.1,init = c(1,1,1,1))
# Compare outputs to fit
cbind(out$fit,p1)

# plot loss function
plot(out$loss,type='l')


### Ok now let's take it up a notch where our design matrix is 100 X 100
X <- cbind(rep(1,100), matrix(rnorm(9900),ncol=99,nrow=100))
p1 <- runif(100,-5,5)
Y <- X%*%p1
plot(Y~X[,2])
out <- linear_gd(Y,X,.1)
plot(out$loss,type='l')
cbind(round(out$fit,2),round(p1,2))
	#' A function to do gradient descent on arbitrarily sized linear models
	#' @param Y a vector of response variables
	#' @param X a design matrix of predictor variables
	#' @param alpha the gradient descent step size
	#' @param init the seed to start off for estimating parameters, must be the same size as the columns in X
	#' @param failure the number of iterations after which to say convergence has failed
	#' @param epsilon the difference in size between loss function iterations after which to say convergence has been reached.

	linear_gd <- function(Y, X, alpha, init = runif(dim(X)[2],-10,10),failure = 10000,epsilon = 0.0001){
	theta_new <- init
	j <- 2
	cost <- rep(NA,failure)
	cost[1] <- (1/(2dim(X)[2]))sum((X%*%theta_new-Y)^2)
	## Little hack to kick off the while loop
	cost[2] <- cost[1] + epsilon + 1

	while(abs(cost[j - 1] - cost[j]) > epsilon){
	j <- j+1
	if(failure <= j){stop(paste("algorithm failed to converge after",failure, "iterations",sep=" "))}
	theta_old <- theta_new
	for(i in 1:length(theta_old)){
	theta_new[i] <- theta_old[i] - (alphamean((X%%theta_old-Y)*X[,i]))
	}
	cost[j] <- (1/(2dim(X)[2]))sum((X%*%theta_new-Y)^2)

	}
	cat(paste("Algorithm converged after",j,"iterations.",sep=" "))
	cost <- cost[!is.na(cost)]
	return(list(fit=theta_new,loss=cost))
	}

	### Simple example

	# Generate some data
	X <- cbind(rep(1,100),matrix(rnorm(300),ncol=3,nrow=100))
	p1 <- c(2,3,-3,4)
	Y <- X%*%p1
	# Plot for fun
	plot(Y~X[,2])

	# Fit
	out <- linear_gd(Y,X,.1,init = c(1,1,1,1))
	# Compare outputs to fit
	cbind(out$fit,p1)

	# plot loss function
	plot(out$loss,type='l')


	### Ok now let's take it up a notch where our design matrix is 100 X 100
	X <- cbind(rep(1,100), matrix(rnorm(9900),ncol=99,nrow=100))
	p1 <- runif(100,-5,5)
	Y <- X%*%p1
	plot(Y~X[,2])
	out <- linear_gd(Y,X,.1)
	plot(out$loss,type='l')
	cbind(round(out$fit,2),round(p1,2))