dendisuhubdy/ridgelogit.R

## ridgelogit.R
#'Norm Vec
norm_vec <- function(x){
	return (sqrt(sum(x^2)))
}

#' Gradient Step
#'
#' @param gradf handle to function that returns gradient of objective function
#' @param x current parameter estimate
#' @param t step-size
gradient_step <- function(gradf, t, x) {
	result <- (x - (t * gradf(x)))
	return (result)
}

#' Gradient Descent (Fixed Step-Size)
#'
#' @param fx handle to function that returns objective function values
#' @param gradf handle to function that returns gradient of objective function
#' @param x0 initial parameter estimate
#' @param t step-size
#' @param max_iter maximum number of iterations
#' @param tol convergence tolerance
gradient_descent_fixed <- function(fx, gradf, t, x0, max_iter=1e2, tol=1e-3) {
	xtrace <- 0
	ytrace <- 0
	vals <- 0
	b <- 0
	b[1] <- x0
	for(i in 1:max_iter) {
		b[i+1] <- gradient_step(gradf(b[i]),t,b[i])
		xtrace[i] <- b[i+1]/b[i]
		ytrace[i] <- fx(b[i+1])/fx(b[i])
		vals[i] <- fx(b[i+1]) - fx(b[i])
		if (ytrace[i] <= tol) {
			break
		}
	}
	result <- list(b[length(b)],fx(b),norm_vec(gradf(b)),xtrace, ytrace, vals)
	return (result)
}

#' Backtracking
#'
#' @param fx handle to function that returns objective function values
#' @param x current parameter estimate
#' @param t current step-size
#' @param df the value of the gradient of objective function evaluated at the current x
#' @param alpha the backtracking parameter
#' @param beta the decrementing multiplier
backtrack <- function(fx, t, x, df, beta=0.9) {
	iter = length(x)
	for (i in 1:iter) {
		if ((fx(x[i] - df(x[i]))) >= fx(x[i]) - (t/2)*norm_vec(df(x[i]))) {
			t <- beta*t
		}
		else {
			break
		}
	}
	return (t)
}

#' Gradient Descent (Backtracking Step-Size)
#'
#' @param fx handle to function that returns objective function values
#' @param gradf handle to function that returns gradient of objective function
#' @param x0 initial parameter estimate
#' @param max_iter maximum number of iterations
#' @param tol convergence tolerance
gradient_descent_backtrack <- function(fx, gradf, x0, max_iter=1e2, tol=1e-3) {
	xtrace <- 0
	ytrace <- 0
	vals <- 0
	t <- 1
	b <- 0
	b[1] <- x0
	for(i in 1:max_iter) {
		step <- backtrack(fx,t,b[i],gradf)
		b[i+1] <- gradient_step(gradf(b[i]),step,b[i])
		xtrace[i] <- b[i+1]/b[i]
		ytrace[i] <- fx(b[i+1])/fx(b[i])
		vals[i] <- fx(b[i+1]) - fx(b[i])
		if (ytrace[i] <= tol) {
			break
		}
	}
	result <- list(b[length(b)],fx(b),norm_vec(gradf(b)),xtrace, ytrace, vals)
	return (result)
}

#' Gradient Descent
#'
#' @param fx handle to function that returns objective function values
#' @param gradf handle to function that returns gradient of objective function
#' @param x0 initial parameter estimate
#' @param t step-size
#' @param max_iter maximum number of iterations
#' @param tol convergence tolerance
gradient_descent <- function(fx, gradf, x0, t=NULL, max_iter=1e2, tol=1e-3) {
	## wrapper
	if (is.null(t)) {
		return (gradient_descent_backtrack(fx, gradf, x0, max_iter, tol))
	} else {
		return (gradient_descent_fixed(fx, gradf, t, x0, max_iter, tol))
	}
}

#' Objective Function for Logistic Regression
#'
#' @param y binary response
#' @param X design matrix
#' @param beta regression coefficient vector
#' @param lambda regularization parameter
fx_logistic <- function(y, X, beta, lambda=0) {
	iter <- length(X)
	sum <- 0
	for (i in 1:iter) {
		sum <- sum - (y[i]*X[i]*beta) + (log(1+exp(X[i]*beta)))
	}
	return (sum + (lambda/2)*norm_vec(beta))
}


#' Gradient for Logistic Regession
#'
#' @param y binary response
#' @param X design matrix
#' @param beta regression coefficient vector
#' @param lambda regularization parameter
gradf_logistic <- function(y, X, beta, lambda=0) {
	iter <- length(X)
	sum <- 0
	for (i in 1:iter) {
		sum <- sum + (((exp(X[i]*beta)/(1+ exp(X[i]*beta)))-y[i])*X[i])
	}
	return (sum +  (lambda*beta))
}


## step 7


## initialize data sequences for testing
set.seed(12345)
n <- 100
p <- 2

X <- matrix(rnorm(n*p),n,p)
x1 <- X[,1]
x2 <- X[,2]
beta0 <- matrix(rnorm(p),p,1)
beta1 <- beta0[1]
beta2 <- beta0[2]
y <- (runif(n) <= plogis(X%*%beta0)) + 0


#' Wrapper Logistic Regession
#'
fx <- function(beta) {
	return (fx_logistic(y, x1, beta, lambda=0))
}

#' Wrapper Gradient for Logistic Regession
#'
gradf <- function(beta) {
	return(gradf_logistic (y, x1, beta, lambda=0))
}


t=1
x0 = x1[1]

result1 <- gradient_descent_fixed(fx, gradf, t, x0, max_iter=1e2, tol=1e-3)
L1 <- 0
for (i in 1:length(result1[[2]])) {
	L1[i] <- result1[[2]][i] - result1[[2]][100]
}

plot(1:length(result1[[2]]),L1, xlab="iterations", ylab="Logistic Result 1",type="o",col="blue")
##

## step 8

result2 <- gradient_descent_backtrack(fx, gradf, x0)
L2 <- 0
for (i in 1:length(result2[[2]])) {
	L2[i] <- result2[[2]][i] - result2[[2]][100]
}

plot(1:length(result2[[2]]),L2, xlab="iterations", ylab="Logistic Result 2",type="o",col="red")

## step 9


#' Wrapper Logistic Regession
#'
fx <- function(beta) {
	return (fx_logistic(y, x1, beta, lambda=10))
}

#' Wrapper Gradient for Logistic Regession
#'
gradf <- function(beta) {
	return(gradf_logistic (y, x1, beta, lambda=10))
}

result3 <- gradient_descent_fixed(fx, gradf, t, x0, max_iter=1e3)
L3 <- 0
for (i in 1:length(result3[[2]])) {
	L3[i] <- result3[[2]][i] - result3[[2]][100]
}

plot(1:length(result3[[2]]),L3, xlab="iterations", ylab="Logistic Result 3",type="o",col="green")
	#'Norm Vec
	norm_vec <- function(x){
	return (sqrt(sum(x^2)))
	}

	#' Gradient Step
	#'
	#' @param gradf handle to function that returns gradient of objective function
	#' @param x current parameter estimate
	#' @param t step-size
	gradient_step <- function(gradf, t, x) {
	result <- (x - (t * gradf(x)))
	return (result)
	}

	#' Gradient Descent (Fixed Step-Size)
	#'
	#' @param fx handle to function that returns objective function values
	#' @param gradf handle to function that returns gradient of objective function
	#' @param x0 initial parameter estimate
	#' @param t step-size
	#' @param max_iter maximum number of iterations
	#' @param tol convergence tolerance
	gradient_descent_fixed <- function(fx, gradf, t, x0, max_iter=1e2, tol=1e-3) {
	xtrace <- 0
	ytrace <- 0
	vals <- 0
	b <- 0
	b[1] <- x0
	for(i in 1:max_iter) {
	b[i+1] <- gradient_step(gradf(b[i]),t,b[i])
	xtrace[i] <- b[i+1]/b[i]
	ytrace[i] <- fx(b[i+1])/fx(b[i])
	vals[i] <- fx(b[i+1]) - fx(b[i])
	if (ytrace[i] <= tol) {
	break
	}
	}
	result <- list(b[length(b)],fx(b),norm_vec(gradf(b)),xtrace, ytrace, vals)
	return (result)
	}

	#' Backtracking
	#'
	#' @param fx handle to function that returns objective function values
	#' @param x current parameter estimate
	#' @param t current step-size
	#' @param df the value of the gradient of objective function evaluated at the current x
	#' @param alpha the backtracking parameter
	#' @param beta the decrementing multiplier
	backtrack <- function(fx, t, x, df, beta=0.9) {
	iter = length(x)
	for (i in 1:iter) {
	if ((fx(x[i] - df(x[i]))) >= fx(x[i]) - (t/2)*norm_vec(df(x[i]))) {
	t <- beta*t
	}
	else {
	break
	}
	}
	return (t)
	}

	#' Gradient Descent (Backtracking Step-Size)
	#'
	#' @param fx handle to function that returns objective function values
	#' @param gradf handle to function that returns gradient of objective function
	#' @param x0 initial parameter estimate
	#' @param max_iter maximum number of iterations
	#' @param tol convergence tolerance
	gradient_descent_backtrack <- function(fx, gradf, x0, max_iter=1e2, tol=1e-3) {
	xtrace <- 0
	ytrace <- 0
	vals <- 0
	t <- 1
	b <- 0
	b[1] <- x0
	for(i in 1:max_iter) {
	step <- backtrack(fx,t,b[i],gradf)
	b[i+1] <- gradient_step(gradf(b[i]),step,b[i])
	xtrace[i] <- b[i+1]/b[i]
	ytrace[i] <- fx(b[i+1])/fx(b[i])
	vals[i] <- fx(b[i+1]) - fx(b[i])
	if (ytrace[i] <= tol) {
	break
	}
	}
	result <- list(b[length(b)],fx(b),norm_vec(gradf(b)),xtrace, ytrace, vals)
	return (result)
	}

	#' Gradient Descent
	#'
	#' @param fx handle to function that returns objective function values
	#' @param gradf handle to function that returns gradient of objective function
	#' @param x0 initial parameter estimate
	#' @param t step-size
	#' @param max_iter maximum number of iterations
	#' @param tol convergence tolerance
	gradient_descent <- function(fx, gradf, x0, t=NULL, max_iter=1e2, tol=1e-3) {
	## wrapper
	if (is.null(t)) {
	return (gradient_descent_backtrack(fx, gradf, x0, max_iter, tol))
	} else {
	return (gradient_descent_fixed(fx, gradf, t, x0, max_iter, tol))
	}
	}

	#' Objective Function for Logistic Regression
	#'
	#' @param y binary response
	#' @param X design matrix
	#' @param beta regression coefficient vector
	#' @param lambda regularization parameter
	fx_logistic <- function(y, X, beta, lambda=0) {
	iter <- length(X)
	sum <- 0
	for (i in 1:iter) {
	sum <- sum - (y[i]X[i]beta) + (log(1+exp(X[i]*beta)))
	}
	return (sum + (lambda/2)*norm_vec(beta))
	}


	#' Gradient for Logistic Regession
	#'
	#' @param y binary response
	#' @param X design matrix
	#' @param beta regression coefficient vector
	#' @param lambda regularization parameter
	gradf_logistic <- function(y, X, beta, lambda=0) {
	iter <- length(X)
	sum <- 0
	for (i in 1:iter) {
	sum <- sum + (((exp(X[i]beta)/(1+ exp(X[i]beta)))-y[i])*X[i])
	}
	return (sum + (lambda*beta))
	}


	## step 7


	## initialize data sequences for testing
	set.seed(12345)
	n <- 100
	p <- 2

	X <- matrix(rnorm(n*p),n,p)
	x1 <- X[,1]
	x2 <- X[,2]
	beta0 <- matrix(rnorm(p),p,1)
	beta1 <- beta0[1]
	beta2 <- beta0[2]
	y <- (runif(n) <= plogis(X%*%beta0)) + 0


	#' Wrapper Logistic Regession
	#'
	fx <- function(beta) {
	return (fx_logistic(y, x1, beta, lambda=0))
	}

	#' Wrapper Gradient for Logistic Regession
	#'
	gradf <- function(beta) {
	return(gradf_logistic (y, x1, beta, lambda=0))
	}


	t=1
	x0 = x1[1]

	result1 <- gradient_descent_fixed(fx, gradf, t, x0, max_iter=1e2, tol=1e-3)
	L1 <- 0
	for (i in 1:length(result1[[2]])) {
	L1[i] <- result1[[2]][i] - result1[[2]][100]
	}

	plot(1:length(result1[[2]]),L1, xlab="iterations", ylab="Logistic Result 1",type="o",col="blue")
	##

	## step 8

	result2 <- gradient_descent_backtrack(fx, gradf, x0)
	L2 <- 0
	for (i in 1:length(result2[[2]])) {
	L2[i] <- result2[[2]][i] - result2[[2]][100]
	}

	plot(1:length(result2[[2]]),L2, xlab="iterations", ylab="Logistic Result 2",type="o",col="red")

	## step 9


	#' Wrapper Logistic Regession
	#'
	fx <- function(beta) {
	return (fx_logistic(y, x1, beta, lambda=10))
	}

	#' Wrapper Gradient for Logistic Regession
	#'
	gradf <- function(beta) {
	return(gradf_logistic (y, x1, beta, lambda=10))
	}

	result3 <- gradient_descent_fixed(fx, gradf, t, x0, max_iter=1e3)
	L3 <- 0
	for (i in 1:length(result3[[2]])) {
	L3[i] <- result3[[2]][i] - result3[[2]][100]
	}

	plot(1:length(result3[[2]]),L3, xlab="iterations", ylab="Logistic Result 3",type="o",col="green")