Skip to content

Instantly share code, notes, and snippets.

@GabrielHoffman
Last active July 22, 2020 14:37
Show Gist options
  • Save GabrielHoffman/c9719d5545bf931ba67dbabf839c9f64 to your computer and use it in GitHub Desktop.
Save GabrielHoffman/c9719d5545bf931ba67dbabf839c9f64 to your computer and use it in GitHub Desktop.
#' @title Sparse Principal Component Analysis (spca).
#
#' @description Implementation of SPCA, using variable projection as an optimization strategy.
#
#' @details
#' Adapted from sparsepca package to use initial values to speed up convergence.
#'
#' Sparse principal component analysis is a modern variant of PCA. Specifically, SPCA attempts to find sparse
#' weight vectors (loadings), i.e., a weight vector with only a few 'active' (nonzero) values. This approach
#' leads to an improved interpretability of the model, because the principal components are formed as a
#' linear combination of only a few of the original variables. Further, SPCA avoids overfitting in a
#' high-dimensional data setting where the number of variables \eqn{p} is greater than the number of
#' observations \eqn{n}.
#'
#' Such a parsimonious model is obtained by introducing prior information like sparsity promoting regularizers.
#' More concreatly, given an \eqn{(n,p)} data matrix \eqn{X}, SPCA attemps to minimize the following
#' objective function:
#'
#' \deqn{ f(A,B) = \frac{1}{2} \| X - X B A^\top \|^2_F + \psi(B) }
#'
#' where \eqn{B} is the sparse weight (loadings) matrix and \eqn{A} is an orthonormal matrix.
#' \eqn{\psi} denotes a sparsity inducing regularizer such as the LASSO (\eqn{\ell_1}{l1} norm) or the elastic net
#' (a combination of the \eqn{\ell_1}{l1} and \eqn{\ell_2}{l2} norm). The principal components \eqn{Z} are formed as
#'
#' \deqn{ Z = X B }{Z = X * B}
#'
#' and the data can be approximately rotated back as
#'
#' \deqn{ \tilde{X} = Z A^\top }{ X = Z t(A)}
#'
#' The print and summary method can be used to present the results in a nice format.
#'
#' @param X array_like; \cr
#' a real \eqn{(n, p)} input matrix (or data frame) to be decomposed.
#'
#' @param k integer; \cr
#' specifies the target rank, i.e., the number of components to be computed.
#'
#' @param alpha float; \cr
#' Sparsity controlling parameter. Higher values lead to sparser components.
#'
#' @param beta float; \cr
#' Amount of ridge shrinkage to apply in order to improve conditioning.
#'
#' @param center bool; \cr
#' logical value which indicates whether the variables should be
#' shifted to be zero centered (TRUE by default).
#'
#' @param scale bool; \cr
#' logical value which indicates whether the variables should
#' be scaled to have unit variance (FALSE by default).
#'
#' @param max_iter integer; \cr
#' maximum number of iterations to perform before exiting.
#'
#' @param tol float; \cr
#' stopping tolerance for the convergence criterion.
#'
#' @param verbose bool; \cr
#' logical value which indicates whether progress is printed.
#'
#' @param init intital values for parameter estimates used for "warm start" to accellerate convergence.
#'
#'
#'@return \code{spca} returns a list containing the following three components:
#'\item{loadings}{ array_like; \cr
#' sparse loadings (weight) vector; \eqn{(p, k)} dimensional array.
#'}
#'
#'\item{transform}{ array_like; \cr
#' the approximated inverse transform; \eqn{(p, k)} dimensional array.
#'}
#'
#'\item{scores}{ array_like; \cr
#' the principal component scores; \eqn{(n, k)} dimensional array.
#'}
#'
#'\item{eigenvalues}{ array_like; \cr
#' the approximated eigenvalues; \eqn{(k)} dimensional array.
#'}
#'
#'\item{center, scale}{ array_like; \cr
#' the centering and scaling used.
#'}
#'
#'
#' @references
#' \itemize{
#'
#' \item [1] N. B. Erichson, P. Zheng, K. Manohar, S. Brunton, J. N. Kutz, A. Y. Aravkin.
#' "Sparse Principal Component Analysis via Variable Projection."
#' Submitted to IEEE Journal of Selected Topics on Signal Processing (2018).
#' (available at `arXiv \url{https://arxiv.org/abs/1804.00341}).
#' }
#'
#' @author N. Benjamin Erichson, Peng Zheng, and Sasha Aravkin. Very slightly adapted by Gabriel Hoffman
#'
#' @seealso \code{\link{spca_path}}
#'
#' @examples
#'
#' # Create artifical data
#' m <- 10000
#' V1 <- rnorm(m, 0, 290)
#' V2 <- rnorm(m, 0, 300)
#' V3 <- -0.1*V1 + 0.1*V2 + rnorm(m,0,100)
#'
#' X <- cbind(V1,V1,V1,V1, V2,V2,V2,V2, V3,V3)
#' X <- X + matrix(rnorm(length(X),0,1), ncol = ncol(X), nrow = nrow(X))
#'
#' # Compute SPCA
#' out <- .spca(X, k=3, alpha=1e-3, beta=1e-3, center = TRUE, scale = FALSE, verbose=0)
#' print(out)
#' summary(out)
#'
.spca <- function(X, k=NULL, alpha=1e-4, beta=1e-4, center=TRUE, scale=FALSE, max_iter=1000, tol=1e-5, verbose=TRUE, init=NULL){
X <- as.matrix(X)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Checks
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if (any(is.na(X))) {
warning("Missing values are omitted: na.omit(X).")
X <- stats::na.omit(X)
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Init rpca object
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
spcaObj = list(loadings = NULL,
transform = NULL,
scores = NULL,
eigenvalues = NULL,
center = center,
scale = scale)
n <- nrow(X)
p <- ncol(X)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set target rank
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if(is.null(k)) k <- min(n,p)
if(k > min(n,p)) k <- min(n,p)
if(k<1) stop("Target rank is not valid!")
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Center/Scale data
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if(center == TRUE) {
spcaObj$center <- colMeans(X)
X <- sweep(X, MARGIN = 2, STATS = spcaObj$center, FUN = "-", check.margin = TRUE)
} else { spcaObj$center <- FALSE }
if(scale == TRUE) {
spcaObj$scale <- sqrt(colSums(X**2) / (n-1))
if(is.complex(spcaObj$scale)) { spcaObj$scale[Re(spcaObj$scale) < 1e-8 ] <- 1+0i
} else {spcaObj$scale[spcaObj$scale < 1e-8] <- 1}
X <- sweep(X, MARGIN = 2, STATS = spcaObj$scale, FUN = "/", check.margin = TRUE)
} else { spcaObj$scale <- FALSE }
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Compute SVD for initialization of the Variable Projection Solver
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
svd_init <- svd(X)
Dmax <- svd_init$d[1] # l2 norm
if( is.null(init) ){
A <- svd_init$v[,1:k]
B <- svd_init$v[,1:k]
V <- svd_init$v
}else{
# spcaObj$loadings <- B
# spcaObj$transform <- A
B <- init$loadings
A <- init$transform
V <- svd_init$v
}
VD = sweep(V, MARGIN = 2, STATS = svd_init$d, FUN = "*", check.margin = TRUE)
VD2 = sweep(V, MARGIN = 2, STATS = svd_init$d**2, FUN = "*", check.margin = TRUE)
#--------------------------------------------------------------------
# Set Tuning Parameters
#--------------------------------------------------------------------
alpha <- alpha * Dmax**2
beta <- beta * Dmax**2
nu <- 1.0 / (Dmax**2 + beta)
kappa <- nu * alpha
obj <- c()
improvement <- Inf
#--------------------------------------------------------------------
# Apply Variable Projection Solver
#--------------------------------------------------------------------
noi <- 1
while (noi <= max_iter && improvement > tol) {
# Update A: X'XB = UDV'
# Z <- VD2 %*% (t(V) %*% B)
Z <- VD2 %*% crossprod(V, B)
svd_update <- svd(Z)
# A <- svd_update$u %*% t(svd_update$v)
A <- tcrossprod(svd_update$u, svd_update$v)
# Proximal Gradient Descent to Update B:
# grad <- VD2 %*% (t(V) %*% (A - B)) - beta * B
grad <- VD2 %*% crossprod(V, A - B) - beta * B
B_temp <- B + nu * grad
# l1 soft-threshold
idxH <- which(B_temp > kappa)
idxL <- which(B_temp <= -kappa)
B = matrix(0, nrow = nrow(B_temp), ncol = ncol(B_temp))
B[idxH] <- B_temp[idxH] - kappa
B[idxL] <- B_temp[idxL] + kappa
# compute residual
# R <- t(VD) - (t(VD) %*% B) %*% t(A)
R <- t(VD) - tcrossprod(crossprod(VD, B), A)
# compute objective function
obj <- c(obj, 0.5 * sum(R**2) + alpha * sum(abs(B)) + 0.5 * beta * sum(B**2))
# Break if obj is not improving anymore
if(noi > 1){
improvement <- (obj[noi-1] - obj[noi]) / obj[noi]
}
# Trace
if(verbose > 0 && (noi-1) %% 1 == 0) {
print(sprintf("Iteration: %4d, Objective: %1.5e, Relative improvement %1.5e", noi, obj[noi], improvement))
}
# Next iter
noi <- noi + 1
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Update spca object
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
spcaObj$loadings <- B
spcaObj$transform <- A
spcaObj$scores <- X %*% B
spcaObj$eigenvalues <- svd_update$d / (n - 1)
spcaObj$objective <- obj
rownames(spcaObj$loadings) = colnames(X)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Explained variance and explained variance ratio
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
spcaObj$sdev <- sqrt( spcaObj$eigenvalues )
spcaObj$var <- sum( apply( Re(X) , 2, stats::var ) )
if(is.complex(X)) spcaObj$var <- Re(spcaObj$var + sum( apply( Im(X) , 2, stats::var ) ))
class(spcaObj) <- "spca"
return( spcaObj )
}
#' Class spcaPath
#'
#' Class \code{spcaPath} stores result of spca_path()
#'
#' @name spcaPath-class
#' @rdname spcaPath-class
#' @exportClass spcaPath
setClass("spcaPath", contains="list", slots=c(eps = "numeric"))
#' Fit sparse PCA path
#'
#' Fit sparse PCA path for many levels of sparsity
#'
#' @param X array_like; \cr
#' a real \eqn{(n, p)} input matrix (or data frame) to be decomposed.
#'
#' @param k integer; \cr
#' specifies the target rank, i.e., the number of components to be computed.
#'
#' @param alpha array; \cr
#' array of values for sparsity controlling parameter. Higher values lead to sparser components.
#'
#' @param beta float; \cr
#' Amount of ridge shrinkage to apply in order to improve conditioning.
#'
#' @param center bool; \cr
#' logical value which indicates whether the variables should be
#' shifted to be zero centered (TRUE by default).
#'
#' @param scale bool; \cr
#' logical value which indicates whether the variables should
#' be scaled to have unit variance (FALSE by default).
#'
#' @param max_iter integer; \cr
#' maximum number of iterations to perform before exiting.
#'
#' @param tol float; \cr
#' stopping tolerance for the convergence criterion.
#'
#' @param verbose bool; \cr
#' logical value which indicates whether progress is printed.
#'
#' @param eps values greater than eps are considered non-zero
#'
#' @description Adapted from sparsepca package to use a "warm start" approach that uses the parameter estimates from the previous value of alpha as initial values
#'
#' @seealso \code{\link{plotPath}}
#' @examples
#'
#' n = 10000
#' p = 100
#' X = matrix(rnorm(n*p), n, p)
#'
#' res = spca_path(X, k=2, center=TRUE, scale=TRUE, alpha=10^-seq(1, 4, length.out=10))
#'
#' plotPath( res )
#'
#' @export
spca_path <- function(X, k=NULL, alpha=10^-seq(1, 4, length.out=100), beta=1e-4, center=TRUE, scale=FALSE, max_iter=1000, tol=1e-5, eps=1e-8, verbose=TRUE){
# must start with full model and then decrease
alpha = sort(alpha)
spcaRes = list()
if( verbose) message("Run sparse PCA regularization path")
if( verbose) message(sprintf("%-8s%-10s%-8s", "index", "alpha", "# active"))
for( i in 1:length(alpha) ){
init = NULL
if( i > 1){
init = spcaRes[[i-1]]
}
spcaRes[[i]] = .spca(X=X, k=k, alpha=alpha[i], beta=beta, center = center, scale = scale, verbose=FALSE, init=init, tol=tol)
spcaRes[[i]]$alpha = alpha[i]
count = countNonzero(spcaRes[[i]], eps)
message(sprintf("%-8s%-10s%-8s", i, format(alpha[i], digits=3, scientific=TRUE), count))
if( count == 0) break
}
new("spcaPath", spcaRes, eps = eps)
}
# count number of features with non-zero loadings
countNonzero = function(res, eps=1e-8){
sum(apply(res$loadings, 1, function(value) max(abs(value))) > eps)
}
#' Plot sparse PCA regularization path
#'
#' Plot sparse PCA regularization path
#'
#' @param regPath spcaPath
#' @param eps values greater than eps are considered non-zero
#'
#' @seealso \code{\link{spca_path}}
#' @examples
#'
#' n = 10000
#' p = 100
#' X = matrix(rnorm(n*p), n, p)
#'
#' regPath = spca_path(X, k=2, center=TRUE, scale=TRUE, alpha=10^-seq(1, 4, length.out=10))
#'
#' plotPath( regPath )
#'
#' @import ggplot2
#' @export
plotPath = function(regPath, eps=1e-8){
if( ! is(regPath, 'spcaPath') ){
stop("Object must be spcaPath")
}
# Pass R CMD check
alpha = nactive = NA
df = lapply( regPath, function(x){
data.frame( alpha = x$alpha, nactive = countNonzero(x, eps) )
})
df = do.call(rbind, df)
ggplot(df, aes(alpha, nactive)) + geom_point() + geom_line() + theme_bw() + theme(aspect.ratio=1, plot.title = element_text(hjust = 0.5)) + ylab("# active features") + ggtitle("Sparse PCA regularization path")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment