fdabl/variable-selection-comparison.R

## variable-selection-comparison.R
library('doParallel')
registerDoParallel(cores = 4)


#' Spike-and-Slab Regression using Gibbs Sampling for p > 1 predictors
#'
#' @param y: vector of responses
#' @param X: matrix of predictor values
#' @param nr_samples: indicates number of samples drawn
#' @param a1: parameter a1 of Gamma prior on variance sigma2e
#' @param a2: parameter a2 of Gamma prior on variance sigma2e
#' @param theta: parameter of prior over mixture weight
#' @param burnin: number of samples we discard ('burnin samples')
#'
#' @returns matrix of posterior samples from parameters pi, beta, tau2, sigma2e, theta
ss_regress <- function(
  y, X, a1 = .01, a2 = .01, theta = .5,
  a = 1, b = 1, s = 1/2, nr_samples = 4000, nr_burnin = round(nr_samples / 4, 2)
) {

  p <- ncol(X)
  n <- nrow(X)

  # res is where we store the posterior samples
  res <- matrix(NA, nrow = nr_samples, ncol = 2*p + 1 + 1 + 1)

  colnames(res) <- c(
    paste0('pi', seq(p)),
    paste0('beta', seq(p)),
    'sigma2e', 'tau2', 'theta'
  )

  # take the MLE estimate as the values for the first sample
  m <- lm(y ~ X - 1)
  res[1, ] <- c(rep(0, p), coef(m), var(predict(m) - y), 1, .5)

  # compute only once
  XtX <- t(X) %*% X
  Xty <- t(X) %*% y
  var_y <- as.numeric(var(y))

  # we start running the Gibbs sampler
  for (i in seq(2, nr_samples)) {

    # first, get all the values of the previous time point
    pi_prev <- res[i-1, seq(p)]
    beta_prev <- res[i-1, seq(p + 1, 2*p)]
    sigma2e_prev <- res[i-1, ncol(res) - 2]
    tau2_prev <- res[i-1, ncol(res) - 1]
    theta_prev <- res[i-1, ncol(res)]

    ## Start sampling from the conditional posterior distributions
    ##############################################################

    # sample theta from a Beta
    theta_new <- rbeta(1, a + sum(pi_prev), b + sum(1 - pi_prev))

    # sample sigma2e from an Inverse-Gamma
    err <- y - X %*% beta_prev
    sigma2e_new <- 1 / rgamma(1, a1 + n/2, a2 + t(err) %*% err / 2)

    # sample tau2 from an Inverse Gamma
    tau2_new <- 1 / rgamma(
      1, 1/2 + 1/2 * sum(pi_prev),
      s^2/2 + t(beta_prev) %*% beta_prev / (2*var_y)
    )

    # sample beta from multivariate Gaussian
    beta_cov <- qr.solve((1/sigma2e_new) * XtX + diag(1/(tau2_new*var_y), p))
    beta_mean <- beta_cov %*% Xty * (1/sigma2e_new)
    beta_new <- mvtnorm::rmvnorm(1, beta_mean, beta_cov)

    # sample each pi_j in random order
    for (j in sample(seq(p))) {

      # get the betas for which beta_j is zero
      pi0 <- pi_prev
      pi0[j] <- 0
      bp0 <- t(beta_new * pi0)

      # compute the z variables and the conditional variance
      xj <- X[, j]
      z <- y - X %*% bp0
      cond_var <- (sum(xj^2) + sigma2e_new/(tau2_new*var_y))

      # compute chance parameter of the conditional posterior of pi_j (Bernoulli)
      l0 <- log(1 - theta_new)
      l1 <- (
        log(theta_new) - .5 * log(tau2_new) +
          sum(xj*z)^2 / (2*sigma2e_new*cond_var) + .5 * log(sigma2e_new / cond_var)
      )

      # sample pi_j from a Bernoulli
      pi_prev[j] <- rbinom(1, 1, exp(l1) / (exp(l1) + exp(l0)))
    }

    pi_new <- pi_prev

    # add new samples
    res[i, ] <- c(pi_new, beta_new*pi_new, sigma2e_new, tau2_new, theta_new)
  }

  # remove the first nr_burnin number of samples
  res[-seq(nr_burnin), ]
}


#' Calls the ss_regress function in parallel
#'
#' @params same as ss_regress
#' @params nr_cores: numeric, number of cores to run ss_regress2 in parallel
#' @returns a list with nr_cores entries which are posterior samples
ss_regressm <- function(
  y, X, a1 = .01, a2 = .01, theta = .5,
  a = 1, b = 1, s = 1/2, nr_samples = 4000,
  nr_burnin = round(nr_samples / 4, 2), nr_cores = 4
) {

  samples <- foreach(i = seq(nr_cores), .combine = rbind) %dopar% {
    ss_regress(
      y = y, X = X, a1 = a1, a2 = a2, theta = theta,
      a = a, b = b, s = s, nr_samples = nr_samples,
      nr_burnin = nr_burnin
    )
  }

  samples
}


data(attitude)
std <- function(x) (x - mean(x)) / sd(x)

attitude_z <- data.frame(apply(attitude, 2, std))
yz <- attitude_z[, 1]
Xz <- attitude_z[, -1]


### Spike-and-Slab Results
###########################
samples <- ss_regressm(
  y = yz, X = as.matrix(Xz), a1 = .01, a2 = .01,
  a = 1, b = 1, s = 1/2, nr_cores = 4, nr_samples = 4000
)

res_table <- cbind(
  post_means[grepl('beta', names(post_means))],
  post_means[grepl('pi', names(post_means))]
)
rownames(res_table) <- colnames(Xz)
colnames(res_table) <- c('Post. Mean', 'Post. Inclusion')

round(res_table, 3)


### BayesFactor Results (Liang et al., 2008)
############################################
library('BayesFactor')
m_bf <- regressionBF(rating ~ ., data = attitude_z)
m_bf
plot(head(m_bf, 10))

### BAS Results (Li & Clyde, 2018)
##################################
library('BAS')
m_bas <- bas.lm(rating ~ ., data = attitude_z, prior = 'ZS-null', modelprior=uniform(), method = 'BAS')
coef(m_bas)
	library('doParallel')
	registerDoParallel(cores = 4)


	#' Spike-and-Slab Regression using Gibbs Sampling for p > 1 predictors
	#'
	#' @param y: vector of responses
	#' @param X: matrix of predictor values
	#' @param nr_samples: indicates number of samples drawn
	#' @param a1: parameter a1 of Gamma prior on variance sigma2e
	#' @param a2: parameter a2 of Gamma prior on variance sigma2e
	#' @param theta: parameter of prior over mixture weight
	#' @param burnin: number of samples we discard ('burnin samples')
	#'
	#' @returns matrix of posterior samples from parameters pi, beta, tau2, sigma2e, theta
	ss_regress <- function(
	y, X, a1 = .01, a2 = .01, theta = .5,
	a = 1, b = 1, s = 1/2, nr_samples = 4000, nr_burnin = round(nr_samples / 4, 2)
	) {

	p <- ncol(X)
	n <- nrow(X)

	# res is where we store the posterior samples
	res <- matrix(NA, nrow = nr_samples, ncol = 2*p + 1 + 1 + 1)

	colnames(res) <- c(
	paste0('pi', seq(p)),
	paste0('beta', seq(p)),
	'sigma2e', 'tau2', 'theta'
	)

	# take the MLE estimate as the values for the first sample
	m <- lm(y ~ X - 1)
	res[1, ] <- c(rep(0, p), coef(m), var(predict(m) - y), 1, .5)

	# compute only once
	XtX <- t(X) %*% X
	Xty <- t(X) %*% y
	var_y <- as.numeric(var(y))

	# we start running the Gibbs sampler
	for (i in seq(2, nr_samples)) {

	# first, get all the values of the previous time point
	pi_prev <- res[i-1, seq(p)]
	beta_prev <- res[i-1, seq(p + 1, 2*p)]
	sigma2e_prev <- res[i-1, ncol(res) - 2]
	tau2_prev <- res[i-1, ncol(res) - 1]
	theta_prev <- res[i-1, ncol(res)]

	## Start sampling from the conditional posterior distributions
	##############################################################

	# sample theta from a Beta
	theta_new <- rbeta(1, a + sum(pi_prev), b + sum(1 - pi_prev))

	# sample sigma2e from an Inverse-Gamma
	err <- y - X %*% beta_prev
	sigma2e_new <- 1 / rgamma(1, a1 + n/2, a2 + t(err) %*% err / 2)

	# sample tau2 from an Inverse Gamma
	tau2_new <- 1 / rgamma(
	1, 1/2 + 1/2 * sum(pi_prev),
	s^2/2 + t(beta_prev) %% beta_prev / (2var_y)
	)

	# sample beta from multivariate Gaussian
	beta_cov <- qr.solve((1/sigma2e_new) * XtX + diag(1/(tau2_new*var_y), p))
	beta_mean <- beta_cov %% Xty (1/sigma2e_new)
	beta_new <- mvtnorm::rmvnorm(1, beta_mean, beta_cov)

	# sample each pi_j in random order
	for (j in sample(seq(p))) {

	# get the betas for which beta_j is zero
	pi0 <- pi_prev
	pi0[j] <- 0
	bp0 <- t(beta_new * pi0)

	# compute the z variables and the conditional variance
	xj <- X[, j]
	z <- y - X %*% bp0
	cond_var <- (sum(xj^2) + sigma2e_new/(tau2_new*var_y))

	# compute chance parameter of the conditional posterior of pi_j (Bernoulli)
	l0 <- log(1 - theta_new)
	l1 <- (
	log(theta_new) - .5 * log(tau2_new) +
	sum(xjz)^2 / (2sigma2e_newcond_var) + .5 log(sigma2e_new / cond_var)
	)

	# sample pi_j from a Bernoulli
	pi_prev[j] <- rbinom(1, 1, exp(l1) / (exp(l1) + exp(l0)))
	}

	pi_new <- pi_prev

	# add new samples
	res[i, ] <- c(pi_new, beta_new*pi_new, sigma2e_new, tau2_new, theta_new)
	}

	# remove the first nr_burnin number of samples
	res[-seq(nr_burnin), ]
	}


	#' Calls the ss_regress function in parallel
	#'
	#' @params same as ss_regress
	#' @params nr_cores: numeric, number of cores to run ss_regress2 in parallel
	#' @returns a list with nr_cores entries which are posterior samples
	ss_regressm <- function(
	y, X, a1 = .01, a2 = .01, theta = .5,
	a = 1, b = 1, s = 1/2, nr_samples = 4000,
	nr_burnin = round(nr_samples / 4, 2), nr_cores = 4
	) {

	samples <- foreach(i = seq(nr_cores), .combine = rbind) %dopar% {
	ss_regress(
	y = y, X = X, a1 = a1, a2 = a2, theta = theta,
	a = a, b = b, s = s, nr_samples = nr_samples,
	nr_burnin = nr_burnin
	)
	}

	samples
	}



	data(attitude)
	std <- function(x) (x - mean(x)) / sd(x)

	attitude_z <- data.frame(apply(attitude, 2, std))
	yz <- attitude_z[, 1]
	Xz <- attitude_z[, -1]


	### Spike-and-Slab Results
	###########################
	samples <- ss_regressm(
	y = yz, X = as.matrix(Xz), a1 = .01, a2 = .01,
	a = 1, b = 1, s = 1/2, nr_cores = 4, nr_samples = 4000
	)

	res_table <- cbind(
	post_means[grepl('beta', names(post_means))],
	post_means[grepl('pi', names(post_means))]
	)
	rownames(res_table) <- colnames(Xz)
	colnames(res_table) <- c('Post. Mean', 'Post. Inclusion')

	round(res_table, 3)


	### BayesFactor Results (Liang et al., 2008)
	############################################
	library('BayesFactor')
	m_bf <- regressionBF(rating ~ ., data = attitude_z)
	m_bf
	plot(head(m_bf, 10))

	### BAS Results (Li & Clyde, 2018)
	##################################
	library('BAS')
	m_bas <- bas.lm(rating ~ ., data = attitude_z, prior = 'ZS-null', modelprior=uniform(), method = 'BAS')
	coef(m_bas)