jonlachmann/var_shapr.R

## var_shapr.R
# Generate mock data (two variable which go from 1-100 and 101-200 with some random noise).
data <- matrix(rnorm(200, 1:200, 0.5), 100, 2)

K <- ncol(data) # Number of variables in model
horizon <- 12 # Forecast horizon
p <- 2 # Lag order of model

# Create the lagged matrix of data X
X <- embed(data, p+1)[,-seq_len(K)]
X <- cbind(X, 1)
colnames(X) <- c("x1_1", "x1_2", "x2_1", "x2_2", "intercept")

# Create the matrix for the dependent variable Y
Y <- tail(data, -p)
colnames(Y) <- c("x1", "x2")

# Estimate a basic VAR model
coefs <- qr.solve(X, Y)

#' Basic predict function for a VAR model.
#' @param model The model to use, should contain coefficients.
#' @param X The vector of data to use to start the prediction from, i.e. the last observation of the lagged data.
#' @param h The forecast horizon.
#' @return A matrix where each row contains subsequent steps ahead and the columns are the different variables.
pred_var <- function (model, X, h=1) {
  res <- matrix(NA, h, model$k)
  for (i in seq_len(h)) {
    pred <- as.numeric(X) %*% model$coefs
    res[i, ] <- pred
    X <- c(pred, X[c(1, 2, 5)])
  }
  return(res)
}

#' Predict a specific variable from a VAR model using multiple X vectors.
#' @param model The model to use for predictions.
#' Should have a property "step" to select forecast horizon and a property "variable" to select which variable to forecast.
#' @param newdata A data.table where each row contains an X vector to use for prediction.
#' @return A vector where each scalar is a prediction produced from a specific X vector.
pred_specific_var <- function (model, newdata) {
  step <- model$step
  variable <- model$variable
  res <- as.data.frame(matrix(NA, nrow(newdata), step))
  for (i in seq_len(nrow(newdata))) {
    res[i, ] <- pred_var(model, as.numeric(newdata[i, ]), step)[, variable]
  }
  return(res)
}

# Assemble the "model" object
model <- list(coefs=coefs, k=K, step=horizon, variable=1)
class(model) <- "var"

# Create a matrix of the last observation seen before the prediction as the X we want to explain.
x_explain <- matrix(c(Y[nrow(Y), ], X[nrow(X), c(1, 2, 5)]), nrow=1)
colnames(x_explain) <- c("x1_1", "x1_2", "x2_1", "x2_2", "intercept")

# Group X by variable.
groups <- list(x1=c("x1_1", "x1_2"), x2=c("x2_1", "x2_2"), intercept="intercept")

# Load the shapr package on the branch output_size (you should be inside the shapr folder when running this).
devtools::load_all()

# Create the explanation, note that prediction zero is of length "output_size" here, but not necessarily containing a repeated value.
explanation <- explain(
  model = model,
  x_explain = x_explain,
  x_train = X,
  approach = "empirical",
  prediction_zero = rep(Y[98,1], 12),
  predict_model = pred_specific_var,
  group = groups,
  output_size = 12,
  parallel = FALSE
)

# Print the explanation (plotting does not work yet).
print(explanation)
	# Generate mock data (two variable which go from 1-100 and 101-200 with some random noise).
	data <- matrix(rnorm(200, 1:200, 0.5), 100, 2)

	K <- ncol(data) # Number of variables in model
	horizon <- 12 # Forecast horizon
	p <- 2 # Lag order of model

	# Create the lagged matrix of data X
	X <- embed(data, p+1)[,-seq_len(K)]
	X <- cbind(X, 1)
	colnames(X) <- c("x1_1", "x1_2", "x2_1", "x2_2", "intercept")

	# Create the matrix for the dependent variable Y
	Y <- tail(data, -p)
	colnames(Y) <- c("x1", "x2")

	# Estimate a basic VAR model
	coefs <- qr.solve(X, Y)

	#' Basic predict function for a VAR model.
	#' @param model The model to use, should contain coefficients.
	#' @param X The vector of data to use to start the prediction from, i.e. the last observation of the lagged data.
	#' @param h The forecast horizon.
	#' @return A matrix where each row contains subsequent steps ahead and the columns are the different variables.
	pred_var <- function (model, X, h=1) {
	res <- matrix(NA, h, model$k)
	for (i in seq_len(h)) {
	pred <- as.numeric(X) %*% model$coefs
	res[i, ] <- pred
	X <- c(pred, X[c(1, 2, 5)])
	}
	return(res)
	}

	#' Predict a specific variable from a VAR model using multiple X vectors.
	#' @param model The model to use for predictions.
	#' Should have a property "step" to select forecast horizon and a property "variable" to select which variable to forecast.
	#' @param newdata A data.table where each row contains an X vector to use for prediction.
	#' @return A vector where each scalar is a prediction produced from a specific X vector.
	pred_specific_var <- function (model, newdata) {
	step <- model$step
	variable <- model$variable
	res <- as.data.frame(matrix(NA, nrow(newdata), step))
	for (i in seq_len(nrow(newdata))) {
	res[i, ] <- pred_var(model, as.numeric(newdata[i, ]), step)[, variable]
	}
	return(res)
	}

	# Assemble the "model" object
	model <- list(coefs=coefs, k=K, step=horizon, variable=1)
	class(model) <- "var"

	# Create a matrix of the last observation seen before the prediction as the X we want to explain.
	x_explain <- matrix(c(Y[nrow(Y), ], X[nrow(X), c(1, 2, 5)]), nrow=1)
	colnames(x_explain) <- c("x1_1", "x1_2", "x2_1", "x2_2", "intercept")

	# Group X by variable.
	groups <- list(x1=c("x1_1", "x1_2"), x2=c("x2_1", "x2_2"), intercept="intercept")

	# Load the shapr package on the branch output_size (you should be inside the shapr folder when running this).
	devtools::load_all()

	# Create the explanation, note that prediction zero is of length "output_size" here, but not necessarily containing a repeated value.
	explanation <- explain(
	model = model,
	x_explain = x_explain,
	x_train = X,
	approach = "empirical",
	prediction_zero = rep(Y[98,1], 12),
	predict_model = pred_specific_var,
	group = groups,
	output_size = 12,
	parallel = FALSE
	)

	# Print the explanation (plotting does not work yet).
	print(explanation)