andybega/predict-ebma.r

## predict-ebma.r
# EBMA ensemble prediction
#
# Aggregate a set of predictions into an ensemble prediction using a fitted
# EBMA model. Recreates the functionality used to calculate test predictions
# during a EBMA calibration call.
#
# ebma - A fitted EBMA object; only works with logit EBMA.
# inputs - Matrix or data frame in which each column is a series of
#   probabilities.
# outcome - Vector of observed outcomes. Lenght must match rows in inputs.
# useModelParams - Should the constant/predictor parameters of EBMA be used, or
#   only the bias reduction transformation? Both, by default.

# Function for logit EBMA that will take input predictions and aggregate
# them to an EBMA prediction.
predict.ebma <- function(ebma, inputs, outcome, useModelParams=TRUE) {
  if (!class(ebma)=="FDatFitLogit") stop("Only works with logit EBMA")

  inputs <- as.matrix(inputs)
  exp <- ebma@exp
  modelParams <- ebma@modelParams[, , 1]
  W <- ebma@modelWeights

  if (useModelParams == TRUE) {
    .adjPred <- .makeAdj(inputs, exp)
    inputsAdj <- array(NA, dim = c(nrow(inputs), ncol(inputs)))

    for (k in 1:ncol(inputs)) {
      inputsAdj[, k] <- affineTransform(.adjPred[, k], modelParams[, k])
      inputsAdj[, k] <- plogis(inputsAdj[, k])
    }
  }
  if (useModelParams == FALSE) {
    .adjPred <- .makeAdj(inputs)
    .adjPred[outcome == 0, ] <- (1 - plogis(.adjPred[outcome == 0, ]))
    .adjPred[outcome == 1, ] <- (plogis(.adjPred[outcome == 1, ]))
    inputsAdj <- .adjPred
  }
  # Weight inputs, with handling for missing predictions
  if (!any(is.na(inputsAdj))) {
    bma <- inputsAdj %*% W
  } else {
    # Weigh non-missing components, then adjust denominator to account for
    # weights not used because of missing values.
    inputsAdjNA <- inputsAdj
    inputsAdjNA[is.na(inputsAdjNA)] <- 0
    bma <- inputsAdjNA %*% W
    denom <- (!is.na(inputsAdj) * 1) %*% W
    denom[denom==0] <- NA  # prevent /0 if entire row is NA
    bma <- bma/denom
  }
  as.vector(bma)
}

# Input adjustment
.makeAdj <- function(x, exp) {
  .adjPred <- qlogis(x)
  .negative <- .adjPred < 0
  .pos <- .adjPred > 1
  .adjPred <- ((1 + abs(.adjPred))^(1/exp)) - 1
  .miss <- is.na(.adjPred)
  .negative[.miss] <- FALSE
  .adjPred[.negative] <- .adjPred[.negative] * (-1)
  .adjPred[.miss] <- NA
  .adjPred
}

# Recreate glm() transformation
affineTransform <- function(x, b) {
  # Affine transform of y = b1 + x*b2
  y <- cbind(1, x) %*% b
  y
}


# Testing -----------------------------------------------------------------
#
#   Make sure output replicates original EBMA output
#   Check handling of NA's in input predictions
#

setwd("~/Work/ebma-ex/fcast-func")

# Example with known solution.
# `pr_test` has 7 input streams of predicitons, as well as ebma prediction
# that was calculated as the test portion of the EBMA calibration, thus
# using the EBMAforecast internal functions.
# https://dl.dropboxusercontent.com/u/46758293/data/ebma-rig.rda
load("ebma-rig.rda")

# Pull out components
inputs    <- pr_test[, grep("i[0-9]", colnames(pr_test))]
outcome   <- pr_test[, "y"]
ebma_orig <- pr_test[, "ebma"]

# Does the redo match the original EBMA calculations?
ebma_redo <- predict.ebma(ensemble, inputs, outcome)

isTRUE(all.equal(ebma_orig, ebma_redo))

# Handling NA's
# Scattered NA's, not complete row
inputs_pNA <- inputs
inputs_pNA[1, 1] <- NA
inputs_pNA[2, c(1:3)] <- NA

ebma_redo2 <- predict.ebma(ensemble, inputs_pNA, outcome)
head(cbind(ebma_orig, ebma_redo2))

# Complete row of NA's
inputs_rowNA <- inputs
inputs_rowNA[1, ] <- NA

ebma_redo3 <- predict.ebma(ensemble, inputs_rowNA, outcome)
head(cbind(ebma_orig, ebma_redo3))
	# EBMA ensemble prediction
	#
	# Aggregate a set of predictions into an ensemble prediction using a fitted
	# EBMA model. Recreates the functionality used to calculate test predictions
	# during a EBMA calibration call.
	#
	# ebma - A fitted EBMA object; only works with logit EBMA.
	# inputs - Matrix or data frame in which each column is a series of
	# probabilities.
	# outcome - Vector of observed outcomes. Lenght must match rows in inputs.
	# useModelParams - Should the constant/predictor parameters of EBMA be used, or
	# only the bias reduction transformation? Both, by default.

	# Function for logit EBMA that will take input predictions and aggregate
	# them to an EBMA prediction.
	predict.ebma <- function(ebma, inputs, outcome, useModelParams=TRUE) {
	if (!class(ebma)=="FDatFitLogit") stop("Only works with logit EBMA")

	inputs <- as.matrix(inputs)
	exp <- ebma@exp
	modelParams <- ebma@modelParams[, , 1]
	W <- ebma@modelWeights

	if (useModelParams == TRUE) {
	.adjPred <- .makeAdj(inputs, exp)
	inputsAdj <- array(NA, dim = c(nrow(inputs), ncol(inputs)))

	for (k in 1:ncol(inputs)) {
	inputsAdj[, k] <- affineTransform(.adjPred[, k], modelParams[, k])
	inputsAdj[, k] <- plogis(inputsAdj[, k])
	}
	}
	if (useModelParams == FALSE) {
	.adjPred <- .makeAdj(inputs)
	.adjPred[outcome == 0, ] <- (1 - plogis(.adjPred[outcome == 0, ]))
	.adjPred[outcome == 1, ] <- (plogis(.adjPred[outcome == 1, ]))
	inputsAdj <- .adjPred
	}
	# Weight inputs, with handling for missing predictions
	if (!any(is.na(inputsAdj))) {
	bma <- inputsAdj %*% W
	} else {
	# Weigh non-missing components, then adjust denominator to account for
	# weights not used because of missing values.
	inputsAdjNA <- inputsAdj
	inputsAdjNA[is.na(inputsAdjNA)] <- 0
	bma <- inputsAdjNA %*% W
	denom <- (!is.na(inputsAdj) * 1) %*% W
	denom[denom==0] <- NA # prevent /0 if entire row is NA
	bma <- bma/denom
	}
	as.vector(bma)
	}

	# Input adjustment
	.makeAdj <- function(x, exp) {
	.adjPred <- qlogis(x)
	.negative <- .adjPred < 0
	.pos <- .adjPred > 1
	.adjPred <- ((1 + abs(.adjPred))^(1/exp)) - 1
	.miss <- is.na(.adjPred)
	.negative[.miss] <- FALSE
	.adjPred[.negative] <- .adjPred[.negative] * (-1)
	.adjPred[.miss] <- NA
	.adjPred
	}

	# Recreate glm() transformation
	affineTransform <- function(x, b) {
	# Affine transform of y = b1 + x*b2
	y <- cbind(1, x) %*% b
	y
	}


	# Testing -----------------------------------------------------------------
	#
	# Make sure output replicates original EBMA output
	# Check handling of NA's in input predictions
	#

	setwd("~/Work/ebma-ex/fcast-func")

	# Example with known solution.
	# `pr_test` has 7 input streams of predicitons, as well as ebma prediction
	# that was calculated as the test portion of the EBMA calibration, thus
	# using the EBMAforecast internal functions.
	# https://dl.dropboxusercontent.com/u/46758293/data/ebma-rig.rda
	load("ebma-rig.rda")

	# Pull out components
	inputs <- pr_test[, grep("i[0-9]", colnames(pr_test))]
	outcome <- pr_test[, "y"]
	ebma_orig <- pr_test[, "ebma"]

	# Does the redo match the original EBMA calculations?
	ebma_redo <- predict.ebma(ensemble, inputs, outcome)

	isTRUE(all.equal(ebma_orig, ebma_redo))

	# Handling NA's
	# Scattered NA's, not complete row
	inputs_pNA <- inputs
	inputs_pNA[1, 1] <- NA
	inputs_pNA[2, c(1:3)] <- NA

	ebma_redo2 <- predict.ebma(ensemble, inputs_pNA, outcome)
	head(cbind(ebma_orig, ebma_redo2))

	# Complete row of NA's
	inputs_rowNA <- inputs
	inputs_rowNA[1, ] <- NA

	ebma_redo3 <- predict.ebma(ensemble, inputs_rowNA, outcome)
	head(cbind(ebma_orig, ebma_redo3))