Skip to content

Instantly share code, notes, and snippets.

@JWiley
Created January 8, 2013 02:05
Show Gist options
  • Save JWiley/4480442 to your computer and use it in GitHub Desktop.
Save JWiley/4480442 to your computer and use it in GitHub Desktop.
# import generics
# (cheap version of importFrom() in a package)
fixef <- nlme:::fixef
ranef <- nlme:::ranef
# define generic prediction function
# but using my functions/methods
predict2 <- function (object, ...) {
UseMethod("predict2")
}
# extract fixed and random effect parameter names
# from an MCMCglmm object
paramNames.MCMCglmm <- function(object, ...) {
fNames <- as.character(attr(terms(object$Fixed$formula), "variables"))[-c(1, 2)]
rNames <- NULL
if (!is.null(object$Random$formula)) {
rNames <- as.character(attr(terms(object$Random$formula), "variables"))[-c(1)]
}
if ("(Intercept)" %in% colnames(object[["Sol"]])) {
fNames <- c("(Intercept)", fNames)
}
list(fixed = fNames, random = rNames)
}
# extract the levels of the random effects
# from an MCMCglmm object
ranefLevels <- function(object, data, ...) {
n <- paramNames.MCMCglmm(object)$random
res <- lapply(n, function(n) {
levels(data[, n])
})
names(res) <- n
return(res)
}
# extract the fixed and random effects portions from an MCMCglmm object
# note for the random, these are the estimates themselves
# not the variability in the estimates
# two use options let you get either just the posterior mean
# or all the posterior samples
.extractEffects <- function(object, use = c("all", "mean"),
which = c("fixed", "random"), ...) {
use <- match.arg(use)
which <- match.arg(which)
b <- object[["Sol"]]
eff <- switch(which,
fixed = {
# this does not work because the intercept term contains
# special characters (), that screw with the regular expressions
# cannot use fixed matching because factor variables can
# expand with arbitrarily named levels
#unlist(lapply(paramNames.MCMCglmm(object)$fixed, function(n) {
# grep(paste0("^", n, ".*$"), colnames(b), value = TRUE)
#}))
object$X@Dimnames[[2]]
},
random = {
unlist(lapply(paramNames.MCMCglmm(object)$random, function(n) {
grep(paste0("^", n, "\\..*$"), colnames(b), value = TRUE)
}))
}
)
b <- b[, eff]
switch(use,
all = t(b),
mean = as.matrix(colMeans(b)))
}
# methods for fixef and ranef to extract the fixed and random effects
# for an MCMCglmm model object
fixef.MCMCglmm <- function(object, use = c("all", "mean"), ...) {
.extractEffects(object = object, use = use, which = "fixed", ...)
}
ranef.MCMCglmm <- function(object, use = c("all", "mean"), ...) {
.extractEffects(object = object, use = use, which = "random", ...)
}
# this is the main workhorse
# a predict function for MCMCglmm objects
# takes a few core arguments
# the model object
# and data matrices, X (fixed effects) and Z (random effects)
# if X and Z are missing, attempts to fill in from the
# model object (which optionally saves them)
# If X and Z are specified or NULL, they are not used
# this is useful either for out of sample predictions
# or to use just the fixed effects, for example
# use can use all posterior samples or the mean
# all is nice because it lets you construct HPD intervals around the predicted
# values, rather than just get an estimate
# the mean is nice because if that is all you care about, it is much much faster
# type is either lp for linear predictor or response for the predicted values
# on the response scale. Currently this is only meant for ordinal models,
# theoretically could be extended but the code is a pain
# varepsilon is the residual variance (probably could get it from the model object??)
# which I usually fix at 1 for probit/logit models
predict2.MCMCglmm <- function(object, X, Z, use = c("all", "mean"),
type = c("lp", "response"), varepsilon, ...) {
use <- match.arg(use)
type <- match.arg(type)
if (!is.null(object$X) && missing(X)) {
X <- object$X
}
if (!is.null(object$Z) && missing(Z)) {
Z <- object$Z
}
if (missing(X)) X <- NULL
if (missing(Z)) Z <- NULL
Xb <- Zu <- 0L
if (!is.null(X)) {
Xb <- X %*% as.matrix(fixef(object, use = use))
}
if (!is.null(Z)) {
Zu <- Z %*% as.matrix(ranef(object, use = use))
}
res <- t(as.matrix(Xb + Zu))
dimnames(res) <- list(switch(use,
all = paste0("Rep.", 1:nrow(res)), mean = NULL), NULL)
# get predicted probabilities for each category from an ordered probit model
# set the resulting object class to MCMCglmlmPredictedProbs
# which I wrote a summary method for
if (type == "response") {
if (all(object$family %in% c("ordinal"))) {
if (missing(varepsilon)) {
varepsilon <- 1
}
CP <- switch(use,
all = object$CP,
mean = colMeans(object$CP))
CP <- as.list(as.data.frame(CP))
CP <- lapply(CP, function(x) {do.call("cbind", rep(list(x), dim(res)[2L]))})
CP <- c(-Inf, 0, CP, Inf)
p <- function(x) {pnorm(x - res, 0, sqrt(varepsilon + 1))}
q <- vector("list", length(CP) - 2)
for (i in 2:(length(CP) - 1)) {
q[[i - 1]] <- mcmc(p(CP[[i + 1]]) - p(CP[[i]]))
}
q <- c(list(mcmc(1 - Reduce(`+`, q[1:(i - 1)]))), q)
class(q) <- c("list", "MCMCglmmPredictedProbs")
res <- q
} else {
stop("Function does not support response type for families beside ordinal")
}
}
return(res)
}
# simple little function to generate confusion matrices
# I use it for one column predicted class, one column actual
# then look at percentage correctly classified
confusion <- function(formula, data) {
res <- xtabs(formula = formula, data = data)
print(res)
res/sum(res)
}
# summary method for predicted probabilities
# optionally first marginalizes across all observations
# by taking the row means
# If the predicted values only used the posterior means
# cannot generate intervals, so only the means are returned
# otherwise, calculates the mean predicted probability, as well as
# the HPD interval
# this can either be per observation or marginalized
summary.MCMCglmmPredictedProbs <- function(object,
marginalize = FALSE, level = .95, ...) {
if (nrow(object[[1]]) > 1) {
intervals <- TRUE
} else {
intervals <- FALSE
}
if (marginalize) {
object <- lapply(object, rowMeans)
}
if (intervals) {
res <- lapply(object, function(x) {
if (!is.null(ncol(x))) {
res <- cbind(colMeans(x), HPDinterval(mcmc(x), prob = level))
} else {
res <- cbind(mean(x), HPDinterval(mcmc(x), prob = level))
}
colnames(res) <- c("M", "LL", "UL")
return(res)
})
} else {
res <- lapply(object, mean)
}
return(res)
}
# recycler wraps most the previous functions to calculate
# the change in predicted probabilities for a twiddle change in the predictor
# or for discrete predictors, can use values so it is the change from 0 to 1
# or whatever
# the result is itself a MCMCglmmPredictedProbs (of course a difference but still)
# object, so it can be summarized using my method for the summary function
# this gives average marginal recycled predicted probabilities, as well as HPD intervals
recycler <- function(object, index = 2, twiddle, values) {
L1 <- missing(twiddle)
L2 <- missing(values)
if (!L1 && !L2) stop("Please specify either a twiddle value or actual values, not both")
X1 <- X2 <- object$X
if (L1 && L2) {
twiddle <- .01
}
if (L2) {
X2[, index] <- X2[, index] + twiddle
} else {
stopifnot(length(values) == 2L)
X1[, index] <- values[1]
X2[, index] <- values[2]
twiddle <- diff(values)
}
p1 <- predict2(object, X = X1, use = "all", type = "response", varepsilon = 1)
p2 <- predict2(object, X = X2, use = "all", type = "response", varepsilon = 1)
pdelta <- lapply(1:3, function(i) {
(p2[[i]] - p1[[i]])/twiddle
})
class(pdelta) <- c("list", "MCMCglmmPredictedProbs")
return(pdelta)
}
# sample models I used as test cases for developing the code
if (FALSE) {
require(MCMCglmm)
set.seed(10)
dat <- mtcars[sample(1:32, 1000, replace = TRUE), ]
dat <- within(dat, {
qsec <- scale(qsec)
hp <- scale(hp)
mpg <- scale(mpg)
disp <- scale(disp)
})
dat$ID <- factor(rep(letters, length.out = 1000))
dat$cyl <- factor(dat$cyl)
m <- MCMCglmm(cyl ~ qsec + mpg + drat, random = ~ ID, family = "ordinal",
data = dat, prior = list(
B = list(mu = c(0, 0, 0, 0), V = diag(4) * 1e2),
R = list(V = 1, fix = 1),
G = list(G1 = list(V = 1, nu = .002))), pr=TRUE,
nitt = 510000, thin = 200, burnin = 10000, verbose=TRUE)
## model summary
summary(m)
## predictor means
colMeans(dat[, c("qsec", "mpg", "drat")])
## predictor ranges
lapply(dat[, c("qsec", "mpg", "drat")], range)
## new data for prediction
myX <- as.matrix(data.frame("(Intercept)" = 1, qsec = 0, mpg = 0, drat = seq(from = 2.76, to = 4.93, by = .1), check.names=FALSE))
## get the predicted values (fixed effects only)
pred1 <- predict2(m, X = myX, Z = NULL, use = "all", type = "response", varepsilon = 1)
## summarize them
(spred1 <- summary(pred1))
## can combine predicted probs + HPD intervals with prediction data
## to be able to plot
preddat <- as.data.frame(cbind(do.call(rbind, rep(list(myX), 3)), do.call(rbind, spred1)))
## need an indicator for which level of the outcome
preddat$outcome <- factor(rep(1:3, each = nrow(myX)))
## look at the data
head(preddat)
## plot it
require(ggplot2)
ggplot(preddat, aes(x = drat, y = M, colour = outcome)) +
geom_ribbon(aes(ymin = LL, ymax = UL, fill = outcome), alpha = .25) +
geom_line(size=2)
## now get average marginal recycled probabilities
## note that this can be a bit slow
tmpres <- lapply(c("qsec", "mpg", "drat"), function(i) {
tmp <- recycler(m, index = i, twiddle = .01)
tmp <- summary(tmp, marginalize = TRUE)
tmp <- do.call("rbind", tmp)
(tmp2 <- sprintf("%0.2f [%0.2f, %0.2f]", tmp[, 1], tmp[, 2], tmp[, 3]))
})
## setup final "output" table
finaltable <- matrix(NA, nrow = 3, ncol = 4, dimnames = list(
NULL, c("Variable", "L1", "L2", "L3")))
finaltable[1, ] <- c("Var1", tmpres[[1]])
finaltable[2, ] <- c("Var2", tmpres[[2]])
finaltable[3, ] <- c("Var3", tmpres[[3]])
finaltable[,1] <- c("qsec", "mpg", "disp")
finaltable[] <- gsub("0\\.", ".", finaltable)
## these are the average changes in probability
## of membership in each group, for a one unit change
## of the predictor, on average across the sample
## with 95% HPD intervals
finaltable
## write to clipboard on Windows (e.g., for collaborators who want in Excel)
write.table(finaltable, file = "clipboard", na = "", sep = "\t", row.names=FALSE, col.names=TRUE)
#### other example models ####
m2 <- MCMCglmm(dv4 ~ hp, random = ~ ID, family = "ordinal",
data = dat, prior = list(
B = list(mu = c(0, 0), V = diag(2) * 1e10),
R = list(V = 1, fix = 1),
G = list(G1 = list(V = 1, nu = .002))), pr=TRUE,
nitt = 55000, thin = 100, burnin = 5000, verbose=FALSE)
msimp <- MCMCglmm(cyl ~ qsec, family = "ordinal",
data = dat, prior = list(
B = list(mu = c(0, 0), V = diag(2) * 1e10),
R = list(V = 1, fix = 1)),
nitt = 55000, thin = 100, burnin = 5000, verbose=FALSE)
malt <- MASS::polr(cyl ~ qsec, data = dat, method = "probit", Hess=TRUE)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment