Created
January 8, 2013 02:05
-
-
Save JWiley/4480442 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import generics | |
# (cheap version of importFrom() in a package) | |
fixef <- nlme:::fixef | |
ranef <- nlme:::ranef | |
# define generic prediction function | |
# but using my functions/methods | |
predict2 <- function (object, ...) { | |
UseMethod("predict2") | |
} | |
# extract fixed and random effect parameter names | |
# from an MCMCglmm object | |
paramNames.MCMCglmm <- function(object, ...) { | |
fNames <- as.character(attr(terms(object$Fixed$formula), "variables"))[-c(1, 2)] | |
rNames <- NULL | |
if (!is.null(object$Random$formula)) { | |
rNames <- as.character(attr(terms(object$Random$formula), "variables"))[-c(1)] | |
} | |
if ("(Intercept)" %in% colnames(object[["Sol"]])) { | |
fNames <- c("(Intercept)", fNames) | |
} | |
list(fixed = fNames, random = rNames) | |
} | |
# extract the levels of the random effects | |
# from an MCMCglmm object | |
ranefLevels <- function(object, data, ...) { | |
n <- paramNames.MCMCglmm(object)$random | |
res <- lapply(n, function(n) { | |
levels(data[, n]) | |
}) | |
names(res) <- n | |
return(res) | |
} | |
# extract the fixed and random effects portions from an MCMCglmm object | |
# note for the random, these are the estimates themselves | |
# not the variability in the estimates | |
# two use options let you get either just the posterior mean | |
# or all the posterior samples | |
.extractEffects <- function(object, use = c("all", "mean"), | |
which = c("fixed", "random"), ...) { | |
use <- match.arg(use) | |
which <- match.arg(which) | |
b <- object[["Sol"]] | |
eff <- switch(which, | |
fixed = { | |
# this does not work because the intercept term contains | |
# special characters (), that screw with the regular expressions | |
# cannot use fixed matching because factor variables can | |
# expand with arbitrarily named levels | |
#unlist(lapply(paramNames.MCMCglmm(object)$fixed, function(n) { | |
# grep(paste0("^", n, ".*$"), colnames(b), value = TRUE) | |
#})) | |
object$X@Dimnames[[2]] | |
}, | |
random = { | |
unlist(lapply(paramNames.MCMCglmm(object)$random, function(n) { | |
grep(paste0("^", n, "\\..*$"), colnames(b), value = TRUE) | |
})) | |
} | |
) | |
b <- b[, eff] | |
switch(use, | |
all = t(b), | |
mean = as.matrix(colMeans(b))) | |
} | |
# methods for fixef and ranef to extract the fixed and random effects | |
# for an MCMCglmm model object | |
fixef.MCMCglmm <- function(object, use = c("all", "mean"), ...) { | |
.extractEffects(object = object, use = use, which = "fixed", ...) | |
} | |
ranef.MCMCglmm <- function(object, use = c("all", "mean"), ...) { | |
.extractEffects(object = object, use = use, which = "random", ...) | |
} | |
# this is the main workhorse | |
# a predict function for MCMCglmm objects | |
# takes a few core arguments | |
# the model object | |
# and data matrices, X (fixed effects) and Z (random effects) | |
# if X and Z are missing, attempts to fill in from the | |
# model object (which optionally saves them) | |
# If X and Z are specified or NULL, they are not used | |
# this is useful either for out of sample predictions | |
# or to use just the fixed effects, for example | |
# use can use all posterior samples or the mean | |
# all is nice because it lets you construct HPD intervals around the predicted | |
# values, rather than just get an estimate | |
# the mean is nice because if that is all you care about, it is much much faster | |
# type is either lp for linear predictor or response for the predicted values | |
# on the response scale. Currently this is only meant for ordinal models, | |
# theoretically could be extended but the code is a pain | |
# varepsilon is the residual variance (probably could get it from the model object??) | |
# which I usually fix at 1 for probit/logit models | |
predict2.MCMCglmm <- function(object, X, Z, use = c("all", "mean"), | |
type = c("lp", "response"), varepsilon, ...) { | |
use <- match.arg(use) | |
type <- match.arg(type) | |
if (!is.null(object$X) && missing(X)) { | |
X <- object$X | |
} | |
if (!is.null(object$Z) && missing(Z)) { | |
Z <- object$Z | |
} | |
if (missing(X)) X <- NULL | |
if (missing(Z)) Z <- NULL | |
Xb <- Zu <- 0L | |
if (!is.null(X)) { | |
Xb <- X %*% as.matrix(fixef(object, use = use)) | |
} | |
if (!is.null(Z)) { | |
Zu <- Z %*% as.matrix(ranef(object, use = use)) | |
} | |
res <- t(as.matrix(Xb + Zu)) | |
dimnames(res) <- list(switch(use, | |
all = paste0("Rep.", 1:nrow(res)), mean = NULL), NULL) | |
# get predicted probabilities for each category from an ordered probit model | |
# set the resulting object class to MCMCglmlmPredictedProbs | |
# which I wrote a summary method for | |
if (type == "response") { | |
if (all(object$family %in% c("ordinal"))) { | |
if (missing(varepsilon)) { | |
varepsilon <- 1 | |
} | |
CP <- switch(use, | |
all = object$CP, | |
mean = colMeans(object$CP)) | |
CP <- as.list(as.data.frame(CP)) | |
CP <- lapply(CP, function(x) {do.call("cbind", rep(list(x), dim(res)[2L]))}) | |
CP <- c(-Inf, 0, CP, Inf) | |
p <- function(x) {pnorm(x - res, 0, sqrt(varepsilon + 1))} | |
q <- vector("list", length(CP) - 2) | |
for (i in 2:(length(CP) - 1)) { | |
q[[i - 1]] <- mcmc(p(CP[[i + 1]]) - p(CP[[i]])) | |
} | |
q <- c(list(mcmc(1 - Reduce(`+`, q[1:(i - 1)]))), q) | |
class(q) <- c("list", "MCMCglmmPredictedProbs") | |
res <- q | |
} else { | |
stop("Function does not support response type for families beside ordinal") | |
} | |
} | |
return(res) | |
} | |
# simple little function to generate confusion matrices | |
# I use it for one column predicted class, one column actual | |
# then look at percentage correctly classified | |
confusion <- function(formula, data) { | |
res <- xtabs(formula = formula, data = data) | |
print(res) | |
res/sum(res) | |
} | |
# summary method for predicted probabilities | |
# optionally first marginalizes across all observations | |
# by taking the row means | |
# If the predicted values only used the posterior means | |
# cannot generate intervals, so only the means are returned | |
# otherwise, calculates the mean predicted probability, as well as | |
# the HPD interval | |
# this can either be per observation or marginalized | |
summary.MCMCglmmPredictedProbs <- function(object, | |
marginalize = FALSE, level = .95, ...) { | |
if (nrow(object[[1]]) > 1) { | |
intervals <- TRUE | |
} else { | |
intervals <- FALSE | |
} | |
if (marginalize) { | |
object <- lapply(object, rowMeans) | |
} | |
if (intervals) { | |
res <- lapply(object, function(x) { | |
if (!is.null(ncol(x))) { | |
res <- cbind(colMeans(x), HPDinterval(mcmc(x), prob = level)) | |
} else { | |
res <- cbind(mean(x), HPDinterval(mcmc(x), prob = level)) | |
} | |
colnames(res) <- c("M", "LL", "UL") | |
return(res) | |
}) | |
} else { | |
res <- lapply(object, mean) | |
} | |
return(res) | |
} | |
# recycler wraps most the previous functions to calculate | |
# the change in predicted probabilities for a twiddle change in the predictor | |
# or for discrete predictors, can use values so it is the change from 0 to 1 | |
# or whatever | |
# the result is itself a MCMCglmmPredictedProbs (of course a difference but still) | |
# object, so it can be summarized using my method for the summary function | |
# this gives average marginal recycled predicted probabilities, as well as HPD intervals | |
recycler <- function(object, index = 2, twiddle, values) { | |
L1 <- missing(twiddle) | |
L2 <- missing(values) | |
if (!L1 && !L2) stop("Please specify either a twiddle value or actual values, not both") | |
X1 <- X2 <- object$X | |
if (L1 && L2) { | |
twiddle <- .01 | |
} | |
if (L2) { | |
X2[, index] <- X2[, index] + twiddle | |
} else { | |
stopifnot(length(values) == 2L) | |
X1[, index] <- values[1] | |
X2[, index] <- values[2] | |
twiddle <- diff(values) | |
} | |
p1 <- predict2(object, X = X1, use = "all", type = "response", varepsilon = 1) | |
p2 <- predict2(object, X = X2, use = "all", type = "response", varepsilon = 1) | |
pdelta <- lapply(1:3, function(i) { | |
(p2[[i]] - p1[[i]])/twiddle | |
}) | |
class(pdelta) <- c("list", "MCMCglmmPredictedProbs") | |
return(pdelta) | |
} | |
# sample models I used as test cases for developing the code | |
if (FALSE) { | |
require(MCMCglmm) | |
set.seed(10) | |
dat <- mtcars[sample(1:32, 1000, replace = TRUE), ] | |
dat <- within(dat, { | |
qsec <- scale(qsec) | |
hp <- scale(hp) | |
mpg <- scale(mpg) | |
disp <- scale(disp) | |
}) | |
dat$ID <- factor(rep(letters, length.out = 1000)) | |
dat$cyl <- factor(dat$cyl) | |
m <- MCMCglmm(cyl ~ qsec + mpg + drat, random = ~ ID, family = "ordinal", | |
data = dat, prior = list( | |
B = list(mu = c(0, 0, 0, 0), V = diag(4) * 1e2), | |
R = list(V = 1, fix = 1), | |
G = list(G1 = list(V = 1, nu = .002))), pr=TRUE, | |
nitt = 510000, thin = 200, burnin = 10000, verbose=TRUE) | |
## model summary | |
summary(m) | |
## predictor means | |
colMeans(dat[, c("qsec", "mpg", "drat")]) | |
## predictor ranges | |
lapply(dat[, c("qsec", "mpg", "drat")], range) | |
## new data for prediction | |
myX <- as.matrix(data.frame("(Intercept)" = 1, qsec = 0, mpg = 0, drat = seq(from = 2.76, to = 4.93, by = .1), check.names=FALSE)) | |
## get the predicted values (fixed effects only) | |
pred1 <- predict2(m, X = myX, Z = NULL, use = "all", type = "response", varepsilon = 1) | |
## summarize them | |
(spred1 <- summary(pred1)) | |
## can combine predicted probs + HPD intervals with prediction data | |
## to be able to plot | |
preddat <- as.data.frame(cbind(do.call(rbind, rep(list(myX), 3)), do.call(rbind, spred1))) | |
## need an indicator for which level of the outcome | |
preddat$outcome <- factor(rep(1:3, each = nrow(myX))) | |
## look at the data | |
head(preddat) | |
## plot it | |
require(ggplot2) | |
ggplot(preddat, aes(x = drat, y = M, colour = outcome)) + | |
geom_ribbon(aes(ymin = LL, ymax = UL, fill = outcome), alpha = .25) + | |
geom_line(size=2) | |
## now get average marginal recycled probabilities | |
## note that this can be a bit slow | |
tmpres <- lapply(c("qsec", "mpg", "drat"), function(i) { | |
tmp <- recycler(m, index = i, twiddle = .01) | |
tmp <- summary(tmp, marginalize = TRUE) | |
tmp <- do.call("rbind", tmp) | |
(tmp2 <- sprintf("%0.2f [%0.2f, %0.2f]", tmp[, 1], tmp[, 2], tmp[, 3])) | |
}) | |
## setup final "output" table | |
finaltable <- matrix(NA, nrow = 3, ncol = 4, dimnames = list( | |
NULL, c("Variable", "L1", "L2", "L3"))) | |
finaltable[1, ] <- c("Var1", tmpres[[1]]) | |
finaltable[2, ] <- c("Var2", tmpres[[2]]) | |
finaltable[3, ] <- c("Var3", tmpres[[3]]) | |
finaltable[,1] <- c("qsec", "mpg", "disp") | |
finaltable[] <- gsub("0\\.", ".", finaltable) | |
## these are the average changes in probability | |
## of membership in each group, for a one unit change | |
## of the predictor, on average across the sample | |
## with 95% HPD intervals | |
finaltable | |
## write to clipboard on Windows (e.g., for collaborators who want in Excel) | |
write.table(finaltable, file = "clipboard", na = "", sep = "\t", row.names=FALSE, col.names=TRUE) | |
#### other example models #### | |
m2 <- MCMCglmm(dv4 ~ hp, random = ~ ID, family = "ordinal", | |
data = dat, prior = list( | |
B = list(mu = c(0, 0), V = diag(2) * 1e10), | |
R = list(V = 1, fix = 1), | |
G = list(G1 = list(V = 1, nu = .002))), pr=TRUE, | |
nitt = 55000, thin = 100, burnin = 5000, verbose=FALSE) | |
msimp <- MCMCglmm(cyl ~ qsec, family = "ordinal", | |
data = dat, prior = list( | |
B = list(mu = c(0, 0), V = diag(2) * 1e10), | |
R = list(V = 1, fix = 1)), | |
nitt = 55000, thin = 100, burnin = 5000, verbose=FALSE) | |
malt <- MASS::polr(cyl ~ qsec, data = dat, method = "probit", Hess=TRUE) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment