Skip to content

Instantly share code, notes, and snippets.

@rubenarslan
Last active August 29, 2015 14:17
Show Gist options
  • Save rubenarslan/aeacdd306b3d061819a6 to your computer and use it in GitHub Desktop.
Save rubenarslan/aeacdd306b3d061819a6 to your computer and use it in GitHub Desktop.
reproducible example MCMCglmm prediction for zero-altered poisson outcome
fact_levels = c("[0,25]","(25,30]","(30,35]")
cases = 5e4
exampledata = data.frame(idParents = sample(1:(cases/10), size = cases, replace = T),
paternalage.factor = factor(sample(fact_levels, size = cases, replace=T),levels = fact_levels))
exampledata$outcome = rpois(cases,lambda = log(30 + (as.numeric(exampledata$paternalage.factor))*-0.2 + 0.8 *rnorm(cases)))
exampledata$outcome = ifelse(plogis(log(as.numeric(exampledata$paternalage.factor))-2 + rnorm(cases)) > 0.5, 0, exampledata$outcome)
samples = 1000; thin = 10; burnin = 3000; nitt = samples * thin + burnin
prior <- list(
R=list(V=diag(2), nu=1.002, fix=2),
G=list(G1=list(V=diag(2), nu=1, alpha.mu=c(0,0), alpha.V=diag(2)*1000))
)
start <- list(
liab=c(rnorm( nrow(exampledata)*2 )),
R = list(R1 = rIW(diag(2), 10 , fix = 2)),
G = list(G1 = rIW(diag(2), 10 ))
)
m1 = MCMCglmm( outcome ~ trait * paternalage.factor,
rcov=~idh(trait):units,
random=~idh(trait):idParents,
family="zapoisson",
start = start,
prior = prior,
data=exampledata,
pr = F, saveX = F, saveZ = F,
nitt=nitt,thin=thin,burnin=burnin)
summary(m1)
normal.evd<-function(x, mu, sd){
exp(-exp(x))*dnorm(x, mu, sd)
}
normal.zt<-function(x, mu, sd){
exp(x)/(1-exp(-exp(x)))*dnorm(x, mu, sd)
}
pred<-function(a_1, a_2, sd_1, sd_2){
prob<-1-integrate(normal.evd, qnorm(0.0001, a_2,sd_2),
qnorm(0.9999, a_2,sd_2), a_2,sd_2)[[1]]
meanc<-integrate(normal.zt, qnorm(0.0001, a_1,sd_1),
qnorm(0.9999, a_1,sd_1), a_1,sd_1)[[1]]
prob*meanc
}
HPDpredict_za = function(object, predictors) {
library(MCMCglmm); library(coda); library(formr); library(reshape2);
if(class(object) != "MCMCglmm") {
if(length( object[[1]]$Residual$nrt )>1) {
object = lapply(object,FUN=function(x) { x$Residual$nrt<-2;x })
}
Sol = mcmc.list(lapply(object,FUN=function(x) { x$Sol}))
VCV = mcmc.list(lapply(object,FUN=function(x) { x$VCV}))
vars = colnames(Sol[[1]])
} else {
Sol = as.data.frame(object$Sol)
VCV = as.data.frame(object$VCV)
vars = names(Sol)
}
za_intercept_name = vars[ ! vars %contains% ":" & vars %begins_with% "traitza_"]
outcome_name = stringr::str_sub(za_intercept_name,stringr::str_length("traitza__"))
vcv_1_name = paste0(outcome_name,".units")
vcv_2_name = paste0("za_",outcome_name,".units")
intercept_name = "(Intercept)"
intercept = Sol[,intercept_name]
za_intercept = Sol[, za_intercept_name]
sd_1 = VCV[, vcv_1_name]
sd_2 = VCV[, vcv_2_name]
if(class(object) != "MCMCglmm") {
intercept = unlist(intercept)
za_intercept = unlist(za_intercept)
sd_1 = unlist(sd_1)
sd_2 = unlist(sd_2)
}
sd_1 = sqrt(sd_1)
sd_2 = sqrt(sd_2)
samples = length(unlist(intercept))
df = data.frame(matrix(NA_real_, nrow = samples, ncol = length(predictors)))
pred_vars = paste0(names(predictors), sapply(predictors, FUN = function(value) {
if(value == 1) { "" } else { paste0("_",as.numeric(value)) }
}))
names(df) = pred_vars
for(i in seq_along(predictors)) {
predictor = names(predictors)[i]
value = predictors[i]
za_predictor = vars[ vars == paste0(za_intercept_name,":", predictor) ]
if(value != 0) {
l1 = Sol[, predictor ]
l2 = Sol[, za_predictor ]
if(is.list(object)) {
l1 = unlist(l1)
l2 = unlist(l2)
}
l1 = value * l1
l2 = value * l2
} else {
l1 = l2 = rep(0,times = samples)
}
at_predictor = numeric(samples)
for(j in 1:samples) {
at_predictor[j] = pred(a_1 = intercept[j] + l1[j],
a_2 = za_intercept[i] + l2[j], sd_1 = sd_1[j], sd_2 = sd_2[j])
}
if(value == 1) { colname = predictor } else { colname = paste0(predictor,"_",value) }
df[, colname] = at_predictor
}
mstrapped = suppressMessages(melt(df))
invisible(mstrapped)
}
predictors = structure(c(0, 1, 1), .Names = c("paternalage.factor[0,25]",
"paternalage.factor(25,30]", "paternalage.factor(30,35]"))
coefs = HPDpredict_za(m1, predictors)
ggplot(coefs, aes_string(x = "variable", y = "value")) +
geom_violin(colour = "transparent", fill = "#5ea16e", alpha = 0.3) +
geom_pointrange(stat = "summary", fun.data = "mean_sdl")+
geom_smooth(aes(weight = 1/sd(value, na.rm = T), group = 1),method = "lm", se = F, lty = "dashed")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment