Last active
April 20, 2016 15:58
-
-
Save jwdink/c7db9c69ebfa4fd9b9e7063c0e828259 to your computer and use it in GitHub Desktop.
Attempt to build a survival model in (start, stop] format in JAGS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library('dplyr') | |
library('survival') | |
## Make the Data: ----- | |
set.seed(3) | |
n_sub <- 1000 | |
current_date <- 365*2 | |
true_shape <- 2 | |
true_scale <- 365 | |
dat <- data_frame(person = 1:n_sub, | |
true_duration = rweibull(n = n_sub, shape = true_shape, scale = true_scale), | |
person_start_time = runif(n_sub, min= 0, max= true_scale*2), | |
person_censored = (person_start_time + true_duration) > current_date, | |
person_duration = ifelse(person_censored, current_date - person_start_time, true_duration) | |
) | |
## Split into multiple observations per person: -------- | |
cens_point <- 300 # <----- try changing to 0 for no split; if so, model correctly estimates | |
dat_split <- dat %>% | |
group_by(person) %>% | |
do(data_frame( | |
split = ifelse(.$person_duration > cens_point, cens_point, .$person_duration), | |
START = c(0, split[1]), | |
END = c(split[1], .$person_duration), | |
TINTERVAL = c(split[1], .$person_duration - split[1]), | |
CENS = c(ifelse(.$person_duration > cens_point, 1, .$person_censored), .$person_censored), # <— edited original post here due to bug; but problem still present when fixing bug | |
TINTERVAL_CENS = ifelse(CENS, NA, TINTERVAL), | |
END_CENS = ifelse(CENS, NA, END) | |
)) %>% | |
filter(TINTERVAL != 0) | |
## Set-Up JAGS Model ------- | |
dat_jags <- as.list(dat_split) | |
dat_jags$N <- length(dat_jags$TINTERVAL) | |
dat_jags$Np <- n_distinct(dat_jags$person) | |
inits <- replicate(n = 2, simplify = FALSE, expr = { | |
list(TINTERVAL_CENS = with(dat_jags, ifelse(CENS, TINTERVAL + 1, NA)), | |
END_CENS = with(dat_jags, ifelse(CENS, END + 1, NA)) ) | |
}) | |
# Model *without* random-effects for person: ---- | |
# consistently overestimates scale parameter | |
model_string <- | |
" | |
model { | |
# | |
log_a ~ dnorm(0, .01) | |
log(a) <- log_a | |
log_b ~ dnorm(0, .01) | |
log(b) <- log_b | |
nu <- a | |
lambda <- (1/b)^a | |
for (i in 1:N) { | |
# Estimate Subject-Durations: | |
CENS[i] ~ dinterval(END_CENS[i], END[i]) | |
END_CENS[i] ~ dweibull( nu, lambda )T(START[i],) | |
} | |
} | |
" | |
# Model *with* random-effect for person: ---- | |
# gives crazy estimates for nu parameter | |
model_string_re <- | |
" | |
model { | |
# | |
log_a ~ dnorm(0, .01) | |
log(a) <- log_a | |
log_b ~ dnorm(0, .01) | |
log(b) <- log_b | |
# | |
nu <- a | |
lambda <- (1/b)^a | |
sigma_re_a ~ dnorm(0, 10) I(0,) | |
sigma_re_b ~ dnorm(0, 10) I(0,) | |
for (p in 1:Np) { | |
log_a_offset_for_p[p] ~ dnorm(0, 1/sigma_re_a^2) | |
log_b_offset_for_p[p] ~ dnorm(0, 1/sigma_re_b^2) | |
this_log_a[p] <- log_a + log_a_offset_for_p[p] | |
this_log_b[p] <- log_b + log_b_offset_for_p[p] | |
log(this_a[p]) <- this_log_a[p] | |
log(this_b[p]) <- this_log_b[p] | |
this_nu[p] <- this_a[p] | |
this_lambda[p] <- (1/this_b[p])^this_a[p] | |
} | |
for (i in 1:N) { | |
# Estimate Subject-Durations: | |
CENS[i] ~ dinterval(END_CENS[i], END[i]) | |
END_CENS[i] ~ dweibull( this_nu[person[i]], this_lambda[person[i]] )T(START[i],) | |
} | |
} | |
" | |
library('runjags') | |
param_monitors <- c('a', 'b', 'nu', 'lambda') | |
fit_jags <- run.jags(model = model_string_re, | |
burnin = 1000, sample = 1000, | |
monitor = param_monitors, | |
n.chains = 2, data = dat_jags, inits = inits) | |
# model_string_re does not converge as quickly. but the estimates do not get better if you let it converge: | |
# fit_jags <- autoextend.jags(fit_jags) | |
# estimate: | |
fit_jags | |
# actual: | |
c(true_shape, true_scale) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment