jwdink/survival_counting_SO.R

## survival_counting_SO.R
library('dplyr')
library('survival')

## Make the Data: -----
set.seed(3)
n_sub <- 1000
current_date <- 365*2

true_shape <- 2
true_scale <- 365

dat <- data_frame(person = 1:n_sub,
                  true_duration = rweibull(n = n_sub, shape = true_shape, scale = true_scale),
                  person_start_time = runif(n_sub, min= 0, max= true_scale*2),
                  person_censored = (person_start_time + true_duration) > current_date,
                  person_duration = ifelse(person_censored, current_date - person_start_time, true_duration)
)

## Split into multiple observations per person: --------
cens_point <- 300 # <----- try changing to 0 for no split; if so, model correctly estimates
dat_split <- dat %>%
  group_by(person) %>%
  do(data_frame(
    split = ifelse(.$person_duration > cens_point, cens_point, .$person_duration),
    START = c(0, split[1]),
    END = c(split[1], .$person_duration),
    TINTERVAL = c(split[1], .$person_duration - split[1]),
    CENS = c(ifelse(.$person_duration > cens_point, 1, .$person_censored), .$person_censored), # <— edited original post here due to bug; but problem still present when fixing bug
    TINTERVAL_CENS = ifelse(CENS, NA, TINTERVAL),
    END_CENS = ifelse(CENS, NA, END)
  )) %>%
  filter(TINTERVAL != 0)

## Set-Up JAGS Model -------
dat_jags <- as.list(dat_split)
dat_jags$N <- length(dat_jags$TINTERVAL)
dat_jags$Np <- n_distinct(dat_jags$person)
inits <- replicate(n = 2, simplify = FALSE, expr = {
  list(TINTERVAL_CENS = with(dat_jags, ifelse(CENS, TINTERVAL + 1, NA)),
       END_CENS = with(dat_jags, ifelse(CENS, END + 1, NA)) )
})

# Model *without* random-effects for person: ----
# consistently overestimates scale parameter
model_string <-
  "
model {
  #
  log_a ~ dnorm(0, .01)
  log(a) <- log_a
  log_b ~ dnorm(0, .01)
  log(b) <- log_b
  nu <- a
  lambda <- (1/b)^a

  for (i in 1:N) {
    # Estimate Subject-Durations:
    CENS[i] ~ dinterval(END_CENS[i], END[i])
    END_CENS[i] ~ dweibull( nu, lambda )T(START[i],)
  }
}
"

# Model *with* random-effect for person: ----
# gives crazy estimates for nu parameter
model_string_re <-
"
  model {
    #
    log_a ~ dnorm(0, .01)
    log(a) <- log_a
    log_b ~ dnorm(0, .01)
    log(b) <- log_b

    #
    nu <- a
    lambda <- (1/b)^a

    sigma_re_a ~ dnorm(0, 10) I(0,)
    sigma_re_b ~ dnorm(0, 10) I(0,)
    for (p in 1:Np) {
      log_a_offset_for_p[p] ~ dnorm(0, 1/sigma_re_a^2)
      log_b_offset_for_p[p] ~ dnorm(0, 1/sigma_re_b^2)

      this_log_a[p] <- log_a + log_a_offset_for_p[p]
      this_log_b[p] <- log_b + log_b_offset_for_p[p]

      log(this_a[p]) <- this_log_a[p]
      log(this_b[p]) <- this_log_b[p]

      this_nu[p] <- this_a[p]
      this_lambda[p] <- (1/this_b[p])^this_a[p]
    }

    for (i in 1:N) {
      # Estimate Subject-Durations:
      CENS[i] ~ dinterval(END_CENS[i], END[i])
      END_CENS[i] ~ dweibull( this_nu[person[i]], this_lambda[person[i]] )T(START[i],)
    }

  }
"

library('runjags')
param_monitors <- c('a', 'b', 'nu', 'lambda')
fit_jags <- run.jags(model = model_string_re,
                     burnin = 1000, sample = 1000,
                     monitor = param_monitors,
                     n.chains = 2, data = dat_jags, inits = inits)
# model_string_re does not converge as quickly. but the estimates do not get better if you let it converge:
# fit_jags <- autoextend.jags(fit_jags)

# estimate:
fit_jags
# actual:
c(true_shape, true_scale)
	library('dplyr')
	library('survival')

	## Make the Data: -----
	set.seed(3)
	n_sub <- 1000
	current_date <- 365*2

	true_shape <- 2
	true_scale <- 365

	dat <- data_frame(person = 1:n_sub,
	true_duration = rweibull(n = n_sub, shape = true_shape, scale = true_scale),
	person_start_time = runif(n_sub, min= 0, max= true_scale*2),
	person_censored = (person_start_time + true_duration) > current_date,
	person_duration = ifelse(person_censored, current_date - person_start_time, true_duration)
	)

	## Split into multiple observations per person: --------
	cens_point <- 300 # <----- try changing to 0 for no split; if so, model correctly estimates
	dat_split <- dat %>%
	group_by(person) %>%
	do(data_frame(
	split = ifelse(.$person_duration > cens_point, cens_point, .$person_duration),
	START = c(0, split[1]),
	END = c(split[1], .$person_duration),
	TINTERVAL = c(split[1], .$person_duration - split[1]),
	CENS = c(ifelse(.$person_duration > cens_point, 1, .$person_censored), .$person_censored), # <— edited original post here due to bug; but problem still present when fixing bug
	TINTERVAL_CENS = ifelse(CENS, NA, TINTERVAL),
	END_CENS = ifelse(CENS, NA, END)
	)) %>%
	filter(TINTERVAL != 0)

	## Set-Up JAGS Model -------
	dat_jags <- as.list(dat_split)
	dat_jags$N <- length(dat_jags$TINTERVAL)
	dat_jags$Np <- n_distinct(dat_jags$person)
	inits <- replicate(n = 2, simplify = FALSE, expr = {
	list(TINTERVAL_CENS = with(dat_jags, ifelse(CENS, TINTERVAL + 1, NA)),
	END_CENS = with(dat_jags, ifelse(CENS, END + 1, NA)) )
	})

	# Model without random-effects for person: ----
	# consistently overestimates scale parameter
	model_string <-
	"
	model {
	#
	log_a ~ dnorm(0, .01)
	log(a) <- log_a
	log_b ~ dnorm(0, .01)
	log(b) <- log_b
	nu <- a
	lambda <- (1/b)^a

	for (i in 1:N) {
	# Estimate Subject-Durations:
	CENS[i] ~ dinterval(END_CENS[i], END[i])
	END_CENS[i] ~ dweibull( nu, lambda )T(START[i],)
	}
	}
	"

	# Model with random-effect for person: ----
	# gives crazy estimates for nu parameter
	model_string_re <-
	"
	model {
	#
	log_a ~ dnorm(0, .01)
	log(a) <- log_a
	log_b ~ dnorm(0, .01)
	log(b) <- log_b

	#
	nu <- a
	lambda <- (1/b)^a

	sigma_re_a ~ dnorm(0, 10) I(0,)
	sigma_re_b ~ dnorm(0, 10) I(0,)
	for (p in 1:Np) {
	log_a_offset_for_p[p] ~ dnorm(0, 1/sigma_re_a^2)
	log_b_offset_for_p[p] ~ dnorm(0, 1/sigma_re_b^2)

	this_log_a[p] <- log_a + log_a_offset_for_p[p]
	this_log_b[p] <- log_b + log_b_offset_for_p[p]

	log(this_a[p]) <- this_log_a[p]
	log(this_b[p]) <- this_log_b[p]

	this_nu[p] <- this_a[p]
	this_lambda[p] <- (1/this_b[p])^this_a[p]
	}

	for (i in 1:N) {
	# Estimate Subject-Durations:
	CENS[i] ~ dinterval(END_CENS[i], END[i])
	END_CENS[i] ~ dweibull( this_nu[person[i]], this_lambda[person[i]] )T(START[i],)
	}

	}
	"

	library('runjags')
	param_monitors <- c('a', 'b', 'nu', 'lambda')
	fit_jags <- run.jags(model = model_string_re,
	burnin = 1000, sample = 1000,
	monitor = param_monitors,
	n.chains = 2, data = dat_jags, inits = inits)
	# model_string_re does not converge as quickly. but the estimates do not get better if you let it converge:
	# fit_jags <- autoextend.jags(fit_jags)

	# estimate:
	fit_jags
	# actual:
	c(true_shape, true_scale)