mevers/drc_partial_pooling_28Mar20.R

## drc_partial_pooling_28Mar20.R
## Gist for https://discourse.mc-stan.org/t/dose-response-model-with-partial-pooling/13823
##
## Version 28 March 2020
##
## Changelog:
##   - Priors on `c` and`d` are now lognormals (see @arya's comments)
##   - Fixed priors on the hyperparameters for `d` (see @FJCC's comments)
#
#      _Originally_
##         mu_d ~ normal(max(y), 5);
##         sigma_d ~ cauchy(0, 2.5);
##     which leads to estimates
#          summary(fit, pars = c("d", "mu_d", "sigma_d"))$summary
#          #            mean    se_mean          sd     2.5%      25%      50%      75%
#          #d[1]    3489.865  0.5093660   31.741119 3427.630 3467.723 3490.117 3511.441
#          #d[2]    2127.745  1.1523709   46.468022 2038.463 2096.270 2127.392 2158.768
#          #d[3]    2576.638  0.7579097   38.674776 2501.041 2549.911 2575.692 2602.964
#          #mu_d    3474.843  0.0646205    4.871406 3465.109 3471.562 3474.840 3478.181
#          #sigma_d 3779.928 40.9239714 1960.299861 1791.430 2589.539 3245.744 4337.591
#          #           97.5%    n_eff      Rhat
#          #d[1]    3550.748 3883.154 0.9997746
#          #d[2]    2221.764 1626.012 1.0003123
#          #d[3]    2653.945 2603.878 1.0004396
#          #mu_d    3484.382 5682.874 0.9996138
#          #sigma_d 8859.847 2294.507 1.0022283
#     mu_d is very tight and its density is basically the same as that the prior
#     So the prior on mu_d was way too informative!
#     We need to make the prior on mu_d less informative
#
#     _Updated_
#         mu_d ~ normal(log(max(y)), sqrt(log(max(y))));
#         sigma_d ~ cauchy(0, 2.5);
#     For a log-normal the median of the distribution is given by exp(mu), and
#     the mean by exp(mu + sigma^2/2); so log(max(y)) should be a decent
#     approximation for the median of `d`, around which we center our prior of
#     mu_d.
#         summary(fit, pars = c("d", "mu_d", "sigma_d"))$summary
#         #                mean    se_mean         sd         2.5%          25%
#         #d[1]    3488.2285124 0.57652702 32.0965491 3426.8387930 3466.1003872
#         #d[2]    2131.2525434 0.95521781 46.5966448 2044.6011600 2098.7158998
#         #d[3]    2578.4156212 0.80747561 38.3812483 2503.9065302 2552.3074723
#         #mu_d       7.9019570 0.01449615  0.4900183    6.8205748    7.7476104
#         #sigma_d    0.6473466 0.02048502  0.6634329    0.1541901    0.2814838
#         #                 50%         75%       97.5%    n_eff      Rhat
#         #d[1]    3487.8134198 3509.058015 3553.375104 3099.398 1.0002373
#         #d[2]    2129.2165140 2162.491249 2226.975477 2379.603 1.0001828
#         #d[3]    2578.2221784 2603.964276 2652.073853 2259.328 0.9996485
#         #mu_d       7.9031171    8.063158    8.800187 1142.665 1.0041365
#         #sigma_d    0.4324705    0.728732    2.638164 1048.869 1.0039242

## Data
data_stan <- list(N = 33L, J = 3L, drug = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), x = c(2e-07, 2e-07, 2e-07,
1e-07, 1e-07, 1e-07, 5e-08, 5e-08, 5e-08, 2.5e-08, 2.5e-08, 2.5e-08,
1.25e-08, 1.25e-08, 1.25e-08, 6.25e-09, 6.25e-09, 6.25e-09, 3.13e-09,
3.13e-09, 3.13e-09, 1.56e-09, 1.56e-09, 1.56e-09, 7.79e-10, 7.79e-10,
7.79e-10, 3.9e-10, 3.9e-10, 3.9e-10, 1.95e-10, 1.95e-10, 1.95e-10
), y = c(13L, 13L, 9L, 9L, 18L, 27L, 85L, 50L, 37L, 149L, 119L,
147L, 183L, 38L, 167L, 716L, 375L, 585L, 2600L, 763L, 997L, 3472L,
1288L, 2013L, 3438L, 1563L, 2334L, 3475L, 2092L, 2262L, 3343L,
2032L, 2575L))


## Stan model
model_code <- "
functions {
  real LL4(real x, real b, real c, real d, real ehat) {
    return c + (d - c) / (1 + exp(b * (log(x) - ehat)));
  }
}

data {
  int<lower=1> N;                           // Measurements
  int<lower=1> J;                           // Number of drugs
  int<lower=1,upper=J> drug[N];             // Drug of measurement
  vector[N] x;                              // Dose values
  int y[N];                                 // Response values for drug
}

// Transformed data
transformed data {
}

// Sampling space
// We need to impose parameter constraints on the LL4 parameters:
// Since c,d ~ LogNormal(), we need <lower=0> for c,d
parameters {
  vector<lower=0>[J] b;                     // slope
  vector<lower=0>[J] c;                     // lower asymptote
  vector<lower=0>[J] d;                     // upper asymptote
  vector[J] ehat;                           // loge(IC50)

  // Hyperparameters
  real<lower=0> mu_b;
  real<lower=0> sigma_b;
  real mu_c;
  real<lower=0> sigma_c;
  real mu_d;
  real<lower=0> sigma_d;
  real mu_ehat;
  real<lower=0> sigma_ehat;
}

// Transform parameters before calculating the posterior
transformed parameters {
  vector<lower=0>[J] e;
  real log_sigma_b;
  real log_sigma_c;
  real log_sigma_d;
  real log_sigma_ehat;

  // IC50
  e = exp(ehat);

  log_sigma_b = log10(sigma_b);
  log_sigma_c = log10(sigma_c);
  log_sigma_d = log10(sigma_d);
  log_sigma_ehat = log10(sigma_ehat);
}

// Calculate posterior
model {
  // Declare mu_y in model to make it local (i.e. we don't want mu_y to show
  // up in the output)
  vector[N] mu_y;

  // Parameter priors
  c ~ lognormal(mu_c, sigma_c);
  d ~ lognormal(mu_d, sigma_d);
  ehat ~ normal(mu_ehat, sigma_ehat);
  b ~ normal(mu_b, sigma_b);

  // Priors for hyperparameters
  mu_c ~ normal(0, 5);
  sigma_c ~ cauchy(0, 2.5);
  mu_d ~ normal(log(max(y)), sqrt(log(max(y))));
  sigma_d ~ cauchy(0, 2.5);
  mu_ehat ~ normal(mean(log(x)), 5);
  sigma_ehat ~ cauchy(0, 2.5);
  mu_b ~ normal(1, 2);
  sigma_b ~ cauchy(0, 2.5);

  for (i in 1:N) {
    mu_y[i] = LL4(x[i], b[drug[i]], c[drug[i]], d[drug[i]], ehat[drug[i]]);
  }
  y ~ poisson(mu_y);
}
"


## Model fit
library(rstan)
fit <- stan(model_code = model_code, data = data_stan)
	## Gist for https://discourse.mc-stan.org/t/dose-response-model-with-partial-pooling/13823
	##
	## Version 28 March 2020
	##
	## Changelog:
	## - Priors on `c` and`d` are now lognormals (see @arya's comments)
	## - Fixed priors on the hyperparameters for `d` (see @FJCC's comments)
	#
	# _Originally_
	## mu_d ~ normal(max(y), 5);
	## sigma_d ~ cauchy(0, 2.5);
	## which leads to estimates
	# summary(fit, pars = c("d", "mu_d", "sigma_d"))$summary
	# # mean se_mean sd 2.5% 25% 50% 75%
	# #d[1] 3489.865 0.5093660 31.741119 3427.630 3467.723 3490.117 3511.441
	# #d[2] 2127.745 1.1523709 46.468022 2038.463 2096.270 2127.392 2158.768
	# #d[3] 2576.638 0.7579097 38.674776 2501.041 2549.911 2575.692 2602.964
	# #mu_d 3474.843 0.0646205 4.871406 3465.109 3471.562 3474.840 3478.181
	# #sigma_d 3779.928 40.9239714 1960.299861 1791.430 2589.539 3245.744 4337.591
	# # 97.5% n_eff Rhat
	# #d[1] 3550.748 3883.154 0.9997746
	# #d[2] 2221.764 1626.012 1.0003123
	# #d[3] 2653.945 2603.878 1.0004396
	# #mu_d 3484.382 5682.874 0.9996138
	# #sigma_d 8859.847 2294.507 1.0022283
	# mu_d is very tight and its density is basically the same as that the prior
	# So the prior on mu_d was way too informative!
	# We need to make the prior on mu_d less informative
	#
	# _Updated_
	# mu_d ~ normal(log(max(y)), sqrt(log(max(y))));
	# sigma_d ~ cauchy(0, 2.5);
	# For a log-normal the median of the distribution is given by exp(mu), and
	# the mean by exp(mu + sigma^2/2); so log(max(y)) should be a decent
	# approximation for the median of `d`, around which we center our prior of
	# mu_d.
	# summary(fit, pars = c("d", "mu_d", "sigma_d"))$summary
	# # mean se_mean sd 2.5% 25%
	# #d[1] 3488.2285124 0.57652702 32.0965491 3426.8387930 3466.1003872
	# #d[2] 2131.2525434 0.95521781 46.5966448 2044.6011600 2098.7158998
	# #d[3] 2578.4156212 0.80747561 38.3812483 2503.9065302 2552.3074723
	# #mu_d 7.9019570 0.01449615 0.4900183 6.8205748 7.7476104
	# #sigma_d 0.6473466 0.02048502 0.6634329 0.1541901 0.2814838
	# # 50% 75% 97.5% n_eff Rhat
	# #d[1] 3487.8134198 3509.058015 3553.375104 3099.398 1.0002373
	# #d[2] 2129.2165140 2162.491249 2226.975477 2379.603 1.0001828
	# #d[3] 2578.2221784 2603.964276 2652.073853 2259.328 0.9996485
	# #mu_d 7.9031171 8.063158 8.800187 1142.665 1.0041365
	# #sigma_d 0.4324705 0.728732 2.638164 1048.869 1.0039242

	## Data
	data_stan <- list(N = 33L, J = 3L, drug = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
	3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
	1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), x = c(2e-07, 2e-07, 2e-07,
	1e-07, 1e-07, 1e-07, 5e-08, 5e-08, 5e-08, 2.5e-08, 2.5e-08, 2.5e-08,
	1.25e-08, 1.25e-08, 1.25e-08, 6.25e-09, 6.25e-09, 6.25e-09, 3.13e-09,
	3.13e-09, 3.13e-09, 1.56e-09, 1.56e-09, 1.56e-09, 7.79e-10, 7.79e-10,
	7.79e-10, 3.9e-10, 3.9e-10, 3.9e-10, 1.95e-10, 1.95e-10, 1.95e-10
	), y = c(13L, 13L, 9L, 9L, 18L, 27L, 85L, 50L, 37L, 149L, 119L,
	147L, 183L, 38L, 167L, 716L, 375L, 585L, 2600L, 763L, 997L, 3472L,
	1288L, 2013L, 3438L, 1563L, 2334L, 3475L, 2092L, 2262L, 3343L,
	2032L, 2575L))


	## Stan model
	model_code <- "
	functions {
	real LL4(real x, real b, real c, real d, real ehat) {
	return c + (d - c) / (1 + exp(b * (log(x) - ehat)));
	}
	}

	data {
	int<lower=1> N; // Measurements
	int<lower=1> J; // Number of drugs
	int<lower=1,upper=J> drug[N]; // Drug of measurement
	vector[N] x; // Dose values
	int y[N]; // Response values for drug
	}

	// Transformed data
	transformed data {
	}

	// Sampling space
	// We need to impose parameter constraints on the LL4 parameters:
	// Since c,d ~ LogNormal(), we need <lower=0> for c,d
	parameters {
	vector<lower=0>[J] b; // slope
	vector<lower=0>[J] c; // lower asymptote
	vector<lower=0>[J] d; // upper asymptote
	vector[J] ehat; // loge(IC50)

	// Hyperparameters
	real<lower=0> mu_b;
	real<lower=0> sigma_b;
	real mu_c;
	real<lower=0> sigma_c;
	real mu_d;
	real<lower=0> sigma_d;
	real mu_ehat;
	real<lower=0> sigma_ehat;
	}

	// Transform parameters before calculating the posterior
	transformed parameters {
	vector<lower=0>[J] e;
	real log_sigma_b;
	real log_sigma_c;
	real log_sigma_d;
	real log_sigma_ehat;

	// IC50
	e = exp(ehat);

	log_sigma_b = log10(sigma_b);
	log_sigma_c = log10(sigma_c);
	log_sigma_d = log10(sigma_d);
	log_sigma_ehat = log10(sigma_ehat);
	}

	// Calculate posterior
	model {
	// Declare mu_y in model to make it local (i.e. we don't want mu_y to show
	// up in the output)
	vector[N] mu_y;

	// Parameter priors
	c ~ lognormal(mu_c, sigma_c);
	d ~ lognormal(mu_d, sigma_d);
	ehat ~ normal(mu_ehat, sigma_ehat);
	b ~ normal(mu_b, sigma_b);

	// Priors for hyperparameters
	mu_c ~ normal(0, 5);
	sigma_c ~ cauchy(0, 2.5);
	mu_d ~ normal(log(max(y)), sqrt(log(max(y))));
	sigma_d ~ cauchy(0, 2.5);
	mu_ehat ~ normal(mean(log(x)), 5);
	sigma_ehat ~ cauchy(0, 2.5);
	mu_b ~ normal(1, 2);
	sigma_b ~ cauchy(0, 2.5);

	for (i in 1:N) {
	mu_y[i] = LL4(x[i], b[drug[i]], c[drug[i]], d[drug[i]], ehat[drug[i]]);
	}
	y ~ poisson(mu_y);
	}
	"


	## Model fit
	library(rstan)
	fit <- stan(model_code = model_code, data = data_stan)