Initial pass at a bivariate Poisson model in Stan
## Simple bivariate Poisson model in stan | |
## following parameterization in Karlis and Ntzoufras 2003 | |
## Simulate data | |
n <- 50 | |
# indpendent Poisson components | |
theta <- c(2, 3, 1) | |
X_i <- array(dim=c(n, 3)) | |
for (i in 1:3){ | |
X_i[,i] <- rpois(n, theta[i]) | |
} | |
# summation to produce bivariate Poisson RVs | |
Y_1 <- X_i[, 1] + X_i[, 3] | |
Y_2 <- X_i[, 2] + X_i[, 3] | |
Y <- matrix(c(Y_1, Y_2), | |
ncol=n, | |
byrow=T) | |
# Trick to generate a ragged array-esque set of indices for interior sum | |
# we want to use a matrix operation to calculate the inner sum | |
# (from 0 to min(y_1[i], y_2[i])) for each of i observations | |
minima <- apply(Y, 2, min) | |
u <- NULL | |
which_n <- NULL | |
for (i in 1:n){ | |
indices <- 0:minima[i] | |
u <- c(u, indices) | |
which_n <- c(which_n, | |
rep(i, length(indices)) | |
) | |
} | |
length_u <- length(u) # should be sum(minima) + n | |
# construct matrix of indicators to ensure proper summation | |
u_mat <- array(dim=c(n, length_u)) | |
for (i in 1:n){ | |
u_mat[i, ] <- ifelse(which_n == i, 1, 0) | |
} | |
# plot data | |
library(epade) | |
par(mai=c(1.5, 1, 1, 1)) | |
bar3d.ade(Y_1, Y_2, wall=3) | |
cat( | |
" | |
data { | |
int<lower=1> n; // number of observations | |
vector<lower=0>[n] Y[2]; // observations | |
int length_u; // number of u indices to get inner summand | |
vector<lower=0>[length_u] u; // u indices | |
int<lower=0> which_n[length_u]; // corresponding site indices | |
matrix[n, length_u] u_mat; // matrix of indicators to facilitate summation | |
} | |
parameters { | |
vector<lower=0>[3] theta; // expected values for component responses | |
} | |
transformed parameters { | |
real theta_sum; | |
vector[3] log_theta; | |
vector[n] u_sum; | |
vector[length_u] u_terms; | |
theta_sum <- sum(theta); | |
log_theta <- log(theta); | |
for (i in 1:length_u){ | |
u_terms[i] <- exp(binomial_coefficient_log(Y[1, which_n[i]], u[i]) + | |
binomial_coefficient_log(Y[2, which_n[i]], u[i]) + | |
lgamma(u[i] + 1) + | |
u[i] * (log_theta[3] - log_theta[1] - log_theta[2])); | |
} | |
u_sum <- u_mat * u_terms; | |
} | |
model { | |
vector[n] loglik_el; | |
real loglik; | |
for (i in 1:n){ | |
loglik_el[i] <- Y[1, i] * log_theta[1] - lgamma(Y[1, i] + 1) + | |
Y[2, i] * log_theta[2] - lgamma(Y[2, i] + 1) + | |
log(u_sum[i]); | |
} | |
loglik <- sum(loglik_el) - n*theta_sum; | |
increment_log_prob(loglik); | |
} | |
", file="mvpois.stan") | |
# estimate parameters | |
library(rstan) | |
d <- list(Y=Y, n=n, length_u=length_u, u=u, u_mat=u_mat, which_n=which_n) | |
init <- stan("mvpois.stan", data = d, chains=0) | |
mod <- stan(fit=init, | |
data=d, | |
iter=2000, chains=3, | |
pars=c("theta")) | |
traceplot(mod, "theta", inc_warmup=F) | |
# evaluate parameter recovery | |
post <- extract(mod) | |
par(mfrow=c(1, 3)) | |
for (i in 1:3){ | |
plot(density(post$theta[, i])) | |
abline(v=theta[i], col="red") | |
} | |
par(mfrow=c(1, 1)) | |
library(scales) | |
pairs(rbind(post$theta, theta), | |
cex=c(rep(1, nrow(post$theta)), 2), | |
pch=c(rep(1, nrow(post$theta)), 19), | |
col=c(rep(alpha("black", .2), nrow(post$theta)), "red"), | |
labels=c(expression(theta[1]), expression(theta[2]), expression(theta[3])) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment