Skip to content

Instantly share code, notes, and snippets.

@mbjoseph
Last active September 2, 2018 18:12
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mbjoseph/100a41d73901764a00a7 to your computer and use it in GitHub Desktop.
Save mbjoseph/100a41d73901764a00a7 to your computer and use it in GitHub Desktop.
Initial pass at a bivariate Poisson model in Stan
## Simple bivariate Poisson model in stan
## following parameterization in Karlis and Ntzoufras 2003
## Simulate data
n <- 50
# indpendent Poisson components
theta <- c(2, 3, 1)
X_i <- array(dim=c(n, 3))
for (i in 1:3){
X_i[,i] <- rpois(n, theta[i])
}
# summation to produce bivariate Poisson RVs
Y_1 <- X_i[, 1] + X_i[, 3]
Y_2 <- X_i[, 2] + X_i[, 3]
Y <- matrix(c(Y_1, Y_2),
ncol=n,
byrow=T)
# Trick to generate a ragged array-esque set of indices for interior sum
# we want to use a matrix operation to calculate the inner sum
# (from 0 to min(y_1[i], y_2[i])) for each of i observations
minima <- apply(Y, 2, min)
u <- NULL
which_n <- NULL
for (i in 1:n){
indices <- 0:minima[i]
u <- c(u, indices)
which_n <- c(which_n,
rep(i, length(indices))
)
}
length_u <- length(u) # should be sum(minima) + n
# construct matrix of indicators to ensure proper summation
u_mat <- array(dim=c(n, length_u))
for (i in 1:n){
u_mat[i, ] <- ifelse(which_n == i, 1, 0)
}
# plot data
library(epade)
par(mai=c(1.5, 1, 1, 1))
bar3d.ade(Y_1, Y_2, wall=3)
cat(
"
data {
int<lower=1> n; // number of observations
vector<lower=0>[n] Y[2]; // observations
int length_u; // number of u indices to get inner summand
vector<lower=0>[length_u] u; // u indices
int<lower=0> which_n[length_u]; // corresponding site indices
matrix[n, length_u] u_mat; // matrix of indicators to facilitate summation
}
parameters {
vector<lower=0>[3] theta; // expected values for component responses
}
transformed parameters {
real theta_sum;
vector[3] log_theta;
vector[n] u_sum;
vector[length_u] u_terms;
theta_sum <- sum(theta);
log_theta <- log(theta);
for (i in 1:length_u){
u_terms[i] <- exp(binomial_coefficient_log(Y[1, which_n[i]], u[i]) +
binomial_coefficient_log(Y[2, which_n[i]], u[i]) +
lgamma(u[i] + 1) +
u[i] * (log_theta[3] - log_theta[1] - log_theta[2]));
}
u_sum <- u_mat * u_terms;
}
model {
vector[n] loglik_el;
real loglik;
for (i in 1:n){
loglik_el[i] <- Y[1, i] * log_theta[1] - lgamma(Y[1, i] + 1) +
Y[2, i] * log_theta[2] - lgamma(Y[2, i] + 1) +
log(u_sum[i]);
}
loglik <- sum(loglik_el) - n*theta_sum;
increment_log_prob(loglik);
}
", file="mvpois.stan")
# estimate parameters
library(rstan)
d <- list(Y=Y, n=n, length_u=length_u, u=u, u_mat=u_mat, which_n=which_n)
init <- stan("mvpois.stan", data = d, chains=0)
mod <- stan(fit=init,
data=d,
iter=2000, chains=3,
pars=c("theta"))
traceplot(mod, "theta", inc_warmup=F)
# evaluate parameter recovery
post <- extract(mod)
par(mfrow=c(1, 3))
for (i in 1:3){
plot(density(post$theta[, i]))
abline(v=theta[i], col="red")
}
par(mfrow=c(1, 1))
library(scales)
pairs(rbind(post$theta, theta),
cex=c(rep(1, nrow(post$theta)), 2),
pch=c(rep(1, nrow(post$theta)), 19),
col=c(rep(alpha("black", .2), nrow(post$theta)), "red"),
labels=c(expression(theta[1]), expression(theta[2]), expression(theta[3]))
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment