mbjoseph/bivar_pois.R

## bivar_pois.R
## Simple bivariate Poisson model in stan
## following parameterization in Karlis and Ntzoufras 2003
## Simulate data
n <- 50

# indpendent Poisson components
theta <- c(2, 3, 1)

X_i <- array(dim=c(n, 3))
for (i in 1:3){
  X_i[,i] <- rpois(n, theta[i])
}

# summation to produce bivariate Poisson RVs
Y_1 <- X_i[, 1] + X_i[, 3]
Y_2 <- X_i[, 2] + X_i[, 3]

Y <- matrix(c(Y_1, Y_2),
            ncol=n,
            byrow=T)

# Trick to generate a ragged array-esque set of indices for interior sum
# we want to use a matrix operation to calculate the inner sum
# (from 0 to min(y_1[i], y_2[i])) for each of i observations
minima <- apply(Y, 2, min)
u <- NULL
which_n <- NULL
for (i in 1:n){
  indices <- 0:minima[i]
  u <- c(u, indices)
  which_n <- c(which_n,
               rep(i, length(indices))
               )
}

length_u <- length(u) # should be sum(minima) + n

# construct matrix of indicators to ensure proper summation
u_mat <- array(dim=c(n, length_u))
for (i in 1:n){
  u_mat[i, ] <- ifelse(which_n == i, 1, 0)
}

# plot data
library(epade)
par(mai=c(1.5, 1, 1, 1))
bar3d.ade(Y_1, Y_2, wall=3)

cat(
  "
data {
  int<lower=1> n;                 // number of observations
  vector<lower=0>[n] Y[2];        // observations
  int length_u;                   // number of u indices to get inner summand
  vector<lower=0>[length_u] u;    // u indices
  int<lower=0> which_n[length_u]; // corresponding site indices
  matrix[n, length_u] u_mat;      // matrix of indicators to facilitate summation
}

parameters {
  vector<lower=0>[3] theta;       // expected values for component responses
}

transformed parameters {
  real theta_sum;
  vector[3] log_theta;
  vector[n] u_sum;
  vector[length_u] u_terms;

  theta_sum <- sum(theta);
  log_theta <- log(theta);

  for (i in 1:length_u){
    u_terms[i] <- exp(binomial_coefficient_log(Y[1, which_n[i]], u[i]) +
                      binomial_coefficient_log(Y[2, which_n[i]], u[i]) +
                      lgamma(u[i] + 1) +
                      u[i] * (log_theta[3] - log_theta[1] - log_theta[2]));
  }
  u_sum <- u_mat * u_terms;
}

model {
  vector[n] loglik_el;
  real loglik;
  for (i in 1:n){
    loglik_el[i] <- Y[1, i] * log_theta[1] - lgamma(Y[1, i] + 1) +
                  Y[2, i] * log_theta[2] - lgamma(Y[2, i] + 1) +
                  log(u_sum[i]);
  }
  loglik <- sum(loglik_el) - n*theta_sum;
  increment_log_prob(loglik);
}
  ", file="mvpois.stan")


# estimate parameters
library(rstan)
d <- list(Y=Y, n=n, length_u=length_u, u=u, u_mat=u_mat, which_n=which_n)
init <- stan("mvpois.stan", data = d, chains=0)
mod <- stan(fit=init,
            data=d,
            iter=2000, chains=3,
            pars=c("theta"))
traceplot(mod, "theta", inc_warmup=F)


# evaluate parameter recovery
post <- extract(mod)
par(mfrow=c(1, 3))
for (i in 1:3){
  plot(density(post$theta[, i]))
  abline(v=theta[i], col="red")
}
par(mfrow=c(1, 1))

library(scales)
pairs(rbind(post$theta, theta),
      cex=c(rep(1, nrow(post$theta)), 2),
      pch=c(rep(1, nrow(post$theta)), 19),
      col=c(rep(alpha("black", .2), nrow(post$theta)), "red"),
      labels=c(expression(theta[1]), expression(theta[2]), expression(theta[3]))
      )
	## Simple bivariate Poisson model in stan
	## following parameterization in Karlis and Ntzoufras 2003
	## Simulate data
	n <- 50

	# indpendent Poisson components
	theta <- c(2, 3, 1)

	X_i <- array(dim=c(n, 3))
	for (i in 1:3){
	X_i[,i] <- rpois(n, theta[i])
	}

	# summation to produce bivariate Poisson RVs
	Y_1 <- X_i[, 1] + X_i[, 3]
	Y_2 <- X_i[, 2] + X_i[, 3]

	Y <- matrix(c(Y_1, Y_2),
	ncol=n,
	byrow=T)

	# Trick to generate a ragged array-esque set of indices for interior sum
	# we want to use a matrix operation to calculate the inner sum
	# (from 0 to min(y_1[i], y_2[i])) for each of i observations
	minima <- apply(Y, 2, min)
	u <- NULL
	which_n <- NULL
	for (i in 1:n){
	indices <- 0:minima[i]
	u <- c(u, indices)
	which_n <- c(which_n,
	rep(i, length(indices))
	)
	}

	length_u <- length(u) # should be sum(minima) + n

	# construct matrix of indicators to ensure proper summation
	u_mat <- array(dim=c(n, length_u))
	for (i in 1:n){
	u_mat[i, ] <- ifelse(which_n == i, 1, 0)
	}

	# plot data
	library(epade)
	par(mai=c(1.5, 1, 1, 1))
	bar3d.ade(Y_1, Y_2, wall=3)

	cat(
	"
	data {
	int<lower=1> n; // number of observations
	vector<lower=0>[n] Y[2]; // observations
	int length_u; // number of u indices to get inner summand
	vector<lower=0>[length_u] u; // u indices
	int<lower=0> which_n[length_u]; // corresponding site indices
	matrix[n, length_u] u_mat; // matrix of indicators to facilitate summation
	}

	parameters {
	vector<lower=0>[3] theta; // expected values for component responses
	}

	transformed parameters {
	real theta_sum;
	vector[3] log_theta;
	vector[n] u_sum;
	vector[length_u] u_terms;

	theta_sum <- sum(theta);
	log_theta <- log(theta);

	for (i in 1:length_u){
	u_terms[i] <- exp(binomial_coefficient_log(Y[1, which_n[i]], u[i]) +
	binomial_coefficient_log(Y[2, which_n[i]], u[i]) +
	lgamma(u[i] + 1) +
	u[i] * (log_theta[3] - log_theta[1] - log_theta[2]));
	}
	u_sum <- u_mat * u_terms;
	}

	model {
	vector[n] loglik_el;
	real loglik;
	for (i in 1:n){
	loglik_el[i] <- Y[1, i] * log_theta[1] - lgamma(Y[1, i] + 1) +
	Y[2, i] * log_theta[2] - lgamma(Y[2, i] + 1) +
	log(u_sum[i]);
	}
	loglik <- sum(loglik_el) - n*theta_sum;
	increment_log_prob(loglik);
	}
	", file="mvpois.stan")


	# estimate parameters
	library(rstan)
	d <- list(Y=Y, n=n, length_u=length_u, u=u, u_mat=u_mat, which_n=which_n)
	init <- stan("mvpois.stan", data = d, chains=0)
	mod <- stan(fit=init,
	data=d,
	iter=2000, chains=3,
	pars=c("theta"))
	traceplot(mod, "theta", inc_warmup=F)


	# evaluate parameter recovery
	post <- extract(mod)
	par(mfrow=c(1, 3))
	for (i in 1:3){
	plot(density(post$theta[, i]))
	abline(v=theta[i], col="red")
	}
	par(mfrow=c(1, 1))

	library(scales)
	pairs(rbind(post$theta, theta),
	cex=c(rep(1, nrow(post$theta)), 2),
	pch=c(rep(1, nrow(post$theta)), 19),
	col=c(rep(alpha("black", .2), nrow(post$theta)), "red"),
	labels=c(expression(theta[1]), expression(theta[2]), expression(theta[3]))
	)