Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Bernoulli observation occupancy model in Stan
library(tidyverse)
library(rstan)
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
# Simulate true occupancy states ------------------------------------------
# define a design matrix for site-level occupancy
n_site <- 200
m_psi <- 3 # m_psi is the number of columns in the site level design matrix
X_psi <- matrix(c(rep(1, n_site), rnorm((m_psi - 1) * n_site)),
ncol = m_psi)
stopifnot(ncol(X_psi) >= 1)
# get occupancy probabilities
beta_psi <- rnorm(m_psi)
psi <- plogis(X_psi %*% beta_psi)
z <- rbinom(n_site, size = 1, prob = psi)
# Simulate imperfect detection/nondetection data --------------------------
# determine number of surveys per site
n_survey <- rpois(n_site, lambda = 4)
total_surveys <- sum(n_survey)
# define a survey-level design matrix for detection probabilities
m_p <- 2 # m_p is the number of survey-level covariates
beta_p <- rnorm(m_p)
X_p <- matrix(c(rep(1, total_surveys), rnorm((m_p - 1) * total_surveys)),
ncol = m_p)
stopifnot(ncol(X_p) >= 1)
p <- plogis(X_p %*% beta_p)
# simulate observations
survey_df <- tibble(site = rep(1:n_site, n_survey)) %>%
mutate(y = rbinom(n = total_surveys, size = 1, prob = z[site] * p))
# get start and end indices to extract slices of y for each site
start_idx <- rep(0, n_site)
end_idx <- rep(0, n_site)
for (i in 1:n_site) {
if (n_survey[i] > 0) {
site_indices <- which(survey_df$site == i)
start_idx[i] <- site_indices[1]
end_idx[i] <- site_indices[n_survey[i]]
}
}
# create vector of whether any positive observations were seen at each site
any_seen <- rep(0, n_site)
for (i in 1:n_site) {
if (n_survey[i] > 0) {
any_seen[i] <- max(survey_df$y[start_idx[i]:end_idx[i]])
}
}
# Bundle data for Stan ----------------------------------------------------
stan_d <- list(n_site = n_site,
m_psi = m_psi,
X_psi = X_psi,
total_surveys = total_surveys,
m_p = m_p,
X_p = X_p,
site = survey_df$site,
y = survey_df$y,
start_idx = start_idx,
end_idx = end_idx,
any_seen = any_seen,
n_survey = n_survey)
# Fit model ---------------------------------------------------------------
m_init <- stan_model('bernoulli-occupancy.stan')
m_fit <- sampling(m_init, data = stan_d, pars = c('beta_psi', 'beta_p'))
# compare fits to true values
m_fit
beta_psi
beta_p
data {
// site-level occupancy covariates
int<lower = 1> n_site;
int<lower = 1> m_psi;
matrix[n_site, m_psi] X_psi;
// survey-level detection covariates
int<lower = 1> total_surveys;
int<lower = 1> m_p;
matrix[total_surveys, m_p] X_p;
// survey level information
int<lower = 1, upper = n_site> site[total_surveys];
int<lower = 0, upper = 1> y[total_surveys];
int<lower = 0, upper = total_surveys> start_idx[n_site];
int<lower = 0, upper = total_surveys> end_idx[n_site];
// summary of whether species is known to be present at each site
int<lower = 0, upper = 1> any_seen[n_site];
// number of surveys at each site
int<lower = 0> n_survey[n_site];
}
parameters {
vector[m_psi] beta_psi;
vector[m_p] beta_p;
}
transformed parameters {
vector[total_surveys] logit_p = X_p * beta_p;
vector[n_site] logit_psi = X_psi * beta_psi;
}
model {
vector[n_site] log_psi = log_inv_logit(logit_psi);
vector[n_site] log1m_psi = log1m_inv_logit(logit_psi);
beta_psi ~ normal(0, 1);
beta_p ~ normal(0, 1);
for (i in 1:n_site) {
if (n_survey[i] > 0) {
if (any_seen[i]) {
// site is occupied
target += log_psi[i]
+ bernoulli_logit_lpmf(y[start_idx[i]:end_idx[i]] |
logit_p[start_idx[i]:end_idx[i]]);
} else {
// site may or may not be occupied
target += log_sum_exp(
log_psi[i] + bernoulli_logit_lpmf(y[start_idx[i]:end_idx[i]] |
logit_p[start_idx[i]:end_idx[i]]),
log1m_psi[i]
);
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment