Created
August 23, 2018 20:08
-
-
Save mbjoseph/a8970189b83aaab4c6776e4e8d3fde4d to your computer and use it in GitHub Desktop.
Bernoulli observation occupancy model in Stan
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(rstan) | |
options(mc.cores = parallel::detectCores()) | |
rstan_options(auto_write = TRUE) | |
# Simulate true occupancy states ------------------------------------------ | |
# define a design matrix for site-level occupancy | |
n_site <- 200 | |
m_psi <- 3 # m_psi is the number of columns in the site level design matrix | |
X_psi <- matrix(c(rep(1, n_site), rnorm((m_psi - 1) * n_site)), | |
ncol = m_psi) | |
stopifnot(ncol(X_psi) >= 1) | |
# get occupancy probabilities | |
beta_psi <- rnorm(m_psi) | |
psi <- plogis(X_psi %*% beta_psi) | |
z <- rbinom(n_site, size = 1, prob = psi) | |
# Simulate imperfect detection/nondetection data -------------------------- | |
# determine number of surveys per site | |
n_survey <- rpois(n_site, lambda = 4) | |
total_surveys <- sum(n_survey) | |
# define a survey-level design matrix for detection probabilities | |
m_p <- 2 # m_p is the number of survey-level covariates | |
beta_p <- rnorm(m_p) | |
X_p <- matrix(c(rep(1, total_surveys), rnorm((m_p - 1) * total_surveys)), | |
ncol = m_p) | |
stopifnot(ncol(X_p) >= 1) | |
p <- plogis(X_p %*% beta_p) | |
# simulate observations | |
survey_df <- tibble(site = rep(1:n_site, n_survey)) %>% | |
mutate(y = rbinom(n = total_surveys, size = 1, prob = z[site] * p)) | |
# get start and end indices to extract slices of y for each site | |
start_idx <- rep(0, n_site) | |
end_idx <- rep(0, n_site) | |
for (i in 1:n_site) { | |
if (n_survey[i] > 0) { | |
site_indices <- which(survey_df$site == i) | |
start_idx[i] <- site_indices[1] | |
end_idx[i] <- site_indices[n_survey[i]] | |
} | |
} | |
# create vector of whether any positive observations were seen at each site | |
any_seen <- rep(0, n_site) | |
for (i in 1:n_site) { | |
if (n_survey[i] > 0) { | |
any_seen[i] <- max(survey_df$y[start_idx[i]:end_idx[i]]) | |
} | |
} | |
# Bundle data for Stan ---------------------------------------------------- | |
stan_d <- list(n_site = n_site, | |
m_psi = m_psi, | |
X_psi = X_psi, | |
total_surveys = total_surveys, | |
m_p = m_p, | |
X_p = X_p, | |
site = survey_df$site, | |
y = survey_df$y, | |
start_idx = start_idx, | |
end_idx = end_idx, | |
any_seen = any_seen, | |
n_survey = n_survey) | |
# Fit model --------------------------------------------------------------- | |
m_init <- stan_model('bernoulli-occupancy.stan') | |
m_fit <- sampling(m_init, data = stan_d, pars = c('beta_psi', 'beta_p')) | |
# compare fits to true values | |
m_fit | |
beta_psi | |
beta_p |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data { | |
// site-level occupancy covariates | |
int<lower = 1> n_site; | |
int<lower = 1> m_psi; | |
matrix[n_site, m_psi] X_psi; | |
// survey-level detection covariates | |
int<lower = 1> total_surveys; | |
int<lower = 1> m_p; | |
matrix[total_surveys, m_p] X_p; | |
// survey level information | |
int<lower = 1, upper = n_site> site[total_surveys]; | |
int<lower = 0, upper = 1> y[total_surveys]; | |
int<lower = 0, upper = total_surveys> start_idx[n_site]; | |
int<lower = 0, upper = total_surveys> end_idx[n_site]; | |
// summary of whether species is known to be present at each site | |
int<lower = 0, upper = 1> any_seen[n_site]; | |
// number of surveys at each site | |
int<lower = 0> n_survey[n_site]; | |
} | |
parameters { | |
vector[m_psi] beta_psi; | |
vector[m_p] beta_p; | |
} | |
transformed parameters { | |
vector[total_surveys] logit_p = X_p * beta_p; | |
vector[n_site] logit_psi = X_psi * beta_psi; | |
} | |
model { | |
vector[n_site] log_psi = log_inv_logit(logit_psi); | |
vector[n_site] log1m_psi = log1m_inv_logit(logit_psi); | |
beta_psi ~ normal(0, 1); | |
beta_p ~ normal(0, 1); | |
for (i in 1:n_site) { | |
if (n_survey[i] > 0) { | |
if (any_seen[i]) { | |
// site is occupied | |
target += log_psi[i] | |
+ bernoulli_logit_lpmf(y[start_idx[i]:end_idx[i]] | | |
logit_p[start_idx[i]:end_idx[i]]); | |
} else { | |
// site may or may not be occupied | |
target += log_sum_exp( | |
log_psi[i] + bernoulli_logit_lpmf(y[start_idx[i]:end_idx[i]] | | |
logit_p[start_idx[i]:end_idx[i]]), | |
log1m_psi[i] | |
); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment