Skip to content

Instantly share code, notes, and snippets.

@njtierney
Last active November 17, 2023 03:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save njtierney/79520d2b265c25ff2da21932cb5f1503 to your computer and use it in GitHub Desktop.
Save njtierney/79520d2b265c25ff2da21932cb5f1503 to your computer and use it in GitHub Desktop.
n.pixel <- 1000
n.other.spec <- 20
spec.names <- letters[1:(n.other.spec+1)]

## Geographic covariates affecting species abundance
x <- matrix(rnorm(2*n.pixel),nrow=n.pixel)

## Geographic covariate causing selection bias (correlated with x1)
z <- scale(x[,1] + rnorm(n.pixel)*sqrt(.95^(-2)-1))

## Intercept and slopes for abundance rate
alpha <- c(-2,.3*rnorm(n.other.spec)-2)
beta <- cbind(c(1,-.5),matrix(rnorm(n.other.spec*2)/2,2))

## Intercept and slope for selection bias
gamma <- -4
delta <- -.3

## PO data is impacted by selection bias
po.count <- matrix(rpois(n.pixel*(n.other.spec+1),
                         lambda=exp(rep(alpha,each=n.pixel) +
                                      x %*% beta + gamma + c(z) * delta)),
                   n.pixel,dimnames=list(NULL,spec.names))

PO.list <- lapply(spec.names,
                  function(sp)
                    data.frame(x1=x[,1],
                               x2=x[,2],
                               z=z)[rep(1:n.pixel,po.count[,sp]),])
names(PO.list) <- spec.names
BG <- data.frame(x1=x[1,],x2=x[,2],z=z)

## PA data is unbiased
n.sites <- 500
pa.samp <- sample(1:n.pixel,n.sites)
pa.count <- matrix(rpois(n.sites*(n.other.spec+1),
                         lambda=exp(rep(alpha,each=n.sites) +
                                      x[pa.samp,] %*% beta)),
                   n.sites,dimnames=list(NULL,spec.names))

## PA data doesn't need biasing covariates
PA <- cbind(data.frame(x1=x[pa.samp,1],
                       x2=x[pa.samp,2]),
            as.data.frame(1*(pa.count>0)))

# combine and record true parameters
true_parameters <- list(
  alpha = alpha,
  beta = beta,
  gamma = gamma,
  delta = delta
)

library(multispeciesPP)

full.mod <- multispeciesPP(sdm.formula = ~x1 + x2,
                           bias.formula = ~z,
                           PA = PA,
                           PO = PO.list,
                           BG = BG,
                           region.size=n.pixel)

library(greta)
#> 
#> Attaching package: 'greta'
#> The following objects are masked from 'package:stats':
#> 
#>     binomial, cov2cor, poisson
#> The following objects are masked from 'package:base':
#> 
#>     %*%, apply, backsolve, beta, chol2inv, colMeans, colSums, diag,
#>     eigen, forwardsolve, gamma, identity, rowMeans, rowSums, sweep,
#>     tapply

# numbers of covariates in use
n_cov_abund <- ncol(x)
n_cov_bias <- ncol(z)

# number of species to model
n_species <- n.other.spec + 1

# tidy up PA data
pa <- 1 * (pa.count > 0)

# define parameters with normal priors, matching the ridge regression setup in
# multispeciesPP defaults
penalty.l2.sdm <- penalty.l2.bias <- 0.1
penalty.l2.intercept <- 1e-4

intercept_sd <- sqrt(1 / penalty.l2.intercept)
beta_sd <- sqrt(1 / penalty.l2.sdm)
delta_sd <- sqrt(1 / penalty.l2.bias)

# intercept and shared slope for selection bias
gamma <- normal(0, intercept_sd, dim = n_species)
#> ℹ Initialising python and checking dependencies, this may take a moment.
#> ✔ Initialising python and checking dependencies ... done!
#> 
delta <- normal(0, delta_sd, dim = c(n_cov_bias))

# intercept and slopes for abundance rate
alpha <- normal(0, intercept_sd, dim = n_species)
beta <- normal(0, beta_sd, dim = c(n_cov_abund, n_species))

# log rates across all sites
log_lambda <- sweep(x %*% beta, 2, alpha, FUN = "+")
# can easily replace this model with something more interesting, like a low-rank
# GP on covariate space or something mechanistic

# bias across pixels (shared coefficient) and species (different intercepts)
log_bias_coef <- sweep(zeros(n.pixel, n_species), 1, z %*% delta, FUN = "+")
log_bias <- sweep(log_bias_coef, 2, gamma, FUN = "+")

# rates across all sites and species
lambda <- exp(log_lambda)
bias <- exp(log_bias)

# define likelihoods

# compute probability of presence (and detection) at the PA sites, assuming
# area/effort of 1 in all these sites, for identifiability
area_pa <- 1
p <- icloglog(log_lambda[pa.samp, ] + log(area_pa))
distribution(pa) <- bernoulli(p)

# compute (biased) expected numbers of presence-only observations across all
# presence and background sites, assuming presence-only count aggregation area
# of 1 (as in multispeciesPP definition). Not that when these are all the same,
# this value only changes all the gamma parameters by a fixed amount, and these
# are not the parameters of interest
area_po <- 1
# combine on log scale, for less risk of numerical overflow (in greta
# likelihood)
po_rate <- exp(log_lambda + log_bias + log(area_po))
distribution(po.count) <- poisson(po_rate)

# define and fit the model by MAP and MCMC
m <- model(alpha, beta, gamma, delta)
map_adam <- opt(m, optimiser = adam(), max_iterations = 500)
map_adam
#> $par
#> $par$alpha
#>  [1] -2.158414 -1.717301 -1.655889 -2.369948 -2.269595 -2.655683 -2.355184
#>  [8] -1.374276 -1.875163 -2.214878 -2.906947 -1.929629 -2.141988 -1.882339
#> [15] -2.410990 -1.683956 -2.271506 -1.819378 -1.658445 -2.145785 -1.391807
#> 
#> $par$beta
#>            [,1]        [,2]       [,3]         [,4]       [,5]        [,6]
#> [1,]  1.1747439 -0.06816036  0.6905093  0.008544741 -0.1996454 -0.89537121
#> [2,] -0.4787865  0.10546636 -0.5209281 -0.981803402  0.6954817 -0.05671385
#>           [,7]       [,8]       [,9]     [,10]      [,11]      [,12]      [,13]
#> [1,] 0.4047998  0.2167707 -0.2017651 0.6093427  0.9249531 -0.4087397 -0.5599243
#> [2,] 0.1798583 -0.7412663  0.2191588 1.0943162 -0.1140470  0.4200365  0.6116124
#>          [,14]     [,15]       [,16]      [,17]       [,18]      [,19]
#> [1,] 0.3235006 0.8430573 -0.84651819 -0.8654151  0.57052270  0.9557502
#> [2,] 0.7922393 0.2115289  0.08529118 -0.7333018 -0.05448101 -0.5329787
#>            [,20]       [,21]
#> [1,] 0.007520128  0.47184668
#> [2,] 0.011473701 -0.01468451
#> 
#> $par$gamma
#>  [1] -3.668019 -4.330503 -4.242165 -3.525359 -3.577892 -3.894189 -3.433758
#>  [8] -3.523621 -4.163104 -3.511888 -3.715117 -4.194679 -4.231803 -3.604484
#> [15] -3.871895 -4.127582 -4.013269 -3.572987 -4.109916 -3.507235 -4.050488
#> 
#> $par$delta
#> [1] -0.3998419
#> 
#> 
#> $value
#> [1] 4804.524
#> 
#> $iterations
#> [1] 500
#> 
#> $convergence
#> [1] 1
map_grad <- opt(m, optimiser = gradient_descent(), max_iterations = 500)
#> Error in `self$run_minimiser()` at greta/R/optimiser_class.R:71:6:
#> ! Detected numerical overflow during optimisation
#> Please try one of the following:
#> ℹ Using different initial values
#> ℹ Using another optimiser. (E.g., instead of `gradient_descent()`, try
#>   `adam()`)
#> Backtrace:
#>      ▆
#>   1. └─greta::opt(m, optimiser = gradient_descent(), max_iterations = 500)
#>   2.   ├─base::with(...) at greta/R/inference.R:893:2
#>   3.   ├─reticulate:::with.python.builtin.object(...)
#>   4.   │ ├─base::tryCatch(...)
#>   5.   │ │ └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#>   6.   │ └─base::force(expr)
#>   7.   └─object$run() at greta/R/inference.R:916:4
#>   8.     └─self$run_minimiser(self$free_state) at greta/R/optimiser_class.R:71:6
#>   9.       └─cli::cli_abort(...) at greta/R/optimiser_class.R:163:12
#>  10.         └─rlang::abort(...)
map_grad
#> Error in eval(expr, envir, enclos): object 'map_grad' not found

Created on 2023-11-17 with reprex v2.0.2

Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.3.2 (2023-10-31)
#>  os       macOS Sonoma 14.0
#>  system   aarch64, darwin20
#>  ui       X11
#>  language (EN)
#>  collate  en_US.UTF-8
#>  ctype    en_US.UTF-8
#>  tz       Australia/Hobart
#>  date     2023-11-17
#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package        * version    date (UTC) lib source
#>  abind            1.4-5      2016-07-21 [1] CRAN (R 4.3.0)
#>  backports        1.4.1      2021-12-13 [1] CRAN (R 4.3.0)
#>  base64enc        0.1-3      2015-07-28 [1] CRAN (R 4.3.0)
#>  callr            3.7.3      2022-11-02 [1] CRAN (R 4.3.0)
#>  cli              3.6.1      2023-03-23 [1] CRAN (R 4.3.0)
#>  coda             0.19-4     2020-09-30 [1] CRAN (R 4.3.0)
#>  codetools        0.2-19     2023-02-01 [1] CRAN (R 4.3.2)
#>  crayon           1.5.2      2022-09-29 [1] CRAN (R 4.3.0)
#>  digest           0.6.33     2023-07-07 [1] CRAN (R 4.3.0)
#>  evaluate         0.23       2023-11-01 [1] CRAN (R 4.3.1)
#>  fansi            1.0.5      2023-10-08 [1] CRAN (R 4.3.1)
#>  fastmap          1.1.1      2023-02-24 [1] CRAN (R 4.3.0)
#>  fs               1.6.3      2023-07-20 [1] CRAN (R 4.3.0)
#>  future           1.33.0     2023-07-01 [1] CRAN (R 4.3.0)
#>  globals          0.16.2     2022-11-21 [1] CRAN (R 4.3.0)
#>  glue             1.6.2      2022-02-24 [1] CRAN (R 4.3.0)
#>  greta          * 0.4.3.9000 2023-11-17 [1] local
#>  hms              1.1.3      2023-03-21 [1] CRAN (R 4.3.0)
#>  htmltools        0.5.7      2023-11-03 [1] CRAN (R 4.3.1)
#>  jsonlite         1.8.7      2023-06-29 [1] CRAN (R 4.3.0)
#>  knitr            1.45       2023-10-30 [1] CRAN (R 4.3.1)
#>  lattice          0.21-9     2023-10-01 [1] CRAN (R 4.3.2)
#>  lifecycle        1.0.3      2022-10-07 [1] CRAN (R 4.3.0)
#>  listenv          0.9.0      2022-12-16 [1] CRAN (R 4.3.0)
#>  magrittr         2.0.3      2022-03-30 [1] CRAN (R 4.3.0)
#>  Matrix           1.6-1.1    2023-09-18 [1] CRAN (R 4.3.2)
#>  multispeciesPP * 1.0        2023-11-09 [1] Github (wfithian/multispeciespp@abdbd6f)
#>  parallelly       1.36.0     2023-05-26 [1] CRAN (R 4.3.0)
#>  pillar           1.9.0      2023-03-22 [1] CRAN (R 4.3.0)
#>  pkgconfig        2.0.3      2019-09-22 [1] CRAN (R 4.3.0)
#>  png              0.1-8      2022-11-29 [1] CRAN (R 4.3.0)
#>  prettyunits      1.2.0      2023-09-24 [1] CRAN (R 4.3.1)
#>  processx         3.8.2      2023-06-30 [1] CRAN (R 4.3.0)
#>  progress         1.2.2      2019-05-16 [1] CRAN (R 4.3.0)
#>  ps               1.7.5      2023-04-18 [1] CRAN (R 4.3.0)
#>  purrr            1.0.2      2023-08-10 [1] CRAN (R 4.3.0)
#>  R.cache          0.16.0     2022-07-21 [1] CRAN (R 4.3.0)
#>  R.methodsS3      1.8.2      2022-06-13 [1] CRAN (R 4.3.0)
#>  R.oo             1.25.0     2022-06-12 [1] CRAN (R 4.3.0)
#>  R.utils          2.12.2     2022-11-11 [1] CRAN (R 4.3.0)
#>  R6               2.5.1      2021-08-19 [1] CRAN (R 4.3.0)
#>  Rcpp             1.0.11     2023-07-06 [1] CRAN (R 4.3.0)
#>  reprex           2.0.2      2022-08-17 [1] CRAN (R 4.3.0)
#>  reticulate       1.34.0     2023-10-12 [1] CRAN (R 4.3.1)
#>  rlang            1.1.1      2023-04-28 [1] CRAN (R 4.3.0)
#>  rmarkdown        2.25       2023-09-18 [1] CRAN (R 4.3.1)
#>  rstudioapi       0.15.0     2023-07-07 [1] CRAN (R 4.3.0)
#>  sessioninfo      1.2.2      2021-12-06 [1] CRAN (R 4.3.0)
#>  styler           1.9.1      2023-03-04 [1] CRAN (R 4.3.0)
#>  tensorflow       2.14.0     2023-09-28 [1] CRAN (R 4.3.1)
#>  tfautograph      0.3.2      2021-09-17 [1] CRAN (R 4.3.0)
#>  tfruns           1.5.1      2022-09-05 [1] CRAN (R 4.3.0)
#>  utf8             1.2.4      2023-10-22 [1] CRAN (R 4.3.1)
#>  vctrs            0.6.4      2023-10-12 [1] CRAN (R 4.3.1)
#>  whisker          0.4.1      2022-12-05 [1] CRAN (R 4.3.0)
#>  withr            2.5.2      2023-10-30 [1] CRAN (R 4.3.1)
#>  xfun             0.41       2023-11-01 [1] CRAN (R 4.3.1)
#>  yaml             2.3.7      2023-01-23 [1] CRAN (R 4.3.0)
#> 
#>  [1] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
#> 
#> ─ Python configuration ───────────────────────────────────────────────────────
#>  python:         /Users/nick/Library/r-miniconda-arm64/envs/greta-env-tf2/bin/python
#>  libpython:      /Users/nick/Library/r-miniconda-arm64/envs/greta-env-tf2/lib/libpython3.8.dylib
#>  pythonhome:     /Users/nick/Library/r-miniconda-arm64/envs/greta-env-tf2:/Users/nick/Library/r-miniconda-arm64/envs/greta-env-tf2
#>  version:        3.8.15 | packaged by conda-forge | (default, Nov 22 2022, 08:49:06)  [Clang 14.0.6 ]
#>  numpy:          /Users/nick/Library/r-miniconda-arm64/envs/greta-env-tf2/lib/python3.8/site-packages/numpy
#>  numpy_version:  1.23.2
#>  tensorflow:     /Users/nick/Library/r-miniconda-arm64/envs/greta-env-tf2/lib/python3.8/site-packages/tensorflow
#>  
#>  NOTE: Python version was forced by use_python() function
#> 
#> ──────────────────────────────────────────────────────────────────────────────
@njtierney
Copy link
Author

I was able to get this to work locally with adam by setting the seed like so:

set.seed(2023-11-17)
map_adam <- opt(m, optimiser = adam(), max_iterations = 500)
map_adam
> map_adam
$par
$par$alpha
 [1] -1.892168 -2.415316 -1.722257 -1.917637 -2.243829 -1.711220 -1.649922 -1.910114 -2.572665
[10] -2.172112 -1.377770 -1.831335 -1.792131 -2.283858 -2.149486 -1.533865 -2.331798 -1.840868
[19] -1.738571 -2.308914 -1.951792

$par$beta
           [,1]        [,2]       [,3]       [,4]        [,5]       [,6]       [,7]        [,8]
[1,]  1.0144221  0.04249253 -0.6711081 -0.3332472  0.40619519 -0.5696343 -0.6677961 -0.27307830
[2,] -0.4283954 -0.32406999 -1.2319437 -0.2677327 -0.06442352  0.2540046 -0.1244514 -0.06542376
           [,9]      [,10]     [,11]      [,12]      [,13]      [,14]      [,15]      [,16]
[1,] -0.4393974 0.02858945 0.1860126 -0.3419654 -0.4106163 -0.5082233 -0.3933723 0.05038508
[2,]  0.5111339 0.45749115 0.3049356 -0.1637799  0.4381465  0.3076999 -0.2278838 0.69123770
          [,17]       [,18]      [,19]      [,20]       [,21]
[1,]  0.8678443 -0.04536905 -0.1540227  0.4809335  0.78672266
[2,] -1.2923567 -0.16217939  1.0650733 -0.4453859 -0.02999493

$par$gamma
 [1] -3.387107 -3.434340 -3.888169 -3.826441 -3.973671 -4.165250 -3.947936 -4.138011 -3.783865
[10] -3.481507 -3.606825 -4.023796 -4.694467 -3.965478 -3.621374 -3.660128 -3.713810 -3.308112
[19] -4.151255 -3.169481 -3.725449

$par$delta
[1] -0.2740553


$value
[1] 4961.417

$iterations
[1] 500

$convergence
[1] 1

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment