Skip to content

Instantly share code, notes, and snippets.

@damianooldoni
Last active January 24, 2024 15:15
Show Gist options
  • Save damianooldoni/d3b5617011e762f16efadadad6c08ee3 to your computer and use it in GitHub Desktop.
Save damianooldoni/d3b5617011e762f16efadadad6c08ee3 to your computer and use it in GitHub Desktop.
Options to download GBIF occ data with rgbif. Some attention is also given to get back the species not present in the data.
library(rgbif)
library(tidyverse)
# Create list of taxa we are interested to.
taxa <- tibble(
species_name = c("Amelanchier lamarckii",
"Mareca sibilatrix",
"Ursus maritimus"
)
)
# Countries we are interested to.
country <- c("BE", "LU")
# All types of occurrences, except `FOSSIL SPECIMEN` and `LIVING SPECIMEN`,
# which can have misleading location information (e.g. location of captive animals).
basis_of_record <- c(
"OBSERVATION",
"HUMAN_OBSERVATION",
"MATERIAL_SAMPLE",
"LITERATURE",
"PRESERVED_SPECIMEN",
"UNKNOWN",
"MACHINE_OBSERVATION"
)
# Occurrences with a valid year: from 1000, up to now.
year_begin <- 1000
year_end <- year(Sys.Date())
# Occurrences with valid geographic coordinates.
hasCoordinate <- TRUE
# We want only presences.
occ_status <- "PRESENT"
# Option 1: get data directly in your R session (attention: 100k occs max, hard limit)
# This search will fail if one name is not found
keys <- sapply(taxa$species_name,
function(x) name_suggest(x)$data$key[1], USE.NAMES=FALSE
)
# Add keys to taxa. Useful later.
taxa$key <- keys
# It could takes a lot of time! Better to use it for previews (default `limit`
# argument value = 500)
# or if you expect very low amount of data. In my case it is ~ 8k occs.
occs <- occ_data(
taxonKey = paste0(taxa$key, collapse = ";"),
country = paste0(country, collapse = ";"),
occurrenceStatus = occ_status, # Actually "PRESENT" is also the default value
hasCoordinate = TRUE,
basisOfRecord = paste0(basis_of_record, collapse = ";"),
year = paste(c(year_begin, year_end), collapse = ","),
limit = 50000 # high enough, but less than 100k
)
# Data are in `data` slot.
occs$data
# Which species from the original list are not present in our data?
# First, get all taxon keys we have in the data
accepted_synonyms_subtaxa_keys <- unique(
c(
unique(occs$data$taxonKey),
unique(occs$data$speciesKey),
unique(occs$data$acceptedTaxonKey)
# add genusKey if you are providing one more genuses as taxa
)
)
# Original keys not present in our cllection of keys from the data
absent_keys <- keys[! keys %in% accepted_synonyms_subtaxa_keys]
# And so the species:
taxa %>% filter(.data$key %in% absent_keys)
# Option 2: trigger a download. Note: GBIF credentials are required.
gbif_download_key <- occ_download(
pred_in("taxonKey", keys),
pred_in("country", country),
pred_in("basisOfRecord", basis_of_record),
pred_gte("year", year_begin),
pred_lte("year", year_end),
pred("hasCoordinate", hasCoordinate),
user = rstudioapi::askForPassword("GBIF username"), # needed if not saved in R environment
pwd = rstudioapi::askForPassword("GBIF password"), # needed if not saved in R environment
email = rstudioapi::askForPassword("Email address for notification") # needed if not saved in R environment
)
# Check status of download
metadata <- occ_download_meta(key = gbif_download_key)
metadata$key
metadata$status
while (metadata$status != "COMPLETED") {
message(paste("Download not ready. Status", metadata$status))
Sys.sleep(time = 60)
}
if (metadata$status == "COMPLETED") {
# Once metadata$status = "COMPLETED", you can download the data:
path_dwc_occs <- "./" # current directory: change it as you wish
occ_download_get(metadata$key,
path = path_dwc_occs # default: current directory
)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment