Last active
January 24, 2024 15:15
-
-
Save damianooldoni/d3b5617011e762f16efadadad6c08ee3 to your computer and use it in GitHub Desktop.
Options to download GBIF occ data with rgbif. Some attention is also given to get back the species not present in the data.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rgbif) | |
library(tidyverse) | |
# Create list of taxa we are interested to. | |
taxa <- tibble( | |
species_name = c("Amelanchier lamarckii", | |
"Mareca sibilatrix", | |
"Ursus maritimus" | |
) | |
) | |
# Countries we are interested to. | |
country <- c("BE", "LU") | |
# All types of occurrences, except `FOSSIL SPECIMEN` and `LIVING SPECIMEN`, | |
# which can have misleading location information (e.g. location of captive animals). | |
basis_of_record <- c( | |
"OBSERVATION", | |
"HUMAN_OBSERVATION", | |
"MATERIAL_SAMPLE", | |
"LITERATURE", | |
"PRESERVED_SPECIMEN", | |
"UNKNOWN", | |
"MACHINE_OBSERVATION" | |
) | |
# Occurrences with a valid year: from 1000, up to now. | |
year_begin <- 1000 | |
year_end <- year(Sys.Date()) | |
# Occurrences with valid geographic coordinates. | |
hasCoordinate <- TRUE | |
# We want only presences. | |
occ_status <- "PRESENT" | |
# Option 1: get data directly in your R session (attention: 100k occs max, hard limit) | |
# This search will fail if one name is not found | |
keys <- sapply(taxa$species_name, | |
function(x) name_suggest(x)$data$key[1], USE.NAMES=FALSE | |
) | |
# Add keys to taxa. Useful later. | |
taxa$key <- keys | |
# It could takes a lot of time! Better to use it for previews (default `limit` | |
# argument value = 500) | |
# or if you expect very low amount of data. In my case it is ~ 8k occs. | |
occs <- occ_data( | |
taxonKey = paste0(taxa$key, collapse = ";"), | |
country = paste0(country, collapse = ";"), | |
occurrenceStatus = occ_status, # Actually "PRESENT" is also the default value | |
hasCoordinate = TRUE, | |
basisOfRecord = paste0(basis_of_record, collapse = ";"), | |
year = paste(c(year_begin, year_end), collapse = ","), | |
limit = 50000 # high enough, but less than 100k | |
) | |
# Data are in `data` slot. | |
occs$data | |
# Which species from the original list are not present in our data? | |
# First, get all taxon keys we have in the data | |
accepted_synonyms_subtaxa_keys <- unique( | |
c( | |
unique(occs$data$taxonKey), | |
unique(occs$data$speciesKey), | |
unique(occs$data$acceptedTaxonKey) | |
# add genusKey if you are providing one more genuses as taxa | |
) | |
) | |
# Original keys not present in our cllection of keys from the data | |
absent_keys <- keys[! keys %in% accepted_synonyms_subtaxa_keys] | |
# And so the species: | |
taxa %>% filter(.data$key %in% absent_keys) | |
# Option 2: trigger a download. Note: GBIF credentials are required. | |
gbif_download_key <- occ_download( | |
pred_in("taxonKey", keys), | |
pred_in("country", country), | |
pred_in("basisOfRecord", basis_of_record), | |
pred_gte("year", year_begin), | |
pred_lte("year", year_end), | |
pred("hasCoordinate", hasCoordinate), | |
user = rstudioapi::askForPassword("GBIF username"), # needed if not saved in R environment | |
pwd = rstudioapi::askForPassword("GBIF password"), # needed if not saved in R environment | |
email = rstudioapi::askForPassword("Email address for notification") # needed if not saved in R environment | |
) | |
# Check status of download | |
metadata <- occ_download_meta(key = gbif_download_key) | |
metadata$key | |
metadata$status | |
while (metadata$status != "COMPLETED") { | |
message(paste("Download not ready. Status", metadata$status)) | |
Sys.sleep(time = 60) | |
} | |
if (metadata$status == "COMPLETED") { | |
# Once metadata$status = "COMPLETED", you can download the data: | |
path_dwc_occs <- "./" # current directory: change it as you wish | |
occ_download_get(metadata$key, | |
path = path_dwc_occs # default: current directory | |
) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment