Get large amounts of GBIF.org occurence data
#Download occurence data from GBIF - for few to many spp. | |
#3/25/2019 KG Turner with assistance from S. Chamberlain, rOpenSci.org | |
#Register at gbif.org. You will need to use this email address, user name, and password in the following script. | |
#NB: DON'T COMMIT YOUR PASSWORDS. | |
# R version 3.5.3 (2019-03-11) | |
library(rgbif) #1.2.0 | |
####make spp. list#### | |
smallList <- c("Chorispora tenella", "Centaurea diffusa", "Lupinus texensis") | |
#to get synonym keys | |
smallList_key <- sapply(smallList, function(x) name_suggest(x)$key[1], USE.NAMES=FALSE) | |
####GBIF request for a single list (at most 100 spp)#### | |
#you will need to register with GBIF to get username etc. | |
occ_download(paste0("taxonKey = ", paste0(smallList_key, collapse = ",")), | |
"basisOfRecord = PRESERVED_SPECIMEN,LITERATURE", | |
"hasCoordinate = true", | |
"hasGeospatialIssue = false", | |
user = "***", | |
pwd = "***", | |
email = "***" | |
) | |
###for 3 species testlist: | |
# <<gbif download>> | |
# Username: *** | |
# E-mail: *** | |
# Download key: 0002607-190320150433242 | |
#check status of request | |
occ_download_meta(key="0003753-190320150433242") | |
# <<gbif download metadata>> | |
# Status: PREPARING | |
# Format: DWCA | |
# Download key: 0002607-190320150433242 | |
# Created: 2019-03-25T20:15:22.624+0000 | |
# Modified: 2019-03-25T20:15:43.495+0000 | |
# Download link: http://api.gbif.org/v1/occurrence/download/request/0002607-190320150433242.zip | |
# Total records: 1706 | |
# Request: | |
# type: and | |
# predicates: | |
# > type: or | |
# predicates: | |
# - type: equals, key: TAXON_KEY, value: 3044349 | |
# - type: equals, key: TAXON_KEY, value: 3128962 | |
# - type: equals, key: TAXON_KEY, value: 2963880 | |
# > type: or | |
# predicates: | |
# - type: equals, key: BASIS_OF_RECORD, value: PRESERVED_SPECIMEN | |
# - type: equals, key: BASIS_OF_RECORD, value: LITERATURE | |
# > type: equals, key: HAS_COORDINATE, value: true | |
# > type: equals, key: HAS_GEOSPATIAL_ISSUE, value: false | |
####For larger numbers of species...#### | |
#break up long species key lists into ~100 spp. lists | |
#GBIF restrictions: URL call limit 12K characters. Limit 3 requests at at time. | |
####large vector splitting function | |
library(plyr) #1.8.4 | |
plyrChunks <- function(d, n){ | |
is <- seq(from = 1, to = length(d), by = n) | |
if(tail(is, 1) != length(d)) { | |
is <- c(is, length(d)) | |
} | |
chunks <- llply(head(seq_along(is), -1), | |
function(i){ | |
start <- is[i]; | |
end <- is[i+1]-1; | |
d[start:end]}) | |
lc <- length(chunks) | |
td <- tail(d, 1) | |
chunks[[lc]] <- c(chunks[[lc]], td) | |
return(chunks) | |
} | |
#plyrChunks(d = vector, n = size_of_chunks) | |
d <- 1:2030 | |
n <- 100 | |
chunkList <- plyrChunks(d,n) #gives list of smaller vectors | |
#break key vector of more than 300 spp into list of smaller vector chunks | |
chunkList <- plyrChunks(bigList_key,100) #make ~100 spp. sub-lists for a really long species list | |
####GBIF request queing function for >300 spp.#### | |
#for less than 300 spp, faster to call them individually as above | |
#GBIF will only accept three request from a single user at a time. | |
#This function ques your requests and submits #4 when one of #1-3 are done. | |
#For 400 - 500 spp., specify occ_download() calls within the occ_download_queue() call | |
output <- occ_download_queue( | |
occ_download('taxonKey = 3119195', "year = 1976", | |
user = "***", | |
pwd = "***", | |
email = "***"), | |
occ_download('taxonKey = 3119195', "year = 2001", "month <= 8", | |
user = "***", | |
pwd = "***", | |
email = "***"), | |
occ_download("country = NZ", "year = 1999", "month = 3", | |
user = "***", | |
pwd = "***", | |
email = "***"), | |
occ_download("catalogNumber = Bird.27847588", "year = 1998", "month = 2", | |
user = "***", | |
pwd = "***", | |
email = "***") | |
) | |
#download data | |
lapply(output, occ_download_get) | |
####run gbif queueing for really large spp. list#### | |
#loop through many occ_download() calls | |
#input name of list of short key vectors, i.e. chunkList above | |
for (i in chunkList[1:n]){ | |
output <- occ_download_queue( | |
occ_download(paste0("taxonKey = ", paste0(i, collapse = ",")), | |
"basisOfRecord = PRESERVED_SPECIMEN,LITERATURE", | |
"hasCoordinate = true", | |
"hasGeospatialIssue = false", | |
user = "***", | |
pwd = "***", | |
email = "***" | |
)) | |
print(output) | |
} | |
lapply(output, occ_download_meta) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment