kgturner/getGBIFdatas.R

## getGBIFdatas.R
#Download occurence data from GBIF - for few to many spp.
#3/25/2019 KG Turner with assistance from S. Chamberlain, rOpenSci.org

#Register at gbif.org. You will need to use this email address, user name, and password in the following script.
#NB: DON'T COMMIT YOUR PASSWORDS.

# R version 3.5.3 (2019-03-11)

library(rgbif) #1.2.0

####make spp. list####
smallList <- c("Chorispora tenella", "Centaurea diffusa", "Lupinus texensis")

#to get synonym keys
smallList_key <- sapply(smallList, function(x) name_suggest(x)$key[1], USE.NAMES=FALSE)


####GBIF request for a single list (at most 100 spp)####
#you will need to register with GBIF to get username etc.
occ_download(paste0("taxonKey = ", paste0(smallList_key, collapse = ",")),
             "basisOfRecord = PRESERVED_SPECIMEN,LITERATURE",
             "hasCoordinate = true",
             "hasGeospatialIssue = false",
             user = "***",
             pwd = "***",
             email = "***"
)

###for 3 species testlist:
# <<gbif download>>
#   Username: ***
#   E-mail: ***
#   Download key: 0002607-190320150433242

#check status of request
occ_download_meta(key="0003753-190320150433242")
# <<gbif download metadata>>
#   Status: PREPARING
#   Format: DWCA
#   Download key: 0002607-190320150433242
#   Created: 2019-03-25T20:15:22.624+0000
#   Modified: 2019-03-25T20:15:43.495+0000
#   Download link: http://api.gbif.org/v1/occurrence/download/request/0002607-190320150433242.zip
#   Total records: 1706
#   Request:
#     type:  and
#     predicates:
#       > type:  or
#         predicates:
#         - type: equals, key: TAXON_KEY, value: 3044349
#         - type: equals, key: TAXON_KEY, value: 3128962
#         - type: equals, key: TAXON_KEY, value: 2963880
#       > type:  or
#         predicates:
#         - type: equals, key: BASIS_OF_RECORD, value: PRESERVED_SPECIMEN
#         - type: equals, key: BASIS_OF_RECORD, value: LITERATURE
#       > type: equals, key: HAS_COORDINATE, value: true
#       > type: equals, key: HAS_GEOSPATIAL_ISSUE, value: false

####For larger numbers of species...####
#break up long species key lists into ~100 spp. lists
#GBIF restrictions: URL call limit 12K characters. Limit 3 requests at at time.

####large vector splitting function
library(plyr) #1.8.4

plyrChunks <- function(d, n){
  is <- seq(from = 1, to = length(d), by = n)
  if(tail(is, 1) != length(d)) {
    is <- c(is, length(d))
  }
  chunks <- llply(head(seq_along(is), -1),
                  function(i){
                    start <-  is[i];
                    end <- is[i+1]-1;
                    d[start:end]})
  lc <- length(chunks)
  td <- tail(d, 1)
  chunks[[lc]] <- c(chunks[[lc]], td)
  return(chunks)
}

#plyrChunks(d = vector, n = size_of_chunks)
d <- 1:2030
n <- 100
chunkList <- plyrChunks(d,n) #gives list of smaller vectors

#break key vector of more than 300 spp into list of smaller vector chunks
chunkList <- plyrChunks(bigList_key,100) #make  ~100 spp. sub-lists for a really long species list

####GBIF request queing function for >300 spp.####
#for less than 300 spp, faster to call them individually as above
#GBIF will only accept three request from a single user at a time.
#This function ques your requests and submits #4 when one of #1-3 are done.

#For 400 - 500 spp., specify occ_download() calls within the occ_download_queue() call
output <- occ_download_queue(
  occ_download('taxonKey = 3119195', "year = 1976",
               user = "***",
               pwd = "***",
               email = "***"),
  occ_download('taxonKey = 3119195', "year = 2001", "month <= 8",
               user = "***",
               pwd = "***",
               email = "***"),
  occ_download("country = NZ", "year = 1999", "month = 3",
               user = "***",
               pwd = "***",
               email = "***"),
  occ_download("catalogNumber = Bird.27847588", "year = 1998", "month = 2",
               user = "***",
               pwd = "***",
               email = "***")
)

#download data
lapply(output, occ_download_get)

####run gbif queueing for really large spp. list####
#loop through many occ_download() calls
#input name of list of short key vectors, i.e. chunkList above

for (i in chunkList[1:n]){
  output <- occ_download_queue(
    occ_download(paste0("taxonKey = ", paste0(i, collapse = ",")),
                 "basisOfRecord = PRESERVED_SPECIMEN,LITERATURE",
                 "hasCoordinate = true",
                 "hasGeospatialIssue = false",
                 user = "***",
                 pwd = "***",
                 email = "***"

    ))
  print(output)
}


lapply(output, occ_download_meta)
	#Download occurence data from GBIF - for few to many spp.
	#3/25/2019 KG Turner with assistance from S. Chamberlain, rOpenSci.org

	#Register at gbif.org. You will need to use this email address, user name, and password in the following script.
	#NB: DON'T COMMIT YOUR PASSWORDS.

	# R version 3.5.3 (2019-03-11)

	library(rgbif) #1.2.0

	####make spp. list####
	smallList <- c("Chorispora tenella", "Centaurea diffusa", "Lupinus texensis")

	#to get synonym keys
	smallList_key <- sapply(smallList, function(x) name_suggest(x)$key[1], USE.NAMES=FALSE)


	####GBIF request for a single list (at most 100 spp)####
	#you will need to register with GBIF to get username etc.
	occ_download(paste0("taxonKey = ", paste0(smallList_key, collapse = ",")),
	"basisOfRecord = PRESERVED_SPECIMEN,LITERATURE",
	"hasCoordinate = true",
	"hasGeospatialIssue = false",
	user = "***",
	pwd = "***",
	email = "***"
	)

	###for 3 species testlist:
	# <<gbif download>>
	# Username: ***
	# E-mail: ***
	# Download key: 0002607-190320150433242

	#check status of request
	occ_download_meta(key="0003753-190320150433242")
	# <<gbif download metadata>>
	# Status: PREPARING
	# Format: DWCA
	# Download key: 0002607-190320150433242
	# Created: 2019-03-25T20:15:22.624+0000
	# Modified: 2019-03-25T20:15:43.495+0000
	# Download link: http://api.gbif.org/v1/occurrence/download/request/0002607-190320150433242.zip
	# Total records: 1706
	# Request:
	# type: and
	# predicates:
	# > type: or
	# predicates:
	# - type: equals, key: TAXON_KEY, value: 3044349
	# - type: equals, key: TAXON_KEY, value: 3128962
	# - type: equals, key: TAXON_KEY, value: 2963880
	# > type: or
	# predicates:
	# - type: equals, key: BASIS_OF_RECORD, value: PRESERVED_SPECIMEN
	# - type: equals, key: BASIS_OF_RECORD, value: LITERATURE
	# > type: equals, key: HAS_COORDINATE, value: true
	# > type: equals, key: HAS_GEOSPATIAL_ISSUE, value: false

	####For larger numbers of species...####
	#break up long species key lists into ~100 spp. lists
	#GBIF restrictions: URL call limit 12K characters. Limit 3 requests at at time.

	####large vector splitting function
	library(plyr) #1.8.4

	plyrChunks <- function(d, n){
	is <- seq(from = 1, to = length(d), by = n)
	if(tail(is, 1) != length(d)) {
	is <- c(is, length(d))
	}
	chunks <- llply(head(seq_along(is), -1),
	function(i){
	start <- is[i];
	end <- is[i+1]-1;
	d[start:end]})
	lc <- length(chunks)
	td <- tail(d, 1)
	chunks[[lc]] <- c(chunks[[lc]], td)
	return(chunks)
	}

	#plyrChunks(d = vector, n = size_of_chunks)
	d <- 1:2030
	n <- 100
	chunkList <- plyrChunks(d,n) #gives list of smaller vectors

	#break key vector of more than 300 spp into list of smaller vector chunks
	chunkList <- plyrChunks(bigList_key,100) #make ~100 spp. sub-lists for a really long species list

	####GBIF request queing function for >300 spp.####
	#for less than 300 spp, faster to call them individually as above
	#GBIF will only accept three request from a single user at a time.
	#This function ques your requests and submits #4 when one of #1-3 are done.

	#For 400 - 500 spp., specify occ_download() calls within the occ_download_queue() call
	output <- occ_download_queue(
	occ_download('taxonKey = 3119195', "year = 1976",
	user = "***",
	pwd = "***",
	email = "***"),
	occ_download('taxonKey = 3119195', "year = 2001", "month <= 8",
	user = "***",
	pwd = "***",
	email = "***"),
	occ_download("country = NZ", "year = 1999", "month = 3",
	user = "***",
	pwd = "***",
	email = "***"),
	occ_download("catalogNumber = Bird.27847588", "year = 1998", "month = 2",
	user = "***",
	pwd = "***",
	email = "***")
	)

	#download data
	lapply(output, occ_download_get)

	####run gbif queueing for really large spp. list####
	#loop through many occ_download() calls
	#input name of list of short key vectors, i.e. chunkList above

	for (i in chunkList[1:n]){
	output <- occ_download_queue(
	occ_download(paste0("taxonKey = ", paste0(i, collapse = ",")),
	"basisOfRecord = PRESERVED_SPECIMEN,LITERATURE",
	"hasCoordinate = true",
	"hasGeospatialIssue = false",
	user = "***",
	pwd = "***",
	email = "***"

	))
	print(output)
	}


	lapply(output, occ_download_meta)