Skip to content

Instantly share code, notes, and snippets.

@jhnwllr
Last active April 16, 2021 13:05
Show Gist options
  • Save jhnwllr/2a931e2e34a51e20f71b9720945bd2b6 to your computer and use it in GitHub Desktop.
Save jhnwllr/2a931e2e34a51e20f71b9720945bd2b6 to your computer and use it in GitHub Desktop.
library(dplyr)
library(readr)
library(rgbif)
library(taxize)
library(CoordinateCleaner)
# 4 Odonata scientific names
name_list =
c(
"Cordulegaster charpentieri Kolenati, 1846",
"Cordulegaster talaria Tennessen, 2004",
"Calopteryx splendens Harris, 1780",
"Epiophlebia laidlawi Tillyard, 1921"
)
# match names to GBIF taxonkeys
gbif_taxon_keys = name_list %>%
taxize::get_gbifid_(method="backbone") %>% # get the gbif taxonkey
bind_rows() %>%
filter(matchtype == "EXACT" & status == "ACCEPTED") %>%
filter(order == "Odonata") %>% # remove anything that might have matched to a non-dragonfly
pull(usagekey) # get the GBIF taxonkeys
user="jwaller" # your GBIF user name
pwd="" # your GBIF password
email="jwaller@gbif.org" # your email
gbif_download_key = occ_download(
type="and",
pred_in("taxonKey", gbif_taxon_keys),
pred("hasGeospatialIssue", FALSE),
pred("hasCoordinate", TRUE),
format = "SIMPLE_CSV",
user=user,pwd=pwd,email=email
)
# <<gbif download>>
# Username: jwaller
# E-mail: jwaller@gbif.org
# Format: SIMPLE_CSV
# Download key: 0253330-200613084148143
## Wait 10-15 min
## Need to wait for download to finish to run next part
gbif_download_key = "0253330-200613084148143"
path_to_download = "C:/Users/ftw712/Desktop/"
# download the file to your machine
rgbif::occ_download_get(gbif_download_key, path = path_to_download, overwrite = FALSE)
# Sometimes easier to just get download from GBIF user profile
# https://www.gbif.org/user/download
# can do this "manually" just want script to run without stopping
zip_file = paste0(path_to_download,gbif_download_key,".zip")
extract_dir = paste0(path_to_download,gbif_download_key)
unzip(zip_file,exdir=extract_dir)
# read in download. Recommend data.table::fread() to avoid parsing errors sometimes that happen sometimes with other csv readers
gbif_download = data.table::fread(paste0(path_to_download,gbif_download_key,"/",gbif_download_key,".csv")) %>%
glimpse()
# Post processing GBIF download
gbif_clean_data = gbif_download %>%
setNames(tolower(names(.))) %>% # set lowercase column names to work with CoordinateCleaner
filter(occurrencestatus == "PRESENT") %>%
filter(!is.na(decimallongitude)) %>%
filter(!is.na(decimallatitude)) %>%
filter(!basisofrecord %in% c("FOSSIL_SPECIMEN","LIVING_SPECIMEN")) %>%
filter(!establishmentmeans %in% c("MANAGED", "INTRODUCED", "INVASIVE", "NATURALISED")) %>%
filter(year >= 1900) %>%
filter(coordinateprecision > 0.01 | is.na(coordinateprecision)) %>%
filter(coordinateuncertaintyinmeters < 10000 | is.na(coordinateuncertaintyinmeters)) %>%
filter(!coordinateuncertaintyinmeters %in% c(301,3036,999,9999)) %>%
filter(!decimallatitude == 0 | !decimallongitude == 0) %>%
cc_cen(buffer = 2000) %>% # remove country centroids within 2km
cc_cap(buffer = 2000) %>% # remove capitals centroids within 2km
cc_inst(buffer = 2000) %>% # remove zoo and herbaria within 2km
cc_sea() %>% # remove from ocean
distinct(decimallongitude,decimallatitude,specieskey,datasetkey, .keep_all = TRUE) %>% # this removes a lot of records!
glimpse() # look at results of pipeline
# 168,593 # before cleaning
# 54,938 # after
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment