Last active
April 16, 2021 13:05
-
-
Save jhnwllr/2a931e2e34a51e20f71b9720945bd2b6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(readr) | |
library(rgbif) | |
library(taxize) | |
library(CoordinateCleaner) | |
# 4 Odonata scientific names | |
name_list = | |
c( | |
"Cordulegaster charpentieri Kolenati, 1846", | |
"Cordulegaster talaria Tennessen, 2004", | |
"Calopteryx splendens Harris, 1780", | |
"Epiophlebia laidlawi Tillyard, 1921" | |
) | |
# match names to GBIF taxonkeys | |
gbif_taxon_keys = name_list %>% | |
taxize::get_gbifid_(method="backbone") %>% # get the gbif taxonkey | |
bind_rows() %>% | |
filter(matchtype == "EXACT" & status == "ACCEPTED") %>% | |
filter(order == "Odonata") %>% # remove anything that might have matched to a non-dragonfly | |
pull(usagekey) # get the GBIF taxonkeys | |
user="jwaller" # your GBIF user name | |
pwd="" # your GBIF password | |
email="jwaller@gbif.org" # your email | |
gbif_download_key = occ_download( | |
type="and", | |
pred_in("taxonKey", gbif_taxon_keys), | |
pred("hasGeospatialIssue", FALSE), | |
pred("hasCoordinate", TRUE), | |
format = "SIMPLE_CSV", | |
user=user,pwd=pwd,email=email | |
) | |
# <<gbif download>> | |
# Username: jwaller | |
# E-mail: jwaller@gbif.org | |
# Format: SIMPLE_CSV | |
# Download key: 0253330-200613084148143 | |
## Wait 10-15 min | |
## Need to wait for download to finish to run next part | |
gbif_download_key = "0253330-200613084148143" | |
path_to_download = "C:/Users/ftw712/Desktop/" | |
# download the file to your machine | |
rgbif::occ_download_get(gbif_download_key, path = path_to_download, overwrite = FALSE) | |
# Sometimes easier to just get download from GBIF user profile | |
# https://www.gbif.org/user/download | |
# can do this "manually" just want script to run without stopping | |
zip_file = paste0(path_to_download,gbif_download_key,".zip") | |
extract_dir = paste0(path_to_download,gbif_download_key) | |
unzip(zip_file,exdir=extract_dir) | |
# read in download. Recommend data.table::fread() to avoid parsing errors sometimes that happen sometimes with other csv readers | |
gbif_download = data.table::fread(paste0(path_to_download,gbif_download_key,"/",gbif_download_key,".csv")) %>% | |
glimpse() | |
# Post processing GBIF download | |
gbif_clean_data = gbif_download %>% | |
setNames(tolower(names(.))) %>% # set lowercase column names to work with CoordinateCleaner | |
filter(occurrencestatus == "PRESENT") %>% | |
filter(!is.na(decimallongitude)) %>% | |
filter(!is.na(decimallatitude)) %>% | |
filter(!basisofrecord %in% c("FOSSIL_SPECIMEN","LIVING_SPECIMEN")) %>% | |
filter(!establishmentmeans %in% c("MANAGED", "INTRODUCED", "INVASIVE", "NATURALISED")) %>% | |
filter(year >= 1900) %>% | |
filter(coordinateprecision > 0.01 | is.na(coordinateprecision)) %>% | |
filter(coordinateuncertaintyinmeters < 10000 | is.na(coordinateuncertaintyinmeters)) %>% | |
filter(!coordinateuncertaintyinmeters %in% c(301,3036,999,9999)) %>% | |
filter(!decimallatitude == 0 | !decimallongitude == 0) %>% | |
cc_cen(buffer = 2000) %>% # remove country centroids within 2km | |
cc_cap(buffer = 2000) %>% # remove capitals centroids within 2km | |
cc_inst(buffer = 2000) %>% # remove zoo and herbaria within 2km | |
cc_sea() %>% # remove from ocean | |
distinct(decimallongitude,decimallatitude,specieskey,datasetkey, .keep_all = TRUE) %>% # this removes a lot of records! | |
glimpse() # look at results of pipeline | |
# 168,593 # before cleaning | |
# 54,938 # after |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment