Last active
October 3, 2019 15:36
-
-
Save hemprichbennett/4cdf5b0bb4d4db9d40d5906cb45fb436 to your computer and use it in GitHub Desktop.
Get the species names of all GBIF records from Malaysia, Indonesia and Brunei
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#### Header #### | |
## Project: NA | |
## Script purpose: Get all the species of bats with GBIF records from Malaysia, | |
## Indonesia and Brunei | |
## Date: | |
## Author: Dave Hemprich-Bennett (hemprich.bennett@gmail.com) | |
## Notes | |
################################################## | |
# Code repurposed from the fantastic lesson at https://ourcodingclub.github.io/2016/01/01/data-synthesis.html | |
library(dplyr) | |
library(tidyr) | |
library(ggplot2) | |
library(ggthemes) | |
library(rgbif) | |
library(purrr) | |
# Make a custom function function to use in the map call below | |
gbif_querying <- function(desired_country, desired_taxa) { | |
cat(as.character(Sys.time()), desired_country, desired_taxa, "starting\n") | |
output <- occ_search( | |
scientificName = desired_taxa, limit = 10000, | |
country = desired_country, | |
hasCoordinate = T, return = "data" | |
) | |
# if there are no matches, occ_search returns a gbif object rather than a list | |
# as gbif objects break the select call and subsequent dataframe making, | |
# check the type of 'output' | |
if (typeof(output) == "list") { | |
# Some records don't have species names etc. These records break the select | |
# call, are no use to us and should be discarded | |
required_cols <- c("species", "name", "decimalLongitude", | |
"decimalLatitude", "year", | |
"individualCount", "country") | |
# if statement translates to 'if all of the required_cols are in | |
# names(output) | |
if(!FALSE %in% (is.element(required_cols, names(output)))){ | |
# Simplify occurrence dataframe, get rid of superfluous columns. This | |
# is necessary as GBIF won't return empty columns, and this can mess | |
# with any attempts at combining dataframes later | |
output <- dplyr::select(output, | |
species, name, decimalLongitude, | |
decimalLatitude, year, | |
individualCount, country | |
) %>% | |
mutate(taxon = desired_taxa) | |
cat(as.character(Sys.time()), desired_country, desired_taxa, "done\n") | |
return(output) | |
} | |
} | |
} | |
# Make a vector of shortnames for all African countries (GBIF doesn't accept | |
# the full name of a country, but instead their 2-letter ISO-3166-1 code, see | |
# http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2 for details) | |
bornean_countries <- c("MY", "BN", "ID") | |
# Make a vector of desired taxonomic groups | |
taxa_vec <- c("Chiroptera") | |
# Make a dataframe with all possible combinations of the two desired variables. | |
# This makes it easier to use purrr later | |
combinations <- crossing(bornean_countries, taxa_vec) | |
# Use our custom function on the above dataframe to query for every combination | |
# of country and taxonomic group | |
occurrence_df <- purrr::map2_df( | |
combinations$bornean_countries, | |
combinations$taxa_vec, | |
gbif_querying | |
) | |
occurrence_df <- occurrence_df %>% | |
filter(!is.na(species)) %>% # get rid of the NAs | |
select(species, country) %>% # delete superfluous columns | |
distinct() %>% # filter out the non-distinct rows | |
arrange(species) # order the rows alphabetically by species | |
#Save the output | |
write.csv(occurrence_df, | |
"se_asian_bats/bat_species.csv", | |
row.names = F) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Ha, I am not! This sounds potentially much easier...