Skip to content

Instantly share code, notes, and snippets.

@arcaravaggi
Created April 3, 2023 10:28
Show Gist options
  • Save arcaravaggi/bbd248159b05e85c395e471e3bee69f2 to your computer and use it in GitHub Desktop.
Save arcaravaggi/bbd248159b05e85c395e471e3bee69f2 to your computer and use it in GitHub Desktop.
A wrapper for download and cleaning of IOC bird Master Lists
# A wrapper for download and cleaning of IOC bird lists
# This function downloads a given list, defined by the URL,
# completes rows with relevant information, and outputs a data frame.
# Further functions may be added and a small package developed, in time.
#
# There's almost certainly a more elegant way of doing this.
#
# earl = URL of IOC master List (see https://www.worldbirdnames.org/new/ioc-lists/master-list-2); currently defaults to v13.1
#
# E.g.
# df <- IOCmasteR()
IOCmasteR <- function(earl = "https://worldbirdnames.org/master_ioc_list_v13.1.xlsx"){
require(tidyverse)
require(janitor)
require(readxl)
# Download Master List
temp = tempfile(fileext = ".xlsx")
dataURL <- earl
download.file(dataURL, destfile=temp, mode='wb')
birds<- read_excel(temp,
col_names = T,
skip = 3)
# Clean column names
birds <- clean_names(birds)
# Fill relevant columns and thin to single-row records, only
# Note that certain fields need to be excluded to prevent erroneous duplication
# of discrete data.
birds2 <- birds %>%
select(-c(parvclass,
authority,
breeding_range_subregion_s,
nonbreeding_range,
code,
comment)) %>%
fill(infraclass) %>%
group_by(infraclass) %>%
fill(order:species_scientific, .direction = 'downup') %>%
group_by(across(infraclass:species_scientific)) %>%
reframe(across(everything(), ~if(all(is.na(.x)))NA else na.omit(.x)))
# Create comparative df for merge, containing reference columns and those]
# previously excluded
birds3 <- birds %>% fill(c("infraclass",
"order",
"family_scientific",
"family_english",
"genus",
"species_scientific",
"species_english"), .direction = "down") %>%
mutate(name = paste(family_scientific, genus,
" ", species_scientific, subspecies))
# Create the same reference column in the filled data frame
birds2 <- birds2 %>%
mutate(name = paste(family_scientific, genus,
" ", species_scientific, subspecies))
# Merge data frames
b <- merge(birds2, birds3[c("name",
"comment",
"code",
"breeding_range_subregion_s",
"authority",
"nonbreeding_range")],
by="name",
all.x = F)
# Re-order columns to something more logical
b <- b[, c(2:7, 9, 8, 14, 10, 13, 15, 12, 11)]
# Sort data
b <- b[order(b$infraclass,
b$order,
b$family_scientific,
b$genus,
b$species_scientific,
b$subspecies),]
return(b)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment