Skip to content

Instantly share code, notes, and snippets.

@sckott
Created March 12, 2016 00:04
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sckott/c0437a71a889793e30d5 to your computer and use it in GitHub Desktop.
Save sckott/c0437a71a889793e30d5 to your computer and use it in GitHub Desktop.
## ------------------------------------------------------------------------
pop <- function(x, y) x[!y == names(x)]
## ------------------------------------------------------------------------
if (!requireNamespace("xml2", quietly = TRUE)) {
install.packages("xml2")
}
if (!requireNamespace("dplyr", quietly = TRUE)) {
install.packages("dplyr")
}
library("xml2")
library("dplyr")
## ------------------------------------------------------------------------
file <- "master_ioc-names_xml.xml"
download.file("http://www.worldbirdnames.org/master_ioc-names_xml.xml", destfile = file)
## ------------------------------------------------------------------------
xml <- xml2::read_xml(file)
orders <- xml2::xml_find_all(xml, "//order")
out <- lapply(orders, function(z) {
zch <- xml_children(z)
zch <- pop(as.list(setNames(xml_text(zch), xml_name(zch))), "family")
fams <- xml2::xml_find_all(z, "family")
famres <- lapply(fams, function(w) {
wch <- xml_children(w)
wch <- pop(as.list(setNames(xml_text(wch), xml_name(wch))), "genus")
gens <- xml2::xml_find_all(w, "genus")
genres <- lapply(gens, function(v) {
vch <- xml_children(v)
vch <- pop(as.list(setNames(xml_text(vch), xml_name(vch))), "species")
spp <- xml2::xml_find_all(v, "species")
sppres <- lapply(spp, function(g) {
gch <- xml_children(g)
gch <- pop(as.list(setNames(xml_text(gch), xml_name(gch))), "subspecies")
subspp <- xml2::xml_find_all(g, "subspecies")
subres <- lapply(subspp, function(a) {
a <- xml_children(a)
as.list(setNames(xml_text(a), xml_name(a)))
})
c(gch, list(subspecies = subres))
})
c(vch, list(species = sppres))
})
c(wch, list(genus = genres))
})
c(zch, list(family = famres))
})
## ------------------------------------------------------------------------
(df <- rbind_all(lapply(out, function(z) {
df <- rbind_all(lapply(z$family, function(w) {
data.frame(family = w$latin_name,
genus = vapply(w$genus, "[[", "", "latin_name"),
stringsAsFactors = FALSE)
}))
df$order <- z$latin_name
df
})) %>% select(order, everything()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment