Skip to content

Instantly share code, notes, and snippets.

@sckott
Created July 9, 2021 20:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sckott/c5eb2b96218d10b942657546cb5df43c to your computer and use it in GitHub Desktop.
Save sckott/c5eb2b96218d10b942657546cb5df43c to your computer and use it in GitHub Desktop.
library(taxizedb)
## Registered S3 method overwritten by 'hoardr':
##   method           from
##   print.cache_info httr
library(dplyr)

Download NCBI taxonomy database

db_download_ncbi()
## Database already exists, returning old file
## [1] "~/Library/Caches/R/taxizedb/NCBI.sql"

get NCBI id for Asteraceae

id <- name2taxid('Asteraceae', db = "ncbi")

Subfamilies

subf <- downstream(id, db = "ncbi", downto = "subfamily")
subf_list <- apply(subf[[1]], 1, as.list)
subf_out <- lapply(subf_list, function(z) {
  df <- downstream(z$childtaxa_id, db = "ncbi", downto = "species")[[1]]
  cbind(df, stats::setNames(z[1:2], c("subfamily_id", "subfamily_name")))
})
subf_tab <- as_tibble(bind_rows(subf_out))
group_by(subf_tab, subfamily_name) %>% 
  summarize(n_species = n())
## # A tibble: 15 x 2
##    subfamily_name     n_species
##    <chr>                  <int>
##  1 Asteroideae             7966
##  2 Barnadesioideae           71
##  3 Carduoideae             1754
##  4 Cichorioideae           2021
##  5 Corymbioideae              5
##  6 Dicomoideae               34
##  7 Famatinanthoideae          1
##  8 Gochnatioideae            68
##  9 Gymnarrhenoideae           1
## 10 Hecastocleidoideae         1
## 11 Mutisioideae             303
## 12 Pertyoideae               40
## 13 Stifftioideae             12
## 14 Tarchonanthoideae         13
## 15 Wunderlichioideae         10

Tribes

trib <- downstream(id, db = "ncbi", downto = "tribe")
trib_list <- apply(trib[[1]], 1, as.list)
trib_out <- lapply(trib_list, function(z) {
  df <- downstream(z$childtaxa_id, db = "ncbi", downto = "species")[[1]]
  cbind(df, stats::setNames(z[1:2], c("tribe_id", "tribe_name")))
})
trib_tab <- as_tibble(bind_rows(trib_out))
group_by(trib_tab, tribe_name) %>% 
  summarize(n_species = n())
## # A tibble: 43 x 2
##    tribe_name    n_species
##    <chr>             <int>
##  1 Anthemideae        1149
##  2 Arctotideae         137
##  3 Astereae           1559
##  4 Athroismeae          48
##  5 Bahieae              56
##  6 Calenduleae          42
##  7 Cardueae           1754
##  8 Chaenactideae         5
##  9 Cichorieae         1365
## 10 Coreopsideae        207
## # … with 33 more rows
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment