Skip to content

Instantly share code, notes, and snippets.

@fbreitwieser
Created September 27, 2016 14:46
Show Gist options
  • Save fbreitwieser/8127619d193d19804f7a68f74eb34623 to your computer and use it in GitHub Desktop.
Save fbreitwieser/8127619d193d19804f7a68f74eb34623 to your computer and use it in GitHub Desktop.
Get taxID for NCBI AC, as well as tax path
library(rentrez)
library(taxize)
library(XML)
get_docsum_val <- function(top, val) {
sapply(getNodeSet(top,sprintf("/eSummaryResult/DocSum/Item[@Name='%s']", val)), xmlValue)
}
list_docsum_items <- function(top) {
sapply(getNodeSet(top,"/eSummaryResult/DocSum/Item"), xmlGetAttr, name = "Name")
}
fetch_docsum_xml_root <- function(ac) {
docsum <- entrez_fetch("nucleotide", ac, rettype = "docsum", retmode = "xml")
doc <- xmlTreeParse(docsum)
xmlRoot(doc)
}
get_taxid_for_ac <- function(ac) {
top <- fetch_docsum_xml_root(ac)
get_docsum_val(top, "TaxId")
}
top <- fetch_docsum_xml_root("NC_000883")
taxid <- get_docsum_val(top, "TaxId")
list_docsum_items(top)
tax_path <- taxize::classification(taxid, db='ncbi')
Species_name <- sapply(tax_path, function(x) x$name[x$rank == "species"] )
Species_taxid <- sapply(tax_path, function(x) x$id[x$rank == "species"] )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment