Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save taylorreiter/50c55359393bf2bd91a07953d6e2f9c8 to your computer and use it in GitHub Desktop.
Save taylorreiter/50c55359393bf2bd91a07953d6e2f9c8 to your computer and use it in GitHub Desktop.
library(dplyr)
library(readr)
library(metacoder)
# read in gdtb rs207 lineage/taxonomy information and format
# csv available here: https://osf.io/p6z3w/
gtdb_lineages <- read_csv("gtdb-rs207.taxonomy.csv.gz") %>% # read in gtdb rs207 lineage information
mutate(full_lineage = paste(sep = ";", superkingdom, phylum, class, order, family, genus, species)) %>% # format lineage into single string
select(ident, full_lineage) # keep columns that will be used by metacoder
# csv generated by running sourmash gather on e.g. a metagenome
gather_results <- read_csv("xxx.csv", col_types = "ddddddddcccddddcccd") %>% # read in gather results
mutate(ident = gsub(" .*", "", name)) %>% # format name string to ident
left_join(gtdb_lineages, by = "ident") %>% # join to lineage information
select(ident, full_lineage, f_unique_weighted) # keep columns that will be used by metacoder
# parse gather and gtdb information into a metacoder object
obj <- parse_tax_data(gather_results,
class_cols = "full_lineage", # the column that contains taxonomic information
class_sep = ";", # The character used to separate taxa in the classification
class_regex = "^(.+)__(.+)$", # Regex identifying where the data for each taxon is
class_key = c(tax_rank = "info", # A key describing each regex capture group
tax_name = "taxon_name"))
# set varimp at taxon level instead of just species level
# note: We only use "f_unique_weighted". Any numeric data could be included.
# It would need to be added below, to the cols parameter, and above in the
# select() function.
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data", cols = c("f_unique_weighted"))
heat_tree(obj,
node_label = taxon_names,
node_size = obj$data$tax_abund$f_unique_weighted,
node_color = obj$data$tax_abund$f_unique_weighted,
#node_label_size_range = c(0.01, 0.03), # uncomment to finely control text size
node_color_axis_label = "color",
node_size_axis_label = "size")
# output_file = "figures/metacoder_gather.png") # uncomment to save file upon running
# customizing a metacoder plot --------------------------------------------
# only label up to genus (useful for plots with many strains)
taxa_to_label <- c("Haemophilus_D", "Streptococcus")
to_label <- unlist(supertaxa(obj, include_input = TRUE)[taxon_names(obj) %in% taxa_to_label])
heat_tree(obj,
node_label = ifelse(taxon_indexes %in% to_label, taxon_names, ''),
node_size = obj$data$tax_abund$f_unique_weighted,
node_color = obj$data$tax_abund$f_unique_weighted,
node_color_axis_label = "color",
node_size_axis_label = "size")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment