Created
May 9, 2022 15:29
-
-
Save taylorreiter/50c55359393bf2bd91a07953d6e2f9c8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(readr) | |
library(metacoder) | |
# read in gdtb rs207 lineage/taxonomy information and format | |
# csv available here: https://osf.io/p6z3w/ | |
gtdb_lineages <- read_csv("gtdb-rs207.taxonomy.csv.gz") %>% # read in gtdb rs207 lineage information | |
mutate(full_lineage = paste(sep = ";", superkingdom, phylum, class, order, family, genus, species)) %>% # format lineage into single string | |
select(ident, full_lineage) # keep columns that will be used by metacoder | |
# csv generated by running sourmash gather on e.g. a metagenome | |
gather_results <- read_csv("xxx.csv", col_types = "ddddddddcccddddcccd") %>% # read in gather results | |
mutate(ident = gsub(" .*", "", name)) %>% # format name string to ident | |
left_join(gtdb_lineages, by = "ident") %>% # join to lineage information | |
select(ident, full_lineage, f_unique_weighted) # keep columns that will be used by metacoder | |
# parse gather and gtdb information into a metacoder object | |
obj <- parse_tax_data(gather_results, | |
class_cols = "full_lineage", # the column that contains taxonomic information | |
class_sep = ";", # The character used to separate taxa in the classification | |
class_regex = "^(.+)__(.+)$", # Regex identifying where the data for each taxon is | |
class_key = c(tax_rank = "info", # A key describing each regex capture group | |
tax_name = "taxon_name")) | |
# set varimp at taxon level instead of just species level | |
# note: We only use "f_unique_weighted". Any numeric data could be included. | |
# It would need to be added below, to the cols parameter, and above in the | |
# select() function. | |
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data", cols = c("f_unique_weighted")) | |
heat_tree(obj, | |
node_label = taxon_names, | |
node_size = obj$data$tax_abund$f_unique_weighted, | |
node_color = obj$data$tax_abund$f_unique_weighted, | |
#node_label_size_range = c(0.01, 0.03), # uncomment to finely control text size | |
node_color_axis_label = "color", | |
node_size_axis_label = "size") | |
# output_file = "figures/metacoder_gather.png") # uncomment to save file upon running | |
# customizing a metacoder plot -------------------------------------------- | |
# only label up to genus (useful for plots with many strains) | |
taxa_to_label <- c("Haemophilus_D", "Streptococcus") | |
to_label <- unlist(supertaxa(obj, include_input = TRUE)[taxon_names(obj) %in% taxa_to_label]) | |
heat_tree(obj, | |
node_label = ifelse(taxon_indexes %in% to_label, taxon_names, ''), | |
node_size = obj$data$tax_abund$f_unique_weighted, | |
node_color = obj$data$tax_abund$f_unique_weighted, | |
node_color_axis_label = "color", | |
node_size_axis_label = "size") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment