Skip to content

Instantly share code, notes, and snippets.

@audy
Created October 13, 2022 04:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save audy/d8f30921d0a2ac9d23192dd99cc6a395 to your computer and use it in GitHub Desktop.
Save audy/d8f30921d0a2ac9d23192dd99cc6a395 to your computer and use it in GitHub Desktop.
import taxonomy
tax = taxonomy.Taxonomy.from_ncbi("ncbi_taxdump/")
FULL_RANKS = [ 'superkingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species']
def get_scalar(d: dict, key: str):
if key in d:
vals = d[key]
if len(vals) == 1:
return vals[0]
else:
return None
def graft_lineage_to_taxonomy(tax, lineage_names, name_to_nodes = None):
""" This will mutate your taxonomy! """
# todo: cache me
name_to_nodes = defaultdict(list)
starting_tax_id = int(1e9)
for tax_id in tax:
node = tax[tax_id]
name_to_nodes[node.name].append(node)
if int(tax_id) > starting_tax_id:
starting_tax_id = int(tax_id) + 1
parent_node = None
new_node = None
for name in lineage_names:
if (node := get_scalar(name_to_nodes, name)) is not None:
# name is already in taxonomy
# we can skip this node
parent_node = node
else:
# need to create a new node
assert parent_node is not None, "orphan node"
rank = FULL_RANKS[FULL_RANKS.index(parent_node.rank) + 1]
tax.add_node(parent_node.id, str(starting_tax_id))
tax.edit_node(str(starting_tax_id), name=name, rank=rank)
new_node = tax[str(starting_tax_id)]
print(f"added new node {new_node}")
name_to_nodes[name].append(new_node)
parent_node = new_node
starting_tax_id += 1
return new_node
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment