Skip to content

Instantly share code, notes, and snippets.

@BaeoMaltinsky
Last active July 15, 2019 23:49
Show Gist options
  • Save BaeoMaltinsky/1dce3dbb1686d60b2a8ffe8873fff348 to your computer and use it in GitHub Desktop.
Save BaeoMaltinsky/1dce3dbb1686d60b2a8ffe8873fff348 to your computer and use it in GitHub Desktop.
"""
A simple script illustraing how to generate a CSV for Anki to learn phylogenies
"""
import requests
import io
import xml.etree.ElementTree as ET
import csv
file = io.BytesIO(requests.get("http://tolweb.org/onlinecontributors/app?service=external&page=xml/TreeStructureService&node_id=70802").content)
tree = ET.parse(file)
def relations(tree):
'''returns collection of relationships from tree'''
# get root of tree
root = tree.getroot().find("NODE")
# dictionary to track relationships
relationships = {"Parent-Child": [], "Child-Parent": []}
def subtree_relations(parent_name, root, relationships):
'''function to recurse on tree'''
# name of root node
name = root.find("NAME").text
# add relationship of root node to parent
if parent_name is not None:
relationships["Child-Parent"].append( [name, parent_name] )
if root.find("NODES") is None:
return
# children of root node
children = [child for child in root.find("NODES").getchildren() if child.find("NAME").text is not None]
# add parent-child relationships
relationships["Parent-Child"].append( [name, [child.find("NAME").text for child in children]] )
# recurse on tree
for child in children:
subtree_relations(name, child, relationships)
# start recursion
subtree_relations(None, root, relationships)
return relationships
rels = relations(tree)
with open("out.csv", "w") as f:
writer = csv.writer(f)
# column names
writer.writerow(["Field 1", "Field 2", "Relationship Type"])
for rel in rels["Parent-Child"]:
row = [rel[0], ", ".join(rel[1]), "Parent-Child"]
writer.writerow(row)
for rel in rels["Child-Parent"]:
writer.writerow(rel + ["Child-Parent"])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment