Last active
July 15, 2019 23:49
-
-
Save BaeoMaltinsky/1dce3dbb1686d60b2a8ffe8873fff348 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A simple script illustraing how to generate a CSV for Anki to learn phylogenies | |
""" | |
import requests | |
import io | |
import xml.etree.ElementTree as ET | |
import csv | |
file = io.BytesIO(requests.get("http://tolweb.org/onlinecontributors/app?service=external&page=xml/TreeStructureService&node_id=70802").content) | |
tree = ET.parse(file) | |
def relations(tree): | |
'''returns collection of relationships from tree''' | |
# get root of tree | |
root = tree.getroot().find("NODE") | |
# dictionary to track relationships | |
relationships = {"Parent-Child": [], "Child-Parent": []} | |
def subtree_relations(parent_name, root, relationships): | |
'''function to recurse on tree''' | |
# name of root node | |
name = root.find("NAME").text | |
# add relationship of root node to parent | |
if parent_name is not None: | |
relationships["Child-Parent"].append( [name, parent_name] ) | |
if root.find("NODES") is None: | |
return | |
# children of root node | |
children = [child for child in root.find("NODES").getchildren() if child.find("NAME").text is not None] | |
# add parent-child relationships | |
relationships["Parent-Child"].append( [name, [child.find("NAME").text for child in children]] ) | |
# recurse on tree | |
for child in children: | |
subtree_relations(name, child, relationships) | |
# start recursion | |
subtree_relations(None, root, relationships) | |
return relationships | |
rels = relations(tree) | |
with open("out.csv", "w") as f: | |
writer = csv.writer(f) | |
# column names | |
writer.writerow(["Field 1", "Field 2", "Relationship Type"]) | |
for rel in rels["Parent-Child"]: | |
row = [rel[0], ", ".join(rel[1]), "Parent-Child"] | |
writer.writerow(row) | |
for rel in rels["Child-Parent"]: | |
writer.writerow(rel + ["Child-Parent"]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment