Skip to content

Instantly share code, notes, and snippets.

@djpillen
Last active July 13, 2016 15:13
Show Gist options
  • Save djpillen/a580862bfdae4ca202b1840b9a64a65e to your computer and use it in GitHub Desktop.
Save djpillen/a580862bfdae4ca202b1840b9a64a65e to your computer and use it in GitHub Desktop.
from lxml import etree
import os
from os.path import join
ead_dir = "path/to/eads"
unsplit_lines = []
for filename in os.listdir("reelmapfiles"):
with open(join("reelmapfiles", filename), "r") as f:
unsplit_lines.extend(f.readlines())
itemid_handle_map = {line.split(" ")[0].strip():line.split(" ")[1].strip() for line in unsplit_lines}
itemids_to_match = itemid_handle_map.keys()
for filename in os.listdir(ead_dir):
tree = etree.parse(join(ead_dir, filename))
unitids = tree.xpath("//unitid")
for unitid in unitids:
if "sr" in unitid.text.lower():
identifier = unitid.text.strip("[] ")
daolink = ""
while not daolink and identifier:
if identifier in itemids_to_match:
daolink = itemid_handle_map[identifier]
itemids_to_match.remove(identifier)
else:
identifier = "-".join(identifier.split("-")[0:-1])
if daolink:
parent_component = unitid.getparent().getparent().getparent()
if not parent_component.xpath("./did/dao"):
# make the dao
# There are still unmatched identifiers that need to be investigated
if itemids_to_match:
print "\n".join(itemids_to_match)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment