Last active
July 13, 2016 15:13
-
-
Save djpillen/a580862bfdae4ca202b1840b9a64a65e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from lxml import etree | |
import os | |
from os.path import join | |
ead_dir = "path/to/eads" | |
unsplit_lines = [] | |
for filename in os.listdir("reelmapfiles"): | |
with open(join("reelmapfiles", filename), "r") as f: | |
unsplit_lines.extend(f.readlines()) | |
itemid_handle_map = {line.split(" ")[0].strip():line.split(" ")[1].strip() for line in unsplit_lines} | |
itemids_to_match = itemid_handle_map.keys() | |
for filename in os.listdir(ead_dir): | |
tree = etree.parse(join(ead_dir, filename)) | |
unitids = tree.xpath("//unitid") | |
for unitid in unitids: | |
if "sr" in unitid.text.lower(): | |
identifier = unitid.text.strip("[] ") | |
daolink = "" | |
while not daolink and identifier: | |
if identifier in itemids_to_match: | |
daolink = itemid_handle_map[identifier] | |
itemids_to_match.remove(identifier) | |
else: | |
identifier = "-".join(identifier.split("-")[0:-1]) | |
if daolink: | |
parent_component = unitid.getparent().getparent().getparent() | |
if not parent_component.xpath("./did/dao"): | |
# make the dao | |
# There are still unmatched identifiers that need to be investigated | |
if itemids_to_match: | |
print "\n".join(itemids_to_match) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment