Skip to content

Instantly share code, notes, and snippets.

View walkerdb's full-sized avatar

Walker Boyle walkerdb

View GitHub Profile
def test_split_extent_with_commas(self):
split_extent_text = split_extents(self.extent_with_commas_raw_text)
assert split_extent_text == self.extent_with_commas_target_output
self.extent_with_commas_raw_text = "3 linear ft., 1 oversize volume, and 5 motion picture reels"
self.extent_with_commas_target_output = ["3 linear ft.", "1 oversize volume", "5 motion picture reels"]
import unittest # Python's testing module
from extent_splitter import split_extents # import the function we're testing
class TestExtentSplitter(unittest.TestCase):
# "setUp" is a special reserved function that is used to define any
# variables you will be using throughout the test. It runs before
# anything else in the class.
def setUp(self):
def split_extents(extent_text):
text_split = extent_text.split(" and ")
return text_split
extent_2_raw_text = "1 oversize volume and 5 motion picture reels"
extent_2_target_output = ["1 oversize volume", "5 motion picture reels"]
split_extent_text = split_extents(extent_2_raw_text)
assert split_extent_text == extent_2_target_output
def split_extents(extent_text):
return ["4 linear feet", "1 oversize volume"]
from extent_splitter import split_extents
def split_extents(extent_text):
pass
# first, we'll define the original extent statement, something we
# might find in an average EAD <extent> tag
basic_extent_raw_text = "4 linear feet and 1 oversize volume."
# then let's define the list of objects we want to transform that into
basic_extent_target_output = ["4 linear feet", "1 oversize volume"]
# run the (currently unwritten) code to transform the input text,
# and store the result in a new variable
split_extent_text = split_extents(basic_extent_raw_text)
# create a new parser object
parser = etree.XMLParser(remove_blank_text=True)
# when we parse the input file, add a parser argument
tree = etree.parse("path/to/ead.xml", parser=parser)
[... making edits ...]
# when making the output string, add a new pretty_print argument
ead_text = etree.tostring(tree, pretty_print=True, xml_declaration=True, encoding="utf-8")