This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def test_split_extent_with_commas(self): | |
split_extent_text = split_extents(self.extent_with_commas_raw_text) | |
assert split_extent_text == self.extent_with_commas_target_output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
self.extent_with_commas_raw_text = "3 linear ft., 1 oversize volume, and 5 motion picture reels" | |
self.extent_with_commas_target_output = ["3 linear ft.", "1 oversize volume", "5 motion picture reels"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import unittest # Python's testing module | |
from extent_splitter import split_extents # import the function we're testing | |
class TestExtentSplitter(unittest.TestCase): | |
# "setUp" is a special reserved function that is used to define any | |
# variables you will be using throughout the test. It runs before | |
# anything else in the class. | |
def setUp(self): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def split_extents(extent_text): | |
text_split = extent_text.split(" and ") | |
return text_split |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extent_2_raw_text = "1 oversize volume and 5 motion picture reels" | |
extent_2_target_output = ["1 oversize volume", "5 motion picture reels"] | |
split_extent_text = split_extents(extent_2_raw_text) | |
assert split_extent_text == extent_2_target_output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def split_extents(extent_text): | |
return ["4 linear feet", "1 oversize volume"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from extent_splitter import split_extents |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def split_extents(extent_text): | |
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# first, we'll define the original extent statement, something we | |
# might find in an average EAD <extent> tag | |
basic_extent_raw_text = "4 linear feet and 1 oversize volume." | |
# then let's define the list of objects we want to transform that into | |
basic_extent_target_output = ["4 linear feet", "1 oversize volume"] | |
# run the (currently unwritten) code to transform the input text, | |
# and store the result in a new variable | |
split_extent_text = split_extents(basic_extent_raw_text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# create a new parser object | |
parser = etree.XMLParser(remove_blank_text=True) | |
# when we parse the input file, add a parser argument | |
tree = etree.parse("path/to/ead.xml", parser=parser) | |
[... making edits ...] | |
# when making the output string, add a new pretty_print argument | |
ead_text = etree.tostring(tree, pretty_print=True, xml_declaration=True, encoding="utf-8") |