# Load USPTO .xml document
xml_text = html.unescape(open(filename, 'r').read())
# Split out patent applications / grants
for patent in xml_text.split("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"):
# Skip if it doesn't exist
if patent is None or patent == "":
# Load patent text as HTML document
bs = BeautifulSoup(patent)
# Search patent for application
application = bs.find('us-patent-application')
# If no application, search for grant
if application is None:
application = bs.find('us-patent-grant')
