Skip to content

Instantly share code, notes, and snippets.

@nonZero
Created November 4, 2015 09:52
Show Gist options
  • Save nonZero/c2067887f5bbf732d9ef to your computer and use it in GitHub Desktop.
Save nonZero/c2067887f5bbf732d9ef to your computer and use it in GitHub Desktop.
iter parse xml
from lxml import etree
def clear_element(e):
e.clear()
while e.getprevious() is not None:
del e.getparent()[0]
def parse_file(filename):
with open(filename, "rb") as f:
context = etree.iterparse(f,
tag="{http://www.loc.gov/MARC21/slim}record")
for action, elem in context:
parse_record(elem)
clear_element(elem)
def parse_record(elem):
print(elem.tag)
# print(etree.tostring(elem))
for x in elem:
print(x)
parse_file("/home/udi/Downloads/nnl10all.xml")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment