Skip to content

Instantly share code, notes, and snippets.

@thigm85
Last active March 3, 2016 13:40
Show Gist options
  • Save thigm85/51b4998e9699c8683eff to your computer and use it in GitHub Desktop.
Save thigm85/51b4998e9699c8683eff to your computer and use it in GitHub Desktop.
import xml.etree.ElementTree as ET
# parse XML from a file
tree = ET.parse(file_path) # parse the file and obtain ElementTree obj
root = tree.getroot() # get the root Element obj
# parse XML in a string format
root = ET.fromstring(country_data_as_string)
root = ET.XML(country_data_as_string) # equivalent to above
list(root) # list the root's children Element objs
# Children are nested, and we can access specific child nodes by index:
root[0][1].tag # access the tag
root[0][1].attrib # access the attribute
root[0][1].text # access the text
# Can iterate through children nodes
for child in root:
print child.tag, child.attrib
# Iterate over all elements with tag 'neighbor'
for neighbor in root.iter('neighbor'):
print neighbor.attrib
# - Element.findall() finds only elements with a tag which are
# direct children of the current element.
# - Element.find() finds the first child with a particular tag
# - Element.text accesses the element’s text content
# - Element.get() accesses the element’s attributes
for country in root.findall('country'):
rank = country.find('rank').text
name = country.get('name')
print name, rank
# More sophisticated specification of which elements to look for
# is possible by using XPath
# (https://docs.python.org/2/library/xml.etree.elementtree.html#elementtree-xpath).
# an Element object may be manipulated by
# - directly changing its fields (such as Element.text)
# - adding and modifying attributes (Element.set() method),
# - adding new children (for example with Element.append())
for rank in root.iter('rank'):
new_rank = int(rank.text) + 1
rank.text = str(new_rank)
rank.set('updated', 'yes')
# remove elements using Element.remove()
for country in root.findall('country'):
rank = int(country.find('rank').text)
if rank > 50:
root.remove(country)
# ElementTree provides a simple way to build XML documents and write them to files.
# The ElementTree.write() method serves this purpose.
tree.write('output.xml')
# The SubElement() function also provides a convenient way
# to create new sub-elements for a given element:
a = ET.Element('a')
b = ET.SubElement(a, 'b')
c = ET.SubElement(a, 'c')
d = ET.SubElement(c, 'd')
ET.dump(a)
# output: <a><b /><c><d /></c></a>
# See (https://docs.python.org/2/library/xml.etree.elementtree.html#parsing-xml-with-namespaces)
# on how to work with XML namespaces
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment