Last active
March 3, 2016 13:40
-
-
Save thigm85/51b4998e9699c8683eff to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xml.etree.ElementTree as ET | |
# parse XML from a file | |
tree = ET.parse(file_path) # parse the file and obtain ElementTree obj | |
root = tree.getroot() # get the root Element obj | |
# parse XML in a string format | |
root = ET.fromstring(country_data_as_string) | |
root = ET.XML(country_data_as_string) # equivalent to above | |
list(root) # list the root's children Element objs | |
# Children are nested, and we can access specific child nodes by index: | |
root[0][1].tag # access the tag | |
root[0][1].attrib # access the attribute | |
root[0][1].text # access the text | |
# Can iterate through children nodes | |
for child in root: | |
print child.tag, child.attrib | |
# Iterate over all elements with tag 'neighbor' | |
for neighbor in root.iter('neighbor'): | |
print neighbor.attrib | |
# - Element.findall() finds only elements with a tag which are | |
# direct children of the current element. | |
# - Element.find() finds the first child with a particular tag | |
# - Element.text accesses the element’s text content | |
# - Element.get() accesses the element’s attributes | |
for country in root.findall('country'): | |
rank = country.find('rank').text | |
name = country.get('name') | |
print name, rank | |
# More sophisticated specification of which elements to look for | |
# is possible by using XPath | |
# (https://docs.python.org/2/library/xml.etree.elementtree.html#elementtree-xpath). | |
# an Element object may be manipulated by | |
# - directly changing its fields (such as Element.text) | |
# - adding and modifying attributes (Element.set() method), | |
# - adding new children (for example with Element.append()) | |
for rank in root.iter('rank'): | |
new_rank = int(rank.text) + 1 | |
rank.text = str(new_rank) | |
rank.set('updated', 'yes') | |
# remove elements using Element.remove() | |
for country in root.findall('country'): | |
rank = int(country.find('rank').text) | |
if rank > 50: | |
root.remove(country) | |
# ElementTree provides a simple way to build XML documents and write them to files. | |
# The ElementTree.write() method serves this purpose. | |
tree.write('output.xml') | |
# The SubElement() function also provides a convenient way | |
# to create new sub-elements for a given element: | |
a = ET.Element('a') | |
b = ET.SubElement(a, 'b') | |
c = ET.SubElement(a, 'c') | |
d = ET.SubElement(c, 'd') | |
ET.dump(a) | |
# output: <a><b /><c><d /></c></a> | |
# See (https://docs.python.org/2/library/xml.etree.elementtree.html#parsing-xml-with-namespaces) | |
# on how to work with XML namespaces |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment