thigm85/xml_etree_ElementTree_module.py

## xml_etree_ElementTree_module.py
import xml.etree.ElementTree as ET

# parse XML from a file
tree = ET.parse(file_path) # parse the file and obtain ElementTree obj
root = tree.getroot() # get the root Element obj

# parse XML in a string format
root = ET.fromstring(country_data_as_string)
root = ET.XML(country_data_as_string) # equivalent to above

list(root) # list the root's children Element objs

# Children are nested, and we can access specific child nodes by index:
root[0][1].tag     # access the tag
root[0][1].attrib  # access the attribute
root[0][1].text    # access the text

# Can iterate through children nodes
for child in root:
	print child.tag, child.attrib

# Iterate over all elements with tag 'neighbor'
for neighbor in root.iter('neighbor'):
	print neighbor.attrib

# - Element.findall() finds only elements with a tag which are
#   direct children of the current element.
# - Element.find() finds the first child with a particular tag
# - Element.text accesses the element’s text content
# - Element.get() accesses the element’s attributes
for country in root.findall('country'):
	rank = country.find('rank').text
	name = country.get('name')
	print name, rank

# More sophisticated specification of which elements to look for
# is possible by using XPath
# (https://docs.python.org/2/library/xml.etree.elementtree.html#elementtree-xpath).

# an Element object may be manipulated by
# - directly changing its fields (such as Element.text)
# - adding and modifying attributes (Element.set() method),
# - adding new children (for example with Element.append())
for rank in root.iter('rank'):
	new_rank = int(rank.text) + 1
	rank.text = str(new_rank)
	rank.set('updated', 'yes')

# remove elements using Element.remove()
for country in root.findall('country'):
	rank = int(country.find('rank').text)
	if rank > 50:
		root.remove(country)

# ElementTree provides a simple way to build XML documents and write them to files.
# The ElementTree.write() method serves this purpose.
tree.write('output.xml')

# The SubElement() function also provides a convenient way
# to create new sub-elements for a given element:
a = ET.Element('a')
b = ET.SubElement(a, 'b')
c = ET.SubElement(a, 'c')
d = ET.SubElement(c, 'd')
ET.dump(a)
# output: <a><b /><c><d /></c></a>

# See (https://docs.python.org/2/library/xml.etree.elementtree.html#parsing-xml-with-namespaces)
# on how to work with XML namespaces
	import xml.etree.ElementTree as ET

	# parse XML from a file
	tree = ET.parse(file_path) # parse the file and obtain ElementTree obj
	root = tree.getroot() # get the root Element obj

	# parse XML in a string format
	root = ET.fromstring(country_data_as_string)
	root = ET.XML(country_data_as_string) # equivalent to above

	list(root) # list the root's children Element objs

	# Children are nested, and we can access specific child nodes by index:
	root[0][1].tag # access the tag
	root[0][1].attrib # access the attribute
	root[0][1].text # access the text

	# Can iterate through children nodes
	for child in root:
	print child.tag, child.attrib

	# Iterate over all elements with tag 'neighbor'
	for neighbor in root.iter('neighbor'):
	print neighbor.attrib

	# - Element.findall() finds only elements with a tag which are
	# direct children of the current element.
	# - Element.find() finds the first child with a particular tag
	# - Element.text accesses the element’s text content
	# - Element.get() accesses the element’s attributes
	for country in root.findall('country'):
	rank = country.find('rank').text
	name = country.get('name')
	print name, rank

	# More sophisticated specification of which elements to look for
	# is possible by using XPath
	# (https://docs.python.org/2/library/xml.etree.elementtree.html#elementtree-xpath).

	# an Element object may be manipulated by
	# - directly changing its fields (such as Element.text)
	# - adding and modifying attributes (Element.set() method),
	# - adding new children (for example with Element.append())
	for rank in root.iter('rank'):
	new_rank = int(rank.text) + 1
	rank.text = str(new_rank)
	rank.set('updated', 'yes')

	# remove elements using Element.remove()
	for country in root.findall('country'):
	rank = int(country.find('rank').text)
	if rank > 50:
	root.remove(country)

	# ElementTree provides a simple way to build XML documents and write them to files.
	# The ElementTree.write() method serves this purpose.
	tree.write('output.xml')

	# The SubElement() function also provides a convenient way
	# to create new sub-elements for a given element:
	a = ET.Element('a')
	b = ET.SubElement(a, 'b')
	c = ET.SubElement(a, 'c')
	d = ET.SubElement(c, 'd')
	ET.dump(a)
	# output: <a><b /><c><d /></c></a>

	# See (https://docs.python.org/2/library/xml.etree.elementtree.html#parsing-xml-with-namespaces)
	# on how to work with XML namespaces