Skip to content

Instantly share code, notes, and snippets.

@yoki
Last active June 17, 2022 17:44
Show Gist options
  • Save yoki/fbff44f79e7f93b8d9c8b0bc11fd3d75 to your computer and use it in GitHub Desktop.
Save yoki/fbff44f79e7f93b8d9c8b0bc11fd3d75 to your computer and use it in GitHub Desktop.
Python XML
# xml document
# edit.py
# encode.py
# extract.py
# namespace.py
# search.py
# http://chimera.labs.oreilly.com/books/1230000000393/ch06.html#_solution_99
>>> from xml.etree.ElementTree import parse, Element
>>> doc = parse('pred.xml')
>>> root = doc.getroot()
>>> # Remove a few elements
>>> root.remove(root.find('sri'))
>>> # Insert a new element after <nm>...</nm>
>>> root.getchildren().index(root.find('nm'))
1
>>> e = Element('spam')
>>> e.text = 'This is a test'
>>> root.insert(2, e)
>>> # Write back to a file
>>> doc.write('newpred.xml', xml_declaration=True)
# http://chimera.labs.oreilly.com/books/1230000000393/ch02.html#_solution_37
>>> s = 'Elements are written as "<tag>text</tag>".'
>>> import html
>>> print(html.escape(s))
Elements are written as &quot;&lt;tag&gt;text&lt;/tag&gt;&quot;.
>>> s = 'Spicy Jalapeño'
>>> s.encode('ascii', errors='xmlcharrefreplace')
b'Spicy Jalape&#241;o'
>>> s = 'Spicy &quot;Jalape&#241;o&quot.'
>>> from html.parser import HTMLParser
>>> p = HTMLParser()
>>> p.unescape(s)
'Spicy "Jalapeño".'
#http://chimera.labs.oreilly.com/books/1230000000393/ch06.html#_solution_96
from urllib.request import urlopen
from xml.etree.ElementTree import parse
# Download the RSS feed and parse it
u = urlopen('http://planet.python.org/rss20.xml')
doc = parse(u)
# Extract and output tags of interest
for item in doc.iterfind('channel/item'):
title = item.findtext('title')
date = item.findtext('pubDate')
link = item.findtext('link')
#--------------- Extract tag, text, attributes
e = doc.find('channel/title')
>>> e
<Element 'title' at 0x10135b310>
>>> e.tag
'title'
>>> e.text
'Planet Python'
>>> e.get('some_attribute')
# http://chimera.labs.oreilly.com/books/1230000000393/ch06.html#_solution_100
#---------XPATH--------------------
# https://docs.python.org/3.5/library/xml.etree.elementtree.html#supported-xpath-syntax
import xml.etree.ElementTree as ET
root = ET.fromstring(countrydata)
# Top-level elements
root.findall(".")
# All 'neighbor' grand-children of 'country' children of the top-level
# elements
root.findall("./country/neighbor")
# Nodes with name='Singapore' that have a 'year' child
root.findall(".//year/..[@name='Singapore']")
# 'year' nodes that are children of nodes with name='Singapore'
root.findall(".//*[@name='Singapore']/year")
# All 'neighbor' nodes that are the second child of their parent
root.findall(".//neighbor[2]")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment