Skip to content

Instantly share code, notes, and snippets.

@roskakori
Created July 7, 2012 19:37
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save roskakori/3067859 to your computer and use it in GitHub Desktop.
Save roskakori/3067859 to your computer and use it in GitHub Desktop.
Different variants of writting XML in Python
# -*- coding: utf-8 -*-
'''
Example code to write large output in XML with Unicode and namespaces.
This code has been referenced in a lightning talk I gave at EuroPython 2012
in Florence.
'''
# This program is free software. It comes without any warranty, to
# the extent permitted by applicable law. You can redistribute it
# and/or modify it under the terms of the Do What The Fuck You Want
# To Public License, Version 2, as published by Sam Hocevar. See
# http://sam.zoy.org/wtfpl/COPYING for more details.
def testMinidom():
# - createElementNS() and createAttributeNS() do not work as expected
# - esoteric discussion about XML serialization at bugs.python.org, issue 1621421
import xml.dom.minidom
document = xml.dom.minidom.Document()
stuff = document.createElement('fin:stuff')
stuff.setAttribute('xmlns:fin', 'http://financialstuff.example.com/v1.3/')
person1 = document.createElement('fin:person')
person1.setAttribute('fin:key', '234')
person1.setAttribute('fin:firstname', 'Jane')
person1.setAttribute('fin:surname', 'Doe')
stuff.appendChild(person1)
person2 = document.createElement('fin:person')
person2.setAttribute('fin:key', '234')
person2.setAttribute('fin:firstname', u'Svën'.encode('utf-8'))
person2.setAttribute('fin:surname', u'Höek'.encode('utf-8'))
stuff.appendChild(person2)
document.appendChild(stuff)
with open('test_minidom.xml', 'wb') as out:
document.writexml(out, encoding='utf-8')
def testEtree():
# - namespace uses Clark notation instead of XPath
from xml.etree import ElementTree
from xml.etree.ElementTree import Element
from xml.etree.ElementTree import SubElement
ElementTree.register_namespace('fin','http://financialstuff.example.com/v1.3/')
root = Element("{http://financialstuff.example.com/v1.3/}stuff")
SubElement(root, "{http://financialstuff.example.com/v1.3/}person", {
'{http://financialstuff.example.com/v1.3/}key': '123',
'{http://financialstuff.example.com/v1.3/}firstname': 'Jane',
'{http://financialstuff.example.com/v1.3/}surname': 'Doe'
})
SubElement(root, "{http://financialstuff.example.com/v1.3/}person", {
'{http://financialstuff.example.com/v1.3/}key': '234',
'{http://financialstuff.example.com/v1.3/}firstname': u'Svën',
'{http://financialstuff.example.com/v1.3/}surname': u'Höek'
})
tree = ElementTree.ElementTree(root)
with open('test_etree.xml', 'wb') as out:
out.write('<?xml version="1.0" encoding="utf-8" ?>')
out.write('<!DOCTYPE finstuff SYSTEM "http://financialstuff.example.com/v1.3/finstuff.dtd">')
tree.write(out, xml_declaration=False, encoding='utf-8')
def testSax():
# - no checks for well formedness
# - can only write <some></some> instead if <some/>
from xml.sax.saxutils import XMLGenerator
with open('test_sax.xml', 'wb') as out:
xml = XMLGenerator(out, encoding='utf-8')
xml.startDocument()
xml.startElement('fin:stuff', {'xmlns:fin': 'http://financialstuff.example.com/v1.3/'})
xml.startElement('fin:person',
{'fin:key': '123', 'fin:firstname': 'Jane', 'fin:surname': 'Doe'})
xml.endElement('fin:person')
xml.startElement('fin:person',
{'fin:key': '234', 'fin:firstname': u'Svën', 'fin:surname': u'Höek'})
xml.endElement('fin:stuff')
xml.endElement('fin:person')
xml.endDocument()
def testCodecs():
import codecs
from xml.sax.saxutils import quoteattr
def writePerson(out, key, firstName, surName):
out.write(u'<fin:person fin:key=%s fin:firstname=%s fin:surname=%s />'
% (quoteattr(str(key)), quoteattr(firstName), quoteattr(surName)))
with codecs.open('test_codecs.xml', 'w', 'utf-8') as out:
out.write('<fin:stuff xmlns:fin="http://financialstuff.example.com/v1.3/">')
writePerson(out, 123, 'Jane', 'Doe')
writePerson(out, 234, u'Svën', u'Höek')
out.write('</fin:stuff>')
def testLoxun():
import loxun
with open('test_loxun.xml', 'wb') as out:
with loxun.XmlWriter(out) as xml:
xml.addNamespace('fin', 'http://financialstuff.example.com/v1.3/')
xml.startTag('fin:stuff')
xml.tag('fin:person',
{'fin:key': '123', 'fin:firstname': 'Jane', 'fin:surname': 'Doe'})
xml.tag('fin:person',
{'fin:key': '234', 'fin:firstname': u'Svën', 'fin:surname': u'Höek'})
xml.endTag()
if __name__ == '__main__':
testMinidom()
testEtree()
testSax()
testCodecs()
testLoxun()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment