Different variants of writting XML in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
''' | |
Example code to write large output in XML with Unicode and namespaces. | |
This code has been referenced in a lightning talk I gave at EuroPython 2012 | |
in Florence. | |
''' | |
# This program is free software. It comes without any warranty, to | |
# the extent permitted by applicable law. You can redistribute it | |
# and/or modify it under the terms of the Do What The Fuck You Want | |
# To Public License, Version 2, as published by Sam Hocevar. See | |
# http://sam.zoy.org/wtfpl/COPYING for more details. | |
def testMinidom(): | |
# - createElementNS() and createAttributeNS() do not work as expected | |
# - esoteric discussion about XML serialization at bugs.python.org, issue 1621421 | |
import xml.dom.minidom | |
document = xml.dom.minidom.Document() | |
stuff = document.createElement('fin:stuff') | |
stuff.setAttribute('xmlns:fin', 'http://financialstuff.example.com/v1.3/') | |
person1 = document.createElement('fin:person') | |
person1.setAttribute('fin:key', '234') | |
person1.setAttribute('fin:firstname', 'Jane') | |
person1.setAttribute('fin:surname', 'Doe') | |
stuff.appendChild(person1) | |
person2 = document.createElement('fin:person') | |
person2.setAttribute('fin:key', '234') | |
person2.setAttribute('fin:firstname', u'Svën'.encode('utf-8')) | |
person2.setAttribute('fin:surname', u'Höek'.encode('utf-8')) | |
stuff.appendChild(person2) | |
document.appendChild(stuff) | |
with open('test_minidom.xml', 'wb') as out: | |
document.writexml(out, encoding='utf-8') | |
def testEtree(): | |
# - namespace uses Clark notation instead of XPath | |
from xml.etree import ElementTree | |
from xml.etree.ElementTree import Element | |
from xml.etree.ElementTree import SubElement | |
ElementTree.register_namespace('fin','http://financialstuff.example.com/v1.3/') | |
root = Element("{http://financialstuff.example.com/v1.3/}stuff") | |
SubElement(root, "{http://financialstuff.example.com/v1.3/}person", { | |
'{http://financialstuff.example.com/v1.3/}key': '123', | |
'{http://financialstuff.example.com/v1.3/}firstname': 'Jane', | |
'{http://financialstuff.example.com/v1.3/}surname': 'Doe' | |
}) | |
SubElement(root, "{http://financialstuff.example.com/v1.3/}person", { | |
'{http://financialstuff.example.com/v1.3/}key': '234', | |
'{http://financialstuff.example.com/v1.3/}firstname': u'Svën', | |
'{http://financialstuff.example.com/v1.3/}surname': u'Höek' | |
}) | |
tree = ElementTree.ElementTree(root) | |
with open('test_etree.xml', 'wb') as out: | |
out.write('<?xml version="1.0" encoding="utf-8" ?>') | |
out.write('<!DOCTYPE finstuff SYSTEM "http://financialstuff.example.com/v1.3/finstuff.dtd">') | |
tree.write(out, xml_declaration=False, encoding='utf-8') | |
def testSax(): | |
# - no checks for well formedness | |
# - can only write <some></some> instead if <some/> | |
from xml.sax.saxutils import XMLGenerator | |
with open('test_sax.xml', 'wb') as out: | |
xml = XMLGenerator(out, encoding='utf-8') | |
xml.startDocument() | |
xml.startElement('fin:stuff', {'xmlns:fin': 'http://financialstuff.example.com/v1.3/'}) | |
xml.startElement('fin:person', | |
{'fin:key': '123', 'fin:firstname': 'Jane', 'fin:surname': 'Doe'}) | |
xml.endElement('fin:person') | |
xml.startElement('fin:person', | |
{'fin:key': '234', 'fin:firstname': u'Svën', 'fin:surname': u'Höek'}) | |
xml.endElement('fin:stuff') | |
xml.endElement('fin:person') | |
xml.endDocument() | |
def testCodecs(): | |
import codecs | |
from xml.sax.saxutils import quoteattr | |
def writePerson(out, key, firstName, surName): | |
out.write(u'<fin:person fin:key=%s fin:firstname=%s fin:surname=%s />' | |
% (quoteattr(str(key)), quoteattr(firstName), quoteattr(surName))) | |
with codecs.open('test_codecs.xml', 'w', 'utf-8') as out: | |
out.write('<fin:stuff xmlns:fin="http://financialstuff.example.com/v1.3/">') | |
writePerson(out, 123, 'Jane', 'Doe') | |
writePerson(out, 234, u'Svën', u'Höek') | |
out.write('</fin:stuff>') | |
def testLoxun(): | |
import loxun | |
with open('test_loxun.xml', 'wb') as out: | |
with loxun.XmlWriter(out) as xml: | |
xml.addNamespace('fin', 'http://financialstuff.example.com/v1.3/') | |
xml.startTag('fin:stuff') | |
xml.tag('fin:person', | |
{'fin:key': '123', 'fin:firstname': 'Jane', 'fin:surname': 'Doe'}) | |
xml.tag('fin:person', | |
{'fin:key': '234', 'fin:firstname': u'Svën', 'fin:surname': u'Höek'}) | |
xml.endTag() | |
if __name__ == '__main__': | |
testMinidom() | |
testEtree() | |
testSax() | |
testCodecs() | |
testLoxun() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment