Skip to content

Instantly share code, notes, and snippets.

@provegard
Created November 21, 2011 07:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save provegard/1381912 to your computer and use it in GitHub Desktop.
Save provegard/1381912 to your computer and use it in GitHub Desktop.
ElementTree parsing for non-prefixed attributes with default namespace
#!/usr/bin/python
from xml.etree import ElementTree as ET
from cStringIO import StringIO
def parse_attrns(file):
"""Parse file to ElementTree instance. Patch non-prefixed attributes
with the namespace of the element they belong to.
"""
events = ("start", )
root = None
for event, elem in ET.iterparse(file, events):
if event == "start":
if root is None:
root = elem
if elem.tag.find("}") < 0:
continue
# inherit the uri from the element
uri, _ = elem.tag[1:].rsplit("}", 1)
for k, v in elem.attrib.items():
if k[:1] != "{":
# replace the old attribute with a namespace-prefixed one
del elem.attrib[k]
k = "{%s}%s" % (uri, k)
elem.attrib[k] = v
return ET.ElementTree(root)
class fileobj(object):
def __init__(self, l):
self.write = l.append
def roundtrip(xml, dns=None):
"""Deserialize XML using ElementTree, then serialize it again.
>>> roundtrip('<a xmlns="foo"><b id="1" /></a>', dns='foo')
'<a xmlns="foo"><b id="1" /></a>'
>>> roundtrip('<a><b id="1" /></a>', dns='foo')
Traceback (most recent call last):
...
ValueError: cannot use non-qualified names with default_namespace option
>>> roundtrip('<a><b id="1" /></a>')
'<a><b id="1" /></a>'
"""
#e = ET.parse(StringIO(xml))
e = parse_attrns(StringIO(xml))
data = []
e.write(fileobj(data), default_namespace=dns)
return "".join(data)
if __name__ == "__main__":
import doctest
doctest.testmod()
@mmastrac
Copy link

To make this work in Python3:

            for k, v in elem.attrib.copy().items():

And

from io import StringIO

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment