Created
April 3, 2014 14:46
-
-
Save jcarbaugh/9955699 to your computer and use it in GitHub Desktop.
Ugh... XML.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from lxml import etree | |
def empty_generator(): | |
return | |
yield | |
def parse(string_or_file): | |
""" Parse string or file-like object and return the root element. | |
""" | |
if isinstance(string_or_file, (str, bytes)): | |
root = etree.fromstring(string_or_file) | |
else: | |
et = etree.parse(string_or_file) | |
root = et.getroot() | |
return GoddamnElement(root) | |
class GoddamnElement(object): | |
def __init__(self, element): | |
self._element = element | |
def __iter__(self): | |
for elem in self._element: | |
yield GoddamnElement(elem) | |
def __repr__(self): | |
return "<GoddamnElement %s>" % self.namespaced_name() | |
def find(self, tag, namespace=None): | |
if namespace: | |
tag = "{%s}%s" % (namespace, tag) | |
elems = self._element.iter(tag) | |
if elems is None: | |
return empty_generator() | |
for elem in elems: | |
yield GoddamnElement(elem) | |
def first(self, tag, namespace=None): | |
for gdelem in self.find(tag, namespace): | |
return gdelem | |
@property | |
def text(self): | |
return self._element.text | |
@property | |
def name(self): | |
tag = self._element.tag | |
if "}" in tag: | |
tag = tag.split("}")[-1] | |
return tag | |
@property | |
def namespace(self): | |
m = re.match("{(.*)}", self._element.tag) | |
if m: | |
return m.groups()[0] | |
def namespaced_name(self): | |
name = self.name | |
ns = self.namespace | |
if ns: | |
name = "{%s}%s" % (ns, name) | |
return name |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment