Last active
March 8, 2022 00:59
-
-
Save haxtibal/8bfc6d32915c7c434e1bc91d016de916 to your computer and use it in GitHub Desktop.
Experiments with lxml: Combine custom class lookup and objectify magic
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Some experiments on how we can combine custom class lookup with lxml.objectify.""" | |
from enum import Enum | |
from io import StringIO | |
from lxml import etree, objectify | |
from typing import Union | |
class MyEnum(Enum): | |
"""Can we associate this pure python type with a specific XML element <myEnumValue>? | |
Yes, it works like this: | |
- ElementNamespaceClassLookup will associate MyEnumElement with <myEnumValue>. | |
- MyEnumElement will (de-)serialize MyEnum from/to an XML string | |
""" | |
FOO = 0 | |
BAR = 1 | |
class MyEnumElement(objectify.ObjectifiedDataElement): | |
"""Data binding for MyEnum.""" | |
@staticmethod | |
def parse(xml_text: str) -> MyEnum: | |
return MyEnum(int(xml_text)) | |
@staticmethod | |
def format(myenum: MyEnum) -> str: | |
return str(myenum.value) | |
@property | |
def pyval(xml: objectify.ObjectifiedElement) -> MyEnum: | |
return MyEnumElement.parse(xml.text) | |
myenum_type = objectify.PyType("MyEnum", MyEnumElement.parse, MyEnumElement, MyEnumElement.format) | |
myenum_type.register() | |
class DecimalElement(objectify.FloatElement): | |
"""Can we tweak the behavior of FloatElement? | |
XML strings with decimal places shall be float, | |
XML strings with no decimal places shall be int. | |
""" | |
@property | |
def pyval(xml_text: str) -> Union[int, float]: | |
try: | |
return int(xml_text.text) | |
except ValueError: | |
return super().pyval | |
# This custom lookup scheme will associate ElementBases with tag names. | |
# To not loose objectify magic for tree nodes, we set ObjectifyElementClassLookup as fallback. | |
lookup = etree.ElementNamespaceClassLookup( | |
objectify.ObjectifyElementClassLookup()) | |
parser = objectify.makeparser() | |
parser.set_element_class_lookup(lookup) | |
namespace = lookup.get_namespace('') | |
namespace['myBooleanValue'] = objectify.BoolElement | |
namespace['myDecimalValue'] = DecimalElement | |
namespace['myEnum'] = MyEnumElement | |
# Demo how the above is used | |
# XML elements will be parsed to pyvals of specified type, instead of going through the default "how types are mapped" fallback chain | |
root = objectify.fromstring("""\ | |
<root> | |
<myBooleanValue>false</myBooleanValue> | |
<myDecimalValue>42</myDecimalValue> | |
<myDecimalValue>3.1415</myDecimalValue> | |
<myEnum>1</myEnum> | |
</root>""", parser) | |
assert isinstance(root.myBooleanValue.pyval, bool) and root.myBooleanValue.pyval == False | |
assert isinstance(root.myDecimalValue[0].pyval, int) and root.myDecimalValue[0].pyval == 42 | |
assert isinstance(root.myDecimalValue[1].pyval, float) and root.myDecimalValue[1].pyval == 3.1415 | |
assert isinstance(root.myEnum.pyval, MyEnum) and root.myEnum.pyval == MyEnum.BAR | |
# Sadly parsing an invalid value passes. Does anybody know how we could fail earlier? | |
# I'd expect it to fail right here in fromstring | |
root = objectify.fromstring('<root><myEnum>2</myEnum></root>', parser) | |
try: | |
# but instead it fails only when later accessing the pyval | |
root.myEnum.pyval | |
except ValueError: | |
pass | |
# now go vice versa - building a tree from scratch and use enforced types | |
# We need to involve our custom parser, as it knows about the custom lookup scheme | |
e = parser.makeelement("myBooleanValue") | |
assert isinstance(e, objectify.BoolElement) | |
# Simple trial: any myBooleanValue attribute should become a BoolElement | |
E = objectify.ElementMaker(makeelement=parser.makeelement) | |
e1 = E.myBooleanValue(False) | |
e2 = E.myBooleanValue("False") | |
assert isinstance(e1, objectify.BoolElement) | |
assert e1.pyval == False | |
assert isinstance(e2, objectify.BoolElement) | |
try: | |
# BoolElement can't convert "False" to bool, it fails on runtime | |
e2.pyval | |
except ValueError: | |
# ValueError: Invalid boolean value: 'False' | |
pass | |
# We can even build a custom E-Factory and mini-DSL that knows our parser to infer the types. | |
ROOT = E.root | |
MYBOOLEANVALUE = E.myBooleanValue | |
MYDECIMALVALUE = E.myDecimalValue | |
MYENUM = E.myEnum | |
root = ROOT( | |
MYDECIMALVALUE(2.718), | |
MYBOOLEANVALUE(True), | |
MYENUM(MyEnum.FOO) # uses our custom stringify function and writes 0 to XML text | |
) | |
# observe the result as dump | |
assert objectify.dump(root) == """root = None [ObjectifiedElement] | |
myDecimalValue = 2.718 [DecimalElement] | |
* py:pytype = 'float' | |
myBooleanValue = True [BoolElement] | |
* py:pytype = 'bool' | |
myEnum = 0 [MyEnumElement] | |
* py:pytype = 'MyEnum'""" | |
# ...and as XML string (with xsi type annotations, instead of pytype) | |
objectify.deannotate(root) | |
objectify.xsiannotate(root) | |
etree.cleanup_namespaces(root) | |
assert etree.tostring(root, pretty_print=True).decode('ascii') == """<root xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> | |
<myDecimalValue xsi:type="xsd:double">2.718</myDecimalValue> | |
<myBooleanValue xsi:type="xsd:boolean">true</myBooleanValue> | |
<myEnum xsi:type="xsd:integer">0</myEnum> | |
</root> | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment