Skip to content

Instantly share code, notes, and snippets.

@haxtibal
Last active March 8, 2022 00:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save haxtibal/8bfc6d32915c7c434e1bc91d016de916 to your computer and use it in GitHub Desktop.
Save haxtibal/8bfc6d32915c7c434e1bc91d016de916 to your computer and use it in GitHub Desktop.
Experiments with lxml: Combine custom class lookup and objectify magic
"""Some experiments on how we can combine custom class lookup with lxml.objectify."""
from enum import Enum
from io import StringIO
from lxml import etree, objectify
from typing import Union
class MyEnum(Enum):
"""Can we associate this pure python type with a specific XML element <myEnumValue>?
Yes, it works like this:
- ElementNamespaceClassLookup will associate MyEnumElement with <myEnumValue>.
- MyEnumElement will (de-)serialize MyEnum from/to an XML string
"""
FOO = 0
BAR = 1
class MyEnumElement(objectify.ObjectifiedDataElement):
"""Data binding for MyEnum."""
@staticmethod
def parse(xml_text: str) -> MyEnum:
return MyEnum(int(xml_text))
@staticmethod
def format(myenum: MyEnum) -> str:
return str(myenum.value)
@property
def pyval(xml: objectify.ObjectifiedElement) -> MyEnum:
return MyEnumElement.parse(xml.text)
myenum_type = objectify.PyType("MyEnum", MyEnumElement.parse, MyEnumElement, MyEnumElement.format)
myenum_type.register()
class DecimalElement(objectify.FloatElement):
"""Can we tweak the behavior of FloatElement?
XML strings with decimal places shall be float,
XML strings with no decimal places shall be int.
"""
@property
def pyval(xml_text: str) -> Union[int, float]:
try:
return int(xml_text.text)
except ValueError:
return super().pyval
# This custom lookup scheme will associate ElementBases with tag names.
# To not loose objectify magic for tree nodes, we set ObjectifyElementClassLookup as fallback.
lookup = etree.ElementNamespaceClassLookup(
objectify.ObjectifyElementClassLookup())
parser = objectify.makeparser()
parser.set_element_class_lookup(lookup)
namespace = lookup.get_namespace('')
namespace['myBooleanValue'] = objectify.BoolElement
namespace['myDecimalValue'] = DecimalElement
namespace['myEnum'] = MyEnumElement
# Demo how the above is used
# XML elements will be parsed to pyvals of specified type, instead of going through the default "how types are mapped" fallback chain
root = objectify.fromstring("""\
<root>
<myBooleanValue>false</myBooleanValue>
<myDecimalValue>42</myDecimalValue>
<myDecimalValue>3.1415</myDecimalValue>
<myEnum>1</myEnum>
</root>""", parser)
assert isinstance(root.myBooleanValue.pyval, bool) and root.myBooleanValue.pyval == False
assert isinstance(root.myDecimalValue[0].pyval, int) and root.myDecimalValue[0].pyval == 42
assert isinstance(root.myDecimalValue[1].pyval, float) and root.myDecimalValue[1].pyval == 3.1415
assert isinstance(root.myEnum.pyval, MyEnum) and root.myEnum.pyval == MyEnum.BAR
# Sadly parsing an invalid value passes. Does anybody know how we could fail earlier?
# I'd expect it to fail right here in fromstring
root = objectify.fromstring('<root><myEnum>2</myEnum></root>', parser)
try:
# but instead it fails only when later accessing the pyval
root.myEnum.pyval
except ValueError:
pass
# now go vice versa - building a tree from scratch and use enforced types
# We need to involve our custom parser, as it knows about the custom lookup scheme
e = parser.makeelement("myBooleanValue")
assert isinstance(e, objectify.BoolElement)
# Simple trial: any myBooleanValue attribute should become a BoolElement
E = objectify.ElementMaker(makeelement=parser.makeelement)
e1 = E.myBooleanValue(False)
e2 = E.myBooleanValue("False")
assert isinstance(e1, objectify.BoolElement)
assert e1.pyval == False
assert isinstance(e2, objectify.BoolElement)
try:
# BoolElement can't convert "False" to bool, it fails on runtime
e2.pyval
except ValueError:
# ValueError: Invalid boolean value: 'False'
pass
# We can even build a custom E-Factory and mini-DSL that knows our parser to infer the types.
ROOT = E.root
MYBOOLEANVALUE = E.myBooleanValue
MYDECIMALVALUE = E.myDecimalValue
MYENUM = E.myEnum
root = ROOT(
MYDECIMALVALUE(2.718),
MYBOOLEANVALUE(True),
MYENUM(MyEnum.FOO) # uses our custom stringify function and writes 0 to XML text
)
# observe the result as dump
assert objectify.dump(root) == """root = None [ObjectifiedElement]
myDecimalValue = 2.718 [DecimalElement]
* py:pytype = 'float'
myBooleanValue = True [BoolElement]
* py:pytype = 'bool'
myEnum = 0 [MyEnumElement]
* py:pytype = 'MyEnum'"""
# ...and as XML string (with xsi type annotations, instead of pytype)
objectify.deannotate(root)
objectify.xsiannotate(root)
etree.cleanup_namespaces(root)
assert etree.tostring(root, pretty_print=True).decode('ascii') == """<root xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<myDecimalValue xsi:type="xsd:double">2.718</myDecimalValue>
<myBooleanValue xsi:type="xsd:boolean">true</myBooleanValue>
<myEnum xsi:type="xsd:integer">0</myEnum>
</root>
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment