Last active
August 31, 2022 21:08
-
-
Save pythononwheels/0568496595d0f79caafb1b4466c0e821 to your computer and use it in GitHub Desktop.
python cerberus dataclass support proposal / idea
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*-: coding utf-8 -*- | |
""" | |
This module is a proposal to add python's dataclass support to cerberus. | |
See Issue: #397 | |
https://github.com/pyeve/cerberus/issues/397 | |
And road-map: | |
1.3 | |
Checklist: | |
The module dataclasses is implemented. This may get postponed 'til a following minor release. (#397) | |
""" | |
from cerberus.validator import Validator as Vd | |
from dataclasses import dataclass, field, fields | |
from typing import List | |
import random | |
import datetime | |
from typing import Dict, Tuple, List | |
import dataclasses | |
def set_datetime(): | |
return datetime.datetime.utcnow() | |
def set_date(): | |
return datetime.datetime.utcnow().date() | |
def set_random(): | |
return random.uniform(1.0, 1.9) | |
def test(): | |
""" | |
The test method that is executed when you run this module from the commandline | |
""" | |
t = TestData(True) | |
v=Validator() | |
v.validate_test(t) | |
def create_cerberus_schema(dataclass): | |
""" | |
create a cerberus schema from the python dataclass fields and | |
added metaclass.cerberus data | |
Example dataclass: | |
@dataclass(init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False) | |
class TestData(): | |
title: str = field (default="", | |
metadata={"cerberus" : | |
{ | |
"maxlength" : 30 | |
} | |
}) | |
""" | |
cerberus_schema = {} | |
for elem in fields(dataclass): | |
#print(elem) | |
if getattr(elem.type, "__name__", None): | |
try: | |
current_type = elem.type | |
except Exception as e: | |
raise | |
else: | |
try: | |
current_type = elem.type.__origin__ | |
except Exception as e: | |
raise | |
#print(current_type) | |
if current_type == bool: | |
cerberus_schema[elem.name] = {"type" : "boolean" } | |
elif current_type == int: | |
cerberus_schema[elem.name] = {"type" : "integer" } | |
elif current_type == float: | |
cerberus_schema[elem.name] = {"type" : "float" } | |
elif current_type == list: | |
cerberus_schema[elem.name] = {"type" : "list" } | |
elif current_type == dict: | |
cerberus_schema[elem.name] = {"type" : "dict" } | |
elif current_type == datetime.date: | |
cerberus_schema[elem.name] = {"type" : "date" } | |
elif current_type == datetime.datetime: | |
cerberus_schema[elem.name] = {"type" : "datetime" } | |
elif current_type == set: | |
cerberus_schema[elem.name] = {"type" : "set" } | |
elif current_type == str: | |
cerberus_schema[elem.name] = {"type" : "string" } | |
elif current_type == bytes or current_type == bytearray: | |
cerberus_schema[elem.name] = {"type" : "binary" } | |
else: | |
cerberus_schema[elem.name] = {"type" : "string" } | |
print("type unknown, setting string.") | |
metadata = getattr(elem, "metadata", None) | |
if "cerberus" in metadata: | |
for key in metadata["cerberus"]: | |
# element has a cerberus metadata sub attribute | |
cerberus_schema[elem.name][key] = metadata["cerberus"][key] | |
return cerberus_schema | |
@dataclass(init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False) | |
class TestData(): | |
""" | |
Test python dataclass for this module. | |
Representing the needed fields for cerberus schemas and validation. | |
See: http://docs.python-cerberus.org/en/stable/validation-rules.html | |
""" | |
active: bool | |
bindat: bytes = b'' | |
bin_arr: bytearray = field( default_factory = bytearray ) | |
tdate: datetime.date = field( default_factory = set_date) | |
tstamp: datetime.datetime = field( default_factory = set_datetime) | |
props: dict = field( default_factory=dict) | |
factor: float = field( default_factory=set_random) | |
votes: int = 0 | |
tags: list = field(default_factory=list) | |
#number ?? | |
unique_tags: set = field( default_factory=set ) | |
connectionOptions: Dict[str, str] = field( default_factory=dict) | |
title: str = field (default="", | |
metadata={"cerberus" : | |
{ | |
"maxlength" : 30 | |
} | |
}) | |
# should also validate to true if check_multi is a list. | |
# See: http://docs.python-cerberus.org/en/stable/validation-rules.html | |
#check_multi: str = field ( default="", metadata={ "cerberus" : | |
# { | |
# #A list of types can be used to allow different values: | |
# 'check:multi': {'type': ['string', 'list']} | |
# } | |
# } | |
#) | |
class Validator(Vd): | |
""" | |
takes a python dataclass and validates the actual values | |
for correct types ad defined in the dataclass | |
""" | |
def print_dataclass_attributes(self, dataclass): | |
""" | |
print the dataclass attributes names and types as defined | |
""" | |
for elem in fields(dataclass): | |
try: | |
print(" Field name: {}, type is: {} ".format(elem.name, elem.type)) | |
#print("... {}".format(elem.type)) | |
try: | |
print(" ... cerberus schema metadata: {}".format(elem.metadata["cerberus"])) | |
except: | |
pass # no extra cerberus schema metadata for this attribute. No problem | |
except: | |
# no elem.type.__name__ so we need to refer to __origin__ | |
# this is currently never the case since I do not use elem.type.__name__ anymore above .. | |
# want to keep the refernece to __origin__ for a moment to remeber it exists... | |
# print("No __name__ but Type is: {} ".format(elem.type)) | |
print(" Field name: {}, type is: {} ".format(elem.name, elem.type)) | |
print(" ... using:__origin__ {}".format(elem.type.__origin__)) | |
try: | |
print(" ... cerberus schema metadata: {}".format(elem.metadata["cerberus"])) | |
except: | |
pass # no extra cerberus schema metadata for this attribute. No problem | |
def validate(self, dataclass): | |
""" | |
takes a python dataclass and validates the actual values | |
for correct types as defined in the dataclass | |
The schema is automatically derived from the dataclass fields and optional | |
metadata["cerberus"] attribute per field. | |
Example: see TestClass in this module. | |
""" | |
cerberus_schema = getattr(dataclass, "schema", None) | |
if not cerberus_schema: | |
# no schema yet, make one: | |
cerberus_schema = create_cerberus_schema(dataclass) | |
# | |
# create the value dict for this dataclass instance | |
# | |
#value_dict = { key[0]: getattr(dataclass, key[0]) for key in cerberus_schema.items()} | |
value_dict = dataclass.asdict(dataclass) | |
# | |
# validate using the value dict and the derived schedma | |
# | |
return super().validate(value_dict, cerberus_schema) | |
def validate_test(self, dataclass=None): | |
""" | |
Test validation that uses the TestData dataclass from this module | |
and heavily prints information to the console. | |
Dataclass fields. | |
Derives a Schema from dataclass Fields and metadata | |
Creates a dict of the current values (only for those in the schema) | |
1st validation result should be: True | |
-- changes the title to 32 a chars (so maxlength metadata constraint should be violated) | |
2nd validation result should be False | |
+ will print the validation errors (self.errors) | |
And finally return the validator | |
""" | |
# | |
# take the TestData as dataclass isntance | |
# | |
if not dataclass: | |
dataclass = TestData() | |
# | |
# check if the dataclass already has a cerberus schema | |
# | |
cerberus_schema = getattr(dataclass, "schema", None) | |
if not cerberus_schema: | |
# no schema yet, make one: | |
cerberus_schema = create_cerberus_schema(dataclass) | |
# | |
# create the value dict for this dataclass instance | |
# | |
#value_dict = { key[0]: getattr(dataclass, key[0]) for key in cerberus_schema.items()} | |
value_dict = dataclasses.asdict(dataclass) | |
# | |
# validate using the value dict and the derived schedma | |
# | |
ret = super().validate(value_dict, cerberus_schema) | |
# | |
# Now print some info | |
# | |
from pprint import PrettyPrinter | |
line_length= 70 | |
pp = PrettyPrinter(indent=4) | |
# print the dataclass fields | |
print(line_length*"-") | |
print("| dataclass fields " ) | |
print(line_length*"-") | |
self.print_dataclass_attributes(dataclass) | |
# print the schema | |
print(line_length*"-") | |
print("| cerberus schema " ) | |
print(line_length*"-") | |
for elem in cerberus_schema: | |
print(" {:20} {}".format('"'+elem+'"', cerberus_schema[elem] )) | |
# print the current values dict | |
print(line_length*"-") | |
print("| current values of dataclass instance " ) | |
print(line_length*"-") | |
pp.pprint(value_dict) | |
# print the 1st validation result | |
print(line_length*"-") | |
print("| cerberus validation result " ) | |
print(line_length*"-") | |
print(" {}".format(ret)) | |
# change the value of title to lenght 32 (so constraint will be violated) | |
dataclass.title=32*"a" | |
#value_dict = { key[0]: getattr(dataclass, key[0]) for key in cerberus_schema.items()} | |
value_dict = dataclasses.asdict(dataclass) | |
ret = super().validate(value_dict, cerberus_schema) | |
# print the 2nd validation result | |
print(line_length*"-") | |
print("| changed title to lenght 32 => validation result " ) | |
print(line_length*"-") | |
print(" {}".format(ret)) | |
if not ret: | |
print(" {}".format((self.errors))) | |
#pp.pprint(cerberus_schema) | |
return self | |
if __name__ == "__main__": | |
test() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is the Test Dataclass
This is the output of the
validate_test()
method, which intentionally proints a lot of info to stdout.