Skip to content

Instantly share code, notes, and snippets.

@pythononwheels
Last active August 31, 2022 21:08
Show Gist options
  • Save pythononwheels/0568496595d0f79caafb1b4466c0e821 to your computer and use it in GitHub Desktop.
Save pythononwheels/0568496595d0f79caafb1b4466c0e821 to your computer and use it in GitHub Desktop.
python cerberus dataclass support proposal / idea
# -*-: coding utf-8 -*-
"""
This module is a proposal to add python's dataclass support to cerberus.
See Issue: #397
https://github.com/pyeve/cerberus/issues/397
And road-map:
1.3
Checklist:
The module dataclasses is implemented. This may get postponed 'til a following minor release. (#397)
"""
from cerberus.validator import Validator as Vd
from dataclasses import dataclass, field, fields
from typing import List
import random
import datetime
from typing import Dict, Tuple, List
import dataclasses
def set_datetime():
return datetime.datetime.utcnow()
def set_date():
return datetime.datetime.utcnow().date()
def set_random():
return random.uniform(1.0, 1.9)
def test():
"""
The test method that is executed when you run this module from the commandline
"""
t = TestData(True)
v=Validator()
v.validate_test(t)
def create_cerberus_schema(dataclass):
"""
create a cerberus schema from the python dataclass fields and
added metaclass.cerberus data
Example dataclass:
@dataclass(init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False)
class TestData():
title: str = field (default="",
metadata={"cerberus" :
{
"maxlength" : 30
}
})
"""
cerberus_schema = {}
for elem in fields(dataclass):
#print(elem)
if getattr(elem.type, "__name__", None):
try:
current_type = elem.type
except Exception as e:
raise
else:
try:
current_type = elem.type.__origin__
except Exception as e:
raise
#print(current_type)
if current_type == bool:
cerberus_schema[elem.name] = {"type" : "boolean" }
elif current_type == int:
cerberus_schema[elem.name] = {"type" : "integer" }
elif current_type == float:
cerberus_schema[elem.name] = {"type" : "float" }
elif current_type == list:
cerberus_schema[elem.name] = {"type" : "list" }
elif current_type == dict:
cerberus_schema[elem.name] = {"type" : "dict" }
elif current_type == datetime.date:
cerberus_schema[elem.name] = {"type" : "date" }
elif current_type == datetime.datetime:
cerberus_schema[elem.name] = {"type" : "datetime" }
elif current_type == set:
cerberus_schema[elem.name] = {"type" : "set" }
elif current_type == str:
cerberus_schema[elem.name] = {"type" : "string" }
elif current_type == bytes or current_type == bytearray:
cerberus_schema[elem.name] = {"type" : "binary" }
else:
cerberus_schema[elem.name] = {"type" : "string" }
print("type unknown, setting string.")
metadata = getattr(elem, "metadata", None)
if "cerberus" in metadata:
for key in metadata["cerberus"]:
# element has a cerberus metadata sub attribute
cerberus_schema[elem.name][key] = metadata["cerberus"][key]
return cerberus_schema
@dataclass(init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False)
class TestData():
"""
Test python dataclass for this module.
Representing the needed fields for cerberus schemas and validation.
See: http://docs.python-cerberus.org/en/stable/validation-rules.html
"""
active: bool
bindat: bytes = b''
bin_arr: bytearray = field( default_factory = bytearray )
tdate: datetime.date = field( default_factory = set_date)
tstamp: datetime.datetime = field( default_factory = set_datetime)
props: dict = field( default_factory=dict)
factor: float = field( default_factory=set_random)
votes: int = 0
tags: list = field(default_factory=list)
#number ??
unique_tags: set = field( default_factory=set )
connectionOptions: Dict[str, str] = field( default_factory=dict)
title: str = field (default="",
metadata={"cerberus" :
{
"maxlength" : 30
}
})
# should also validate to true if check_multi is a list.
# See: http://docs.python-cerberus.org/en/stable/validation-rules.html
#check_multi: str = field ( default="", metadata={ "cerberus" :
# {
# #A list of types can be used to allow different values:
# 'check:multi': {'type': ['string', 'list']}
# }
# }
#)
class Validator(Vd):
"""
takes a python dataclass and validates the actual values
for correct types ad defined in the dataclass
"""
def print_dataclass_attributes(self, dataclass):
"""
print the dataclass attributes names and types as defined
"""
for elem in fields(dataclass):
try:
print(" Field name: {}, type is: {} ".format(elem.name, elem.type))
#print("... {}".format(elem.type))
try:
print(" ... cerberus schema metadata: {}".format(elem.metadata["cerberus"]))
except:
pass # no extra cerberus schema metadata for this attribute. No problem
except:
# no elem.type.__name__ so we need to refer to __origin__
# this is currently never the case since I do not use elem.type.__name__ anymore above ..
# want to keep the refernece to __origin__ for a moment to remeber it exists...
# print("No __name__ but Type is: {} ".format(elem.type))
print(" Field name: {}, type is: {} ".format(elem.name, elem.type))
print(" ... using:__origin__ {}".format(elem.type.__origin__))
try:
print(" ... cerberus schema metadata: {}".format(elem.metadata["cerberus"]))
except:
pass # no extra cerberus schema metadata for this attribute. No problem
def validate(self, dataclass):
"""
takes a python dataclass and validates the actual values
for correct types as defined in the dataclass
The schema is automatically derived from the dataclass fields and optional
metadata["cerberus"] attribute per field.
Example: see TestClass in this module.
"""
cerberus_schema = getattr(dataclass, "schema", None)
if not cerberus_schema:
# no schema yet, make one:
cerberus_schema = create_cerberus_schema(dataclass)
#
# create the value dict for this dataclass instance
#
#value_dict = { key[0]: getattr(dataclass, key[0]) for key in cerberus_schema.items()}
value_dict = dataclass.asdict(dataclass)
#
# validate using the value dict and the derived schedma
#
return super().validate(value_dict, cerberus_schema)
def validate_test(self, dataclass=None):
"""
Test validation that uses the TestData dataclass from this module
and heavily prints information to the console.
Dataclass fields.
Derives a Schema from dataclass Fields and metadata
Creates a dict of the current values (only for those in the schema)
1st validation result should be: True
-- changes the title to 32 a chars (so maxlength metadata constraint should be violated)
2nd validation result should be False
+ will print the validation errors (self.errors)
And finally return the validator
"""
#
# take the TestData as dataclass isntance
#
if not dataclass:
dataclass = TestData()
#
# check if the dataclass already has a cerberus schema
#
cerberus_schema = getattr(dataclass, "schema", None)
if not cerberus_schema:
# no schema yet, make one:
cerberus_schema = create_cerberus_schema(dataclass)
#
# create the value dict for this dataclass instance
#
#value_dict = { key[0]: getattr(dataclass, key[0]) for key in cerberus_schema.items()}
value_dict = dataclasses.asdict(dataclass)
#
# validate using the value dict and the derived schedma
#
ret = super().validate(value_dict, cerberus_schema)
#
# Now print some info
#
from pprint import PrettyPrinter
line_length= 70
pp = PrettyPrinter(indent=4)
# print the dataclass fields
print(line_length*"-")
print("| dataclass fields " )
print(line_length*"-")
self.print_dataclass_attributes(dataclass)
# print the schema
print(line_length*"-")
print("| cerberus schema " )
print(line_length*"-")
for elem in cerberus_schema:
print(" {:20} {}".format('"'+elem+'"', cerberus_schema[elem] ))
# print the current values dict
print(line_length*"-")
print("| current values of dataclass instance " )
print(line_length*"-")
pp.pprint(value_dict)
# print the 1st validation result
print(line_length*"-")
print("| cerberus validation result " )
print(line_length*"-")
print(" {}".format(ret))
# change the value of title to lenght 32 (so constraint will be violated)
dataclass.title=32*"a"
#value_dict = { key[0]: getattr(dataclass, key[0]) for key in cerberus_schema.items()}
value_dict = dataclasses.asdict(dataclass)
ret = super().validate(value_dict, cerberus_schema)
# print the 2nd validation result
print(line_length*"-")
print("| changed title to lenght 32 => validation result " )
print(line_length*"-")
print(" {}".format(ret))
if not ret:
print(" {}".format((self.errors)))
#pp.pprint(cerberus_schema)
return self
if __name__ == "__main__":
test()
@pythononwheels
Copy link
Author

pythononwheels commented Nov 13, 2018

This is the Test Dataclass

@dataclass(init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False)
class TestData():
    """
        Test python dataclass for this module.
        Representing the needed fields for cerberus schemas and validation.
        See: http://docs.python-cerberus.org/en/stable/validation-rules.html
    """
    active: bool 
    bindat: bytes = b''
    bin_arr: bytearray = field( default_factory = bytearray )
    tdate: datetime.date = field( default_factory = set_date)
    tstamp: datetime.datetime = field( default_factory = set_datetime)
    props: dict = field( default_factory=dict)
    factor: float = field( default_factory=set_random)
    votes: int = 0
    tags: list = field(default_factory=list)
    #number ??
    unique_tags: set = field( default_factory=set )
    connectionOptions: Dict[str, str]  = field( default_factory=dict)
    title: str = field (default="", 
        metadata={"cerberus" : 
            {
                "maxlength" : 30

            }
        })

This is the output of the validate_test() method, which intentionally proints a lot of info to stdout.

----------------------------------------------------------------------
|  dataclass fields
----------------------------------------------------------------------
    Field name: active, type is: <class 'bool'>
    Field name: bindat, type is: <class 'bytes'>
    Field name: bin_arr, type is: <class 'bytearray'>
    Field name: tdate, type is: <class 'datetime.date'>
    Field name: tstamp, type is: <class 'datetime.datetime'>
    Field name: props, type is: <class 'dict'>
    Field name: factor, type is: <class 'float'>
    Field name: votes, type is: <class 'int'>
    Field name: tags, type is: <class 'list'>
    Field name: unique_tags, type is: <class 'set'>
    Field name: connectionOptions, type is: typing.Dict[str, str]
    Field name: title, type is: <class 'str'>
     ... cerberus schema metadata: {'maxlength': 30}
----------------------------------------------------------------------
|  cerberus schema
----------------------------------------------------------------------
    "active"             {'type': 'boolean'}
    "bindat"             {'type': 'binary'}
    "bin_arr"            {'type': 'binary'}
    "tdate"              {'type': 'date'}
    "tstamp"             {'type': 'datetime'}
    "props"              {'type': 'dict'}
    "factor"             {'type': 'float'}
    "votes"              {'type': 'integer'}
    "tags"               {'type': 'list'}
    "unique_tags"        {'type': 'set'}
    "connectionOptions"  {'type': 'dict'}
    "title"              {'type': 'string', 'maxlength': 30}
----------------------------------------------------------------------
|  current values of dataclass instance
----------------------------------------------------------------------
{   'active': True,
    'bin_arr': bytearray(b''),
    'bindat': b'',
    'connectionOptions': {},
    'factor': 1.4276983175256361,
    'props': {},
    'tags': [],
    'tdate': datetime.date(2018, 11, 14),
    'title': '',
    'tstamp': datetime.datetime(2018, 11, 14, 20, 25, 24, 53223),
    'unique_tags': set(),
    'votes': 0}
----------------------------------------------------------------------
|  cerberus validation result
----------------------------------------------------------------------
     True
----------------------------------------------------------------------
|  changed title to lenght 32 => validation result
----------------------------------------------------------------------
     False
     {'title': ['max length is 30']}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment