pythononwheels/data_classes.py

## data_classes.py
# -*-: coding utf-8 -*-
"""

    This module is a proposal to add  python's dataclass support to cerberus.
    See Issue: #397
        https://github.com/pyeve/cerberus/issues/397
    And road-map:
        1.3
            Checklist:
            The module dataclasses is implemented. This may get postponed 'til a following minor release. (#397)

"""
from cerberus.validator import Validator as Vd
from dataclasses import dataclass, field, fields
from typing import List
import random
import datetime
from typing import Dict, Tuple, List
import dataclasses

def set_datetime():
    return datetime.datetime.utcnow()

def set_date():
    return datetime.datetime.utcnow().date()

def set_random():
    return random.uniform(1.0, 1.9)

def test():
    """
        The test method that is executed when you run this module from the commandline
    """
    t = TestData(True)
    v=Validator()
    v.validate_test(t)

def create_cerberus_schema(dataclass):
    """
        create a cerberus schema from the python dataclass fields and
        added metaclass.cerberus data

        Example dataclass:
        @dataclass(init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False)
        class TestData():
            title: str = field (default="",
                metadata={"cerberus" :
                    {
                        "maxlength" : 30

                    }
                })
    """
    cerberus_schema = {}

    for elem in fields(dataclass):
        #print(elem)
        if getattr(elem.type, "__name__", None):
            try:
                current_type = elem.type
            except Exception as e:
                raise
        else:
            try:
                current_type = elem.type.__origin__
            except Exception as e:
                raise
        #print(current_type)
        if current_type == bool:
            cerberus_schema[elem.name] = {"type" : "boolean" }
        elif current_type == int:
            cerberus_schema[elem.name] = {"type" : "integer" }
        elif current_type == float:
            cerberus_schema[elem.name] = {"type" : "float" }
        elif current_type ==  list:
            cerberus_schema[elem.name] = {"type" : "list" }
        elif current_type ==  dict:
            cerberus_schema[elem.name] = {"type" : "dict" }
        elif current_type ==  datetime.date:
            cerberus_schema[elem.name] = {"type" : "date" }
        elif current_type ==  datetime.datetime:
            cerberus_schema[elem.name] = {"type" : "datetime" }
        elif current_type ==  set:
            cerberus_schema[elem.name] = {"type" : "set" }
        elif current_type ==  str:
            cerberus_schema[elem.name] = {"type" : "string" }
        elif current_type ==  bytes or current_type ==  bytearray:
            cerberus_schema[elem.name] = {"type" : "binary" }
        else:
            cerberus_schema[elem.name] = {"type" : "string" }
            print("type unknown, setting string.")
        metadata = getattr(elem, "metadata", None)
        if "cerberus" in metadata:
            for key in metadata["cerberus"]:
                # element has a cerberus metadata sub attribute
                cerberus_schema[elem.name][key] = metadata["cerberus"][key]
    return cerberus_schema


@dataclass(init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False)
class TestData():
    """
        Test python dataclass for this module.
        Representing the needed fields for cerberus schemas and validation.
        See: http://docs.python-cerberus.org/en/stable/validation-rules.html
    """
    active: bool
    bindat: bytes = b''
    bin_arr: bytearray = field( default_factory = bytearray )
    tdate: datetime.date = field( default_factory = set_date)
    tstamp: datetime.datetime = field( default_factory = set_datetime)
    props: dict = field( default_factory=dict)
    factor: float = field( default_factory=set_random)
    votes: int = 0
    tags: list = field(default_factory=list)
    #number ??
    unique_tags: set = field( default_factory=set )
    connectionOptions: Dict[str, str]  = field( default_factory=dict)
    title: str = field (default="",
        metadata={"cerberus" :
            {
                "maxlength" : 30

            }
        })

    # should also validate to true if check_multi is a list.
    # See: http://docs.python-cerberus.org/en/stable/validation-rules.html
    #check_multi: str = field ( default="", metadata={ "cerberus" :
    #        {
    #            #A list of types can be used to allow different values:
    #            'check:multi': {'type': ['string', 'list']}
    #        }
    #    }
    #)


class Validator(Vd):
    """
        takes a python dataclass and validates the actual values
        for correct types ad defined in the dataclass

    """
    def print_dataclass_attributes(self, dataclass):
        """
            print the dataclass attributes names and types as defined
        """
        for elem in fields(dataclass):

            try:
                print("    Field name: {}, type is: {} ".format(elem.name, elem.type))
                #print("... {}".format(elem.type))
                try:
                    print("     ... cerberus schema metadata: {}".format(elem.metadata["cerberus"]))
                except:
                    pass # no extra cerberus schema metadata for this attribute. No problem
            except:
                # no elem.type.__name__ so we need to refer to __origin__
                # this is currently never the case since I do not use elem.type.__name__ anymore above ..
                # want to keep the refernece to __origin__ for a moment to remeber it exists...
                # print("No __name__ but Type is: {} ".format(elem.type))
                print("    Field name: {}, type is: {} ".format(elem.name, elem.type))
                print("    ... using:__origin__ {}".format(elem.type.__origin__))
                try:
                    print(" ... cerberus schema metadata: {}".format(elem.metadata["cerberus"]))
                except:
                    pass # no extra cerberus schema metadata for this attribute. No problem


    def validate(self, dataclass):
        """
            takes a python dataclass and validates the actual values
            for correct types as defined in the dataclass

            The schema is automatically derived from the dataclass fields and optional
            metadata["cerberus"] attribute per field.

            Example: see TestClass in this module.
        """
        cerberus_schema = getattr(dataclass, "schema", None)
        if not cerberus_schema:
            # no schema yet, make one:
            cerberus_schema = create_cerberus_schema(dataclass)

        #
        # create the value dict for this dataclass instance
        #
        #value_dict = { key[0]: getattr(dataclass, key[0]) for key in cerberus_schema.items()}
        value_dict = dataclass.asdict(dataclass)
        #
        # validate using the value dict and the derived schedma
        #
        return super().validate(value_dict, cerberus_schema)


    def validate_test(self, dataclass=None):
        """
           Test validation that uses the TestData dataclass from this module
           and heavily prints information to the console.
           Dataclass fields.
           Derives a Schema from dataclass Fields and metadata
           Creates a dict of the current values (only for those in the schema)

           1st validation result should be: True
           -- changes the title to 32 a chars (so maxlength metadata constraint should be violated)
           2nd validation result should be False
           + will print the validation errors (self.errors)
           And finally return the validator
        """
        #
        # take the TestData as dataclass isntance
        #
        if not dataclass:
            dataclass = TestData()
        #
        # check if the dataclass already has a cerberus schema
        #
        cerberus_schema = getattr(dataclass, "schema", None)
        if not cerberus_schema:
            # no schema yet, make one:
            cerberus_schema = create_cerberus_schema(dataclass)

        #
        # create the value dict for this dataclass instance
        #
        #value_dict = { key[0]: getattr(dataclass, key[0]) for key in cerberus_schema.items()}
        value_dict = dataclasses.asdict(dataclass)
        #
        # validate using the value dict and the derived schedma
        #
        ret = super().validate(value_dict, cerberus_schema)

        #
        # Now print some info
        #
        from pprint import PrettyPrinter
        line_length= 70
        pp = PrettyPrinter(indent=4)

        # print the dataclass fields
        print(line_length*"-")
        print("|  dataclass fields " )
        print(line_length*"-")
        self.print_dataclass_attributes(dataclass)

        # print the schema
        print(line_length*"-")
        print("|  cerberus schema " )
        print(line_length*"-")
        for elem in cerberus_schema:
            print("    {:20} {}".format('"'+elem+'"', cerberus_schema[elem] ))

        # print the current values dict
        print(line_length*"-")
        print("|  current values of dataclass instance " )
        print(line_length*"-")
        pp.pprint(value_dict)

        # print the 1st validation result
        print(line_length*"-")
        print("|  cerberus validation result " )
        print(line_length*"-")
        print("     {}".format(ret))

        # change the value of title to lenght 32 (so constraint will be violated)
        dataclass.title=32*"a"
        #value_dict = { key[0]: getattr(dataclass, key[0]) for key in cerberus_schema.items()}
        value_dict = dataclasses.asdict(dataclass)
        ret = super().validate(value_dict, cerberus_schema)

        # print the 2nd validation result
        print(line_length*"-")
        print("|  changed title to lenght 32 => validation result " )
        print(line_length*"-")


        print("     {}".format(ret))
        if not ret:
            print("     {}".format((self.errors)))
        #pp.pprint(cerberus_schema)
        return self

if __name__ == "__main__":
    test()
	# --: coding utf-8 --
	"""

	This module is a proposal to add python's dataclass support to cerberus.
	See Issue: #397
	https://github.com/pyeve/cerberus/issues/397
	And road-map:
	1.3
	Checklist:
	The module dataclasses is implemented. This may get postponed 'til a following minor release. (#397)

	"""
	from cerberus.validator import Validator as Vd
	from dataclasses import dataclass, field, fields
	from typing import List
	import random
	import datetime
	from typing import Dict, Tuple, List
	import dataclasses

	def set_datetime():
	return datetime.datetime.utcnow()

	def set_date():
	return datetime.datetime.utcnow().date()

	def set_random():
	return random.uniform(1.0, 1.9)

	def test():
	"""
	The test method that is executed when you run this module from the commandline
	"""
	t = TestData(True)
	v=Validator()
	v.validate_test(t)

	def create_cerberus_schema(dataclass):
	"""
	create a cerberus schema from the python dataclass fields and
	added metaclass.cerberus data

	Example dataclass:
	@dataclass(init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False)
	class TestData():
	title: str = field (default="",
	metadata={"cerberus" :
	{
	"maxlength" : 30

	}
	})
	"""
	cerberus_schema = {}

	for elem in fields(dataclass):
	#print(elem)
	if getattr(elem.type, "__name__", None):
	try:
	current_type = elem.type
	except Exception as e:
	raise
	else:
	try:
	current_type = elem.type.__origin__
	except Exception as e:
	raise
	#print(current_type)
	if current_type == bool:
	cerberus_schema[elem.name] = {"type" : "boolean" }
	elif current_type == int:
	cerberus_schema[elem.name] = {"type" : "integer" }
	elif current_type == float:
	cerberus_schema[elem.name] = {"type" : "float" }
	elif current_type == list:
	cerberus_schema[elem.name] = {"type" : "list" }
	elif current_type == dict:
	cerberus_schema[elem.name] = {"type" : "dict" }
	elif current_type == datetime.date:
	cerberus_schema[elem.name] = {"type" : "date" }
	elif current_type == datetime.datetime:
	cerberus_schema[elem.name] = {"type" : "datetime" }
	elif current_type == set:
	cerberus_schema[elem.name] = {"type" : "set" }
	elif current_type == str:
	cerberus_schema[elem.name] = {"type" : "string" }
	elif current_type == bytes or current_type == bytearray:
	cerberus_schema[elem.name] = {"type" : "binary" }
	else:
	cerberus_schema[elem.name] = {"type" : "string" }
	print("type unknown, setting string.")
	metadata = getattr(elem, "metadata", None)
	if "cerberus" in metadata:
	for key in metadata["cerberus"]:
	# element has a cerberus metadata sub attribute
	cerberus_schema[elem.name][key] = metadata["cerberus"][key]
	return cerberus_schema




	@dataclass(init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False)
	class TestData():
	"""
	Test python dataclass for this module.
	Representing the needed fields for cerberus schemas and validation.
	See: http://docs.python-cerberus.org/en/stable/validation-rules.html
	"""
	active: bool
	bindat: bytes = b''
	bin_arr: bytearray = field( default_factory = bytearray )
	tdate: datetime.date = field( default_factory = set_date)
	tstamp: datetime.datetime = field( default_factory = set_datetime)
	props: dict = field( default_factory=dict)
	factor: float = field( default_factory=set_random)
	votes: int = 0
	tags: list = field(default_factory=list)
	#number ??
	unique_tags: set = field( default_factory=set )
	connectionOptions: Dict[str, str] = field( default_factory=dict)
	title: str = field (default="",
	metadata={"cerberus" :
	{
	"maxlength" : 30

	}
	})

	# should also validate to true if check_multi is a list.
	# See: http://docs.python-cerberus.org/en/stable/validation-rules.html
	#check_multi: str = field ( default="", metadata={ "cerberus" :
	# {
	# #A list of types can be used to allow different values:
	# 'check:multi': {'type': ['string', 'list']}
	# }
	# }
	#)


	class Validator(Vd):
	"""
	takes a python dataclass and validates the actual values
	for correct types ad defined in the dataclass

	"""
	def print_dataclass_attributes(self, dataclass):
	"""
	print the dataclass attributes names and types as defined
	"""
	for elem in fields(dataclass):

	try:
	print(" Field name: {}, type is: {} ".format(elem.name, elem.type))
	#print("... {}".format(elem.type))
	try:
	print(" ... cerberus schema metadata: {}".format(elem.metadata["cerberus"]))
	except:
	pass # no extra cerberus schema metadata for this attribute. No problem
	except:
	# no elem.type.__name__ so we need to refer to __origin__
	# this is currently never the case since I do not use elem.type.__name__ anymore above ..
	# want to keep the refernece to __origin__ for a moment to remeber it exists...
	# print("No __name__ but Type is: {} ".format(elem.type))
	print(" Field name: {}, type is: {} ".format(elem.name, elem.type))
	print(" ... using:__origin__ {}".format(elem.type.__origin__))
	try:
	print(" ... cerberus schema metadata: {}".format(elem.metadata["cerberus"]))
	except:
	pass # no extra cerberus schema metadata for this attribute. No problem


	def validate(self, dataclass):
	"""
	takes a python dataclass and validates the actual values
	for correct types as defined in the dataclass

	The schema is automatically derived from the dataclass fields and optional
	metadata["cerberus"] attribute per field.

	Example: see TestClass in this module.
	"""
	cerberus_schema = getattr(dataclass, "schema", None)
	if not cerberus_schema:
	# no schema yet, make one:
	cerberus_schema = create_cerberus_schema(dataclass)

	#
	# create the value dict for this dataclass instance
	#
	#value_dict = { key[0]: getattr(dataclass, key[0]) for key in cerberus_schema.items()}
	value_dict = dataclass.asdict(dataclass)
	#
	# validate using the value dict and the derived schedma
	#
	return super().validate(value_dict, cerberus_schema)



	def validate_test(self, dataclass=None):
	"""
	Test validation that uses the TestData dataclass from this module
	and heavily prints information to the console.
	Dataclass fields.
	Derives a Schema from dataclass Fields and metadata
	Creates a dict of the current values (only for those in the schema)

	1st validation result should be: True
	-- changes the title to 32 a chars (so maxlength metadata constraint should be violated)
	2nd validation result should be False
	+ will print the validation errors (self.errors)
	And finally return the validator
	"""
	#
	# take the TestData as dataclass isntance
	#
	if not dataclass:
	dataclass = TestData()
	#
	# check if the dataclass already has a cerberus schema
	#
	cerberus_schema = getattr(dataclass, "schema", None)
	if not cerberus_schema:
	# no schema yet, make one:
	cerberus_schema = create_cerberus_schema(dataclass)

	#
	# create the value dict for this dataclass instance
	#
	#value_dict = { key[0]: getattr(dataclass, key[0]) for key in cerberus_schema.items()}
	value_dict = dataclasses.asdict(dataclass)
	#
	# validate using the value dict and the derived schedma
	#
	ret = super().validate(value_dict, cerberus_schema)

	#
	# Now print some info
	#
	from pprint import PrettyPrinter
	line_length= 70
	pp = PrettyPrinter(indent=4)

	# print the dataclass fields
	print(line_length*"-")
	print("\| dataclass fields " )
	print(line_length*"-")
	self.print_dataclass_attributes(dataclass)

	# print the schema
	print(line_length*"-")
	print("\| cerberus schema " )
	print(line_length*"-")
	for elem in cerberus_schema:
	print(" {:20} {}".format('"'+elem+'"', cerberus_schema[elem] ))

	# print the current values dict
	print(line_length*"-")
	print("\| current values of dataclass instance " )
	print(line_length*"-")
	pp.pprint(value_dict)

	# print the 1st validation result
	print(line_length*"-")
	print("\| cerberus validation result " )
	print(line_length*"-")
	print(" {}".format(ret))

	# change the value of title to lenght 32 (so constraint will be violated)
	dataclass.title=32*"a"
	#value_dict = { key[0]: getattr(dataclass, key[0]) for key in cerberus_schema.items()}
	value_dict = dataclasses.asdict(dataclass)
	ret = super().validate(value_dict, cerberus_schema)

	# print the 2nd validation result
	print(line_length*"-")
	print("\| changed title to lenght 32 => validation result " )
	print(line_length*"-")


	print(" {}".format(ret))
	if not ret:
	print(" {}".format((self.errors)))
	#pp.pprint(cerberus_schema)
	return self

	if __name__ == "__main__":
	test()