bromzh/mongoengine_serializer.py

## mongoengine_serializer.py
# -*- coding: utf-8 -*-
import datetime
import calendar
import re
import base64
import bson
from bson import RE_TYPE
from bson.binary import Binary
from bson.code import Code
from bson.dbref import DBRef
from bson.max_key import MaxKey
from bson.min_key import MinKey
from bson.objectid import ObjectId
from bson.regex import Regex
from bson.timestamp import Timestamp
import json
from mongoengine.queryset.base import BaseQuerySet
from mongoengine.document import BaseDocument, MapReduceDocument


class DocumentSerializer(object):
    """Serializer for MongoEngine documents and pymongo data.

        Usage:

        >>> from bson import ObjectId
        >>> from mongoengine import connect, fields, Document
        >>> client = connect('test')
        >>> db = client.test
        >>> db.user.drop()
        >>> db.post.drop()
        >>> db.comment.drop()
        >>>
        >>> class User(Document):
        ...     first_name = fields.StringField(max_length=100)
        ...     last_name = fields.StringField(max_length=100)
        ...     email = fields.EmailField()
        ...     @property
        ...     def name(self):
        ...         return ' '.join((self.first_name, self.last_name))
        ...
        >>> user = User(id=ObjectId('54052ba41591466157f5306a'), first_name='Philip',
        ...             last_name='Dick', email='dick@example.com')
        >>> user.save()
        <User: User object>
        >>> data = DocumentSerializer().serialize(user)
        >>> data == {'first_name': 'Philip', 'last_name': 'Dick', 'email': 'dick@example.com',
        ...          'id': {'$oid': '54052ba41591466157f5306a'}}
        True
        >>> data = DocumentSerializer(includes=['name']).serialize(user)
        >>> data == {'first_name': 'Philip', 'last_name': 'Dick', 'email': 'dick@example.com',
        ...          'id': {'$oid': '54052ba41591466157f5306a'}, 'name': 'Philip Dick'}
        True
        >>> data = DocumentSerializer(blacklist=['email']).serialize(user)
        >>> data == {'first_name': 'Philip', 'last_name': 'Dick',
        ...          'id': {'$oid': '54052ba41591466157f5306a'}}
        True
        >>> data = DocumentSerializer(whitelist=['first_name', 'email']).serialize(user)
        >>> data == {'first_name': 'Philip', 'email': 'dick@example.com'}
        True
        >>> data = DocumentSerializer(mapping={'last_name': 'surname'}).serialize(user)
        >>> data == {'first_name': 'Philip', 'surname': 'Dick', 'email': 'dick@example.com',
        ...          'id': {'$oid': '54052ba41591466157f5306a'}}
        True
        >>> data = DocumentSerializer(blacklist=['first_name', 'last_name', 'id'],
        ...                           includes=['name'],
        ...                           mapping={'name': 'full_name'}).serialize(user)
        >>> data == {'full_name': 'Philip Dick', 'email': 'dick@example.com'}
        True
        >>> class Post(Document):
        ...     title = fields.StringField(max_length=500)
        ...     text = fields.StringField()
        ...     author = fields.ReferenceField(User)
        ...
        >>> first_post = Post(id=ObjectId('54052ba41591466157f5306b'), title='foo',
        ...             text='lorem ipsum', author=user)
        >>> first_post.save()
        <Post: Post object>
        >>> data = DocumentSerializer(blacklist=['id', 'author.id']).serialize(first_post)
        >>> data == {'title': 'foo', 'text': 'lorem ipsum',
        ...          'author': {'last_name': 'Dick', 'first_name': 'Philip',
        ...                     'email': 'dick@example.com'}}
        True
        >>> data = DocumentSerializer(whitelist=['title', 'author'],
        ...                           includes=['author.name'],
        ...                           mapping={'author.name': 'full_name'},
        ...                           blacklist=['author.id',
        ...                                      'author.first_name',
        ...                                      'author.last_name']).serialize(first_post)
        >>> data == {'author': {'full_name': 'Philip Dick', 'email': 'dick@example.com'},
        ...          'title': 'foo'}
        True
        >>> another_post = Post(id=ObjectId('54052ba41591466157f5306c'), title='bar',
        ...                     text='lorem ipsum', author=user)
        >>> another_post.save()
        <Post: Post object>
        >>> posts = Post.objects.all()
        >>> data = DocumentSerializer(whitelist=['post.title', 'author.email'],
        ...                           includes=['author.name'],
        ...                           collection_name='posts').serialize(posts)
        >>> data == {'posts': [{'author': {'name': 'Philip Dick', 'email': 'dick@example.com'}},
        ...                    {'author': {'name': 'Philip Dick', 'email': 'dick@example.com'}}]}
        True

    :param list whitelist: List of dotted fields names which will be output during serialization
    :param list blacklist: List of dotted fields names which well not be output during serialization
    :param list includes: List of methods or/and properties of class
        which will be output during serialization
    :param dict mapping: Dict with rules how to name fields during serialization.
        Key - dotted field name (full source name), value - name during serialization
    :param str collection_name: Name of dict key which be output when serialize iterable data
        such as :obj:`list`, :obj:`generator`, :class:`mongoengine.QuerySet`, etc.

    """

    def __init__(self, whitelist=None, blacklist=None, includes=None,
                 mapping=None, collection_name='objects'):
        self.whitelist = whitelist
        self.blacklist = blacklist if blacklist is not None else list()
        self.includes = includes if includes is not None else list()
        self.mapping = mapping if mapping is not None else dict()
        self.collection_name = collection_name

    def _get_name(self, name):
        """Returns mapped name if there is name in `self.mapping`

        :param str name: Dotted field name
        :return: Mapped name of regular name
        :rtype: str
        """
        return self.mapping.get(name, name.rsplit('.', 1)[-1])

    def _to_primitive(self, o, ctx=''):
        """Recursive conversion from :class:`mongoengine.Document` or Bdict objects to primitive types
        which can be serialized by standard :class:`json.JdictEncoder` of :func:`json.dumps`.

        Based on :func:`bson.json_util.default` and :func:`bson.json_util._json_convert`.

        :param o: Object to convert
        :param str ctx: Conversion context
            So if `o` is value for dict key 'foo', ctx will be equal 'foo.'
            and all fields of `o` will be named like 'foo.name1', 'foo.name2', etc.
        :return: Conversation result.
            Type will be one of this: `int`, `str`, `list`, `dict`, `True`, `False`, `None`.
        """
        if isinstance(o, MapReduceDocument):
            res = dict([('_id', o.key)])
            res.update(o.value)
            return self._to_primitive(res)
        if isinstance(o, BaseDocument):
            result = dict()
            for field_name in o:
                full_name = ctx + field_name
                if full_name in self.blacklist:
                    continue
                if self.whitelist:
                    allowed = False
                    for allowed_name in self.whitelist:
                        allowed = False
                        if allowed_name.startswith(full_name) or full_name.startswith(allowed_name):
                            allowed = True
                            break
                    if not allowed:
                        continue
                try:
                    result[self._get_name(full_name)] = self._to_primitive(getattr(o, field_name),
                                                                           ctx=full_name+'.')
                except AttributeError:
                    pass
            for name in self.includes:
                attr_name = name.rsplit('.', 1)[-1]
                full_name = ctx + attr_name
                if name == full_name:
                    try:
                        raw_value = getattr(o, attr_name)
                    except AttributeError:
                        break
                    if callable(raw_value):
                        result[self._get_name(full_name)] = self._to_primitive(raw_value(),
                                                                               ctx=full_name+'.')
                    else:
                        result[self._get_name(full_name)] = self._to_primitive(raw_value,
                                                                               ctx=full_name+'.')
            return result
        if hasattr(o, 'items'):
            result = dict()
            for field_name, value in o.items():
                full_name = ctx + field_name
                if full_name in self.blacklist:
                    continue
                if self.whitelist:
                    allowed = False
                    for allowed_name in self.whitelist:
                        allowed = False
                        if allowed_name.startswith(full_name) or full_name.startswith(allowed_name):
                            allowed = True
                            break
                    if not allowed:
                        continue
                result[self._get_name(full_name)] = self._to_primitive(value, ctx=full_name+'.')
            for name in self.includes:
                if name.startswith(ctx):
                    attr_name = name.rsplit('.', 1)[-1]
                    full_name = ctx + attr_name
                    try:
                        raw_value = o[attr_name]
                    except KeyError:
                        break
                    if callable(raw_value):
                        result[self._get_name(full_name)] = self._to_primitive(raw_value(),
                                                                               ctx=full_name+'.')
                    else:
                        result[self._get_name(full_name)] = self._to_primitive(raw_value,
                                                                               ctx=full_name+'.')
            return result
        elif hasattr(o, '__iter__') and not isinstance(o, str) and not isinstance(o, bytes):
            return list((self._to_primitive(v, ctx=ctx) for v in o))
        if isinstance(o, ObjectId):
            return {"$oid": str(o)}
        if isinstance(o, DBRef):
            return self._to_primitive(o.as_doc(), ctx=ctx)
        if isinstance(o, datetime.datetime):
            # TODO share this code w/ bson.py?
            if o.utcoffset() is not None:
                o = o - o.utcoffset()
            millis = int(calendar.timegm(o.timetuple()) * 1000 +
                         o.microsecond / 1000)
            return {"$date": millis}
        if isinstance(o, (RE_TYPE, Regex)):
            flags = ""
            if o.flags & re.IGNORECASE:
                flags += "i"
            if o.flags & re.LOCALE:
                flags += "l"
            if o.flags & re.MULTILINE:
                flags += "m"
            if o.flags & re.DOTALL:
                flags += "s"
            if o.flags & re.UNICODE:
                flags += "u"
            if o.flags & re.VERBOSE:
                flags += "x"
            if isinstance(o.pattern, str):
                pattern = o.pattern
            else:
                pattern = o.pattern.decode('utf-8')
            return dict([("$regex", pattern), ("$options", flags)])
        if isinstance(o, MinKey):
            return {"$minKey": 1}
        if isinstance(o, MaxKey):
            return {"$maxKey": 1}
        if isinstance(o, Timestamp):
            return dict([("t", o.time), ("i", o.inc)])
        if isinstance(o, Code):
            return dict([('$code', str(o)), ('$scope', o.scope)])
        if isinstance(o, Binary):
            return dict([
                ('$binary', base64.b64encode(o).decode()),
                ('$type', "%02x" % o.subtype)])
        if isinstance(o, bytes):
            return dict([
                ('$binary', base64.b64encode(o).decode()),
                ('$type', "00")])
        if bson.has_uuid() and isinstance(o, bson.uuid.UUID):
            return {"$uuid": o.hex}
        return o

    def serialize(self, o):
        """Convert object to primitive types with rules passed in constructor

        :param o: Object to serialization
        :return: Serialized object
        """
        if isinstance(o, BaseQuerySet) or (hasattr(o, '__iter__')
                                           and not isinstance(o, str)
                                           and not isinstance(o, dict)
                                           and not isinstance(o, BaseDocument)):
            return {self.collection_name: [self._to_primitive(item) for item in o]}
        return self._to_primitive(o)

    def to_json(self, o, **kwargs):
        """Serialize object to JSON string

        :param o: Object to serialization
        :return: JSON string
        :rtype: str
        """
        return json.dumps(self.serialize(o), **kwargs)
	# -- coding: utf-8 --
	import datetime
	import calendar
	import re
	import base64
	import bson
	from bson import RE_TYPE
	from bson.binary import Binary
	from bson.code import Code
	from bson.dbref import DBRef
	from bson.max_key import MaxKey
	from bson.min_key import MinKey
	from bson.objectid import ObjectId
	from bson.regex import Regex
	from bson.timestamp import Timestamp
	import json
	from mongoengine.queryset.base import BaseQuerySet
	from mongoengine.document import BaseDocument, MapReduceDocument


	class DocumentSerializer(object):
	"""Serializer for MongoEngine documents and pymongo data.

	Usage:

	>>> from bson import ObjectId
	>>> from mongoengine import connect, fields, Document
	>>> client = connect('test')
	>>> db = client.test
	>>> db.user.drop()
	>>> db.post.drop()
	>>> db.comment.drop()
	>>>
	>>> class User(Document):
	... first_name = fields.StringField(max_length=100)
	... last_name = fields.StringField(max_length=100)
	... email = fields.EmailField()
	... @property
	... def name(self):
	... return ' '.join((self.first_name, self.last_name))
	...
	>>> user = User(id=ObjectId('54052ba41591466157f5306a'), first_name='Philip',
	... last_name='Dick', email='dick@example.com')
	>>> user.save()
	<User: User object>
	>>> data = DocumentSerializer().serialize(user)
	>>> data == {'first_name': 'Philip', 'last_name': 'Dick', 'email': 'dick@example.com',
	... 'id': {'$oid': '54052ba41591466157f5306a'}}
	True
	>>> data = DocumentSerializer(includes=['name']).serialize(user)
	>>> data == {'first_name': 'Philip', 'last_name': 'Dick', 'email': 'dick@example.com',
	... 'id': {'$oid': '54052ba41591466157f5306a'}, 'name': 'Philip Dick'}
	True
	>>> data = DocumentSerializer(blacklist=['email']).serialize(user)
	>>> data == {'first_name': 'Philip', 'last_name': 'Dick',
	... 'id': {'$oid': '54052ba41591466157f5306a'}}
	True
	>>> data = DocumentSerializer(whitelist=['first_name', 'email']).serialize(user)
	>>> data == {'first_name': 'Philip', 'email': 'dick@example.com'}
	True
	>>> data = DocumentSerializer(mapping={'last_name': 'surname'}).serialize(user)
	>>> data == {'first_name': 'Philip', 'surname': 'Dick', 'email': 'dick@example.com',
	... 'id': {'$oid': '54052ba41591466157f5306a'}}
	True
	>>> data = DocumentSerializer(blacklist=['first_name', 'last_name', 'id'],
	... includes=['name'],
	... mapping={'name': 'full_name'}).serialize(user)
	>>> data == {'full_name': 'Philip Dick', 'email': 'dick@example.com'}
	True
	>>> class Post(Document):
	... title = fields.StringField(max_length=500)
	... text = fields.StringField()
	... author = fields.ReferenceField(User)
	...
	>>> first_post = Post(id=ObjectId('54052ba41591466157f5306b'), title='foo',
	... text='lorem ipsum', author=user)
	>>> first_post.save()
	<Post: Post object>
	>>> data = DocumentSerializer(blacklist=['id', 'author.id']).serialize(first_post)
	>>> data == {'title': 'foo', 'text': 'lorem ipsum',
	... 'author': {'last_name': 'Dick', 'first_name': 'Philip',
	... 'email': 'dick@example.com'}}
	True
	>>> data = DocumentSerializer(whitelist=['title', 'author'],
	... includes=['author.name'],
	... mapping={'author.name': 'full_name'},
	... blacklist=['author.id',
	... 'author.first_name',
	... 'author.last_name']).serialize(first_post)
	>>> data == {'author': {'full_name': 'Philip Dick', 'email': 'dick@example.com'},
	... 'title': 'foo'}
	True
	>>> another_post = Post(id=ObjectId('54052ba41591466157f5306c'), title='bar',
	... text='lorem ipsum', author=user)
	>>> another_post.save()
	<Post: Post object>
	>>> posts = Post.objects.all()
	>>> data = DocumentSerializer(whitelist=['post.title', 'author.email'],
	... includes=['author.name'],
	... collection_name='posts').serialize(posts)
	>>> data == {'posts': [{'author': {'name': 'Philip Dick', 'email': 'dick@example.com'}},
	... {'author': {'name': 'Philip Dick', 'email': 'dick@example.com'}}]}
	True

	:param list whitelist: List of dotted fields names which will be output during serialization
	:param list blacklist: List of dotted fields names which well not be output during serialization
	:param list includes: List of methods or/and properties of class
	which will be output during serialization
	:param dict mapping: Dict with rules how to name fields during serialization.
	Key - dotted field name (full source name), value - name during serialization
	:param str collection_name: Name of dict key which be output when serialize iterable data
	such as :obj:`list`, :obj:`generator`, :class:`mongoengine.QuerySet`, etc.

	"""

	def __init__(self, whitelist=None, blacklist=None, includes=None,
	mapping=None, collection_name='objects'):
	self.whitelist = whitelist
	self.blacklist = blacklist if blacklist is not None else list()
	self.includes = includes if includes is not None else list()
	self.mapping = mapping if mapping is not None else dict()
	self.collection_name = collection_name

	def _get_name(self, name):
	"""Returns mapped name if there is name in `self.mapping`

	:param str name: Dotted field name
	:return: Mapped name of regular name
	:rtype: str
	"""
	return self.mapping.get(name, name.rsplit('.', 1)[-1])

	def _to_primitive(self, o, ctx=''):
	"""Recursive conversion from :class:`mongoengine.Document` or Bdict objects to primitive types
	which can be serialized by standard :class:`json.JdictEncoder` of :func:`json.dumps`.

	Based on :func:`bson.json_util.default` and :func:`bson.json_util._json_convert`.

	:param o: Object to convert
	:param str ctx: Conversion context
	So if `o` is value for dict key 'foo', ctx will be equal 'foo.'
	and all fields of `o` will be named like 'foo.name1', 'foo.name2', etc.
	:return: Conversation result.
	Type will be one of this: `int`, `str`, `list`, `dict`, `True`, `False`, `None`.
	"""
	if isinstance(o, MapReduceDocument):
	res = dict([('_id', o.key)])
	res.update(o.value)
	return self._to_primitive(res)
	if isinstance(o, BaseDocument):
	result = dict()
	for field_name in o:
	full_name = ctx + field_name
	if full_name in self.blacklist:
	continue
	if self.whitelist:
	allowed = False
	for allowed_name in self.whitelist:
	allowed = False
	if allowed_name.startswith(full_name) or full_name.startswith(allowed_name):
	allowed = True
	break
	if not allowed:
	continue
	try:
	result[self._get_name(full_name)] = self._to_primitive(getattr(o, field_name),
	ctx=full_name+'.')
	except AttributeError:
	pass
	for name in self.includes:
	attr_name = name.rsplit('.', 1)[-1]
	full_name = ctx + attr_name
	if name == full_name:
	try:
	raw_value = getattr(o, attr_name)
	except AttributeError:
	break
	if callable(raw_value):
	result[self._get_name(full_name)] = self._to_primitive(raw_value(),
	ctx=full_name+'.')
	else:
	result[self._get_name(full_name)] = self._to_primitive(raw_value,
	ctx=full_name+'.')
	return result
	if hasattr(o, 'items'):
	result = dict()
	for field_name, value in o.items():
	full_name = ctx + field_name
	if full_name in self.blacklist:
	continue
	if self.whitelist:
	allowed = False
	for allowed_name in self.whitelist:
	allowed = False
	if allowed_name.startswith(full_name) or full_name.startswith(allowed_name):
	allowed = True
	break
	if not allowed:
	continue
	result[self._get_name(full_name)] = self._to_primitive(value, ctx=full_name+'.')
	for name in self.includes:
	if name.startswith(ctx):
	attr_name = name.rsplit('.', 1)[-1]
	full_name = ctx + attr_name
	try:
	raw_value = o[attr_name]
	except KeyError:
	break
	if callable(raw_value):
	result[self._get_name(full_name)] = self._to_primitive(raw_value(),
	ctx=full_name+'.')
	else:
	result[self._get_name(full_name)] = self._to_primitive(raw_value,
	ctx=full_name+'.')
	return result
	elif hasattr(o, '__iter__') and not isinstance(o, str) and not isinstance(o, bytes):
	return list((self._to_primitive(v, ctx=ctx) for v in o))
	if isinstance(o, ObjectId):
	return {"$oid": str(o)}
	if isinstance(o, DBRef):
	return self._to_primitive(o.as_doc(), ctx=ctx)
	if isinstance(o, datetime.datetime):
	# TODO share this code w/ bson.py?
	if o.utcoffset() is not None:
	o = o - o.utcoffset()
	millis = int(calendar.timegm(o.timetuple()) * 1000 +
	o.microsecond / 1000)
	return {"$date": millis}
	if isinstance(o, (RE_TYPE, Regex)):
	flags = ""
	if o.flags & re.IGNORECASE:
	flags += "i"
	if o.flags & re.LOCALE:
	flags += "l"
	if o.flags & re.MULTILINE:
	flags += "m"
	if o.flags & re.DOTALL:
	flags += "s"
	if o.flags & re.UNICODE:
	flags += "u"
	if o.flags & re.VERBOSE:
	flags += "x"
	if isinstance(o.pattern, str):
	pattern = o.pattern
	else:
	pattern = o.pattern.decode('utf-8')
	return dict([("$regex", pattern), ("$options", flags)])
	if isinstance(o, MinKey):
	return {"$minKey": 1}
	if isinstance(o, MaxKey):
	return {"$maxKey": 1}
	if isinstance(o, Timestamp):
	return dict([("t", o.time), ("i", o.inc)])
	if isinstance(o, Code):
	return dict([('$code', str(o)), ('$scope', o.scope)])
	if isinstance(o, Binary):
	return dict([
	('$binary', base64.b64encode(o).decode()),
	('$type', "%02x" % o.subtype)])
	if isinstance(o, bytes):
	return dict([
	('$binary', base64.b64encode(o).decode()),
	('$type', "00")])
	if bson.has_uuid() and isinstance(o, bson.uuid.UUID):
	return {"$uuid": o.hex}
	return o

	def serialize(self, o):
	"""Convert object to primitive types with rules passed in constructor

	:param o: Object to serialization
	:return: Serialized object
	"""
	if isinstance(o, BaseQuerySet) or (hasattr(o, '__iter__')
	and not isinstance(o, str)
	and not isinstance(o, dict)
	and not isinstance(o, BaseDocument)):
	return {self.collection_name: [self._to_primitive(item) for item in o]}
	return self._to_primitive(o)

	def to_json(self, o, **kwargs):
	"""Serialize object to JSON string

	:param o: Object to serialization
	:return: JSON string
	:rtype: str
	"""
	return json.dumps(self.serialize(o), **kwargs)