Skip to content

Instantly share code, notes, and snippets.

Last active August 29, 2015 14:05
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save bromzh/ca30fb7eb5983fe5b8d6 to your computer and use it in GitHub Desktop.
Save bromzh/ca30fb7eb5983fe5b8d6 to your computer and use it in GitHub Desktop.
This is a simple serializer for MongoEngine Documents supports black and white lists, name mapping and methods/properties includes.
# -*- coding: utf-8 -*-
import datetime
import calendar
import re
import base64
import bson
from bson import RE_TYPE
from bson.binary import Binary
from bson.code import Code
from bson.dbref import DBRef
from bson.max_key import MaxKey
from bson.min_key import MinKey
from bson.objectid import ObjectId
from bson.regex import Regex
from bson.timestamp import Timestamp
import json
from mongoengine.queryset.base import BaseQuerySet
from mongoengine.document import BaseDocument, MapReduceDocument
class DocumentSerializer(object):
"""Serializer for MongoEngine documents and pymongo data.
>>> from bson import ObjectId
>>> from mongoengine import connect, fields, Document
>>> client = connect('test')
>>> db = client.test
>>> db.user.drop()
>>> db.comment.drop()
>>> class User(Document):
... first_name = fields.StringField(max_length=100)
... last_name = fields.StringField(max_length=100)
... email = fields.EmailField()
... @property
... def name(self):
... return ' '.join((self.first_name, self.last_name))
>>> user = User(id=ObjectId('54052ba41591466157f5306a'), first_name='Philip',
... last_name='Dick', email='')
<User: User object>
>>> data = DocumentSerializer().serialize(user)
>>> data == {'first_name': 'Philip', 'last_name': 'Dick', 'email': '',
... 'id': {'$oid': '54052ba41591466157f5306a'}}
>>> data = DocumentSerializer(includes=['name']).serialize(user)
>>> data == {'first_name': 'Philip', 'last_name': 'Dick', 'email': '',
... 'id': {'$oid': '54052ba41591466157f5306a'}, 'name': 'Philip Dick'}
>>> data = DocumentSerializer(blacklist=['email']).serialize(user)
>>> data == {'first_name': 'Philip', 'last_name': 'Dick',
... 'id': {'$oid': '54052ba41591466157f5306a'}}
>>> data = DocumentSerializer(whitelist=['first_name', 'email']).serialize(user)
>>> data == {'first_name': 'Philip', 'email': ''}
>>> data = DocumentSerializer(mapping={'last_name': 'surname'}).serialize(user)
>>> data == {'first_name': 'Philip', 'surname': 'Dick', 'email': '',
... 'id': {'$oid': '54052ba41591466157f5306a'}}
>>> data = DocumentSerializer(blacklist=['first_name', 'last_name', 'id'],
... includes=['name'],
... mapping={'name': 'full_name'}).serialize(user)
>>> data == {'full_name': 'Philip Dick', 'email': ''}
>>> class Post(Document):
... title = fields.StringField(max_length=500)
... text = fields.StringField()
... author = fields.ReferenceField(User)
>>> first_post = Post(id=ObjectId('54052ba41591466157f5306b'), title='foo',
... text='lorem ipsum', author=user)
<Post: Post object>
>>> data = DocumentSerializer(blacklist=['id', '']).serialize(first_post)
>>> data == {'title': 'foo', 'text': 'lorem ipsum',
... 'author': {'last_name': 'Dick', 'first_name': 'Philip',
... 'email': ''}}
>>> data = DocumentSerializer(whitelist=['title', 'author'],
... includes=[''],
... mapping={'': 'full_name'},
... blacklist=['',
... 'author.first_name',
... 'author.last_name']).serialize(first_post)
>>> data == {'author': {'full_name': 'Philip Dick', 'email': ''},
... 'title': 'foo'}
>>> another_post = Post(id=ObjectId('54052ba41591466157f5306c'), title='bar',
... text='lorem ipsum', author=user)
<Post: Post object>
>>> posts = Post.objects.all()
>>> data = DocumentSerializer(whitelist=['post.title', ''],
... includes=[''],
... collection_name='posts').serialize(posts)
>>> data == {'posts': [{'author': {'name': 'Philip Dick', 'email': ''}},
... {'author': {'name': 'Philip Dick', 'email': ''}}]}
:param list whitelist: List of dotted fields names which will be output during serialization
:param list blacklist: List of dotted fields names which well not be output during serialization
:param list includes: List of methods or/and properties of class
which will be output during serialization
:param dict mapping: Dict with rules how to name fields during serialization.
Key - dotted field name (full source name), value - name during serialization
:param str collection_name: Name of dict key which be output when serialize iterable data
such as :obj:`list`, :obj:`generator`, :class:`mongoengine.QuerySet`, etc.
def __init__(self, whitelist=None, blacklist=None, includes=None,
mapping=None, collection_name='objects'):
self.whitelist = whitelist
self.blacklist = blacklist if blacklist is not None else list()
self.includes = includes if includes is not None else list()
self.mapping = mapping if mapping is not None else dict()
self.collection_name = collection_name
def _get_name(self, name):
"""Returns mapped name if there is name in `self.mapping`
:param str name: Dotted field name
:return: Mapped name of regular name
:rtype: str
return self.mapping.get(name, name.rsplit('.', 1)[-1])
def _to_primitive(self, o, ctx=''):
"""Recursive conversion from :class:`mongoengine.Document` or Bdict objects to primitive types
which can be serialized by standard :class:`json.JdictEncoder` of :func:`json.dumps`.
Based on :func:`bson.json_util.default` and :func:`bson.json_util._json_convert`.
:param o: Object to convert
:param str ctx: Conversion context
So if `o` is value for dict key 'foo', ctx will be equal 'foo.'
and all fields of `o` will be named like 'foo.name1', 'foo.name2', etc.
:return: Conversation result.
Type will be one of this: `int`, `str`, `list`, `dict`, `True`, `False`, `None`.
if isinstance(o, MapReduceDocument):
res = dict([('_id', o.key)])
return self._to_primitive(res)
if isinstance(o, BaseDocument):
result = dict()
for field_name in o:
full_name = ctx + field_name
if full_name in self.blacklist:
if self.whitelist:
allowed = False
for allowed_name in self.whitelist:
allowed = False
if allowed_name.startswith(full_name) or full_name.startswith(allowed_name):
allowed = True
if not allowed:
result[self._get_name(full_name)] = self._to_primitive(getattr(o, field_name),
except AttributeError:
for name in self.includes:
attr_name = name.rsplit('.', 1)[-1]
full_name = ctx + attr_name
if name == full_name:
raw_value = getattr(o, attr_name)
except AttributeError:
if callable(raw_value):
result[self._get_name(full_name)] = self._to_primitive(raw_value(),
result[self._get_name(full_name)] = self._to_primitive(raw_value,
return result
if hasattr(o, 'items'):
result = dict()
for field_name, value in o.items():
full_name = ctx + field_name
if full_name in self.blacklist:
if self.whitelist:
allowed = False
for allowed_name in self.whitelist:
allowed = False
if allowed_name.startswith(full_name) or full_name.startswith(allowed_name):
allowed = True
if not allowed:
result[self._get_name(full_name)] = self._to_primitive(value, ctx=full_name+'.')
for name in self.includes:
if name.startswith(ctx):
attr_name = name.rsplit('.', 1)[-1]
full_name = ctx + attr_name
raw_value = o[attr_name]
except KeyError:
if callable(raw_value):
result[self._get_name(full_name)] = self._to_primitive(raw_value(),
result[self._get_name(full_name)] = self._to_primitive(raw_value,
return result
elif hasattr(o, '__iter__') and not isinstance(o, str) and not isinstance(o, bytes):
return list((self._to_primitive(v, ctx=ctx) for v in o))
if isinstance(o, ObjectId):
return {"$oid": str(o)}
if isinstance(o, DBRef):
return self._to_primitive(o.as_doc(), ctx=ctx)
if isinstance(o, datetime.datetime):
# TODO share this code w/
if o.utcoffset() is not None:
o = o - o.utcoffset()
millis = int(calendar.timegm(o.timetuple()) * 1000 +
o.microsecond / 1000)
return {"$date": millis}
if isinstance(o, (RE_TYPE, Regex)):
flags = ""
if o.flags & re.IGNORECASE:
flags += "i"
if o.flags & re.LOCALE:
flags += "l"
if o.flags & re.MULTILINE:
flags += "m"
if o.flags & re.DOTALL:
flags += "s"
if o.flags & re.UNICODE:
flags += "u"
if o.flags & re.VERBOSE:
flags += "x"
if isinstance(o.pattern, str):
pattern = o.pattern
pattern = o.pattern.decode('utf-8')
return dict([("$regex", pattern), ("$options", flags)])
if isinstance(o, MinKey):
return {"$minKey": 1}
if isinstance(o, MaxKey):
return {"$maxKey": 1}
if isinstance(o, Timestamp):
return dict([("t", o.time), ("i",])
if isinstance(o, Code):
return dict([('$code', str(o)), ('$scope', o.scope)])
if isinstance(o, Binary):
return dict([
('$binary', base64.b64encode(o).decode()),
('$type', "%02x" % o.subtype)])
if isinstance(o, bytes):
return dict([
('$binary', base64.b64encode(o).decode()),
('$type', "00")])
if bson.has_uuid() and isinstance(o, bson.uuid.UUID):
return {"$uuid": o.hex}
return o
def serialize(self, o):
"""Convert object to primitive types with rules passed in constructor
:param o: Object to serialization
:return: Serialized object
if isinstance(o, BaseQuerySet) or (hasattr(o, '__iter__')
and not isinstance(o, str)
and not isinstance(o, dict)
and not isinstance(o, BaseDocument)):
return {self.collection_name: [self._to_primitive(item) for item in o]}
return self._to_primitive(o)
def to_json(self, o, **kwargs):
"""Serialize object to JSON string
:param o: Object to serialization
:return: JSON string
:rtype: str
return json.dumps(self.serialize(o), **kwargs)
Copy link

1st commented Sep 2, 2014

Can you please show few examples of usage?

Copy link

kxepal commented Sep 2, 2014

Few suggestions:

  1. Split _to_primitive into multiple functions: you have duplicated logic there. The best way is function-per-type to let your code being easily extended.
  2. The way how denied flag becomes set in whitelist processing loop is awkward. May be you wanted to set it default as True and just break the loop on False?
  3. Instead foo.split('.')[-1] use foo.rsplit('.', 1)[-1] since you're now interested in splitting whole string.
  4. bytes type implements __iter__, so if isinstance(o, bytes): will never get reached for them.

Copy link

bromzh commented Sep 9, 2014

Thanks for answer.

  1. Yes, this is a good idea. Of course separating one function to many make code more extensible, but it is just an example. I think the good idea is make metaclass for collect all conversion functions from childs in one list and call them one by one. But it is a way to make another python model system
  2. I have some problem with whitelist. Now I fixed this part, but I still think that there is easiest way to make it (but I don't know how)
  3. Thanks, I didn't know this thing
  4. Fixed

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment