Skip to content

Instantly share code, notes, and snippets.

@bromzh
Last active August 29, 2015 14:05
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save bromzh/ca30fb7eb5983fe5b8d6 to your computer and use it in GitHub Desktop.
Save bromzh/ca30fb7eb5983fe5b8d6 to your computer and use it in GitHub Desktop.
This is a simple serializer for MongoEngine Documents supports black and white lists, name mapping and methods/properties includes.
# -*- coding: utf-8 -*-
import datetime
import calendar
import re
import base64
import bson
from bson import RE_TYPE
from bson.binary import Binary
from bson.code import Code
from bson.dbref import DBRef
from bson.max_key import MaxKey
from bson.min_key import MinKey
from bson.objectid import ObjectId
from bson.regex import Regex
from bson.timestamp import Timestamp
import json
from mongoengine.queryset.base import BaseQuerySet
from mongoengine.document import BaseDocument, MapReduceDocument
class DocumentSerializer(object):
"""Serializer for MongoEngine documents and pymongo data.
Usage:
>>> from bson import ObjectId
>>> from mongoengine import connect, fields, Document
>>> client = connect('test')
>>> db = client.test
>>> db.user.drop()
>>> db.post.drop()
>>> db.comment.drop()
>>>
>>> class User(Document):
... first_name = fields.StringField(max_length=100)
... last_name = fields.StringField(max_length=100)
... email = fields.EmailField()
... @property
... def name(self):
... return ' '.join((self.first_name, self.last_name))
...
>>> user = User(id=ObjectId('54052ba41591466157f5306a'), first_name='Philip',
... last_name='Dick', email='dick@example.com')
>>> user.save()
<User: User object>
>>> data = DocumentSerializer().serialize(user)
>>> data == {'first_name': 'Philip', 'last_name': 'Dick', 'email': 'dick@example.com',
... 'id': {'$oid': '54052ba41591466157f5306a'}}
True
>>> data = DocumentSerializer(includes=['name']).serialize(user)
>>> data == {'first_name': 'Philip', 'last_name': 'Dick', 'email': 'dick@example.com',
... 'id': {'$oid': '54052ba41591466157f5306a'}, 'name': 'Philip Dick'}
True
>>> data = DocumentSerializer(blacklist=['email']).serialize(user)
>>> data == {'first_name': 'Philip', 'last_name': 'Dick',
... 'id': {'$oid': '54052ba41591466157f5306a'}}
True
>>> data = DocumentSerializer(whitelist=['first_name', 'email']).serialize(user)
>>> data == {'first_name': 'Philip', 'email': 'dick@example.com'}
True
>>> data = DocumentSerializer(mapping={'last_name': 'surname'}).serialize(user)
>>> data == {'first_name': 'Philip', 'surname': 'Dick', 'email': 'dick@example.com',
... 'id': {'$oid': '54052ba41591466157f5306a'}}
True
>>> data = DocumentSerializer(blacklist=['first_name', 'last_name', 'id'],
... includes=['name'],
... mapping={'name': 'full_name'}).serialize(user)
>>> data == {'full_name': 'Philip Dick', 'email': 'dick@example.com'}
True
>>> class Post(Document):
... title = fields.StringField(max_length=500)
... text = fields.StringField()
... author = fields.ReferenceField(User)
...
>>> first_post = Post(id=ObjectId('54052ba41591466157f5306b'), title='foo',
... text='lorem ipsum', author=user)
>>> first_post.save()
<Post: Post object>
>>> data = DocumentSerializer(blacklist=['id', 'author.id']).serialize(first_post)
>>> data == {'title': 'foo', 'text': 'lorem ipsum',
... 'author': {'last_name': 'Dick', 'first_name': 'Philip',
... 'email': 'dick@example.com'}}
True
>>> data = DocumentSerializer(whitelist=['title', 'author'],
... includes=['author.name'],
... mapping={'author.name': 'full_name'},
... blacklist=['author.id',
... 'author.first_name',
... 'author.last_name']).serialize(first_post)
>>> data == {'author': {'full_name': 'Philip Dick', 'email': 'dick@example.com'},
... 'title': 'foo'}
True
>>> another_post = Post(id=ObjectId('54052ba41591466157f5306c'), title='bar',
... text='lorem ipsum', author=user)
>>> another_post.save()
<Post: Post object>
>>> posts = Post.objects.all()
>>> data = DocumentSerializer(whitelist=['post.title', 'author.email'],
... includes=['author.name'],
... collection_name='posts').serialize(posts)
>>> data == {'posts': [{'author': {'name': 'Philip Dick', 'email': 'dick@example.com'}},
... {'author': {'name': 'Philip Dick', 'email': 'dick@example.com'}}]}
True
:param list whitelist: List of dotted fields names which will be output during serialization
:param list blacklist: List of dotted fields names which well not be output during serialization
:param list includes: List of methods or/and properties of class
which will be output during serialization
:param dict mapping: Dict with rules how to name fields during serialization.
Key - dotted field name (full source name), value - name during serialization
:param str collection_name: Name of dict key which be output when serialize iterable data
such as :obj:`list`, :obj:`generator`, :class:`mongoengine.QuerySet`, etc.
"""
def __init__(self, whitelist=None, blacklist=None, includes=None,
mapping=None, collection_name='objects'):
self.whitelist = whitelist
self.blacklist = blacklist if blacklist is not None else list()
self.includes = includes if includes is not None else list()
self.mapping = mapping if mapping is not None else dict()
self.collection_name = collection_name
def _get_name(self, name):
"""Returns mapped name if there is name in `self.mapping`
:param str name: Dotted field name
:return: Mapped name of regular name
:rtype: str
"""
return self.mapping.get(name, name.rsplit('.', 1)[-1])
def _to_primitive(self, o, ctx=''):
"""Recursive conversion from :class:`mongoengine.Document` or Bdict objects to primitive types
which can be serialized by standard :class:`json.JdictEncoder` of :func:`json.dumps`.
Based on :func:`bson.json_util.default` and :func:`bson.json_util._json_convert`.
:param o: Object to convert
:param str ctx: Conversion context
So if `o` is value for dict key 'foo', ctx will be equal 'foo.'
and all fields of `o` will be named like 'foo.name1', 'foo.name2', etc.
:return: Conversation result.
Type will be one of this: `int`, `str`, `list`, `dict`, `True`, `False`, `None`.
"""
if isinstance(o, MapReduceDocument):
res = dict([('_id', o.key)])
res.update(o.value)
return self._to_primitive(res)
if isinstance(o, BaseDocument):
result = dict()
for field_name in o:
full_name = ctx + field_name
if full_name in self.blacklist:
continue
if self.whitelist:
allowed = False
for allowed_name in self.whitelist:
allowed = False
if allowed_name.startswith(full_name) or full_name.startswith(allowed_name):
allowed = True
break
if not allowed:
continue
try:
result[self._get_name(full_name)] = self._to_primitive(getattr(o, field_name),
ctx=full_name+'.')
except AttributeError:
pass
for name in self.includes:
attr_name = name.rsplit('.', 1)[-1]
full_name = ctx + attr_name
if name == full_name:
try:
raw_value = getattr(o, attr_name)
except AttributeError:
break
if callable(raw_value):
result[self._get_name(full_name)] = self._to_primitive(raw_value(),
ctx=full_name+'.')
else:
result[self._get_name(full_name)] = self._to_primitive(raw_value,
ctx=full_name+'.')
return result
if hasattr(o, 'items'):
result = dict()
for field_name, value in o.items():
full_name = ctx + field_name
if full_name in self.blacklist:
continue
if self.whitelist:
allowed = False
for allowed_name in self.whitelist:
allowed = False
if allowed_name.startswith(full_name) or full_name.startswith(allowed_name):
allowed = True
break
if not allowed:
continue
result[self._get_name(full_name)] = self._to_primitive(value, ctx=full_name+'.')
for name in self.includes:
if name.startswith(ctx):
attr_name = name.rsplit('.', 1)[-1]
full_name = ctx + attr_name
try:
raw_value = o[attr_name]
except KeyError:
break
if callable(raw_value):
result[self._get_name(full_name)] = self._to_primitive(raw_value(),
ctx=full_name+'.')
else:
result[self._get_name(full_name)] = self._to_primitive(raw_value,
ctx=full_name+'.')
return result
elif hasattr(o, '__iter__') and not isinstance(o, str) and not isinstance(o, bytes):
return list((self._to_primitive(v, ctx=ctx) for v in o))
if isinstance(o, ObjectId):
return {"$oid": str(o)}
if isinstance(o, DBRef):
return self._to_primitive(o.as_doc(), ctx=ctx)
if isinstance(o, datetime.datetime):
# TODO share this code w/ bson.py?
if o.utcoffset() is not None:
o = o - o.utcoffset()
millis = int(calendar.timegm(o.timetuple()) * 1000 +
o.microsecond / 1000)
return {"$date": millis}
if isinstance(o, (RE_TYPE, Regex)):
flags = ""
if o.flags & re.IGNORECASE:
flags += "i"
if o.flags & re.LOCALE:
flags += "l"
if o.flags & re.MULTILINE:
flags += "m"
if o.flags & re.DOTALL:
flags += "s"
if o.flags & re.UNICODE:
flags += "u"
if o.flags & re.VERBOSE:
flags += "x"
if isinstance(o.pattern, str):
pattern = o.pattern
else:
pattern = o.pattern.decode('utf-8')
return dict([("$regex", pattern), ("$options", flags)])
if isinstance(o, MinKey):
return {"$minKey": 1}
if isinstance(o, MaxKey):
return {"$maxKey": 1}
if isinstance(o, Timestamp):
return dict([("t", o.time), ("i", o.inc)])
if isinstance(o, Code):
return dict([('$code', str(o)), ('$scope', o.scope)])
if isinstance(o, Binary):
return dict([
('$binary', base64.b64encode(o).decode()),
('$type', "%02x" % o.subtype)])
if isinstance(o, bytes):
return dict([
('$binary', base64.b64encode(o).decode()),
('$type', "00")])
if bson.has_uuid() and isinstance(o, bson.uuid.UUID):
return {"$uuid": o.hex}
return o
def serialize(self, o):
"""Convert object to primitive types with rules passed in constructor
:param o: Object to serialization
:return: Serialized object
"""
if isinstance(o, BaseQuerySet) or (hasattr(o, '__iter__')
and not isinstance(o, str)
and not isinstance(o, dict)
and not isinstance(o, BaseDocument)):
return {self.collection_name: [self._to_primitive(item) for item in o]}
return self._to_primitive(o)
def to_json(self, o, **kwargs):
"""Serialize object to JSON string
:param o: Object to serialization
:return: JSON string
:rtype: str
"""
return json.dumps(self.serialize(o), **kwargs)
@bromzh
Copy link
Author

bromzh commented Sep 9, 2014

@kxepal
Thanks for answer.

  1. Yes, this is a good idea. Of course separating one function to many make code more extensible, but it is just an example. I think the good idea is make metaclass for collect all conversion functions from childs in one list and call them one by one. But it is a way to make another python model system
  2. I have some problem with whitelist. Now I fixed this part, but I still think that there is easiest way to make it (but I don't know how)
  3. Thanks, I didn't know this thing
  4. Fixed

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment