Last active
August 29, 2015 14:05
-
-
Save bromzh/ca30fb7eb5983fe5b8d6 to your computer and use it in GitHub Desktop.
This is a simple serializer for MongoEngine Documents supports black and white lists, name mapping and methods/properties includes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import datetime | |
import calendar | |
import re | |
import base64 | |
import bson | |
from bson import RE_TYPE | |
from bson.binary import Binary | |
from bson.code import Code | |
from bson.dbref import DBRef | |
from bson.max_key import MaxKey | |
from bson.min_key import MinKey | |
from bson.objectid import ObjectId | |
from bson.regex import Regex | |
from bson.timestamp import Timestamp | |
import json | |
from mongoengine.queryset.base import BaseQuerySet | |
from mongoengine.document import BaseDocument, MapReduceDocument | |
class DocumentSerializer(object): | |
"""Serializer for MongoEngine documents and pymongo data. | |
Usage: | |
>>> from bson import ObjectId | |
>>> from mongoengine import connect, fields, Document | |
>>> client = connect('test') | |
>>> db = client.test | |
>>> db.user.drop() | |
>>> db.post.drop() | |
>>> db.comment.drop() | |
>>> | |
>>> class User(Document): | |
... first_name = fields.StringField(max_length=100) | |
... last_name = fields.StringField(max_length=100) | |
... email = fields.EmailField() | |
... @property | |
... def name(self): | |
... return ' '.join((self.first_name, self.last_name)) | |
... | |
>>> user = User(id=ObjectId('54052ba41591466157f5306a'), first_name='Philip', | |
... last_name='Dick', email='dick@example.com') | |
>>> user.save() | |
<User: User object> | |
>>> data = DocumentSerializer().serialize(user) | |
>>> data == {'first_name': 'Philip', 'last_name': 'Dick', 'email': 'dick@example.com', | |
... 'id': {'$oid': '54052ba41591466157f5306a'}} | |
True | |
>>> data = DocumentSerializer(includes=['name']).serialize(user) | |
>>> data == {'first_name': 'Philip', 'last_name': 'Dick', 'email': 'dick@example.com', | |
... 'id': {'$oid': '54052ba41591466157f5306a'}, 'name': 'Philip Dick'} | |
True | |
>>> data = DocumentSerializer(blacklist=['email']).serialize(user) | |
>>> data == {'first_name': 'Philip', 'last_name': 'Dick', | |
... 'id': {'$oid': '54052ba41591466157f5306a'}} | |
True | |
>>> data = DocumentSerializer(whitelist=['first_name', 'email']).serialize(user) | |
>>> data == {'first_name': 'Philip', 'email': 'dick@example.com'} | |
True | |
>>> data = DocumentSerializer(mapping={'last_name': 'surname'}).serialize(user) | |
>>> data == {'first_name': 'Philip', 'surname': 'Dick', 'email': 'dick@example.com', | |
... 'id': {'$oid': '54052ba41591466157f5306a'}} | |
True | |
>>> data = DocumentSerializer(blacklist=['first_name', 'last_name', 'id'], | |
... includes=['name'], | |
... mapping={'name': 'full_name'}).serialize(user) | |
>>> data == {'full_name': 'Philip Dick', 'email': 'dick@example.com'} | |
True | |
>>> class Post(Document): | |
... title = fields.StringField(max_length=500) | |
... text = fields.StringField() | |
... author = fields.ReferenceField(User) | |
... | |
>>> first_post = Post(id=ObjectId('54052ba41591466157f5306b'), title='foo', | |
... text='lorem ipsum', author=user) | |
>>> first_post.save() | |
<Post: Post object> | |
>>> data = DocumentSerializer(blacklist=['id', 'author.id']).serialize(first_post) | |
>>> data == {'title': 'foo', 'text': 'lorem ipsum', | |
... 'author': {'last_name': 'Dick', 'first_name': 'Philip', | |
... 'email': 'dick@example.com'}} | |
True | |
>>> data = DocumentSerializer(whitelist=['title', 'author'], | |
... includes=['author.name'], | |
... mapping={'author.name': 'full_name'}, | |
... blacklist=['author.id', | |
... 'author.first_name', | |
... 'author.last_name']).serialize(first_post) | |
>>> data == {'author': {'full_name': 'Philip Dick', 'email': 'dick@example.com'}, | |
... 'title': 'foo'} | |
True | |
>>> another_post = Post(id=ObjectId('54052ba41591466157f5306c'), title='bar', | |
... text='lorem ipsum', author=user) | |
>>> another_post.save() | |
<Post: Post object> | |
>>> posts = Post.objects.all() | |
>>> data = DocumentSerializer(whitelist=['post.title', 'author.email'], | |
... includes=['author.name'], | |
... collection_name='posts').serialize(posts) | |
>>> data == {'posts': [{'author': {'name': 'Philip Dick', 'email': 'dick@example.com'}}, | |
... {'author': {'name': 'Philip Dick', 'email': 'dick@example.com'}}]} | |
True | |
:param list whitelist: List of dotted fields names which will be output during serialization | |
:param list blacklist: List of dotted fields names which well not be output during serialization | |
:param list includes: List of methods or/and properties of class | |
which will be output during serialization | |
:param dict mapping: Dict with rules how to name fields during serialization. | |
Key - dotted field name (full source name), value - name during serialization | |
:param str collection_name: Name of dict key which be output when serialize iterable data | |
such as :obj:`list`, :obj:`generator`, :class:`mongoengine.QuerySet`, etc. | |
""" | |
def __init__(self, whitelist=None, blacklist=None, includes=None, | |
mapping=None, collection_name='objects'): | |
self.whitelist = whitelist | |
self.blacklist = blacklist if blacklist is not None else list() | |
self.includes = includes if includes is not None else list() | |
self.mapping = mapping if mapping is not None else dict() | |
self.collection_name = collection_name | |
def _get_name(self, name): | |
"""Returns mapped name if there is name in `self.mapping` | |
:param str name: Dotted field name | |
:return: Mapped name of regular name | |
:rtype: str | |
""" | |
return self.mapping.get(name, name.rsplit('.', 1)[-1]) | |
def _to_primitive(self, o, ctx=''): | |
"""Recursive conversion from :class:`mongoengine.Document` or Bdict objects to primitive types | |
which can be serialized by standard :class:`json.JdictEncoder` of :func:`json.dumps`. | |
Based on :func:`bson.json_util.default` and :func:`bson.json_util._json_convert`. | |
:param o: Object to convert | |
:param str ctx: Conversion context | |
So if `o` is value for dict key 'foo', ctx will be equal 'foo.' | |
and all fields of `o` will be named like 'foo.name1', 'foo.name2', etc. | |
:return: Conversation result. | |
Type will be one of this: `int`, `str`, `list`, `dict`, `True`, `False`, `None`. | |
""" | |
if isinstance(o, MapReduceDocument): | |
res = dict([('_id', o.key)]) | |
res.update(o.value) | |
return self._to_primitive(res) | |
if isinstance(o, BaseDocument): | |
result = dict() | |
for field_name in o: | |
full_name = ctx + field_name | |
if full_name in self.blacklist: | |
continue | |
if self.whitelist: | |
allowed = False | |
for allowed_name in self.whitelist: | |
allowed = False | |
if allowed_name.startswith(full_name) or full_name.startswith(allowed_name): | |
allowed = True | |
break | |
if not allowed: | |
continue | |
try: | |
result[self._get_name(full_name)] = self._to_primitive(getattr(o, field_name), | |
ctx=full_name+'.') | |
except AttributeError: | |
pass | |
for name in self.includes: | |
attr_name = name.rsplit('.', 1)[-1] | |
full_name = ctx + attr_name | |
if name == full_name: | |
try: | |
raw_value = getattr(o, attr_name) | |
except AttributeError: | |
break | |
if callable(raw_value): | |
result[self._get_name(full_name)] = self._to_primitive(raw_value(), | |
ctx=full_name+'.') | |
else: | |
result[self._get_name(full_name)] = self._to_primitive(raw_value, | |
ctx=full_name+'.') | |
return result | |
if hasattr(o, 'items'): | |
result = dict() | |
for field_name, value in o.items(): | |
full_name = ctx + field_name | |
if full_name in self.blacklist: | |
continue | |
if self.whitelist: | |
allowed = False | |
for allowed_name in self.whitelist: | |
allowed = False | |
if allowed_name.startswith(full_name) or full_name.startswith(allowed_name): | |
allowed = True | |
break | |
if not allowed: | |
continue | |
result[self._get_name(full_name)] = self._to_primitive(value, ctx=full_name+'.') | |
for name in self.includes: | |
if name.startswith(ctx): | |
attr_name = name.rsplit('.', 1)[-1] | |
full_name = ctx + attr_name | |
try: | |
raw_value = o[attr_name] | |
except KeyError: | |
break | |
if callable(raw_value): | |
result[self._get_name(full_name)] = self._to_primitive(raw_value(), | |
ctx=full_name+'.') | |
else: | |
result[self._get_name(full_name)] = self._to_primitive(raw_value, | |
ctx=full_name+'.') | |
return result | |
elif hasattr(o, '__iter__') and not isinstance(o, str) and not isinstance(o, bytes): | |
return list((self._to_primitive(v, ctx=ctx) for v in o)) | |
if isinstance(o, ObjectId): | |
return {"$oid": str(o)} | |
if isinstance(o, DBRef): | |
return self._to_primitive(o.as_doc(), ctx=ctx) | |
if isinstance(o, datetime.datetime): | |
# TODO share this code w/ bson.py? | |
if o.utcoffset() is not None: | |
o = o - o.utcoffset() | |
millis = int(calendar.timegm(o.timetuple()) * 1000 + | |
o.microsecond / 1000) | |
return {"$date": millis} | |
if isinstance(o, (RE_TYPE, Regex)): | |
flags = "" | |
if o.flags & re.IGNORECASE: | |
flags += "i" | |
if o.flags & re.LOCALE: | |
flags += "l" | |
if o.flags & re.MULTILINE: | |
flags += "m" | |
if o.flags & re.DOTALL: | |
flags += "s" | |
if o.flags & re.UNICODE: | |
flags += "u" | |
if o.flags & re.VERBOSE: | |
flags += "x" | |
if isinstance(o.pattern, str): | |
pattern = o.pattern | |
else: | |
pattern = o.pattern.decode('utf-8') | |
return dict([("$regex", pattern), ("$options", flags)]) | |
if isinstance(o, MinKey): | |
return {"$minKey": 1} | |
if isinstance(o, MaxKey): | |
return {"$maxKey": 1} | |
if isinstance(o, Timestamp): | |
return dict([("t", o.time), ("i", o.inc)]) | |
if isinstance(o, Code): | |
return dict([('$code', str(o)), ('$scope', o.scope)]) | |
if isinstance(o, Binary): | |
return dict([ | |
('$binary', base64.b64encode(o).decode()), | |
('$type', "%02x" % o.subtype)]) | |
if isinstance(o, bytes): | |
return dict([ | |
('$binary', base64.b64encode(o).decode()), | |
('$type', "00")]) | |
if bson.has_uuid() and isinstance(o, bson.uuid.UUID): | |
return {"$uuid": o.hex} | |
return o | |
def serialize(self, o): | |
"""Convert object to primitive types with rules passed in constructor | |
:param o: Object to serialization | |
:return: Serialized object | |
""" | |
if isinstance(o, BaseQuerySet) or (hasattr(o, '__iter__') | |
and not isinstance(o, str) | |
and not isinstance(o, dict) | |
and not isinstance(o, BaseDocument)): | |
return {self.collection_name: [self._to_primitive(item) for item in o]} | |
return self._to_primitive(o) | |
def to_json(self, o, **kwargs): | |
"""Serialize object to JSON string | |
:param o: Object to serialization | |
:return: JSON string | |
:rtype: str | |
""" | |
return json.dumps(self.serialize(o), **kwargs) |
@kxepal
Thanks for answer.
- Yes, this is a good idea. Of course separating one function to many make code more extensible, but it is just an example. I think the good idea is make metaclass for collect all conversion functions from childs in one list and call them one by one. But it is a way to make another python model system
- I have some problem with whitelist. Now I fixed this part, but I still think that there is easiest way to make it (but I don't know how)
- Thanks, I didn't know this thing
- Fixed
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Few suggestions:
_to_primitive
into multiple functions: you have duplicated logic there. The best way is function-per-type to let your code being easily extended.denied
flag becomes set in whitelist processing loop is awkward. May be you wanted to set it default as True and just break the loop on False?foo.split('.')[-1]
usefoo.rsplit('.', 1)[-1]
since you're now interested in splitting whole string.bytes
type implements__iter__
, soif isinstance(o, bytes):
will never get reached for them.