Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Some mongoengine vs pymongo timing comparisons based on https://gist.github.com/BeardedSteve/a1484adcf7475f62028e/ and discussion at http://stackoverflow.com/questions/35257305/mongoengine-is-very-slow-on-large-documents-comapred-to-native-pymongo-usage Example using Embeded documents, with additional comparison against Marrow Mongo and PyModm.
import datetime
import itertools
import random
import sys
import timeit
from collections import defaultdict
from pymongo import version as pymongo_version
from distutils.version import StrictVersion
import mongoengine as db
from pycallgraph.output.graphviz import GraphvizOutput
from pycallgraph.pycallgraph import PyCallGraph
db.connect("test-dicts")
class Data(db.EmbeddedDocument):
subf0 = db.ListField(db.IntField())
subf1 = db.ListField(db.IntField())
subf2 = db.ListField(db.IntField())
subf3 = db.ListField(db.IntField())
subf4 = db.ListField(db.IntField())
class MyDictModel(db.Document):
date = db.DateTimeField(required=True, default=datetime.date.today)
data_dict_1 = db.DictField()
class MyEmbedModel(db.Document):
date = db.DateTimeField(required=True, default=datetime.date.today)
data_dict_1 = db.EmbeddedDocumentField(Data)
MyDictModel.drop_collection()
MyEmbedModel.drop_collection()
data = ["subf{}".format(f) for f in range(5)]
m_dict = MyDictModel()
my_dict = dict([(d, list(random.sample(range(50000), 20000))) for d in data])
m_dict.data_dict_1 = my_dict
m_dict.save()
m_embed = MyEmbedModel()
my_data = Data()
for f in data:
my_data[f] = list(random.sample(range(50000), 20000))
m_embed.data_dict_1 = my_data
m_embed.save()
def pymongo_dict_doc():
r = db.connection.get_connection()["test-dicts"]['my_dict_model'].find_one()
print((type(r), len(r)))
return r
def pymongo_embed_doc():
r = db.connection.get_connection()["test-dicts"]['my_embed_model'].find_one()
print((type(r), len(r)))
return r
def mongoengine_dict_doc():
r = MyDictModel.objects.first()
print((type(r.data_dict_1), len(r.data_dict_1)))
return r
def mongoengine_embed_doc():
r = MyEmbedModel.objects.first()
print((type(r.data_dict_1), len(r.data_dict_1)))
return r
def mongoengine_dict_docp():
r = MyDictModel.objects.as_pymongo().first()
print((type(r), len(r)))
return r
def mongoengine_embed_docp():
r = MyEmbedModel.objects.as_pymongo().first()
print((type(r), len(r)))
return r
def mongoengine_agg_doc():
r = list(MyDictModel.objects.aggregate({"$limit":1}))[0]
print((type(r), len(r)))
return r
def mongoengine_agg_embed():
r = list(MyEmbedModel.objects.aggregate({"$limit":1}))[0]
print((type(r), len(r)))
return r
#return
if __name__ == '__main__':
print("pymongo with dict took {:2.2f}s".format(timeit.timeit(pymongo_dict_doc, number=10)))
print("pymongo with embed took {:2.2f}s".format(timeit.timeit(pymongo_embed_doc, number=10)))
print("mongoengine with dict took {:2.2f}s".format(timeit.timeit(mongoengine_dict_doc, number=10)))
print("mongoengine with embed took {:2.2f}s".format( timeit.timeit(mongoengine_embed_doc, number=10)))
print("mongoengine with dict as_pymongo() took {:2.2f}s".format(timeit.timeit(mongoengine_dict_docp, number=10)))
print("mongoengine with embed as_pymongo() took {:2.2f}s".format( timeit.timeit(mongoengine_embed_docp, number=10)))
if StrictVersion(pymongo_version) < StrictVersion('3.0.0'):
print("Skipping aggregation on pymongo < 3.x")
else:
print("mongoengine aggregation with dict took {:2.2f}s".format( timeit.timeit(mongoengine_agg_doc, number=10)))
print("mongoengine aggregation with embed took {:2.2f}s".format( timeit.timeit(mongoengine_agg_embed, number=10)))
out1 = GraphvizOutput()
out1.output_file = "viz_embed.png"
out2 = GraphvizOutput()
out2.output_file = "viz_dict.png"
with PyCallGraph(output=out1):
mongoengine_embed_doc()
with PyCallGraph(output=out2):
mongoengine_dict_doc()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment