Skip to content

Instantly share code, notes, and snippets.

@hmarr
Created January 16, 2010 15:36
Show Gist options
  • Save hmarr/278859 to your computer and use it in GitHub Desktop.
Save hmarr/278859 to your computer and use it in GitHub Desktop.
import mongoengine
from mongoengine.document import Document
from mongoengine.fields import StringField, ListField
import time
from contextlib import contextmanager
from random import sample, randint
def create_data():
words = unicode(open('/usr/share/dict/words').read(), 'utf8').split()
data = []
for i in range(5000):
title = ' '.join(sample(words, 10))
tags = sample(words, 7)
if len(title) > 110:
title = title[:110]
data.append((title, tags))
return data
@contextmanager
def stopwatch(name):
t1 = time.time()
yield
time_taken = time.time() - t1
print '%s took %s seconds to run' % (name, time_taken)
def test_no_index(data, large_test_set, small_test_set):
class NonIndexedPost(Document):
title = StringField()
tags = ListField(StringField())
NonIndexedPost.drop_collection()
for title, tags in data:
NonIndexedPost(title=title, tags=tags).save()
with stopwatch('no-index-title'):
for title, tags in large_test_set:
list(NonIndexedPost.objects(title=title))
with stopwatch('no-index-tags'):
for title, tags in small_test_set:
for tag in tags:
list(NonIndexedPost.objects(tags=tag))
NonIndexedPost.drop_collection()
def test_index(data, large_test_set, small_test_set):
class IndexedPost(Document):
title = StringField()
tags = ListField(StringField())
meta = {'indexes': ['title', 'tags']}
IndexedPost.drop_collection()
for title, tags in data:
IndexedPost(title=title, tags=tags).save()
with stopwatch('index-title'):
for title, tags in large_test_set:
list(IndexedPost.objects(title=title))
with stopwatch('index-tags'):
for title, tags in small_test_set:
for tag in tags:
list(IndexedPost.objects(tags=tag))
IndexedPost.drop_collection()
if __name__ == '__main__':
mongoengine.connect('test')
data = create_data()
large_test_set = sample(data, len(data)/4)
small_test_set = sample(data, len(data)/15)
for i in range(len(small_test_set)):
title, tags = small_test_set[i]
small_test_set[i] = (title, sample(tags, 3))
test_index(data, large_test_set, small_test_set)
test_no_index(data, large_test_set, small_test_set)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment