Created
March 10, 2014 15:56
-
-
Save jbeezley/9467772 to your computer and use it in GitHub Desktop.
pymongo insert benchmark with healthMap data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import time | |
import math | |
import pymongo | |
import json | |
# healthMap data json | |
fname = 'healthMapDump.json' | |
# mongo database to benchmark | |
host = 'localhost' | |
port = 27017 | |
# database name | |
dbName = 'healthMapBenchmark' | |
# collection name | |
colName = 'benchmarkCollection' | |
# output stream | |
out = sys.stdout | |
class TimeIt(object): | |
def __init__(self, descr): | |
out.write('%s... ' % descr) | |
out.flush() | |
self.descr = descr | |
self.start = time.time() | |
def finish(self): | |
out.write(str(self) + '\n') | |
out.flush() | |
def __str__(self): | |
return '%8.3fs elapsed.' % (time.time() - self.start) | |
def loadData(): | |
t = TimeIt('loading data from %s' % fname) | |
strdata = open(fname, 'r').read() | |
data = json.loads(strdata)['features'] | |
print str(t) | |
return data | |
def connect(writeAck = True): | |
client = pymongo.MongoClient(host, port) | |
if not writeAck: | |
client.write_concern['w'] = 0 | |
client.drop_database(dbName) | |
return client[dbName] | |
def getCollection(db): | |
db.drop_collection(colName) | |
return db[colName] | |
def benchmark(data, Ngroup = None, writeAck = True, index=False): | |
collection = getCollection(connect(writeAck)) | |
count = len(data) | |
if Ngroup is None: | |
Ngroup = count | |
N = int(math.floor(count/Ngroup)) | |
t = TimeIt('Insert %i records in %5i batches of %5i' % (count, N, Ngroup)) | |
for i in xrange(N): | |
group = data[Ngroup * i : Ngroup * (i+1)] | |
collection.insert(group) | |
if index: | |
collection.create_index('date') | |
if count > N * Ngroup: | |
collection.insert(data[Ngroup*N:]) | |
t.finish() | |
if __name__ == '__main__': | |
# load data | |
data = loadData() | |
for index in (False, True): | |
for writeAck in (True, False): | |
out.write('\n') | |
indexStr = 'without' | |
if index: | |
indexStr = 'with' | |
if writeAck: | |
out.write('Testing with write acknowledgement, %s indexing\n' % indexStr) | |
else: | |
out.write('Turning without write acknowledgement, %s indexing\n' % indexStr) | |
out.write('=' * 40 + '\n') | |
# benchmark inserting one at a time | |
benchmark(data, 1, writeAck, index) | |
# benchmark insert 10 at a time | |
benchmark(data, 10, writeAck, index) | |
# benchmark insert 100 at a time | |
benchmark(data, 100, writeAck, index) | |
# benchmark insert 1000 at a time | |
benchmark(data, 1000, writeAck, index) | |
# benchmark insert all at once | |
benchmark(data, None, writeAck, index) | |
# clean up | |
connect() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment