Skip to content

Instantly share code, notes, and snippets.

@jbeezley
Created March 10, 2014 15:56
Show Gist options
  • Save jbeezley/9467772 to your computer and use it in GitHub Desktop.
Save jbeezley/9467772 to your computer and use it in GitHub Desktop.
pymongo insert benchmark with healthMap data
import sys
import time
import math
import pymongo
import json
# healthMap data json
fname = 'healthMapDump.json'
# mongo database to benchmark
host = 'localhost'
port = 27017
# database name
dbName = 'healthMapBenchmark'
# collection name
colName = 'benchmarkCollection'
# output stream
out = sys.stdout
class TimeIt(object):
def __init__(self, descr):
out.write('%s... ' % descr)
out.flush()
self.descr = descr
self.start = time.time()
def finish(self):
out.write(str(self) + '\n')
out.flush()
def __str__(self):
return '%8.3fs elapsed.' % (time.time() - self.start)
def loadData():
t = TimeIt('loading data from %s' % fname)
strdata = open(fname, 'r').read()
data = json.loads(strdata)['features']
print str(t)
return data
def connect(writeAck = True):
client = pymongo.MongoClient(host, port)
if not writeAck:
client.write_concern['w'] = 0
client.drop_database(dbName)
return client[dbName]
def getCollection(db):
db.drop_collection(colName)
return db[colName]
def benchmark(data, Ngroup = None, writeAck = True, index=False):
collection = getCollection(connect(writeAck))
count = len(data)
if Ngroup is None:
Ngroup = count
N = int(math.floor(count/Ngroup))
t = TimeIt('Insert %i records in %5i batches of %5i' % (count, N, Ngroup))
for i in xrange(N):
group = data[Ngroup * i : Ngroup * (i+1)]
collection.insert(group)
if index:
collection.create_index('date')
if count > N * Ngroup:
collection.insert(data[Ngroup*N:])
t.finish()
if __name__ == '__main__':
# load data
data = loadData()
for index in (False, True):
for writeAck in (True, False):
out.write('\n')
indexStr = 'without'
if index:
indexStr = 'with'
if writeAck:
out.write('Testing with write acknowledgement, %s indexing\n' % indexStr)
else:
out.write('Turning without write acknowledgement, %s indexing\n' % indexStr)
out.write('=' * 40 + '\n')
# benchmark inserting one at a time
benchmark(data, 1, writeAck, index)
# benchmark insert 10 at a time
benchmark(data, 10, writeAck, index)
# benchmark insert 100 at a time
benchmark(data, 100, writeAck, index)
# benchmark insert 1000 at a time
benchmark(data, 1000, writeAck, index)
# benchmark insert all at once
benchmark(data, None, writeAck, index)
# clean up
connect()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment