Skip to content

Instantly share code, notes, and snippets.

@mpobrien
Created November 14, 2011 20:45
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mpobrien/1365096 to your computer and use it in GitHub Desktop.
Save mpobrien/1365096 to your computer and use it in GitHub Desktop.
mongo test
import pymongo
import random
from collections import defaultdict
import time
import sys
from pymongo import Connection
connection = Connection()
connection.drop_database('test_db')
collection = connection['test_db']['testing']
filler = "x" * 100
def get_doctemplate(tree=False, leaf=[]):
if tree:
logs = dict([["%s-%s" % getbucket(hour, 6, 24), {}] for hour in xrange(0,23)])
for hourgroup in logs.keys():
logs[hourgroup] = {'0':{},'1':{},'2':{},'3':{}}
for hour in ('0', '1', '2', '3'):
logs[hourgroup][hour] = {'0':{},'1':{},'2':{},'3':{}}
for quarter in ('0', '1', '2', '3'):
logs[hourgroup][hour][quarter] = leaf
else:
logs = []
return logs
def get_treekey(hour, minute):
hourrange = getbucket(hour, 6, 24)
hourgroup = "%s-%s" % hourrange
hourbucket = hour - hourrange[0]
quarterrange = getbucket(minute, 4, 60)
quarter = int(4 * float(minute)/60)
return hourgroup, hourbucket, quarter
def getbucket(x, numbuckets, range):
bucket_size = range / numbuckets
bucketnum = int(x / bucket_size)
return (bucketnum*bucket_size, (bucketnum*bucket_size)+(bucket_size-1))
def generate_uuids(num, length=6):
results = []
for i in range(num):
val = random.randint(0,2**(length*8))
results.append("%x" % val)
return results
def generate_pad(size):
pad = []
for i in xrange(0, size):
pad.append(filler)
return pad
def main(args):
if args[0] == "x":
return
if len(args) != 3:
print "usage: test.py <arraysize> <tree> <pad>"
array_size = int(args[0])
tree = (args[1] == "tree")
use_padding = (args[2] == "pad")
uuids = generate_uuids(100)
pad = None
if use_padding:
pad = generate_pad(array_size)
print "Tree: ", tree, "padding:", use_padding, "size", array_size
if tree:
insert_blanks(uuids, pad=pad, tree=True)
test_tree(uuids, array_size)
else:
print "padding", array_size, "array"
insert_blanks(uuids, pad=pad, tree=False)
test_array(uuids, array_size)
print "Tree: ", tree, "padding:", use_padding, "size", array_size
#print "pad:",pad
def insert_blanks(uuids, pad=None, tree=False):
doc = dict(logs=get_doctemplate(tree=tree))
doc['day'] = 1
if pad:
doc['padding'] = pad
j = 0
for uuid in uuids:
j+=1
if j % 20 == 0:
sys.stdout.write(".")
#doc = dict(uuid=uuid, day=1, month=1, year=1)
collection.insert(dict(uuid=uuid, **doc), safe=False)
if pad:
collection.update({"day":1}, {"$unset":{"padding":1}}, multi=True)
collection.ensure_index("uuid")
def test_array(uuids, array_size=2500):
total_time = 0
j = 0
for i in xrange(0, array_size):
t1 = time.time()
print "pass %s" % i
for uuid in uuids:
collection.update({"uuid":uuid},{"$push":{"logs":filler}}, upsert=True, safe=True)
j+=1
if j % 20 == 0:
sys.stdout.write(".")
sys.stdout.flush()
diff = time.time() - t1
print "%s took %s" % (i, diff)
total_time += diff
print "total time: %s" % total_time
print "%s updates processed" % j
def test_tree(uuids, array_size=2500):
total_time = 0
j = 0
for i in xrange(0, array_size):
t1 = time.time()
seconds = (float(i) / array_size) * (24 * 60 * 60)
hour = int(seconds / 3600)
minutes = int(int((seconds / 60) % 60));
for uuid in uuids:
field_key = '.'.join([str(x) for x in get_treekey(hour, minutes)])
collection.update({"uuid":uuid},{"$push":{field_key:filler}}, upsert=True, safe=True)
j+=1
if j % 20 == 0:
sys.stdout.write(".")
sys.stdout.flush()
diff = time.time() - t1
print "%s took %s" % (i, diff)
total_time += diff
print "total time: %s" % total_time
print "%s updates processed" % j
if __name__ == '__main__':
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment