Skip to content

Instantly share code, notes, and snippets.

@vincentbernat
Forked from anonymous/bench.py
Created December 14, 2012 21:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vincentbernat/4288844 to your computer and use it in GitHub Desktop.
Save vincentbernat/4288844 to your computer and use it in GitHub Desktop.
Trying to get efficient indexes with MongoDB
#!/usr/bin/env python
from pymongo import Connection, ASCENDING, DESCENDING
from datetime import datetime
c = Connection()
db = c['asynctask']
# Will select about 7/10 of the logs
now = datetime.now()
half = datetime(2012, 12, 13, 20, 30, 30, 0)
def explain(r, what):
try:
e = what.explain()
except Exception, e:
print "| %02d | %s" % (r, e)
else:
print "| %02d | %.2fs SAO=%d =%s= n=%d s=%d" % (r,
e["millis"]/1000.,
int(e["scanAndOrder"]),
e["cursor"],
e["n"],
e["nscannedObjects"])
def stats(coll):
s = db.command("collstats", coll)
print "| | Total index size: %d MB (out of %d MB)" % (s["totalIndexSize"]/1024/1024,
s["size"]/1024/1024)
explain(1, db.tasks.find({"depends": "unknown"}).limit(1))
explain(2, db.tasks.find({"created": {"$lt": now},
"status": "finished",
"tried": {"$lt": 5}}))
explain(3, db.tasks.find().sort("_id", -1))
explain(4, db.tasks.find({"tried": {"$gt": 3}}).sort("_id", -1))
explain(5, db.tasks.find({"type": "most-common-type"}).sort("_id", -1))
explain(6, db.tasks.find({"rtask_id": 18}).sort("_id", -1))
explain(7, db.tasks.find({"tried": {"$gt": 3},
"type": "most-common-type"}).sort("_id", -1))
explain(8, db.tasks.find({"status": "finished",
"type": "most-common-type"}).sort("_id", -1))
explain(9, db.tasks.find({"status": "queued",
"type": "most-common-type"}).sort("_id", -1))
explain(10, db.tasks.find({"status": "queued",
"type": "most-common-type",
"tried": {"$lt": 5}}))
explain(11, db.tasks.find({"status": {"$in": ["finished", "running"]},
"type": "most-common-type"}).sort("_id", -1))
explain(12, db.tasks.find({"status": "finished",
"type": "most-common-type",
"finished": {"$gt": half}}))
explain(13, db.tasks.find({"status": {"$in": ["queued", "running"]},
"reserved": {"$lt": now},
"type": {"$in": ["most-common-type"]},
"tried": {"$lt": 5},
"ns": "default"}).sort([("priority", DESCENDING),
("_id", ASCENDING)]).limit(1))
explain(14, db.tasks.find({"status": "queued",
"reserved": {"$lt": now},
"type": {"$in": ["most-common-type"]},
"tried": {"$lt": 5},
"ns": "default"}).sort([("priority", DESCENDING),
("_id", ASCENDING)]).limit(1))
stats("tasks")
import random
import string
import time
from datetime import datetime
from pymongo import Connection
c = Connection()
db = c["asynctask"]
types = ["most-common-type",]*40 + [''.join(random.choice(string.letters) for n in xrange(20)) for n in xrange(10)]
hosts = ["%s.dailymotion.com" % ''.join(random.choice(string.digits) for n in xrange(10)) for n in xrange(150)]
start = time.mktime(datetime(2012, 12, 13, 20, 0, 30, 0).timetuple())
namespaces = ["default",]*10 + ["other-ns1", "other-ns2"]
# Tasks.
# First half of the tasks are finished. Tenth of those are failed.
# Tenth of the remaining are running or finished (tenth again).
# The remaining tasks are queued with tenth of them running.
count = 5000000
for n in xrange(count):
if n < count/2:
if random.randint(0,10) == 0:
status = "failed"
else:
status = "finished"
elif n < 6*count/10:
if random.randint(0, 10) == 0:
status = "finished"
else:
status = "running"
else:
if random.randint(0, 10) == 0:
status = "running"
else:
status = "queued"
created = start + n*3600/count
started = created + random.randrange(10, 100) if status != "queued" else None
finished = started + random.randrange(10, 200) if status in ("finished", "failed") else None
if status == "queued":
reserved = created
elif status == "running":
reserved = started + 30
else:
reserved = finished + 30
tried = random.choice([1,]*10 + range(2,6))
if status == "queued": tried = tried - 1
db.tasks.insert({"status": status,
"created": datetime.fromtimestamp(created),
"started": datetime.fromtimestamp(started) if started else None,
"finished": datetime.fromtimestamp(finished) if finished else None,
"reserved": datetime.fromtimestamp(reserved),
"depends": [],
"callback_id": "not important",
"callback_parameters": {"param1": 66, "param2": 8457},
"worker_host": random.choice(hosts) if status != "queued" else None,
"worker_name": "tictac-16" if status != "queued" else None,
"result": status == "failed" and "failed logs" or None,
"ns": random.choice(namespaces),
"tried": tried,
"type": random.choice(types)})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment