Skip to content

Instantly share code, notes, and snippets.

@hotsyk
Forked from sergray/mongolyze.py
Created February 22, 2012 13:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hotsyk/1885210 to your computer and use it in GitHub Desktop.
Save hotsyk/1885210 to your computer and use it in GitHub Desktop.
Python script for automated analysis of slow queries in mongodb
"""
Script for automated analysis of profiling data in MongoDB,
gathered by Mongo with db.setProfilingLevel(1).
See <http://www.mongodb.org/display/DOCS/Database+Profiler>
TODO: pass collection and database with profiling data in arguments
TODO: make thread-safe
"""
from collections import defaultdict
MONGO_DB = 'test'
PROFILE_COLLECTION = 'system.profile' # default name of collection with profiling data
# global mapping of (collection, query_fields) to their statistics data
QSTATS = defaultdict(lambda: {
'count': 0, 'millis_sum': 0, 'millis_min': None, 'millis_max': None,
'nscanned_sum': 0, 'nscanned_min': None, 'nscanned_max': None
})
def get_profile_collection():
"""Return mongo collection containing profiling records"""
from pymongo import Connection
con = Connection()
db = con[MONGO_DB]
col = db[PROFILE_COLLECTION]
return col
def extract_collection_query(prof_rec):
"""Returns tuple of collection name and list of query fields"""
ns = prof_rec[u'ns']
if ns.endswith(u'$cmd'):
cmd_info = prof_rec[u'command']
qry_fields = extract_fields(cmd_info.pop(u'query', {}))
fields = cmd_info.pop(u'fields')
command, collection = cmd_info.popitem()
else:
collection = ns.rsplit(u'.').pop()
query = prof_rec[u'query']
if u'$query' in query:
qry_fields = extract_fields(query[u'$query'])
else:
qry_fields = extract_fields(query)
if u'$orderby' in query:
ord_fields = [f + [u'$orderby'] for f in extract_fields(query[u'$orderby'])]
qry_fields.extend(ord_fields)
return (collection, [u'.'.join(f) for f in qry_fields])
def extract_fields(query, parent_fields=None):
"""Recursively descend query prototype and return list of field names"""
fields = []
if not parent_fields:
parent_fields = []
field_path = lambda k: '.'.join(parent_fields + [k])
for k,v in query.items():
if isinstance(v, dict):
fields.extend(extract_fields(v, parent_fields + [k]))
else:
fields.append(parent_fields + [k])
return fields
def _update_stats(col, qry_fields, prof_rec):
stat_key = (col, tuple(qry_fields))
stats = QSTATS[stat_key]
stats['count'] += 1
millis = prof_rec.get(u'millis')
if millis:
stats['millis_sum'] += millis
if stats['millis_min'] is None or stats['millis_min'] > millis:
stats['millis_min'] = millis
if stats['millis_max'] is None or stats['millis_max'] < millis:
stats['millis_max'] = millis
nscanned = prof_rec.get(u'nscanned')
if nscanned:
stats['nscanned_sum'] += nscanned
if stats['nscanned_min'] is None or stats['nscanned_min'] > nscanned:
stats['nscanned_min'] = nscanned
if stats['nscanned_max'] is None or stats['nscanned_max'] < nscanned:
stats['nscanned_max'] = nscanned
def show_stats():
for (col, fields), stats in QSTATS.items():
print col, fields,
info = stats.copy()
if info['count']:
if info['millis_sum'] is not None:
info['avg_millis'] = info['millis_sum'] / info['count']
else:
info['avg_millis'] = None
if info['nscanned_sum'] is not None:
info['avg_nscanned'] = info['nscanned_sum'] / info['count']
else:
info['avg_nscanned'] = None
print "count=%(count)d avg_millis=%(avg_millis)r avg_nscanned=%(avg_nscanned)r" % info
def analyze_profiling_data():
"""Process all records in profiling collection and gather statistics"""
prof_col = get_profile_collection()
for rec in prof_col.find():
col, qry_fields = extract_collection_query(rec)
_update_stats(col, qry_fields, rec)
if __name__ == '__main__':
analyze_profiling_data()
show_stats()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment