Skip to content

Instantly share code, notes, and snippets.

@MattFaus
Created June 7, 2013 23:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MattFaus/5733209 to your computer and use it in GitHub Desktop.
Save MattFaus/5733209 to your computer and use it in GitHub Desktop.
import logging
import os
import sys
import time
from google.appengine.ext import db
import instance_cache
import user_util
_orig_db_query_run = db.Query.run
_orig_db_put_async = db.put_async
_orig_db_model_put = db.Model.put
_LAST_LOG = 0
def _new_db_query_run(self, *args, **kwargs):
"""A version of db.Query.run that stores used indexes in instance-cache."""
retval = _orig_db_query_run(self, *args, **kwargs)
msg = ['query: %s' % _get_caller()]
try:
index_list = self.index_list()
except AssertionError, why:
index_list = []
# Appengine raises an AssertionError if there's no index-list
# because the query doesn't support it (an IN query, eg). Not
# the exception I would have chosen, but oh well.
msg.append(str(why))
for ix in index_list:
kind = ix.kind()
if ix.has_ancestor():
msg.append('%s.(ancestor)' % kind)
properties = ix.properties()
properties.sort()
for (name, direction) in properties:
dir_str = (' (descending)' if direction == db.Index.DESCENDING
else '') # ascending is the default
msg.append('%s.%s%s' % (kind, name, dir_str))
# We don't need to document that we're using the same indices
# multiple times -- that just takes up space. So we keep a
# cache.
instance_cache.increment(' -- '.join(msg))
# Log every 5 minutes. That way, even if the instance dies, we
# don't lose its data. The '5 minutes' is arbitrary.
global _LAST_LOG
if time.time() - _LAST_LOG > 5 * 60:
logging.info(get_db_index_stats())
_LAST_LOG = time.time()
return retval
def _new_db_put_async(models, *args, **kwargs):
"""Instrumented version of db.put_async (which db.put also calls)."""
retval = _orig_db_put_async(models, *args, **kwargs)
msg = ['query: %s' % _get_caller()]
# 'models' can be either a single model instance, or a list of them.
try:
for model in models:
msg.append(model.__class__.__name__ + '.<db.put>')
except TypeError:
msg.append(models.__class__.__name__ + '.<db.put>')
instance_cache.increment(' -- '.join(msg))
# We could do the log-every-5-minutes thing, but we'll let _new_db_query_run do it.
return retval
def _new_db_model_put(self, *args, **kwargs):
"""Like entity.put() but stores put-stats in the instance cache."""
retval = _orig_db_model_put(self, *args, **kwargs)
msg = ['query: %s' % _get_caller()]
msg.append(self.__class__.__name__ + '.<put>')
instance_cache.increment(' -- '.join(msg))
# We could do the log-every-5-minutes thing, but we'll let _new_db_query_run do it.
return retval
def _get_caller():
"""Return filename:linenumber of our caller's caller."""
try:
caller_frame = sys._getframe(2)
while 'ext/db' in caller_frame.f_code.co_filename:
caller_frame = caller_frame.f_back
filename = caller_frame.f_code.co_filename
lineno = caller_frame.f_lineno
filename = os.path.relpath(filename, 'your-root-directory-here')
return '%s:%s' % (filename, lineno)
except Exception, why:
# filename/linenum isn't essential. Don't die if we can't get it.
return '<unknown>:0:%s' % why
def get_db_index_stats():
"""Return a string summarizing db-index use on this instance."""
data = _get_db_index_information_from_cache()
# TODO(csilvers): say what instance this data is coming from.
output = ['db-index use information for http://%s.%s'
% (os.environ.get('INSTANCE_ID', '<unknown>'),
os.environ.get('DEFAULT_VERSION_HOSTNAME', '<unknown>')),
'(<count>: <where> -- <indices>):',
'']
if not data and db.Query.run == _orig_db_query_run:
output.append('None. Pass ?enable=1 to enable stats collection')
else:
for (db_index_info, use_count) in data:
output.append('%s: %s' % (use_count, db_index_info))
return '\n'.join(output)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment