MattFaus/appengine_stats.py

## appengine_stats.py
import logging
import os
import sys
import time

from google.appengine.ext import db

import instance_cache
import user_util

_orig_db_query_run = db.Query.run
_orig_db_put_async = db.put_async
_orig_db_model_put = db.Model.put

_LAST_LOG = 0

def _new_db_query_run(self, *args, **kwargs):
    """A version of db.Query.run that stores used indexes in instance-cache."""
    retval = _orig_db_query_run(self, *args, **kwargs)
    msg = ['query: %s' % _get_caller()]

    try:
        index_list = self.index_list()
    except AssertionError, why:
        index_list = []
        # Appengine raises an AssertionError if there's no index-list
        # because the query doesn't support it (an IN query, eg).  Not
        # the exception I would have chosen, but oh well.
        msg.append(str(why))

    for ix in index_list:
        kind = ix.kind()
        if ix.has_ancestor():
            msg.append('%s.(ancestor)' % kind)
        properties = ix.properties()
        properties.sort()
        for (name, direction) in properties:
            dir_str = (' (descending)' if direction == db.Index.DESCENDING
                       else '')    # ascending is the default
            msg.append('%s.%s%s' % (kind, name, dir_str))

    # We don't need to document that we're using the same indices
    # multiple times -- that just takes up space.  So we keep a
    # cache.
    instance_cache.increment(' -- '.join(msg))

    # Log every 5 minutes.  That way, even if the instance dies, we
    # don't lose its data.  The '5 minutes' is arbitrary.
    global _LAST_LOG
    if time.time() - _LAST_LOG > 5 * 60:
        logging.info(get_db_index_stats())
        _LAST_LOG = time.time()

    return retval


def _new_db_put_async(models, *args, **kwargs):
    """Instrumented version of db.put_async (which db.put also calls)."""
    retval = _orig_db_put_async(models, *args, **kwargs)
    msg = ['query: %s' % _get_caller()]
    # 'models' can be either a single model instance, or a list of them.
    try:
        for model in models:
            msg.append(model.__class__.__name__ + '.<db.put>')
    except TypeError:
        msg.append(models.__class__.__name__ + '.<db.put>')
    instance_cache.increment(' -- '.join(msg))
    # We could do the log-every-5-minutes thing, but we'll let _new_db_query_run do it.
    return retval


def _new_db_model_put(self, *args, **kwargs):
    """Like entity.put() but stores put-stats in the instance cache."""
    retval = _orig_db_model_put(self, *args, **kwargs)
    msg = ['query: %s' % _get_caller()]
    msg.append(self.__class__.__name__ + '.<put>')
    instance_cache.increment(' -- '.join(msg))
    # We could do the log-every-5-minutes thing, but we'll let _new_db_query_run do it.
    return retval


def _get_caller():
    """Return filename:linenumber of our caller's caller."""
    try:
        caller_frame = sys._getframe(2)
        while 'ext/db' in caller_frame.f_code.co_filename:
            caller_frame = caller_frame.f_back
        filename = caller_frame.f_code.co_filename
        lineno = caller_frame.f_lineno
        filename = os.path.relpath(filename, 'your-root-directory-here')
        return '%s:%s' % (filename, lineno)
    except Exception, why:
        # filename/linenum isn't essential.  Don't die if we can't get it.
        return '<unknown>:0:%s' % why


def get_db_index_stats():
    """Return a string summarizing db-index use on this instance."""
    data = _get_db_index_information_from_cache()
    # TODO(csilvers): say what instance this data is coming from.
    output = ['db-index use information for http://%s.%s'
              % (os.environ.get('INSTANCE_ID', '<unknown>'),
                 os.environ.get('DEFAULT_VERSION_HOSTNAME', '<unknown>')),
              '(<count>: <where> -- <indices>):',
              '']
    if not data and db.Query.run == _orig_db_query_run:
        output.append('None.  Pass ?enable=1 to enable stats collection')
    else:
        for (db_index_info, use_count) in data:
            output.append('%s: %s' % (use_count, db_index_info))
    return '\n'.join(output)
	import logging
	import os
	import sys
	import time

	from google.appengine.ext import db

	import instance_cache
	import user_util

	_orig_db_query_run = db.Query.run
	_orig_db_put_async = db.put_async
	_orig_db_model_put = db.Model.put

	_LAST_LOG = 0

	def _new_db_query_run(self, args, *kwargs):
	"""A version of db.Query.run that stores used indexes in instance-cache."""
	retval = _orig_db_query_run(self, args, *kwargs)
	msg = ['query: %s' % _get_caller()]

	try:
	index_list = self.index_list()
	except AssertionError, why:
	index_list = []
	# Appengine raises an AssertionError if there's no index-list
	# because the query doesn't support it (an IN query, eg). Not
	# the exception I would have chosen, but oh well.
	msg.append(str(why))

	for ix in index_list:
	kind = ix.kind()
	if ix.has_ancestor():
	msg.append('%s.(ancestor)' % kind)
	properties = ix.properties()
	properties.sort()
	for (name, direction) in properties:
	dir_str = (' (descending)' if direction == db.Index.DESCENDING
	else '') # ascending is the default
	msg.append('%s.%s%s' % (kind, name, dir_str))

	# We don't need to document that we're using the same indices
	# multiple times -- that just takes up space. So we keep a
	# cache.
	instance_cache.increment(' -- '.join(msg))

	# Log every 5 minutes. That way, even if the instance dies, we
	# don't lose its data. The '5 minutes' is arbitrary.
	global _LAST_LOG
	if time.time() - _LAST_LOG > 5 * 60:
	logging.info(get_db_index_stats())
	_LAST_LOG = time.time()

	return retval


	def _new_db_put_async(models, args, *kwargs):
	"""Instrumented version of db.put_async (which db.put also calls)."""
	retval = _orig_db_put_async(models, args, *kwargs)
	msg = ['query: %s' % _get_caller()]
	# 'models' can be either a single model instance, or a list of them.
	try:
	for model in models:
	msg.append(model.__class__.__name__ + '.<db.put>')
	except TypeError:
	msg.append(models.__class__.__name__ + '.<db.put>')
	instance_cache.increment(' -- '.join(msg))
	# We could do the log-every-5-minutes thing, but we'll let _new_db_query_run do it.
	return retval


	def _new_db_model_put(self, args, *kwargs):
	"""Like entity.put() but stores put-stats in the instance cache."""
	retval = _orig_db_model_put(self, args, *kwargs)
	msg = ['query: %s' % _get_caller()]
	msg.append(self.__class__.__name__ + '.<put>')
	instance_cache.increment(' -- '.join(msg))
	# We could do the log-every-5-minutes thing, but we'll let _new_db_query_run do it.
	return retval


	def _get_caller():
	"""Return filename:linenumber of our caller's caller."""
	try:
	caller_frame = sys._getframe(2)
	while 'ext/db' in caller_frame.f_code.co_filename:
	caller_frame = caller_frame.f_back
	filename = caller_frame.f_code.co_filename
	lineno = caller_frame.f_lineno
	filename = os.path.relpath(filename, 'your-root-directory-here')
	return '%s:%s' % (filename, lineno)
	except Exception, why:
	# filename/linenum isn't essential. Don't die if we can't get it.
	return '<unknown>:0:%s' % why


	def get_db_index_stats():
	"""Return a string summarizing db-index use on this instance."""
	data = _get_db_index_information_from_cache()
	# TODO(csilvers): say what instance this data is coming from.
	output = ['db-index use information for http://%s.%s'
	% (os.environ.get('INSTANCE_ID', '<unknown>'),
	os.environ.get('DEFAULT_VERSION_HOSTNAME', '<unknown>')),
	'(<count>: <where> -- <indices>):',
	'']
	if not data and db.Query.run == _orig_db_query_run:
	output.append('None. Pass ?enable=1 to enable stats collection')
	else:
	for (db_index_info, use_count) in data:
	output.append('%s: %s' % (use_count, db_index_info))
	return '\n'.join(output)