chdorner/README.md

## README.md

      
    Raw
  

              README.md
            
          
    my database had 72k annotations at the time I ran these benchmarks, here's the result:
$ python scripts/batch_bench.py conf/development-app.ini dumb
Memory summary: start
      types |   # objects |   total size
=========== | =========== | ============
       dict |       13852 |     12.46 MB
  frozenset |         349 |     11.85 MB
VM: 327.29Mb
Memory summary: after dumb query
      types |   # objects |   total size
=========== | =========== | ============
       dict |       14201 |     15.79 MB
  frozenset |         350 |     11.85 MB
VM: 575.88Mb

$ python scripts/batch_bench.py conf/development-app.ini stream
Memory summary: start
      types |   # objects |   total size
=========== | =========== | ============
       dict |       13852 |     12.46 MB
  frozenset |         349 |     11.85 MB
VM: 327.29Mb
Memory summary: after streaming
      types |   # objects |   total size
=========== | =========== | ============
       dict |       14201 |     15.79 MB
  frozenset |         350 |     11.85 MB
VM: 495.90Mb

$ python scripts/batch_bench.py conf/development-app.ini yield_per
Memory summary: start
      types |   # objects |   total size
=========== | =========== | ============
       dict |       13852 |     12.46 MB
  frozenset |         349 |     11.85 MB
VM: 327.29Mb
Memory summary: after yield_per
      types |   # objects |   total size
=========== | =========== | ============
       dict |       14198 |     12.81 MB
  frozenset |         350 |     11.85 MB
VM: 326.30Mb


## batch_bench.py
# -*- coding: utf-8 -*-

import argparse
import itertools

from pympler import summary, muppy
import psutil

from pyramid import paster
from pyramid.request import Request
from sqlalchemy.orm import scoped_session, sessionmaker

from h import db
from h.api import models

parser = argparse.ArgumentParser('batch_bench')
parser.add_argument('config_uri', help='paster configuration URI')
parser.add_argument('type')

Session = scoped_session(sessionmaker())

def main():
    args = parser.parse_args()

    request = Request.blank('/')
    env = paster.bootstrap(args.config_uri, request=request)
    request.root = env['root']

    engine = db.make_engine(request.registry.settings)
    Session.configure(bind=engine)

    batchsize = 100

    memory_usage('start')

    if args.type == 'dumb':
        query = Session.query(models.Annotation)
        for batch in _batch_iter(batchsize, query):
            pass
        memory_usage('after dumb query')

    elif args.type == 'stream':
        query = Session.query(models.Annotation).execution_options(stream_results=True)
        for batch in _batch_iter(batchsize, query):
            pass
        memory_usage('after streaming')

    elif args.type == 'yield_per':
        for batch in _batch_iter(batchsize, Session.query(models.Annotation).yield_per(batchsize)):
            pass
        memory_usage('after yield_per')


def get_virtual_memory_usage_kb():
    """
    The process's current virtual memory size in Kb, as a float.

    """
    return float(psutil.Process().memory_info_ex().vms) / 1024.0

def memory_usage(where):
    """
    Print out a basic summary of memory usage.

    Taken from: https://hyp.is/AVQ-y75KH9ZO4OKSl23K/www.mobify.com/blog/sqlalchemy-memory-magic/

    """
    mem_summary = summary.summarize(muppy.get_objects())
    print "Memory summary:", where
    summary.print_(mem_summary, limit=2)
    print "VM: %.2fMb" % (get_virtual_memory_usage_kb() / 1024.0)


def _batch_iter(n, iterable):
    it = iter(iterable)
    while True:
        batch = list(itertools.islice(it, n))
        if not batch:
            return
        yield batch


if __name__ == '__main__':
    main()
	# -- coding: utf-8 --

	import argparse
	import itertools

	from pympler import summary, muppy
	import psutil

	from pyramid import paster
	from pyramid.request import Request
	from sqlalchemy.orm import scoped_session, sessionmaker

	from h import db
	from h.api import models

	parser = argparse.ArgumentParser('batch_bench')
	parser.add_argument('config_uri', help='paster configuration URI')
	parser.add_argument('type')

	Session = scoped_session(sessionmaker())

	def main():
	args = parser.parse_args()

	request = Request.blank('/')
	env = paster.bootstrap(args.config_uri, request=request)
	request.root = env['root']

	engine = db.make_engine(request.registry.settings)
	Session.configure(bind=engine)

	batchsize = 100

	memory_usage('start')

	if args.type == 'dumb':
	query = Session.query(models.Annotation)
	for batch in _batch_iter(batchsize, query):
	pass
	memory_usage('after dumb query')

	elif args.type == 'stream':
	query = Session.query(models.Annotation).execution_options(stream_results=True)
	for batch in _batch_iter(batchsize, query):
	pass
	memory_usage('after streaming')

	elif args.type == 'yield_per':
	for batch in _batch_iter(batchsize, Session.query(models.Annotation).yield_per(batchsize)):
	pass
	memory_usage('after yield_per')


	def get_virtual_memory_usage_kb():
	"""
	The process's current virtual memory size in Kb, as a float.

	"""
	return float(psutil.Process().memory_info_ex().vms) / 1024.0

	def memory_usage(where):
	"""
	Print out a basic summary of memory usage.

	Taken from: https://hyp.is/AVQ-y75KH9ZO4OKSl23K/www.mobify.com/blog/sqlalchemy-memory-magic/

	"""
	mem_summary = summary.summarize(muppy.get_objects())
	print "Memory summary:", where
	summary.print_(mem_summary, limit=2)
	print "VM: %.2fMb" % (get_virtual_memory_usage_kb() / 1024.0)


	def _batch_iter(n, iterable):
	it = iter(iterable)
	while True:
	batch = list(itertools.islice(it, n))
	if not batch:
	return
	yield batch


	if __name__ == '__main__':
	main()