Skip to content

Instantly share code, notes, and snippets.

@aliles
Created July 12, 2011 12:51
Show Gist options
  • Save aliles/1077915 to your computer and use it in GitHub Desktop.
Save aliles/1077915 to your computer and use it in GitHub Desktop.
Benchmark decompression performance in Python
"Benchmark decompression performance"
from collections import namedtuple
import argparse
import bz2
import functools
import gzip
import hashlib
import itertools
import logging
import sys
import time
import zipfile
Result = namedtuple('Result', 'clock_cpu clock_wall bytes digest')
def gzip_reader(filename):
source = gzip.GzipFile(filename, 'r')
return source
def zipfile_reader(filename):
zip = zipfile.ZipFile(filename, 'r')
info = zip.infolist()[0]
source = zip.open(info)
return source
def bzip_reader(filename):
source = bz2.BZ2File(filename, 'r')
return source
def text_reader(filename):
source = file(filename, 'r')
return source
def open_source(algorithm, filename):
readers = {
'gz': gzip_reader,
'gzip': gzip_reader,
'zip': zipfile_reader,
'bz': bzip_reader,
'bz2': bzip_reader,
'bzip': bzip_reader,
'plain': text_reader,
'txt': text_reader,
}
if algorithm is None:
algorithm = filename.rsplit('.', 1)[1]
opener = readers[algorithm]
return opener(filename)
def hash_content(algorithm, filename, limit):
reader = open_source(algorithm, filename)
if limit is not None:
reader = itertools.islice(reader, limit)
hash = hashlib.md5()
accumulator = 0
start_cpu = time.clock()
start_wall = time.time()
for line in reader:
accumulator += len(line)
hash.update(line)
stop_cpu = time.clock()
stop_wall = time.time()
duration_cpu = stop_cpu - start_cpu
duration_wall = stop_wall - start_wall
return Result(duration_cpu, duration_wall, accumulator, hash.hexdigest())
def main():
args = parse_cmdline()
hash_run = functools.partial(hash_content, args.algorithm, args.file, args.limit)
set_logging(args.quiet)
if args.warmup:
logging.debug('warmup run started')
result = hash_run()
logging.debug('warmup run completed in %d seconds' % result.clock_wall)
results = []
for run_num in xrange(1, args.runs+1):
logging.debug('real run %d started' % run_num)
results.append(hash_run())
logging.debug('warmup run completed in %d seconds' % result.clock_wall)
logging.debug('real run %d complete in %d seconds' % (run_num, result.clock_wall))
total_bytes = reduce(lambda a, r: a + r.bytes, results, 0) / 1024 / 1024
total_wall = reduce(lambda a, r: a + r.clock_wall, results, 0.0)
total_cpu = reduce(lambda a, r: a + r.clock_cpu, results, 0.0)
logging.info('Average through put : %.8f (MB/s)' % (total_bytes / total_cpu))
logging.info('Wall time average : %.8f (s)' % (total_wall / args.runs))
logging.info('CPU time average : %.8f (s)' % (total_cpu / args.runs))
def parse_cmdline():
parser = argparse.ArgumentParser(description='Benchmark decompression of file.')
parser.add_argument('-c', '--compression', dest='algorithm',
choices=['plain', 'zip', 'gzip', 'bzip'],
help='explicitly choose compression algorithm')
parser.add_argument('-l', '--limit', dest='limit', default=None, type=int,
help='limit runs to first N lines')
parser.add_argument('-r', '--runs', dest='runs', default=3, type=int,
help='number of times to run decompression test')
parser.add_argument('-q', '--quiet', dest='quiet', default=False,
action='store_true', help='suppress display of progress information')
parser.add_argument('-w', '--warmup', dest='warmup', default=False,
action='store_true', help='run an initial, untimed, decompression test')
parser.add_argument('file', help='test file for decompression tests')
return parser.parse_args()
def set_logging(quiet=False):
format = "%(message)s"
level = logging.DEBUG if not quiet else logging.INFO
logging.basicConfig(format=format, level=level)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment