Created
July 12, 2011 12:51
-
-
Save aliles/1077915 to your computer and use it in GitHub Desktop.
Benchmark decompression performance in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"Benchmark decompression performance" | |
from collections import namedtuple | |
import argparse | |
import bz2 | |
import functools | |
import gzip | |
import hashlib | |
import itertools | |
import logging | |
import sys | |
import time | |
import zipfile | |
Result = namedtuple('Result', 'clock_cpu clock_wall bytes digest') | |
def gzip_reader(filename): | |
source = gzip.GzipFile(filename, 'r') | |
return source | |
def zipfile_reader(filename): | |
zip = zipfile.ZipFile(filename, 'r') | |
info = zip.infolist()[0] | |
source = zip.open(info) | |
return source | |
def bzip_reader(filename): | |
source = bz2.BZ2File(filename, 'r') | |
return source | |
def text_reader(filename): | |
source = file(filename, 'r') | |
return source | |
def open_source(algorithm, filename): | |
readers = { | |
'gz': gzip_reader, | |
'gzip': gzip_reader, | |
'zip': zipfile_reader, | |
'bz': bzip_reader, | |
'bz2': bzip_reader, | |
'bzip': bzip_reader, | |
'plain': text_reader, | |
'txt': text_reader, | |
} | |
if algorithm is None: | |
algorithm = filename.rsplit('.', 1)[1] | |
opener = readers[algorithm] | |
return opener(filename) | |
def hash_content(algorithm, filename, limit): | |
reader = open_source(algorithm, filename) | |
if limit is not None: | |
reader = itertools.islice(reader, limit) | |
hash = hashlib.md5() | |
accumulator = 0 | |
start_cpu = time.clock() | |
start_wall = time.time() | |
for line in reader: | |
accumulator += len(line) | |
hash.update(line) | |
stop_cpu = time.clock() | |
stop_wall = time.time() | |
duration_cpu = stop_cpu - start_cpu | |
duration_wall = stop_wall - start_wall | |
return Result(duration_cpu, duration_wall, accumulator, hash.hexdigest()) | |
def main(): | |
args = parse_cmdline() | |
hash_run = functools.partial(hash_content, args.algorithm, args.file, args.limit) | |
set_logging(args.quiet) | |
if args.warmup: | |
logging.debug('warmup run started') | |
result = hash_run() | |
logging.debug('warmup run completed in %d seconds' % result.clock_wall) | |
results = [] | |
for run_num in xrange(1, args.runs+1): | |
logging.debug('real run %d started' % run_num) | |
results.append(hash_run()) | |
logging.debug('warmup run completed in %d seconds' % result.clock_wall) | |
logging.debug('real run %d complete in %d seconds' % (run_num, result.clock_wall)) | |
total_bytes = reduce(lambda a, r: a + r.bytes, results, 0) / 1024 / 1024 | |
total_wall = reduce(lambda a, r: a + r.clock_wall, results, 0.0) | |
total_cpu = reduce(lambda a, r: a + r.clock_cpu, results, 0.0) | |
logging.info('Average through put : %.8f (MB/s)' % (total_bytes / total_cpu)) | |
logging.info('Wall time average : %.8f (s)' % (total_wall / args.runs)) | |
logging.info('CPU time average : %.8f (s)' % (total_cpu / args.runs)) | |
def parse_cmdline(): | |
parser = argparse.ArgumentParser(description='Benchmark decompression of file.') | |
parser.add_argument('-c', '--compression', dest='algorithm', | |
choices=['plain', 'zip', 'gzip', 'bzip'], | |
help='explicitly choose compression algorithm') | |
parser.add_argument('-l', '--limit', dest='limit', default=None, type=int, | |
help='limit runs to first N lines') | |
parser.add_argument('-r', '--runs', dest='runs', default=3, type=int, | |
help='number of times to run decompression test') | |
parser.add_argument('-q', '--quiet', dest='quiet', default=False, | |
action='store_true', help='suppress display of progress information') | |
parser.add_argument('-w', '--warmup', dest='warmup', default=False, | |
action='store_true', help='run an initial, untimed, decompression test') | |
parser.add_argument('file', help='test file for decompression tests') | |
return parser.parse_args() | |
def set_logging(quiet=False): | |
format = "%(message)s" | |
level = logging.DEBUG if not quiet else logging.INFO | |
logging.basicConfig(format=format, level=level) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment