Skip to content

Instantly share code, notes, and snippets.

@jart
Created August 11, 2016 22:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jart/a262eea89d52f5697d8d301cee21ea2c to your computer and use it in GitHub Desktop.
Save jart/a262eea89d52f5697d8d301cee21ea2c to your computer and use it in GitHub Desktop.
Python GZip Benchmark
#!/usr/bin/env python
#
# tl;dr: compresslevel=1 is 4x faster than the default and nearly as good
#
# https://docs.python.org/2/library/gzip.html
#
# http11 jart@compy://third_party/tensorflow/tensorboard$ python ~/doodle.py
# 1 5% 151ms
# 2 4% 149ms
# 3 4% 143ms
# 4 3% 240ms
# 5 3% 245ms
# 6 2% 288ms
# 7 2% 309ms
# 8 2% 448ms
# 9 2% 608ms
# http11 jart@compy://third_party/tensorflow/tensorboard$ ls -hal ~/doodle
# -rw-r----- 1 jart eng 38M Aug 11 15:30 /usr/local/google/home/jart/doodle
import datetime
import gzip
import six
from six import BytesIO
def as_bytes(bytes_or_text):
if isinstance(bytes_or_text, six.text_type):
return bytes_or_text.encode('utf-8')
elif isinstance(bytes_or_text, bytes):
return bytes_or_text
else:
raise TypeError('Expected binary or unicode string, got %r' %
(bytes_or_text,))
content = open('/usr/local/google/home/jart/doodle', 'rb').read()
for n in range(1, 10):
t1 = datetime.datetime.now()
out = BytesIO()
f = gzip.GzipFile(fileobj=out, mode='wb', compresslevel=n)
f.write(as_bytes(content))
f.close()
gzip_content = out.getvalue()
t2 = datetime.datetime.now()
e = t2 - t1
ms = str(int((e.seconds * 1e6 + e.microseconds) / 1000)) + "ms"
r = str(int(float(len(gzip_content)) / len(content) * 100)) + "%"
print n, r, ms
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment