Created
August 11, 2016 22:43
-
-
Save jart/a262eea89d52f5697d8d301cee21ea2c to your computer and use it in GitHub Desktop.
Python GZip Benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# | |
# tl;dr: compresslevel=1 is 4x faster than the default and nearly as good | |
# | |
# https://docs.python.org/2/library/gzip.html | |
# | |
# http11 jart@compy://third_party/tensorflow/tensorboard$ python ~/doodle.py | |
# 1 5% 151ms | |
# 2 4% 149ms | |
# 3 4% 143ms | |
# 4 3% 240ms | |
# 5 3% 245ms | |
# 6 2% 288ms | |
# 7 2% 309ms | |
# 8 2% 448ms | |
# 9 2% 608ms | |
# http11 jart@compy://third_party/tensorflow/tensorboard$ ls -hal ~/doodle | |
# -rw-r----- 1 jart eng 38M Aug 11 15:30 /usr/local/google/home/jart/doodle | |
import datetime | |
import gzip | |
import six | |
from six import BytesIO | |
def as_bytes(bytes_or_text): | |
if isinstance(bytes_or_text, six.text_type): | |
return bytes_or_text.encode('utf-8') | |
elif isinstance(bytes_or_text, bytes): | |
return bytes_or_text | |
else: | |
raise TypeError('Expected binary or unicode string, got %r' % | |
(bytes_or_text,)) | |
content = open('/usr/local/google/home/jart/doodle', 'rb').read() | |
for n in range(1, 10): | |
t1 = datetime.datetime.now() | |
out = BytesIO() | |
f = gzip.GzipFile(fileobj=out, mode='wb', compresslevel=n) | |
f.write(as_bytes(content)) | |
f.close() | |
gzip_content = out.getvalue() | |
t2 = datetime.datetime.now() | |
e = t2 - t1 | |
ms = str(int((e.seconds * 1e6 + e.microseconds) / 1000)) + "ms" | |
r = str(int(float(len(gzip_content)) / len(content) * 100)) + "%" | |
print n, r, ms |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment