Skip to content

Instantly share code, notes, and snippets.

@solidpple
Created December 8, 2017 05:40
import os
import sys
if sys.version.startswith("3"):
import io
io_method = io.BytesIO
else:
import cStringIO
io_method = cStringIO.StringIO
import gzip
import subprocess
import time
dirname = "test"
fl = os.listdir(dirname)
fl.sort()
ttime = [0, 0]
runs = 5
for i in range(2 * runs):
st = time.time()
for fn in fl:
if not fn.endswith(".gz"):
continue
cc = 0
lc = 0
sz = 0
fullfn = os.path.join(dirname, fn)
sz += os.stat(fullfn)[6]
if i % 2 == 0:
fh = gzip.GzipFile(fullfn, "r")
else:
p = subprocess.Popen(["zcat", fullfn], stdout = subprocess.PIPE)
fh = io_method(p.communicate()[0])
assert p.returncode == 0
for line in fh:
lc += 1
cc += len(line)
et = time.time()
dt = et - st
ttime[i % 2] += dt
print("time-taken = %0.2f seconds, 1000 characters per second = %0.0f, file size per second = %0.0f, character count=%s, line count=%s file size = %s" % (dt, 0.001 * cc / dt, 0.001 * sz / dt, cc, lc, sz))
print("\nAverages")
print(" gzip.open - %0.1f seconds" % (ttime[0] / runs))
print(" zcat and pipe - %0.1f seconds" % (ttime[1] / runs))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment