Skip to content

Instantly share code, notes, and snippets.

@jedie
Last active August 29, 2015 14:22
Show Gist options
  • Save jedie/95225b0ecb89f23688f3 to your computer and use it in GitHub Desktop.
Save jedie/95225b0ecb89f23688f3 to your computer and use it in GitHub Desktop.
import os
import sys
import time
# New in Python 3.3: https://docs.python.org/3/library/lzma.html
import lzma
def test_compress_file(filepath, preset):
statinfo = os.stat(filepath)
uncompressed_size = statinfo.st_size
print("_"*79)
print(
"Compress with preset=%i - File: '%s' (%s Bytes uncompressed)\n" % (
preset, filepath, uncompressed_size
)
)
lzc = lzma.LZMACompressor(preset=preset)
chunk_size=1 + 1024 * 1024
compressed = bytes()
with open(filepath, "rb") as source_file:
next_update = time.time() + 0.5
read_bytes = 0
start_time=time.time()
while True:
chunk_data = source_file.read(chunk_size)
if not chunk_data:
raise RuntimeError
read_bytes += len(chunk_data)
compressed += lzc.compress(chunk_data)
done = read_bytes >= uncompressed_size
if done or time.time() > next_update:
percent = (100.0 / uncompressed_size) * read_bytes
print("\tread %i Bytes - %.1f%%" % (read_bytes, percent))
next_update = time.time() + 1
if done:
break
duration = time.time()-start_time
print("\ncompression time...: %6.2f sec." % duration)
compressed_size = len(compressed)
print("uncompressed.......: %6.2f MBytes" % (uncompressed_size/1024/1024))
print("compressed.........: %6.2f MBytes" % (compressed_size/1024/1024))
percent = (100.0 / uncompressed_size) * compressed_size
print("compression ratio..: %6.2f %%" % percent)
return duration, percent
class StatLzma(object):
def __init__(self, preset_range=10):
self.preset_range=preset_range
self.stats = {}
def compress(self, filename):
self.stats[filename] = []
for preset in range(self.preset_range):
duration, percent = test_compress_file(filename, preset)
self.stats[filename].append(
(preset, duration, percent)
)
def print_stats(self):
print("\n\n# LZMA Stats:")
for filename, data in sorted(self.stats.items()):
print("\n## file '%s':\n" % filename)
print("preset | duration | compression percent | efficiency")
print("------ | -------- | ------------------- | ----------")
for preset, duration, percent in data:
efficiency = int(round((100-percent) / duration,0))
print(" %i | %.2f sec | %.2f %% | %i" % (
preset, duration, percent, efficiency
))
if __name__ == "__main__":
stat_lzma = StatLzma(preset_range=10)
stat_lzma.compress("lib/pypy.vm.js")
stat_lzma.compress("lib/pypy.vm.js.mem")
stat_lzma.print_stats()
import os
import sys
import time
import zlib
def test_compress_file(filepath, level):
statinfo = os.stat(filepath)
uncompressed_size = statinfo.st_size
print("_"*79)
print(
"Compress with level=%i - File: '%s' (%s Bytes uncompressed)\n" % (
level, filepath, uncompressed_size
)
)
chunk_size=1 + 1024 * 1024
compressed = bytes()
with open(filepath, "rb") as source_file:
next_update = time.time() + 0.5
read_bytes = 0
start_time=time.time()
while True:
chunk_data = source_file.read(chunk_size)
if not chunk_data:
raise RuntimeError
read_bytes += len(chunk_data)
compressed += zlib.compress(chunk_data, level)
done = read_bytes >= uncompressed_size
if done or time.time() > next_update:
percent = (100.0 / uncompressed_size) * read_bytes
print("\tread %i Bytes - %.1f%%" % (read_bytes, percent))
next_update = time.time() + 1
if done:
break
duration = time.time()-start_time
print("\ncompression time...: %6.2f sec." % duration)
compressed_size = len(compressed)
print("uncompressed.......: %6.2f MBytes" % (uncompressed_size/1024/1024))
print("compressed.........: %6.2f MBytes" % (compressed_size/1024/1024))
percent = (100.0 / uncompressed_size) * compressed_size
print("compression ratio..: %6.2f %%" % percent)
return duration, percent
class CompressionStats(object):
def __init__(self, level_range=10):
self.level_range=level_range
self.stats = {}
def compress(self, filename):
self.stats[filename] = []
for level in range(self.level_range):
duration, percent = test_compress_file(filename, level)
self.stats[filename].append(
(level, duration, percent)
)
def print_stats(self):
print("\n\n# zlib stats:")
for filename, data in sorted(self.stats.items()):
print("\n## file '%s':\n" % filename)
print("level | duration | compression percent | efficiency")
print("----- | -------- | ------------------- | ----------")
for level, duration, percent in data:
efficiency = int(round((100-percent) / duration,0))
print(" %i | %.2f sec | %.2f %% | %i" % (
level, duration, percent, efficiency
))
if __name__ == "__main__":
zlib_stats = CompressionStats(level_range=10)
zlib_stats.compress("lib/pypy.vm.js")
zlib_stats.compress("lib/pypy.vm.js.mem")
zlib_stats.print_stats()

LZMA Stats:

file 'lib/pypy.vm.js':

preset duration compression percent efficiency
0 0.43 sec 12.96 % 204
1 0.51 sec 11.12 % 174
2 0.70 sec 11.24 % 127
3 0.91 sec 10.20 % 99
4 2.11 sec 9.77 % 43
5 3.48 sec 8.81 % 26
6 4.72 sec 8.34 % 19
7 4.59 sec 7.87 % 20
8 4.53 sec 7.87 % 20
9 4.59 sec 7.87 % 20

file 'lib/pypy.vm.js.mem':

preset duration compression percent efficiency
0 0.38 sec 22.36 % 203
1 0.45 sec 20.60 % 177
2 0.55 sec 21.72 % 142
3 0.70 sec 21.56 % 112
4 1.56 sec 19.84 % 51
5 1.97 sec 18.08 % 41
6 2.29 sec 18.08 % 36
7 2.29 sec 18.08 % 36
8 2.28 sec 18.08 % 36
9 2.28 sec 18.08 % 36

zlib stats:

file 'lib/pypy.vm.js':

level duration compression percent efficiency
0 0.10 sec 100.02 % 0
1 0.13 sec 21.15 % 594
2 0.12 sec 19.77 % 646
3 0.14 sec 18.75 % 585
4 0.18 sec 17.30 % 453
5 0.25 sec 16.00 % 337
6 0.37 sec 15.31 % 226
7 0.48 sec 15.11 % 178
8 0.91 sec 14.90 % 94
9 0.95 sec 14.90 % 89

file 'lib/pypy.vm.js.mem':

level duration compression percent efficiency
0 0.04 sec 100.02 % 0
1 0.10 sec 32.55 % 704
2 0.11 sec 31.86 % 648
3 0.14 sec 31.25 % 484
4 0.15 sec 30.71 % 449
5 0.17 sec 30.02 % 415
6 0.29 sec 29.69 % 241
7 0.41 sec 29.58 % 173
8 0.83 sec 29.45 % 85
9 1.67 sec 29.39 % 42
@jedie
Copy link
Author

jedie commented Jun 11, 2015

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment