|
import os |
|
import sys |
|
import time |
|
|
|
|
|
# New in Python 3.3: https://docs.python.org/3/library/lzma.html |
|
import lzma |
|
|
|
|
|
|
|
def test_compress_file(filepath, preset): |
|
statinfo = os.stat(filepath) |
|
uncompressed_size = statinfo.st_size |
|
|
|
print("_"*79) |
|
print( |
|
"Compress with preset=%i - File: '%s' (%s Bytes uncompressed)\n" % ( |
|
preset, filepath, uncompressed_size |
|
) |
|
) |
|
|
|
lzc = lzma.LZMACompressor(preset=preset) |
|
|
|
chunk_size=1 + 1024 * 1024 |
|
compressed = bytes() |
|
|
|
with open(filepath, "rb") as source_file: |
|
next_update = time.time() + 0.5 |
|
read_bytes = 0 |
|
start_time=time.time() |
|
while True: |
|
chunk_data = source_file.read(chunk_size) |
|
if not chunk_data: |
|
raise RuntimeError |
|
|
|
read_bytes += len(chunk_data) |
|
compressed += lzc.compress(chunk_data) |
|
|
|
done = read_bytes >= uncompressed_size |
|
|
|
if done or time.time() > next_update: |
|
percent = (100.0 / uncompressed_size) * read_bytes |
|
print("\tread %i Bytes - %.1f%%" % (read_bytes, percent)) |
|
next_update = time.time() + 1 |
|
|
|
if done: |
|
break |
|
|
|
duration = time.time()-start_time |
|
print("\ncompression time...: %6.2f sec." % duration) |
|
|
|
compressed_size = len(compressed) |
|
print("uncompressed.......: %6.2f MBytes" % (uncompressed_size/1024/1024)) |
|
print("compressed.........: %6.2f MBytes" % (compressed_size/1024/1024)) |
|
|
|
percent = (100.0 / uncompressed_size) * compressed_size |
|
print("compression ratio..: %6.2f %%" % percent) |
|
|
|
return duration, percent |
|
|
|
|
|
class StatLzma(object): |
|
def __init__(self, preset_range=10): |
|
self.preset_range=preset_range |
|
self.stats = {} |
|
|
|
def compress(self, filename): |
|
self.stats[filename] = [] |
|
for preset in range(self.preset_range): |
|
duration, percent = test_compress_file(filename, preset) |
|
self.stats[filename].append( |
|
(preset, duration, percent) |
|
) |
|
|
|
def print_stats(self): |
|
print("\n\n# LZMA Stats:") |
|
for filename, data in sorted(self.stats.items()): |
|
print("\n## file '%s':\n" % filename) |
|
print("preset | duration | compression percent | efficiency") |
|
print("------ | -------- | ------------------- | ----------") |
|
for preset, duration, percent in data: |
|
efficiency = int(round((100-percent) / duration,0)) |
|
print(" %i | %.2f sec | %.2f %% | %i" % ( |
|
preset, duration, percent, efficiency |
|
)) |
|
|
|
if __name__ == "__main__": |
|
stat_lzma = StatLzma(preset_range=10) |
|
stat_lzma.compress("lib/pypy.vm.js") |
|
stat_lzma.compress("lib/pypy.vm.js.mem") |
|
stat_lzma.print_stats() |
pypyjs/pypyjs.github.io#4 (comment)