Skip to content

Instantly share code, notes, and snippets.

@pganssle
Created August 19, 2014 20:35
Show Gist options
  • Save pganssle/c4cf1a40efb75448248d to your computer and use it in GitHub Desktop.
Save pganssle/c4cf1a40efb75448248d to your computer and use it in GitHub Desktop.
Compression of the same data, represented two ways, using BZ2 (similar results with LZMA)
from __future__ import division
import struct
import bz2
# Generate data - round to nearest 1e11 so that no data is lost when formatting in ASCII
xx = range(10**7)
x = [round(ii*1e11)/(1e11*len(xx)) for ii in xx]
sx = struct.pack('d'*len(x), *x)
# Write it to file as a text and binary file
with open('test_ascii.tdata', 'w') as ft: # Full size, 10**7 entries: 152 MB
for val in x:
ft.write('{:01.12f}\n'.format(val))
with open('test_binary.bdata', 'wb') as fb: # Full size, 10**7 entries: 76.2 MB
fb.write(sx)
# Compress both files using bz2 compression
buff_size = 4096
with open('test_ascii.tdata', 'r') as ft:
with bz2.BZ2File('test_ascii.bz2', 'w', buff_size) as bz2f:
for line in ft:
bz2f.write(line)
bz2f.close() # Compressed size: 11.3 MB
with open('test_binary.bdata', 'rb') as fb:
with bz2.BZ2File('test_binary.bz2', 'w', buff_size) as bz2f:
bd = fb.read()
bz2f.write(bd)
bz2f.close() # Compressed size: 39.5 MB
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment