Skip to content

Instantly share code, notes, and snippets.

@bradmontgomery
Last active June 13, 2022 20:00
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bradmontgomery/fe39b5a48e4bc17c2a037693b8bc9a44 to your computer and use it in GitHub Desktop.
Save bradmontgomery/fe39b5a48e4bc17c2a037693b8bc9a44 to your computer and use it in GitHub Desktop.
Hack to get the uncompressed size of a gzip file without reading the whole thing.
#!/usr/bin/env python
"""
Test if we can reliably figure out the uncompressed size of .gz file...
"""
import gzip
import os
import subprocess
NUM_FILES = 10
# 1. Generate some sample text files.
files = []
for i in range(NUM_FILES):
filename = f"data_{i}.txt"
files.append(filename)
data = ''
for x in range(1000 * i):
data += 'a'
with open(filename, "w") as f:
f.write(data)
st = os.stat(filename)
print(f"{filename} is {st.st_size} bytes uncompressed")
# 2. Use the OS's gzip command to compress the files.
for filename in files:
subprocess.call(['gzip', filename])
print(f"Compressed {len(files)} files...")
# Rename our list of files...
files = [f'{file}.gz' for file in files]
# 3. Look at current file size, & report on uncompressed size
for filename in files:
st = os.stat(filename)
f = gzip.open(filename, 'rb')
x = f.seek(0, 2) # NOTE: Seek to the end of the file to get the uncompressed size
print(f"{filename} is {st.st_size} bytes compressed, was {x} bytes uncompressed.")
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment