Last active
June 13, 2022 20:00
-
-
Save bradmontgomery/fe39b5a48e4bc17c2a037693b8bc9a44 to your computer and use it in GitHub Desktop.
Hack to get the uncompressed size of a gzip file without reading the whole thing.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Test if we can reliably figure out the uncompressed size of .gz file... | |
""" | |
import gzip | |
import os | |
import subprocess | |
NUM_FILES = 10 | |
# 1. Generate some sample text files. | |
files = [] | |
for i in range(NUM_FILES): | |
filename = f"data_{i}.txt" | |
files.append(filename) | |
data = '' | |
for x in range(1000 * i): | |
data += 'a' | |
with open(filename, "w") as f: | |
f.write(data) | |
st = os.stat(filename) | |
print(f"{filename} is {st.st_size} bytes uncompressed") | |
# 2. Use the OS's gzip command to compress the files. | |
for filename in files: | |
subprocess.call(['gzip', filename]) | |
print(f"Compressed {len(files)} files...") | |
# Rename our list of files... | |
files = [f'{file}.gz' for file in files] | |
# 3. Look at current file size, & report on uncompressed size | |
for filename in files: | |
st = os.stat(filename) | |
f = gzip.open(filename, 'rb') | |
x = f.seek(0, 2) # NOTE: Seek to the end of the file to get the uncompressed size | |
print(f"{filename} is {st.st_size} bytes compressed, was {x} bytes uncompressed.") | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment