Created
March 27, 2023 11:20
-
-
Save zklaus/1310993342d7784b9ace071f4d10b468 to your computer and use it in GitHub Desktop.
Test script for compression with netcdf4
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import logging | |
import os | |
import sys | |
import traceback | |
from time import perf_counter as timer | |
from humanfriendly import format_size | |
from netCDF4 import Dataset | |
import numpy as np | |
import pandas as pd | |
logging.basicConfig(level=logging.INFO) | |
def create_data(shape): | |
rng = np.random.default_rng() | |
return rng.normal(300.0, 10.0, shape) | |
def load_data(path): | |
ds = Dataset(path[0]) | |
return ds["ta"][:] | |
def store(data, compression, complevel): | |
if compression is None: | |
filename = "uncompressed.nc" | |
else: | |
filename = f"{compression}_{complevel}.nc" | |
with Dataset(filename, "w", format="NETCDF4") as root: | |
for i in range(data.ndim): | |
root.createDimension(f"dim_{i}", data.shape[i]) | |
try: | |
if data.ndim == 4: | |
chunksizes = (10, 1, 100, 100) | |
else: | |
chunksizes = (10, 100, 100) | |
logging.info("Trying compression %s", compression) | |
v = root.createVariable( | |
"var", | |
"f4", | |
(f"dim_{i}" for i in range(data.ndim)), | |
fill_value=1.0e20, | |
significant_digits=5, | |
quantize_mode="GranularBitRound", | |
# quantize_mode="BitGroom", | |
compression=compression, | |
complevel=complevel, | |
chunksizes=chunksizes, | |
) | |
v[:] = data | |
success = True | |
except RuntimeError as e: | |
traceback.print_exception(e) | |
success = False | |
if success: | |
size = os.stat(filename).st_size | |
else: | |
size = None | |
return size | |
def main(): | |
if len(sys.argv) > 1: | |
data = load_data(sys.argv[1:]) | |
else: | |
data = create_data((1000, 100, 100)) | |
info = {} | |
methods = ( | |
(None, 0), | |
("zlib", 1), | |
("szip", 4), | |
# ("bzip2", 4), # bzip2 is too slow | |
("zstd", -4), | |
("zstd", 12), | |
("blosc_zstd", 4), | |
("blosc_zlib", 4), | |
) | |
for compression, complevel in methods: | |
start = timer() | |
size = store(data, compression, complevel) | |
end = timer() | |
time = end - start | |
info[f"{compression} {complevel}"] = (size, time) | |
info = pd.DataFrame.from_dict(info, orient="index", columns=["Filesize", "Time"]) | |
info["Compression ratio"] = info["Filesize"][0] / info["Filesize"] | |
print(info.to_string(formatters={"Filesize": format_size})) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment