Skip to content

Instantly share code, notes, and snippets.

@CharlZKP
Created August 10, 2023 04:41
Show Gist options
  • Save CharlZKP/5eb3d12d282dffa6b3af59e760d3ae1b to your computer and use it in GitHub Desktop.
Save CharlZKP/5eb3d12d282dffa6b3af59e760d3ae1b to your computer and use it in GitHub Desktop.
faster numpy array compression and decompression speed with slightly smaller size, using zstandard
# This code is licensed under the terms of the MIT license
import numpy as np
import zstandard as zstd
import json
def np_save_compressed(filename, array, compression_level=5):
"""
Save a NumPy array as a compressed file using Zstandard compression.
Args:
filename (str): The name of the output compressed file.
array (numpy.ndarray): The NumPy array to be compressed and saved.
compression_level (int, optional): The compression level (1 to 22).
Higher values provide better compression. Defaults to 5.
"""
metadata = {
"shape": array.shape,
"dtype": str(array.dtype)
}
cctx = zstd.ZstdCompressor(level=compression_level)
compressed_data = cctx.compress(array.tobytes())
with open(filename, 'wb') as f:
metadata_bytes = json.dumps(metadata).encode('utf-8')
f.write(np.int32(len(metadata_bytes)))
f.write(metadata_bytes)
f.write(compressed_data)
def np_load_compressed(filename):
"""
Load a compressed NumPy array using Zstandard decompression.
Args:
filename (str): The name of the compressed file to load.
Returns:
numpy.ndarray: The decompressed NumPy array.
"""
cctx = zstd.ZstdDecompressor()
with open(filename, 'rb') as f:
metadata_size = np.frombuffer(f.read(np.int32().itemsize), dtype=np.int32)[0]
metadata = f.read(metadata_size).decode("utf-8")
compressed_data = f.read()
metadata_dict = json.loads(metadata)
dtype = np.dtype(metadata_dict["dtype"])
shape = metadata_dict["shape"]
decompressed_data = cctx.decompress(compressed_data)
return np.frombuffer(decompressed_data, dtype=dtype).reshape(shape)
# np_save_compressed('array1.np.zstd', array1)
# np_save_compressed('array2.np.zstd', array2)
# loaded_array1 = np_load_compressed('array1.np.zstd')
# loaded_array2 = np_load_compressed('array2.np.zstd')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment