Skip to content

Instantly share code, notes, and snippets.

@nschloe
Last active January 28, 2023 20:08
Show Gist options
  • Save nschloe/3d3b1adb9ce9e2d68d1c2d1a23ffa06d to your computer and use it in GitHub Desktop.
Save nschloe/3d3b1adb9ce9e2d68d1c2d1a23ffa06d to your computer and use it in GitHub Desktop.
Python array I/O comparison
from sys import version_info
import matplotlib.pyplot as plt
import perfplot
import pickle
import netCDF4
import numpy as np
import h5py
import tables
import zarr
def write_numpy(data):
np.save("out.npy", data)
def write_hdf5(data):
with h5py.File("out.h5", "w") as f:
f.create_dataset("data", data=data)
def write_netcdf(data):
with netCDF4.Dataset("out.nc", "w") as nc:
nc.createDimension("len_data", len(data))
ncdata = nc.createVariable(
"mydata",
"float64",
("len_data",),
)
ncdata[:] = data
def write_pickle(data):
with open("out.pkl", "wb") as f:
pickle.dump(data, f)
def write_pytables(data):
with tables.open_file("out-pytables.h5", mode="w") as f:
gcolumns = f.create_group(f.root, "columns", "data")
f.create_array(gcolumns, "data", data, "data")
def write_zarr(data):
zarr.save_array("out.zip", data)
zarr.save_array("out.zarr", data)
# uncompressed
zarr.save_array("out-uncompressed.zip", data, compressor=None)
zarr.save_array("out-uncompressed.zarr", data, compressor=None)
def setup(n):
data = np.random.rand(n)
write_netcdf(data)
write_numpy(data)
write_hdf5(data)
write_pickle(data)
write_pytables(data)
write_zarr(data)
# adapt n
n[...] = data.nbytes
def numpy_read(data):
return np.load("out.npy")
def hdf5_read(data):
with h5py.File("out.h5", "r") as f:
return f["data"][()]
def netcdf_read(data):
with netCDF4.Dataset("out.nc") as nc:
return nc.variables["mydata"][:]
def pickle_read(data):
with open("out.pkl", "rb") as f:
return pickle.load(f)
def pytables_read(data):
with tables.open_file("out-pytables.h5", mode="r") as f:
return f.root.columns.data[()]
def zarr_zarr_read(data):
return zarr.load("out.zarr")
def zarr_zip_read(data):
return zarr.load("out.zip")
def zarr_zarr_read_uncompressed(data):
return zarr.load("out-uncompressed.zarr")
def zarr_zip_read_uncompressed(data):
return zarr.load("out-uncompressed.zip")
b = perfplot.bench(
setup=setup,
kernels=[
numpy_read,
hdf5_read,
netcdf_read,
pickle_read,
pytables_read,
zarr_zarr_read,
zarr_zip_read,
zarr_zarr_read_uncompressed,
zarr_zip_read_uncompressed,
],
title="read comparison",
n_range=[2**k for k in range(28)],
xlabel="data.nbytes",
)
plt.text(
0.0,
-0.3,
", ".join(
[
f"Python {version_info.major}.{version_info.minor}.{version_info.micro}",
f"h5py {h5py.__version__}",
f"netCDF4 {netCDF4.__version__}",
f"NumPy {np.__version__}",
f"PyTables {tables.__version__}",
f"Zarr {zarr.__version__}",
]
),
transform=plt.gca().transAxes,
fontsize="x-small",
verticalalignment="top",
)
b.save("out.png")
b.show()
@nschloe
Copy link
Author

nschloe commented Jan 27, 2023

out

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment