Skip to content

Instantly share code, notes, and snippets.

@ajelenak
Created May 19, 2023 00:00
Show Gist options
  • Save ajelenak/1e0403b2cbf28f88a0115eacc252af81 to your computer and use it in GitHub Desktop.
Save ajelenak/1e0403b2cbf28f88a0115eacc252af81 to your computer and use it in GitHub Desktop.
Create a copy of HDF5 file with specified user block. User block is filled with zero bytes.
import argparse
from warnings import warn
import h5py
WARN_UBLOCK_SIZE = 10 * 1024 * 1024
COPY_BLOCK_SIZE = 10 * 1024 * 1024
parser = argparse.ArgumentParser(
description='Add empty user block to an HDF5 file.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('h5in', help='Input HDF5 file name.', type=str)
parser.add_argument('h5out', help='Output HDF5 file name to create.', type=str)
parser.add_argument('--ublock', '-b', help='User block size (positive and power of two)',
type=int, default=1024 * 1024)
parser.add_argument('--nocheck', action='store_true',
help='Do not open new HDF5 file to check user block size')
cli = parser.parse_args()
if cli.ublock <= 0 or (cli.ublock & (cli.ublock - 1) != 0):
raise ValueError(f'User block size not positive and power of two: {cli.ublock}')
elif cli.ublock >= WARN_UBLOCK_SIZE:
warn(f'Should user block be this large {cli.ublock}?', stacklevel=2)
with open(cli.h5in, mode='rb') as in_, open(cli.h5out, mode='wb') as out:
# Write the requested number of user block bytes first...
out.write(bytes(cli.ublock))
# ...then copy the input HDF5 file verbatim...
while True:
block = in_.read(COPY_BLOCK_SIZE)
if block:
out.write(block)
continue
break
if not cli.nocheck:
with h5py.File(cli.h5out, mode='r') as f:
if f.userblock_size != cli.ublock:
raise ValueError(
f'Actual user block size different: {cli.ublock} != {f.userblock_size}')
print(f'New copy with a user block of {cli.ublock} bytes is at {cli.h5out}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment