Skip to content

Instantly share code, notes, and snippets.

@h-mayorquin
Last active June 30, 2025 19:16
Show Gist options
  • Save h-mayorquin/f478ee18267028529a78463d595468d9 to your computer and use it in GitHub Desktop.
Save h-mayorquin/f478ee18267028529a78463d595468d9 to your computer and use it in GitHub Desktop.
Stub hdf5 femtonics
from pathlib import Path
import h5py
import numpy as np
# ────────────────────────────────────────────────────────────────────────────
def copy_group(
src_group,
dst_group,
stub_frames: int,
*,
omit_groups=None,
):
"""
Recursively copy *src_group* → *dst_group* while
• truncating every dataset to the first `stub_frames` planes, and
• skipping **any** HDF5 group whose name appears in `omit_groups`,
no matter where it occurs in the hierarchy.
Parameters
----------
src_group, dst_group : h5py.Group
stub_frames : int
Max number of leading frames to copy from each dataset.
omit_groups : Iterable[str] | None
Names of groups to omit (e.g. {"MUnit_96"}). If None/empty, nothing
is omitted.
Notes
-----
* Matching is done by **group name only** (not full path).
If the same name appears at multiple depths, every occurrence is skipped.
* Datasets are *never* matched—only groups.
"""
omit_groups = set(omit_groups or [])
for key, item in src_group.items():
# ── skip this subtree?
if isinstance(item, h5py.Group) and key in omit_groups:
continue
if isinstance(item, h5py.Group):
sub_dst = dst_group.create_group(key)
copy_group(item, sub_dst, stub_frames, omit_groups=omit_groups)
else: # dataset
take = min(stub_frames, item.shape[0])
dset = dst_group.create_dataset(
key,
data=item[:take],
dtype=item.dtype,
compression=item.compression,
compression_opts=item.compression_opts,
shuffle=item.shuffle,
fletcher32=item.fletcher32,
scaleoffset=item.scaleoffset,
chunks=item.chunks,
)
dset.attrs.update(item.attrs)
dst_group.attrs.update(src_group.attrs)
if "ZDim" in dst_group.attrs:
dst_group.attrs.modify("ZDim", np.uint64(stub_frames))
# ────────────────────────────────────────────────────────────────────────────
folder_path = Path("/home/heberto/data/femtonics")
assert folder_path.exists(), f"Folder {folder_path} does not exist."
file_path = folder_path / "mesc_demo_sample.mesc"
output_path = folder_path / "single_m_unit_index.mesc"
with h5py.File(file_path, "r") as src, h5py.File(output_path, "w") as dst:
# Example: drop every group named "MUnit_96" anywhere in the tree
copy_group(src, dst, stub_frames=2, omit_groups={"MUnit_96"})
print("Stub file saved to:", output_path)
output_path = folder_path / "single_channel.mesc"
with h5py.File(file_path, "r") as src, h5py.File(output_path, "w") as dst:
# Example: drop every group named "MUnit_96" anywhere in the tree
copy_group(src, dst, stub_frames=2)
print("Stub file saved to:", output_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment