h-mayorquin/stub_femtonics.py

## stub_femtonics.py
from pathlib import Path
import h5py
import numpy as np


# ────────────────────────────────────────────────────────────────────────────
def copy_group(
    src_group,
    dst_group,
    stub_frames: int,
    *,
    omit_groups=None,
):
    """
    Recursively copy *src_group* → *dst_group* while

      • truncating every dataset to the first `stub_frames` planes, and
      • skipping **any** HDF5 group whose name appears in `omit_groups`,
        no matter where it occurs in the hierarchy.

    Parameters
    ----------
    src_group, dst_group : h5py.Group
    stub_frames : int
        Max number of leading frames to copy from each dataset.
    omit_groups : Iterable[str] | None
        Names of groups to omit (e.g. {"MUnit_96"}).  If None/empty, nothing
        is omitted.

    Notes
    -----
    * Matching is done by **group name only** (not full path).
      If the same name appears at multiple depths, every occurrence is skipped.
    * Datasets are *never* matched—only groups.
    """
    omit_groups = set(omit_groups or [])

    for key, item in src_group.items():
        # ── skip this subtree?
        if isinstance(item, h5py.Group) and key in omit_groups:
            continue

        if isinstance(item, h5py.Group):
            sub_dst = dst_group.create_group(key)
            copy_group(item, sub_dst, stub_frames, omit_groups=omit_groups)
        else:  # dataset
            take = min(stub_frames, item.shape[0])
            dset = dst_group.create_dataset(
                key,
                data=item[:take],
                dtype=item.dtype,
                compression=item.compression,
                compression_opts=item.compression_opts,
                shuffle=item.shuffle,
                fletcher32=item.fletcher32,
                scaleoffset=item.scaleoffset,
                chunks=item.chunks,
            )
            dset.attrs.update(item.attrs)

    dst_group.attrs.update(src_group.attrs)
    if "ZDim" in dst_group.attrs:
        dst_group.attrs.modify("ZDim", np.uint64(stub_frames))


# ────────────────────────────────────────────────────────────────────────────

folder_path = Path("/home/heberto/data/femtonics")
assert folder_path.exists(), f"Folder {folder_path} does not exist."
file_path = folder_path / "mesc_demo_sample.mesc"
output_path  = folder_path / "single_m_unit_index.mesc"

with h5py.File(file_path, "r") as src, h5py.File(output_path, "w") as dst:
    # Example: drop every group named "MUnit_96" anywhere in the tree
    copy_group(src, dst, stub_frames=2, omit_groups={"MUnit_96"})

print("Stub file saved to:", output_path)
output_path  = folder_path / "single_channel.mesc"

with h5py.File(file_path, "r") as src, h5py.File(output_path, "w") as dst:
    # Example: drop every group named "MUnit_96" anywhere in the tree
    copy_group(src, dst, stub_frames=2)

print("Stub file saved to:", output_path)
	from pathlib import Path
	import h5py
	import numpy as np


	# ────────────────────────────────────────────────────────────────────────────
	def copy_group(
	src_group,
	dst_group,
	stub_frames: int,
	*,
	omit_groups=None,
	):
	"""
	Recursively copy src_group → dst_group while

	• truncating every dataset to the first `stub_frames` planes, and
	• skipping any HDF5 group whose name appears in `omit_groups`,
	no matter where it occurs in the hierarchy.

	Parameters
	----------
	src_group, dst_group : h5py.Group
	stub_frames : int
	Max number of leading frames to copy from each dataset.
	omit_groups : Iterable[str] \| None
	Names of groups to omit (e.g. {"MUnit_96"}). If None/empty, nothing
	is omitted.

	Notes
	-----
	* Matching is done by group name only (not full path).
	If the same name appears at multiple depths, every occurrence is skipped.
	* Datasets are never matched—only groups.
	"""
	omit_groups = set(omit_groups or [])

	for key, item in src_group.items():
	# ── skip this subtree?
	if isinstance(item, h5py.Group) and key in omit_groups:
	continue

	if isinstance(item, h5py.Group):
	sub_dst = dst_group.create_group(key)
	copy_group(item, sub_dst, stub_frames, omit_groups=omit_groups)
	else: # dataset
	take = min(stub_frames, item.shape[0])
	dset = dst_group.create_dataset(
	key,
	data=item[:take],
	dtype=item.dtype,
	compression=item.compression,
	compression_opts=item.compression_opts,
	shuffle=item.shuffle,
	fletcher32=item.fletcher32,
	scaleoffset=item.scaleoffset,
	chunks=item.chunks,
	)
	dset.attrs.update(item.attrs)

	dst_group.attrs.update(src_group.attrs)
	if "ZDim" in dst_group.attrs:
	dst_group.attrs.modify("ZDim", np.uint64(stub_frames))


	# ────────────────────────────────────────────────────────────────────────────

	folder_path = Path("/home/heberto/data/femtonics")
	assert folder_path.exists(), f"Folder {folder_path} does not exist."
	file_path = folder_path / "mesc_demo_sample.mesc"
	output_path = folder_path / "single_m_unit_index.mesc"

	with h5py.File(file_path, "r") as src, h5py.File(output_path, "w") as dst:
	# Example: drop every group named "MUnit_96" anywhere in the tree
	copy_group(src, dst, stub_frames=2, omit_groups={"MUnit_96"})

	print("Stub file saved to:", output_path)
	output_path = folder_path / "single_channel.mesc"

	with h5py.File(file_path, "r") as src, h5py.File(output_path, "w") as dst:
	# Example: drop every group named "MUnit_96" anywhere in the tree
	copy_group(src, dst, stub_frames=2)

	print("Stub file saved to:", output_path)