Skip to content

Instantly share code, notes, and snippets.

@rly
Last active February 24, 2023 23:31
Show Gist options
  • Save rly/5c21903d892674fde8bd11a3e7800373 to your computer and use it in GitHub Desktop.
Save rly/5c21903d892674fde8bd11a3e7800373 to your computer and use it in GitHub Desktop.
Tailored Python script to replace particular values in an NWB file to conform with DANDI upload requirements
import glob
import h5py
import numpy as np
import argparse
import pynwb
STR_DTYPE = h5py.special_dtype(vlen=str)
def adjust(filepath, print_changes):
print("-------------------------------------------------------------------")
print("Adjusting NWB file:", filepath)
with h5py.File(filepath, "a") as f:
# replace subject sex value "Male" with "M"
sex_value = f["/general/subject/sex"][()].decode("utf-8")
assert sex_value in ("Male", "M")
if sex_value == "Male":
new_sex_value = "M"
print(f"Adjusting subject sex from '{sex_value}' to '{new_sex_value}'.")
f["/general/subject/sex"][()] = new_sex_value
# replace subject species value "Rat" with "Rattus norvegicus"
species_value = f["/general/subject/species"][()].decode("utf-8")
assert species_value in ("Rat", "Rattus norvegicus")
if species_value == "Rat":
new_species_value = "Rattus norvegicus"
print(
f"Adjusting subject species from '{species_value}' to '{new_species_value}'."
)
f["/general/subject/species"][()] = new_species_value
# add subject age dataset with value "P4M/P8M"
if "age" not in f["/general/subject"]:
new_age_value = "P4M/P8M"
print(f"Adding missing subject age, set to '{new_age_value}'.")
f["/general/subject"].create_dataset(
name="age", data=new_age_value, dtype=STR_DTYPE
)
# replace experimenter list value ["Abhilasha Joshi"] with ["Joshi, Abhilasha"]
experimenter_value = f["/general/experimenter"][:].astype(str)
assert experimenter_value in (
np.array(["Abhilasha Joshi"]),
np.array(["Joshi, Abhilasha"]),
)
if experimenter_value == np.array(["Abhilasha Joshi"]):
new_experimenter_value = np.array(["Joshi, Abhilasha"], dtype=STR_DTYPE)
print(
f"Adjusting experimenter from {experimenter_value} to {new_experimenter_value}."
)
f["/general/experimenter"][:] = new_experimenter_value
# add keywords dataset with value "test"
# if "keywords" not in f["/general"]:
# new_keywords = ["test"]
# print(f"Adding missing keywords, set to {new_keywords}.")
# f["/general"].create_dataset(name="keywords", data=np.array(new_keywords, dtype=STR_DTYPE))
# remove the "camera_sample_frame_counts" processing module
# NOTE: this does not shrink the file
if "camera_sample_frame_counts" in f["/processing"]:
print("Removing camera_sample_frame_counts processing module.")
del f["/processing/camera_sample_frame_counts"]
# change the external file path of all videos from absolute path to relative path
# NOTE: decision on 2023-02-21 to remove video files from the NWB file instead
# image_series_groups = f["/processing/video_files/video"].values()
# for image_series in image_series_groups:
# assert len(image_series["external_file"]) == 1
# external_file_value = image_series["external_file"][:].astype(str)
# if external_file_value[0].startswith("/opt/data16/nwb_test/Jaq/nwb/"):
# new_external_file_value = np.array(
# [
# external_file_value[0].replace(
# "/opt/data16/nwb_test/Jaq/nwb/", "../"
# )
# ],
# dtype=STR_DTYPE,
# )
# print(
# f"Adjusting external file value from {external_file_value} to {new_external_file_value}."
# )
# image_series["external_file"][:] = new_external_file_value
# remove the "camera_sample_frame_counts" processing module
# NOTE: this does not shrink the file
# if "video_files" in f["/processing"]:
print("Removing video_files processing module.")
del f["/processing/video_files"]
# if the analog time series has no data, remove the "analog" processing module
# NOTE: this does not shrink the file
if len(f["/processing/analog/analog/analog/data"]) == 0:
print("Analog time series has no data. Removing analog processing module.")
del f["/processing/analog"]
# verify changes
if print_changes:
with pynwb.NWBHDF5IO(filepath, "a", load_namespaces=True) as io:
nwbfile = io.read()
print("Sex:", nwbfile.subject.sex)
assert nwbfile.subject.sex == "M"
print("Species:", nwbfile.subject.species)
assert nwbfile.subject.species == "Rattus norvegicus"
print("Age:", nwbfile.subject.age)
assert nwbfile.subject.age == "P4M/P8M"
print("Experimenter:", nwbfile.experimenter[:])
assert nwbfile.experimenter == np.array(["Joshi, Abhilasha"])
# print("Keywords:", nwbfile.keywords[:])
# assert nwbfile.keywords == np.array(["test"])
print(
"'camera_sample_frame_counts' processing module found:",
"camera_sample_frame_counts" in nwbfile.processing,
)
assert "camera_sample_frame_counts" not in nwbfile.processing
# for image_series in nwbfile.processing["video_files"][
# "video"
# ].time_series.values():
# print(image_series.external_file[:])
# assert image_series.external_file[0].startswith("../")
print(
"'video_files' processing module found:",
"video_files" in nwbfile.processing,
)
assert "video_files" not in nwbfile.processing
print(
"'analog' processing module found:",
"analog" in nwbfile.processing,
)
assert (
"analog" not in nwbfile.processing
or len(nwbfile.processing["analog"]["analog"]["analog"].data) > 0
)
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"path", help="path to the NWB file or directory of NWB files to adjust"
)
parser.add_argument(
"--print",
action="store_true",
help="whether to print the adjusted values from the NWB file",
)
args = parser.parse_args()
path = args.path
if path.endswith(".nwb"):
filepaths = [path]
else:
filepaths = glob.glob(path + "*.nwb")
print("Adjusting these NWB files:", filepaths, sep="\n")
for filepath in filepaths:
adjust(filepath=filepath, print_changes=args.print)
if __name__ == "__main__":
main()
@rly
Copy link
Author

rly commented Feb 18, 2023

This code does the following without rewriting the whole file:

  • replace subject sex value "Male" with "M"
  • replace subject species value "Rat" with "Rattus norvegicus"
  • add subject age dataset with value "P4M/P8M"
  • replace experimenter list value ["Ryan Ly"] with ["Ly, Ryan"]
  • remove the "camera_sample_frame_counts" processing module
  • change the external file path of all videos from a specific absolute path to a specific relative path
  • remove the "video_files" processing module
  • remove the "analog" processing module only if the "analog" time series within it has no data

To run:

  1. Install the latest version of nwbinspector or load an environment with the latest version of nwbinspector installed.
  2. Copy one of your NWB files but keep it in the same directory as the others that you will adjust
  3. Run python adjust_nwb.py [path to the NWB file]
  4. Run nwbinspector --config dandi [path to the NWB file]

If the critical errors go away, then delete the copy of the NWB file. Then:

  1. Run python adjust_nwb.py [path to the directory containing all your NWB files that you want to adjust]
  2. Run nwbinspector --config dandi [path to the same directory]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment