rly/trim_beans.py

## trim_beans.py
import pynwb
import ndx_franklab_novela

with pynwb.NWBHDF5IO('beans20190718.nwb', 'r') as io:
    nwbfile = io.read()
    orig_eseries = nwbfile.acquisition['e-series']
    n_timestamps = 4000000  # / 20000 Hz sampling rate = 200 seconds
    data = orig_eseries.data[0:n_timestamps, :]
    ts = orig_eseries.timestamps[0:n_timestamps]
    electrodes = nwbfile.create_electrode_table_region(
        region=orig_eseries.electrodes.data[:].tolist(),
        name=orig_eseries.electrodes.name,
        description=orig_eseries.electrodes.description
    )
    new_eseries = pynwb.ecephys.ElectricalSeries(
        name=orig_eseries.name,
        description=orig_eseries.description,
        data=data,
        timestamps=ts,
        electrodes=electrodes
    )

    nwbfile.acquisition.pop('e-series')
    nwbfile.add_acquisition(new_eseries)

    orig_analog = nwbfile.processing['analog']['analog']['analog']
    data = orig_analog.data[0:n_timestamps, :]
    ts = orig_analog.timestamps[0:n_timestamps]
    new_analog = pynwb.TimeSeries(
        name=orig_analog.name,
        description=orig_analog.description,
        data=data,
        timestamps=ts,
        unit=orig_analog.unit
    )
    nwbfile.processing['analog']['analog'].time_series.pop('analog')
    nwbfile.processing['analog']['analog'].add_timeseries(new_analog)

    with pynwb.NWBHDF5IO('beans20190718-trim.nwb', 'w') as export_io:
        export_io.export(io, nwbfile)

    # note: I manually updated the stop time of the first time interval to be the timestamp of the eseries 100 samples before the end


# if out of RAM, try this strategy (not sure if it will work)
# https://stackoverflow.com/questions/38443230/how-to-subset-a-very-large-hdf5-dataset-from-file-and-write-to-another-file
# or use a DataChunkIterator
	import pynwb
	import ndx_franklab_novela

	with pynwb.NWBHDF5IO('beans20190718.nwb', 'r') as io:
	nwbfile = io.read()
	orig_eseries = nwbfile.acquisition['e-series']
	n_timestamps = 4000000 # / 20000 Hz sampling rate = 200 seconds
	data = orig_eseries.data[0:n_timestamps, :]
	ts = orig_eseries.timestamps[0:n_timestamps]
	electrodes = nwbfile.create_electrode_table_region(
	region=orig_eseries.electrodes.data[:].tolist(),
	name=orig_eseries.electrodes.name,
	description=orig_eseries.electrodes.description
	)
	new_eseries = pynwb.ecephys.ElectricalSeries(
	name=orig_eseries.name,
	description=orig_eseries.description,
	data=data,
	timestamps=ts,
	electrodes=electrodes
	)

	nwbfile.acquisition.pop('e-series')
	nwbfile.add_acquisition(new_eseries)

	orig_analog = nwbfile.processing['analog']['analog']['analog']
	data = orig_analog.data[0:n_timestamps, :]
	ts = orig_analog.timestamps[0:n_timestamps]
	new_analog = pynwb.TimeSeries(
	name=orig_analog.name,
	description=orig_analog.description,
	data=data,
	timestamps=ts,
	unit=orig_analog.unit
	)
	nwbfile.processing['analog']['analog'].time_series.pop('analog')
	nwbfile.processing['analog']['analog'].add_timeseries(new_analog)

	with pynwb.NWBHDF5IO('beans20190718-trim.nwb', 'w') as export_io:
	export_io.export(io, nwbfile)

	# note: I manually updated the stop time of the first time interval to be the timestamp of the eseries 100 samples before the end


	# if out of RAM, try this strategy (not sure if it will work)
	# https://stackoverflow.com/questions/38443230/how-to-subset-a-very-large-hdf5-dataset-from-file-and-write-to-another-file
	# or use a DataChunkIterator