pp-mo/iris_netcdf_append_example.py

## iris_netcdf_append_example.py
# Example showing how to append ~similar cubes onto an existing netcdf file.
import numpy as np

import iris
from iris.cube import Cube
from iris.coords import DimCoord, AuxCoord

#
# Generate a really simple timestepped data sequence
#

# All in one cube, to start with.
nt, ny, nx = 10, 5, 3
timeco = DimCoord(np.arange(nt), standard_name='time',
                  units='days since 2021-01-01')
yco = DimCoord(np.linspace(-90., 90., ny, endpoint=True),
               standard_name='latitude', units='degrees')
xco = DimCoord(np.linspace(-180., 180., nx, endpoint=True),
               standard_name='longitude', units='degrees')
data = np.arange(np.prod((nt, ny, nx))).reshape((nt, ny, nx))
testdata_cube = Cube(data, long_name='odd_data')
testdata_cube.add_dim_coord(timeco, 0)
testdata_cube.add_dim_coord(yco, 1)
testdata_cube.add_dim_coord(xco, 2)
# Just for interest, add an arbitrary attribute.
testdata_cube.attributes['extra'] = 'special'

# Save all this as intermediate data in *separate* timestep files.
for it in range(nt):
    filename = f'timestep_{it}.nc'
    iris.save(testdata_cube[it], filename)


#
# Load the separate timesteps as a list of Iris cubes
#

cubes = iris.load_raw('timestep_*.nc')
first_cube, rest_cubes = cubes[0], cubes[1:]

#
# Save the first_cube to make a new file with an unlimited dimension.
#

# First ensure it has an actual time dimension instead of a scalar time coord.
from iris.util import new_axis
first_cube = new_axis(first_cube, 'time')
combined_filename = 'recombined_timesteps.nc'
iris.save(first_cube, combined_filename, unlimited_dimensions=['time'])

#
# Take the other cubes in turn, and append the time coord and main variable data.
#

# For this we must use the low-level netcdf interface.
import netCDF4 as nc
ds = nc.Dataset(combined_filename, 'r+')
time_var = ds.variables['time']
data_var = ds.variables['odd_data']
# Just check some essential facts about the file..
assert ds.dimensions['time'].isunlimited()
assert time_var.dimensions == ('time',)
assert data_var.dimensions[0] == 'time'
for var in ds.variables.values():
    if var not in (time_var, data_var):
        assert 'time' not in var.dimensions

# Now add in the data from extra timesteps.
# N.B. one could also open and close the dataset for each update
for i_cube, timestep_cube in enumerate(rest_cubes):
    time_point = timestep_cube.coord('time').points
    it = i_cube + 1
    time_var[it:it+1] = [time_point]  # Always a single point
    data_var[it:it+1] = timestep_cube.data

# Close the datafile again.
ds.close()


#
# Check results
#

# Load back the 'combined' file.
result = iris.load_cube(combined_filename)
print(result)

# Try to compare this with original ...
# For this, we must remove additional info added by the save process.

# Remove all the var_names
result.var_name = None
for coord in result.coords():
    coord.var_name = None
# Remove the extra Conventions attribute.
del result.attributes['Conventions']

# It now matches the original test data *precisely* (including all data)
print('')
print('Split + re-concatenated == original ? ', result == testdata_cube)

# QED !
	# Example showing how to append ~similar cubes onto an existing netcdf file.
	import numpy as np

	import iris
	from iris.cube import Cube
	from iris.coords import DimCoord, AuxCoord

	#
	# Generate a really simple timestepped data sequence
	#

	# All in one cube, to start with.
	nt, ny, nx = 10, 5, 3
	timeco = DimCoord(np.arange(nt), standard_name='time',
	units='days since 2021-01-01')
	yco = DimCoord(np.linspace(-90., 90., ny, endpoint=True),
	standard_name='latitude', units='degrees')
	xco = DimCoord(np.linspace(-180., 180., nx, endpoint=True),
	standard_name='longitude', units='degrees')
	data = np.arange(np.prod((nt, ny, nx))).reshape((nt, ny, nx))
	testdata_cube = Cube(data, long_name='odd_data')
	testdata_cube.add_dim_coord(timeco, 0)
	testdata_cube.add_dim_coord(yco, 1)
	testdata_cube.add_dim_coord(xco, 2)
	# Just for interest, add an arbitrary attribute.
	testdata_cube.attributes['extra'] = 'special'

	# Save all this as intermediate data in separate timestep files.
	for it in range(nt):
	filename = f'timestep_{it}.nc'
	iris.save(testdata_cube[it], filename)


	#
	# Load the separate timesteps as a list of Iris cubes
	#

	cubes = iris.load_raw('timestep_*.nc')
	first_cube, rest_cubes = cubes[0], cubes[1:]

	#
	# Save the first_cube to make a new file with an unlimited dimension.
	#

	# First ensure it has an actual time dimension instead of a scalar time coord.
	from iris.util import new_axis
	first_cube = new_axis(first_cube, 'time')
	combined_filename = 'recombined_timesteps.nc'
	iris.save(first_cube, combined_filename, unlimited_dimensions=['time'])

	#
	# Take the other cubes in turn, and append the time coord and main variable data.
	#

	# For this we must use the low-level netcdf interface.
	import netCDF4 as nc
	ds = nc.Dataset(combined_filename, 'r+')
	time_var = ds.variables['time']
	data_var = ds.variables['odd_data']
	# Just check some essential facts about the file..
	assert ds.dimensions['time'].isunlimited()
	assert time_var.dimensions == ('time',)
	assert data_var.dimensions[0] == 'time'
	for var in ds.variables.values():
	if var not in (time_var, data_var):
	assert 'time' not in var.dimensions

	# Now add in the data from extra timesteps.
	# N.B. one could also open and close the dataset for each update
	for i_cube, timestep_cube in enumerate(rest_cubes):
	time_point = timestep_cube.coord('time').points
	it = i_cube + 1
	time_var[it:it+1] = [time_point] # Always a single point
	data_var[it:it+1] = timestep_cube.data

	# Close the datafile again.
	ds.close()


	#
	# Check results
	#

	# Load back the 'combined' file.
	result = iris.load_cube(combined_filename)
	print(result)

	# Try to compare this with original ...
	# For this, we must remove additional info added by the save process.

	# Remove all the var_names
	result.var_name = None
	for coord in result.coords():
	coord.var_name = None
	# Remove the extra Conventions attribute.
	del result.attributes['Conventions']

	# It now matches the original test data precisely (including all data)
	print('')
	print('Split + re-concatenated == original ? ', result == testdata_cube)

	# QED !