Skip to content

Instantly share code, notes, and snippets.

@pp-mo
Created October 7, 2021 15:08
Show Gist options
  • Save pp-mo/835c26fd4e11b0fc95ef831303257824 to your computer and use it in GitHub Desktop.
Save pp-mo/835c26fd4e11b0fc95ef831303257824 to your computer and use it in GitHub Desktop.
Example: Append Iris cubes to a netcdf file.
# Example showing how to append ~similar cubes onto an existing netcdf file.
import numpy as np
import iris
from iris.cube import Cube
from iris.coords import DimCoord, AuxCoord
#
# Generate a really simple timestepped data sequence
#
# All in one cube, to start with.
nt, ny, nx = 10, 5, 3
timeco = DimCoord(np.arange(nt), standard_name='time',
units='days since 2021-01-01')
yco = DimCoord(np.linspace(-90., 90., ny, endpoint=True),
standard_name='latitude', units='degrees')
xco = DimCoord(np.linspace(-180., 180., nx, endpoint=True),
standard_name='longitude', units='degrees')
data = np.arange(np.prod((nt, ny, nx))).reshape((nt, ny, nx))
testdata_cube = Cube(data, long_name='odd_data')
testdata_cube.add_dim_coord(timeco, 0)
testdata_cube.add_dim_coord(yco, 1)
testdata_cube.add_dim_coord(xco, 2)
# Just for interest, add an arbitrary attribute.
testdata_cube.attributes['extra'] = 'special'
# Save all this as intermediate data in *separate* timestep files.
for it in range(nt):
filename = f'timestep_{it}.nc'
iris.save(testdata_cube[it], filename)
#
# Load the separate timesteps as a list of Iris cubes
#
cubes = iris.load_raw('timestep_*.nc')
first_cube, rest_cubes = cubes[0], cubes[1:]
#
# Save the first_cube to make a new file with an unlimited dimension.
#
# First ensure it has an actual time dimension instead of a scalar time coord.
from iris.util import new_axis
first_cube = new_axis(first_cube, 'time')
combined_filename = 'recombined_timesteps.nc'
iris.save(first_cube, combined_filename, unlimited_dimensions=['time'])
#
# Take the other cubes in turn, and append the time coord and main variable data.
#
# For this we must use the low-level netcdf interface.
import netCDF4 as nc
ds = nc.Dataset(combined_filename, 'r+')
time_var = ds.variables['time']
data_var = ds.variables['odd_data']
# Just check some essential facts about the file..
assert ds.dimensions['time'].isunlimited()
assert time_var.dimensions == ('time',)
assert data_var.dimensions[0] == 'time'
for var in ds.variables.values():
if var not in (time_var, data_var):
assert 'time' not in var.dimensions
# Now add in the data from extra timesteps.
# N.B. one could also open and close the dataset for each update
for i_cube, timestep_cube in enumerate(rest_cubes):
time_point = timestep_cube.coord('time').points
it = i_cube + 1
time_var[it:it+1] = [time_point] # Always a single point
data_var[it:it+1] = timestep_cube.data
# Close the datafile again.
ds.close()
#
# Check results
#
# Load back the 'combined' file.
result = iris.load_cube(combined_filename)
print(result)
# Try to compare this with original ...
# For this, we must remove additional info added by the save process.
# Remove all the var_names
result.var_name = None
for coord in result.coords():
coord.var_name = None
# Remove the extra Conventions attribute.
del result.attributes['Conventions']
# It now matches the original test data *precisely* (including all data)
print('')
print('Split + re-concatenated == original ? ', result == testdata_cube)
# QED !
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment