Skip to content

Instantly share code, notes, and snippets.

@rsignell-usgs
Last active January 3, 2018 21:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rsignell-usgs/1009ac7fd4193a56ddd581cf11701688 to your computer and use it in GitHub Desktop.
Save rsignell-usgs/1009ac7fd4193a56ddd581cf11701688 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Convert NetCDF4 file to HSDS, with custom chunking"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read netcdf4 files using xarray, so that we can read the attributes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"import xarray as xr\n",
"import numpy as np\n",
"import h5pyd as h5py"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"infile = '/notebooks/rsignell/data/CFSR/tmp2m_2months.nc'\n",
"outfile = '/home/rsignell/tmp2m_2months_rechunked.nc'\n",
"#outfile = '/notebooks/rsignell/data/CFSR/foo.nc'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"ds = xr.open_dataset(infile, decode_cf=False)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Frozen(OrderedDict([('latitude', <xarray.IndexVariable 'latitude' (latitude: 880)>\n",
"array([-89.843515, -89.640798, -89.436886, ..., 89.436886, 89.640798,\n",
" 89.843515])\n",
"Attributes:\n",
" units: degrees_north\n",
" long_name: latitude), ('longitude', <xarray.IndexVariable 'longitude' (longitude: 1760)>\n",
"array([ 0.000000e+00, 2.045452e-01, 4.090904e-01, ..., 3.593859e+02,\n",
" 3.595905e+02, 3.597950e+02])\n",
"Attributes:\n",
" units: degrees_east\n",
" long_name: longitude), ('time', <xarray.IndexVariable 'time' (time: 1416)>\n",
"array([ 1.483232e+09, 1.483236e+09, 1.483240e+09, ..., 1.488319e+09,\n",
" 1.488323e+09, 1.488326e+09])\n",
"Attributes:\n",
" units: seconds since 1970-01-01 00:00:00.0 0:00\n",
" long_name: verification time generated by wgrib2 functi...\n",
" reference_time: 1483228800.0\n",
" reference_time_type: 0\n",
" reference_date: 2017.01.01 00:00:00 UTC\n",
" reference_time_description: kind of product unclear, reference date is v...\n",
" time_step_setting: auto\n",
" time_step: 3600.0), ('TMP_2maboveground', <xarray.Variable (time: 1416, latitude: 880, longitude: 1760)>\n",
"[2193100800 values with dtype=float32]\n",
"Attributes:\n",
" _FillValue: 9.999e+20\n",
" short_name: TMP_2maboveground\n",
" long_name: Temperature\n",
" level: 2 m above ground\n",
" units: K)]))"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds.variables"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"f = h5py.File(outfile, 'w')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"for key, val in ds.attrs.items():\n",
" if isinstance(val,str):\n",
" f.attrs[key]=val\n",
" else:\n",
" f.attrs.create(key, val, (), dtype=val.dtype)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'COARDS'"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f.attrs['Conventions']"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(880,)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds['latitude'].data.shape"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dtype('float64')"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds['latitude'].data.dtype"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"latitude (880,) None\n",
"longitude (1760,) None\n",
"time (1416,) None\n",
"TMP_2maboveground (1416, 880, 1760) None\n"
]
}
],
"source": [
"for key, val in ds.variables.items():\n",
" print(key, val.shape, val.chunks)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Just specify the chunk sizes for those vars that need rechunking"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ds['TMP_2maboveground'].attrs['chunks'] = (4, 220, 440)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"latitude\n",
"units degrees_north\n",
"long_name latitude\n",
"longitude\n",
"units degrees_east\n",
"long_name longitude\n",
"time\n",
"units seconds since 1970-01-01 00:00:00.0 0:00\n",
"long_name verification time generated by wgrib2 function verftime()\n",
"reference_time 1483228800.0\n",
"reference_time_type 0\n",
"reference_date 2017.01.01 00:00:00 UTC\n",
"reference_time_description kind of product unclear, reference date is variable, min found reference date is given\n",
"time_step_setting auto\n",
"time_step 3600.0\n",
"TMP_2maboveground\n"
]
}
],
"source": [
"for key, val in ds.variables.items():\n",
" print(key)\n",
" dset = f.create_dataset(key, data=val.data, chunks=val.chunks)\n",
" for k,v in val.attrs.items():\n",
" print(k,v)\n",
" if isinstance(v,str):\n",
" dset.attrs[k] = v\n",
" else:\n",
" dset.attrs.create(k, np.array(v))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Creating dimension scales\n",
"dset.dims.set_scale(f['/latitude'])\n",
"dset.dims.set_scale(f['/time'])\n",
"dset.dims.set_scale(f['/longitude'])\n",
"\n",
"# Attaching dimension scales to dataset: /TMP_2maboveground\n",
"f['/TMP_2maboveground'].dims[0].attach_scale(f['/time'])\n",
"f['/TMP_2maboveground'].dims[1].attach_scale(f['/latitude'])\n",
"f['/TMP_2maboveground'].dims[2].attach_scale(f['/longitude'])\n",
"\n",
"f.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:h5pyd]",
"language": "python",
"name": "conda-env-h5pyd-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment