Skip to content

Instantly share code, notes, and snippets.

@rsignell-usgs
Created April 14, 2018 12:32
Show Gist options
  • Save rsignell-usgs/dce09aae4f7cd174a141247a56ddea2c to your computer and use it in GitHub Desktop.
Save rsignell-usgs/dce09aae4f7cd174a141247a56ddea2c to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Write National Water Model (NWM) model data to Zarr"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from dask.distributed import Client, progress, LocalCluster\n",
"import pandas as pd\n",
"import xarray as xr\n",
"import s3fs"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table style=\"border: 2px solid white;\">\n",
"<tr>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3>Client</h3>\n",
"<ul>\n",
" <li><b>Scheduler: </b>tcp://127.0.0.1:42571\n",
" <li><b>Dashboard: </b><a href='http://127.0.0.1:46734/status' target='_blank'>http://127.0.0.1:46734/status</a>\n",
"</ul>\n",
"</td>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3>Cluster</h3>\n",
"<ul>\n",
" <li><b>Workers: </b>2</li>\n",
" <li><b>Cores: </b>2</li>\n",
" <li><b>Memory: </b>8.37 GB</li>\n",
"</ul>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"<Client: scheduler='tcp://127.0.0.1:42571' processes=2 cores=2>"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# depends on the machine you are using\n",
"cluster = LocalCluster()\n",
"client = Client(cluster)\n",
"\n",
"client "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"root = 'http://tds.renci.org:8080/thredds/dodsC/nwm/forcing_short_range/'"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"dates = pd.date_range(start='2018-04-07T18:00', end='2018-04-07T20:00', freq='H')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"urls = ['{}{}/nwm.t{}z.short_range.forcing.f001.conus.nc'.format(root,a.strftime('%Y%m%d'),a.strftime('%H')) for a in dates]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 156 ms, sys: 28 ms, total: 184 ms\n",
"Wall time: 2.15 s\n"
]
}
],
"source": [
"%%time \n",
"ds = xr.open_mfdataset(urls,concat_dim='time')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<xarray.Dataset>\n",
"Dimensions: (nv: 2, reference_time: 3, time: 3, x: 4608, y: 3840)\n",
"Coordinates:\n",
" * reference_time (reference_time) datetime64[ns] 2018-04-07T18:00:00 ...\n",
" * x (x) float64 -2.304e+06 -2.303e+06 -2.302e+06 ...\n",
" * y (y) float64 -1.92e+06 -1.919e+06 -1.918e+06 ...\n",
" * time (time) datetime64[ns] 2018-04-07T19:00:00 ...\n",
"Dimensions without coordinates: nv\n",
"Data variables:\n",
" time_bounds (time, nv) datetime64[ns] dask.array<shape=(3, 2), chunksize=(1, 2)>\n",
" ProjectionCoordinateSystem (time) |S64 b'' b'' b''\n",
" T2D (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" LWDOWN (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" Q2D (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" U2D (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" V2D (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" PSFC (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" RAINRATE (time, y, x) float32 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" SWDOWN (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
"Attributes:\n",
" model_initialization_time: 2018-04-07_18:00:00\n",
" model_output_valid_time: 2018-04-07_19:00:00\n",
" DODS.strlen: 0\n",
" DODS_EXTRA.Unlimited_Dimension: time"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dtype('S64')"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds['ProjectionCoordinateSystem'].dtype"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"#ds = ds.drop(['ProjectionCoordinateSystem'])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<xarray.Dataset>\n",
"Dimensions: (nv: 2, reference_time: 3, time: 3, x: 4608, y: 3840)\n",
"Coordinates:\n",
" * reference_time (reference_time) datetime64[ns] 2018-04-07T18:00:00 ...\n",
" * x (x) float64 -2.304e+06 -2.303e+06 -2.302e+06 ...\n",
" * y (y) float64 -1.92e+06 -1.919e+06 -1.918e+06 ...\n",
" * time (time) datetime64[ns] 2018-04-07T19:00:00 ...\n",
"Dimensions without coordinates: nv\n",
"Data variables:\n",
" time_bounds (time, nv) datetime64[ns] dask.array<shape=(3, 2), chunksize=(1, 2)>\n",
" ProjectionCoordinateSystem (time) |S64 b'' b'' b''\n",
" T2D (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" LWDOWN (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" Q2D (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" U2D (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" V2D (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" PSFC (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" RAINRATE (time, y, x) float32 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
" SWDOWN (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n",
"Attributes:\n",
" model_initialization_time: 2018-04-07_18:00:00\n",
" model_output_valid_time: 2018-04-07_19:00:00\n",
" DODS.strlen: 0\n",
" DODS_EXTRA.Unlimited_Dimension: time"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"fs = s3fs.S3FileSystem(anon=False)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"f_zarr = 'rsignell/nwm/test02'"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"d = s3fs.S3Map(f_zarr, s3=fs)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 8.84 s, sys: 1.2 s, total: 10 s\n",
"Wall time: 3min 35s\n"
]
},
{
"data": {
"text/plain": [
"<xarray.backends.zarr.ZarrStore at 0x7fcbb9fa85f8>"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%time ds.to_zarr(store=d, mode='w')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Test to see if we can read what we wrote"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "Chunks and shape must be of the same length/dimension. Got chunks=(3, 64), shape=(3,)",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-15-8198db1c8578>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mds2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen_zarr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/xarray/backends/zarr.py\u001b[0m in \u001b[0;36mopen_zarr\u001b[0;34m(store, group, synchronizer, auto_chunk, decode_cf, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables)\u001b[0m\n\u001b[1;32m 476\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 477\u001b[0m variables = OrderedDict([(k, maybe_chunk(k, v))\n\u001b[0;32m--> 478\u001b[0;31m for k, v in ds.variables.items()])\n\u001b[0m\u001b[1;32m 479\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_replace_vars_and_dims\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvariables\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 480\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/xarray/backends/zarr.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 476\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 477\u001b[0m variables = OrderedDict([(k, maybe_chunk(k, v))\n\u001b[0;32m--> 478\u001b[0;31m for k, v in ds.variables.items()])\n\u001b[0m\u001b[1;32m 479\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_replace_vars_and_dims\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvariables\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 480\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/xarray/backends/zarr.py\u001b[0m in \u001b[0;36mmaybe_chunk\u001b[0;34m(name, var)\u001b[0m\n\u001b[1;32m 471\u001b[0m \u001b[0mtoken2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtokenize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvar\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 472\u001b[0m \u001b[0mname2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'zarr-%s'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mtoken2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 473\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mvar\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchunk\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mchunks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlock\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 474\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 475\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mvar\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/xarray/core/variable.py\u001b[0m in \u001b[0;36mchunk\u001b[0;34m(self, chunks, name, lock)\u001b[0m\n\u001b[1;32m 820\u001b[0m data = indexing.ImplicitToExplicitIndexingAdapter(\n\u001b[1;32m 821\u001b[0m data, indexing.OuterIndexer)\n\u001b[0;32m--> 822\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mda\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlock\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlock\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 823\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 824\u001b[0m return type(self)(self.dims, data, self._attrs, self._encoding,\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/dask/array/core.py\u001b[0m in \u001b[0;36mfrom_array\u001b[0;34m(x, chunks, name, lock, asarray, fancy, getitem)\u001b[0m\n\u001b[1;32m 1988\u001b[0m \u001b[0;34m>>\u001b[0m\u001b[0;34m>\u001b[0m \u001b[0ma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mda\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1000\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlock\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# doctest: +SKIP\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1989\u001b[0m \"\"\"\n\u001b[0;32m-> 1990\u001b[0;31m \u001b[0mchunks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnormalize_chunks\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mchunks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1991\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1992\u001b[0m \u001b[0mtoken\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtokenize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/dask/array/core.py\u001b[0m in \u001b[0;36mnormalize_chunks\u001b[0;34m(chunks, shape)\u001b[0m\n\u001b[1;32m 1918\u001b[0m raise ValueError(\n\u001b[1;32m 1919\u001b[0m \u001b[0;34m\"Chunks and shape must be of the same length/dimension. \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1920\u001b[0;31m \"Got chunks=%s, shape=%s\" % (chunks, shape))\n\u001b[0m\u001b[1;32m 1921\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1922\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mshape\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Chunks and shape must be of the same length/dimension. Got chunks=(3, 64), shape=(3,)"
]
}
],
"source": [
"ds2 = xr.open_zarr(d)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ds2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"isub=4\n",
"ds2['T2D'][0,::isub,::isub].plot.imshow()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ds2['T2D'][:,1000,1000].plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment