Created
April 14, 2018 12:32
-
-
Save rsignell-usgs/dce09aae4f7cd174a141247a56ddea2c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Write National Water Model (NWM) model data to Zarr" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from dask.distributed import Client, progress, LocalCluster\n", | |
"import pandas as pd\n", | |
"import xarray as xr\n", | |
"import s3fs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table style=\"border: 2px solid white;\">\n", | |
"<tr>\n", | |
"<td style=\"vertical-align: top; border: 0px solid white\">\n", | |
"<h3>Client</h3>\n", | |
"<ul>\n", | |
" <li><b>Scheduler: </b>tcp://127.0.0.1:42571\n", | |
" <li><b>Dashboard: </b><a href='http://127.0.0.1:46734/status' target='_blank'>http://127.0.0.1:46734/status</a>\n", | |
"</ul>\n", | |
"</td>\n", | |
"<td style=\"vertical-align: top; border: 0px solid white\">\n", | |
"<h3>Cluster</h3>\n", | |
"<ul>\n", | |
" <li><b>Workers: </b>2</li>\n", | |
" <li><b>Cores: </b>2</li>\n", | |
" <li><b>Memory: </b>8.37 GB</li>\n", | |
"</ul>\n", | |
"</td>\n", | |
"</tr>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<Client: scheduler='tcp://127.0.0.1:42571' processes=2 cores=2>" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# depends on the machine you are using\n", | |
"cluster = LocalCluster()\n", | |
"client = Client(cluster)\n", | |
"\n", | |
"client " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"root = 'http://tds.renci.org:8080/thredds/dodsC/nwm/forcing_short_range/'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dates = pd.date_range(start='2018-04-07T18:00', end='2018-04-07T20:00', freq='H')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"urls = ['{}{}/nwm.t{}z.short_range.forcing.f001.conus.nc'.format(root,a.strftime('%Y%m%d'),a.strftime('%H')) for a in dates]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 156 ms, sys: 28 ms, total: 184 ms\n", | |
"Wall time: 2.15 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time \n", | |
"ds = xr.open_mfdataset(urls,concat_dim='time')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<xarray.Dataset>\n", | |
"Dimensions: (nv: 2, reference_time: 3, time: 3, x: 4608, y: 3840)\n", | |
"Coordinates:\n", | |
" * reference_time (reference_time) datetime64[ns] 2018-04-07T18:00:00 ...\n", | |
" * x (x) float64 -2.304e+06 -2.303e+06 -2.302e+06 ...\n", | |
" * y (y) float64 -1.92e+06 -1.919e+06 -1.918e+06 ...\n", | |
" * time (time) datetime64[ns] 2018-04-07T19:00:00 ...\n", | |
"Dimensions without coordinates: nv\n", | |
"Data variables:\n", | |
" time_bounds (time, nv) datetime64[ns] dask.array<shape=(3, 2), chunksize=(1, 2)>\n", | |
" ProjectionCoordinateSystem (time) |S64 b'' b'' b''\n", | |
" T2D (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" LWDOWN (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" Q2D (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" U2D (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" V2D (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" PSFC (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" RAINRATE (time, y, x) float32 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" SWDOWN (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
"Attributes:\n", | |
" model_initialization_time: 2018-04-07_18:00:00\n", | |
" model_output_valid_time: 2018-04-07_19:00:00\n", | |
" DODS.strlen: 0\n", | |
" DODS_EXTRA.Unlimited_Dimension: time" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ds" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"dtype('S64')" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ds['ProjectionCoordinateSystem'].dtype" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#ds = ds.drop(['ProjectionCoordinateSystem'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<xarray.Dataset>\n", | |
"Dimensions: (nv: 2, reference_time: 3, time: 3, x: 4608, y: 3840)\n", | |
"Coordinates:\n", | |
" * reference_time (reference_time) datetime64[ns] 2018-04-07T18:00:00 ...\n", | |
" * x (x) float64 -2.304e+06 -2.303e+06 -2.302e+06 ...\n", | |
" * y (y) float64 -1.92e+06 -1.919e+06 -1.918e+06 ...\n", | |
" * time (time) datetime64[ns] 2018-04-07T19:00:00 ...\n", | |
"Dimensions without coordinates: nv\n", | |
"Data variables:\n", | |
" time_bounds (time, nv) datetime64[ns] dask.array<shape=(3, 2), chunksize=(1, 2)>\n", | |
" ProjectionCoordinateSystem (time) |S64 b'' b'' b''\n", | |
" T2D (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" LWDOWN (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" Q2D (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" U2D (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" V2D (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" PSFC (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" RAINRATE (time, y, x) float32 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
" SWDOWN (time, y, x) float64 dask.array<shape=(3, 3840, 4608), chunksize=(1, 3840, 4608)>\n", | |
"Attributes:\n", | |
" model_initialization_time: 2018-04-07_18:00:00\n", | |
" model_output_valid_time: 2018-04-07_19:00:00\n", | |
" DODS.strlen: 0\n", | |
" DODS_EXTRA.Unlimited_Dimension: time" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ds" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fs = s3fs.S3FileSystem(anon=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"f_zarr = 'rsignell/nwm/test02'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"d = s3fs.S3Map(f_zarr, s3=fs)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 8.84 s, sys: 1.2 s, total: 10 s\n", | |
"Wall time: 3min 35s\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"<xarray.backends.zarr.ZarrStore at 0x7fcbb9fa85f8>" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%time ds.to_zarr(store=d, mode='w')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Test to see if we can read what we wrote" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "ValueError", | |
"evalue": "Chunks and shape must be of the same length/dimension. Got chunks=(3, 64), shape=(3,)", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-15-8198db1c8578>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mds2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen_zarr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/xarray/backends/zarr.py\u001b[0m in \u001b[0;36mopen_zarr\u001b[0;34m(store, group, synchronizer, auto_chunk, decode_cf, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables)\u001b[0m\n\u001b[1;32m 476\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 477\u001b[0m variables = OrderedDict([(k, maybe_chunk(k, v))\n\u001b[0;32m--> 478\u001b[0;31m for k, v in ds.variables.items()])\n\u001b[0m\u001b[1;32m 479\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_replace_vars_and_dims\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvariables\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 480\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/xarray/backends/zarr.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 476\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 477\u001b[0m variables = OrderedDict([(k, maybe_chunk(k, v))\n\u001b[0;32m--> 478\u001b[0;31m for k, v in ds.variables.items()])\n\u001b[0m\u001b[1;32m 479\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_replace_vars_and_dims\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvariables\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 480\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/xarray/backends/zarr.py\u001b[0m in \u001b[0;36mmaybe_chunk\u001b[0;34m(name, var)\u001b[0m\n\u001b[1;32m 471\u001b[0m \u001b[0mtoken2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtokenize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvar\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 472\u001b[0m \u001b[0mname2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'zarr-%s'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mtoken2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 473\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mvar\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchunk\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mchunks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlock\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 474\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 475\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mvar\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/xarray/core/variable.py\u001b[0m in \u001b[0;36mchunk\u001b[0;34m(self, chunks, name, lock)\u001b[0m\n\u001b[1;32m 820\u001b[0m data = indexing.ImplicitToExplicitIndexingAdapter(\n\u001b[1;32m 821\u001b[0m data, indexing.OuterIndexer)\n\u001b[0;32m--> 822\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mda\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlock\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlock\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 823\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 824\u001b[0m return type(self)(self.dims, data, self._attrs, self._encoding,\n", | |
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/dask/array/core.py\u001b[0m in \u001b[0;36mfrom_array\u001b[0;34m(x, chunks, name, lock, asarray, fancy, getitem)\u001b[0m\n\u001b[1;32m 1988\u001b[0m \u001b[0;34m>>\u001b[0m\u001b[0;34m>\u001b[0m \u001b[0ma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mda\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1000\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlock\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# doctest: +SKIP\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1989\u001b[0m \"\"\"\n\u001b[0;32m-> 1990\u001b[0;31m \u001b[0mchunks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnormalize_chunks\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mchunks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1991\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1992\u001b[0m \u001b[0mtoken\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtokenize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/dask/array/core.py\u001b[0m in \u001b[0;36mnormalize_chunks\u001b[0;34m(chunks, shape)\u001b[0m\n\u001b[1;32m 1918\u001b[0m raise ValueError(\n\u001b[1;32m 1919\u001b[0m \u001b[0;34m\"Chunks and shape must be of the same length/dimension. \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1920\u001b[0;31m \"Got chunks=%s, shape=%s\" % (chunks, shape))\n\u001b[0m\u001b[1;32m 1921\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1922\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mshape\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mValueError\u001b[0m: Chunks and shape must be of the same length/dimension. Got chunks=(3, 64), shape=(3,)" | |
] | |
} | |
], | |
"source": [ | |
"ds2 = xr.open_zarr(d)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ds2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"%matplotlib inline" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"isub=4\n", | |
"ds2['T2D'][0,::isub,::isub].plot.imshow()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ds2['T2D'][:,1000,1000].plot()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python [default]", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment