Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save suvarchal/3b42bdd706f5a40a66cc76213166e364 to your computer and use it in GitHub Desktop.
Save suvarchal/3b42bdd706f5a40a66cc76213166e364 to your computer and use it in GitHub Desktop.
quick demo of chunking arrays and storing
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"id": "fdc1b0d8",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# untill now how any objects can be stored in s3, and data inperticular as blobs\n",
"# but to get most for scientific data, need to introduce idea of chunking arrays."
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "723e2b24-3fa7-4f6e-851e-7497e80fa153",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import dask.array as da\n",
"import zarr"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "cb82ff69",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div><svg style=\"position: absolute; width: 0; height: 0; overflow: hidden\">\n",
"<defs>\n",
"<symbol id=\"icon-database\" viewBox=\"0 0 32 32\">\n",
"<path d=\"M16 0c-8.837 0-16 2.239-16 5v4c0 2.761 7.163 5 16 5s16-2.239 16-5v-4c0-2.761-7.163-5-16-5z\"></path>\n",
"<path d=\"M16 17c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n",
"<path d=\"M16 26c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n",
"</symbol>\n",
"<symbol id=\"icon-file-text2\" viewBox=\"0 0 32 32\">\n",
"<path d=\"M28.681 7.159c-0.694-0.947-1.662-2.053-2.724-3.116s-2.169-2.030-3.116-2.724c-1.612-1.182-2.393-1.319-2.841-1.319h-15.5c-1.378 0-2.5 1.121-2.5 2.5v27c0 1.378 1.122 2.5 2.5 2.5h23c1.378 0 2.5-1.122 2.5-2.5v-19.5c0-0.448-0.137-1.23-1.319-2.841zM24.543 5.457c0.959 0.959 1.712 1.825 2.268 2.543h-4.811v-4.811c0.718 0.556 1.584 1.309 2.543 2.268zM28 29.5c0 0.271-0.229 0.5-0.5 0.5h-23c-0.271 0-0.5-0.229-0.5-0.5v-27c0-0.271 0.229-0.5 0.5-0.5 0 0 15.499-0 15.5 0v7c0 0.552 0.448 1 1 1h7v19.5z\"></path>\n",
"<path d=\"M23 26h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
"<path d=\"M23 22h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
"<path d=\"M23 18h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
"</symbol>\n",
"</defs>\n",
"</svg>\n",
"<style>/* CSS stylesheet for displaying xarray objects in jupyterlab.\n",
" *\n",
" */\n",
"\n",
":root {\n",
" --xr-font-color0: var(--jp-content-font-color0, rgba(0, 0, 0, 1));\n",
" --xr-font-color2: var(--jp-content-font-color2, rgba(0, 0, 0, 0.54));\n",
" --xr-font-color3: var(--jp-content-font-color3, rgba(0, 0, 0, 0.38));\n",
" --xr-border-color: var(--jp-border-color2, #e0e0e0);\n",
" --xr-disabled-color: var(--jp-layout-color3, #bdbdbd);\n",
" --xr-background-color: var(--jp-layout-color0, white);\n",
" --xr-background-color-row-even: var(--jp-layout-color1, white);\n",
" --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n",
"}\n",
"\n",
"html[theme=dark],\n",
"body[data-theme=dark],\n",
"body.vscode-dark {\n",
" --xr-font-color0: rgba(255, 255, 255, 1);\n",
" --xr-font-color2: rgba(255, 255, 255, 0.54);\n",
" --xr-font-color3: rgba(255, 255, 255, 0.38);\n",
" --xr-border-color: #1F1F1F;\n",
" --xr-disabled-color: #515151;\n",
" --xr-background-color: #111111;\n",
" --xr-background-color-row-even: #111111;\n",
" --xr-background-color-row-odd: #313131;\n",
"}\n",
"\n",
".xr-wrap {\n",
" display: block !important;\n",
" min-width: 300px;\n",
" max-width: 700px;\n",
"}\n",
"\n",
".xr-text-repr-fallback {\n",
" /* fallback to plain text repr when CSS is not injected (untrusted notebook) */\n",
" display: none;\n",
"}\n",
"\n",
".xr-header {\n",
" padding-top: 6px;\n",
" padding-bottom: 6px;\n",
" margin-bottom: 4px;\n",
" border-bottom: solid 1px var(--xr-border-color);\n",
"}\n",
"\n",
".xr-header > div,\n",
".xr-header > ul {\n",
" display: inline;\n",
" margin-top: 0;\n",
" margin-bottom: 0;\n",
"}\n",
"\n",
".xr-obj-type,\n",
".xr-array-name {\n",
" margin-left: 2px;\n",
" margin-right: 10px;\n",
"}\n",
"\n",
".xr-obj-type {\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-sections {\n",
" padding-left: 0 !important;\n",
" display: grid;\n",
" grid-template-columns: 150px auto auto 1fr 20px 20px;\n",
"}\n",
"\n",
".xr-section-item {\n",
" display: contents;\n",
"}\n",
"\n",
".xr-section-item input {\n",
" display: none;\n",
"}\n",
"\n",
".xr-section-item input + label {\n",
" color: var(--xr-disabled-color);\n",
"}\n",
"\n",
".xr-section-item input:enabled + label {\n",
" cursor: pointer;\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-section-item input:enabled + label:hover {\n",
" color: var(--xr-font-color0);\n",
"}\n",
"\n",
".xr-section-summary {\n",
" grid-column: 1;\n",
" color: var(--xr-font-color2);\n",
" font-weight: 500;\n",
"}\n",
"\n",
".xr-section-summary > span {\n",
" display: inline-block;\n",
" padding-left: 0.5em;\n",
"}\n",
"\n",
".xr-section-summary-in:disabled + label {\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-section-summary-in + label:before {\n",
" display: inline-block;\n",
" content: '►';\n",
" font-size: 11px;\n",
" width: 15px;\n",
" text-align: center;\n",
"}\n",
"\n",
".xr-section-summary-in:disabled + label:before {\n",
" color: var(--xr-disabled-color);\n",
"}\n",
"\n",
".xr-section-summary-in:checked + label:before {\n",
" content: '▼';\n",
"}\n",
"\n",
".xr-section-summary-in:checked + label > span {\n",
" display: none;\n",
"}\n",
"\n",
".xr-section-summary,\n",
".xr-section-inline-details {\n",
" padding-top: 4px;\n",
" padding-bottom: 4px;\n",
"}\n",
"\n",
".xr-section-inline-details {\n",
" grid-column: 2 / -1;\n",
"}\n",
"\n",
".xr-section-details {\n",
" display: none;\n",
" grid-column: 1 / -1;\n",
" margin-bottom: 5px;\n",
"}\n",
"\n",
".xr-section-summary-in:checked ~ .xr-section-details {\n",
" display: contents;\n",
"}\n",
"\n",
".xr-array-wrap {\n",
" grid-column: 1 / -1;\n",
" display: grid;\n",
" grid-template-columns: 20px auto;\n",
"}\n",
"\n",
".xr-array-wrap > label {\n",
" grid-column: 1;\n",
" vertical-align: top;\n",
"}\n",
"\n",
".xr-preview {\n",
" color: var(--xr-font-color3);\n",
"}\n",
"\n",
".xr-array-preview,\n",
".xr-array-data {\n",
" padding: 0 5px !important;\n",
" grid-column: 2;\n",
"}\n",
"\n",
".xr-array-data,\n",
".xr-array-in:checked ~ .xr-array-preview {\n",
" display: none;\n",
"}\n",
"\n",
".xr-array-in:checked ~ .xr-array-data,\n",
".xr-array-preview {\n",
" display: inline-block;\n",
"}\n",
"\n",
".xr-dim-list {\n",
" display: inline-block !important;\n",
" list-style: none;\n",
" padding: 0 !important;\n",
" margin: 0;\n",
"}\n",
"\n",
".xr-dim-list li {\n",
" display: inline-block;\n",
" padding: 0;\n",
" margin: 0;\n",
"}\n",
"\n",
".xr-dim-list:before {\n",
" content: '(';\n",
"}\n",
"\n",
".xr-dim-list:after {\n",
" content: ')';\n",
"}\n",
"\n",
".xr-dim-list li:not(:last-child):after {\n",
" content: ',';\n",
" padding-right: 5px;\n",
"}\n",
"\n",
".xr-has-index {\n",
" font-weight: bold;\n",
"}\n",
"\n",
".xr-var-list,\n",
".xr-var-item {\n",
" display: contents;\n",
"}\n",
"\n",
".xr-var-item > div,\n",
".xr-var-item label,\n",
".xr-var-item > .xr-var-name span {\n",
" background-color: var(--xr-background-color-row-even);\n",
" margin-bottom: 0;\n",
"}\n",
"\n",
".xr-var-item > .xr-var-name:hover span {\n",
" padding-right: 5px;\n",
"}\n",
"\n",
".xr-var-list > li:nth-child(odd) > div,\n",
".xr-var-list > li:nth-child(odd) > label,\n",
".xr-var-list > li:nth-child(odd) > .xr-var-name span {\n",
" background-color: var(--xr-background-color-row-odd);\n",
"}\n",
"\n",
".xr-var-name {\n",
" grid-column: 1;\n",
"}\n",
"\n",
".xr-var-dims {\n",
" grid-column: 2;\n",
"}\n",
"\n",
".xr-var-dtype {\n",
" grid-column: 3;\n",
" text-align: right;\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-var-preview {\n",
" grid-column: 4;\n",
"}\n",
"\n",
".xr-index-preview {\n",
" grid-column: 2 / 5;\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-var-name,\n",
".xr-var-dims,\n",
".xr-var-dtype,\n",
".xr-preview,\n",
".xr-attrs dt {\n",
" white-space: nowrap;\n",
" overflow: hidden;\n",
" text-overflow: ellipsis;\n",
" padding-right: 10px;\n",
"}\n",
"\n",
".xr-var-name:hover,\n",
".xr-var-dims:hover,\n",
".xr-var-dtype:hover,\n",
".xr-attrs dt:hover {\n",
" overflow: visible;\n",
" width: auto;\n",
" z-index: 1;\n",
"}\n",
"\n",
".xr-var-attrs,\n",
".xr-var-data,\n",
".xr-index-data {\n",
" display: none;\n",
" background-color: var(--xr-background-color) !important;\n",
" padding-bottom: 5px !important;\n",
"}\n",
"\n",
".xr-var-attrs-in:checked ~ .xr-var-attrs,\n",
".xr-var-data-in:checked ~ .xr-var-data,\n",
".xr-index-data-in:checked ~ .xr-index-data {\n",
" display: block;\n",
"}\n",
"\n",
".xr-var-data > table {\n",
" float: right;\n",
"}\n",
"\n",
".xr-var-name span,\n",
".xr-var-data,\n",
".xr-index-name div,\n",
".xr-index-data,\n",
".xr-attrs {\n",
" padding-left: 25px !important;\n",
"}\n",
"\n",
".xr-attrs,\n",
".xr-var-attrs,\n",
".xr-var-data,\n",
".xr-index-data {\n",
" grid-column: 1 / -1;\n",
"}\n",
"\n",
"dl.xr-attrs {\n",
" padding: 0;\n",
" margin: 0;\n",
" display: grid;\n",
" grid-template-columns: 125px auto;\n",
"}\n",
"\n",
".xr-attrs dt,\n",
".xr-attrs dd {\n",
" padding: 0;\n",
" margin: 0;\n",
" float: left;\n",
" padding-right: 10px;\n",
" width: auto;\n",
"}\n",
"\n",
".xr-attrs dt {\n",
" font-weight: normal;\n",
" grid-column: 1;\n",
"}\n",
"\n",
".xr-attrs dt:hover span {\n",
" display: inline-block;\n",
" background: var(--xr-background-color);\n",
" padding-right: 10px;\n",
"}\n",
"\n",
".xr-attrs dd {\n",
" grid-column: 2;\n",
" white-space: pre-wrap;\n",
" word-break: break-all;\n",
"}\n",
"\n",
".xr-icon-database,\n",
".xr-icon-file-text2,\n",
".xr-no-icon {\n",
" display: inline-block;\n",
" vertical-align: middle;\n",
" width: 1em;\n",
" height: 1.5em !important;\n",
" stroke-width: 0;\n",
" stroke: currentColor;\n",
" fill: currentColor;\n",
"}\n",
"</style><pre class='xr-text-repr-fallback'>&lt;xarray.Dataset&gt;\n",
"Dimensions: (lat: 25, time: 2920, lon: 53)\n",
"Coordinates:\n",
" * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
" * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
" * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
"Data variables:\n",
" air (time, lat, lon) float32 241.2 242.5 243.5 ... 296.5 296.2 295.7\n",
"Attributes:\n",
" Conventions: COARDS\n",
" title: 4x daily NMC reanalysis (1948)\n",
" description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n",
" platform: Model\n",
" references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...</pre><div class='xr-wrap' style='display:none'><div class='xr-header'><div class='xr-obj-type'>xarray.Dataset</div></div><ul class='xr-sections'><li class='xr-section-item'><input id='section-974d706e-411c-4cde-a8fe-0d02f712bf7c' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-974d706e-411c-4cde-a8fe-0d02f712bf7c' class='xr-section-summary' title='Expand/collapse section'>Dimensions:</label><div class='xr-section-inline-details'><ul class='xr-dim-list'><li><span class='xr-has-index'>lat</span>: 25</li><li><span class='xr-has-index'>time</span>: 2920</li><li><span class='xr-has-index'>lon</span>: 53</li></ul></div><div class='xr-section-details'></div></li><li class='xr-section-item'><input id='section-c6dce3a0-9f41-4d97-a311-84eb882b3f33' class='xr-section-summary-in' type='checkbox' checked><label for='section-c6dce3a0-9f41-4d97-a311-84eb882b3f33' class='xr-section-summary' >Coordinates: <span>(3)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>lat</span></div><div class='xr-var-dims'>(lat)</div><div class='xr-var-dtype'>float32</div><div class='xr-var-preview xr-preview'>75.0 72.5 70.0 ... 20.0 17.5 15.0</div><input id='attrs-75f7b2f9-f1f3-435c-9a08-84293d5301bc' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-75f7b2f9-f1f3-435c-9a08-84293d5301bc' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-6ec53786-b0fa-4a8b-9455-981bb52efe0a' class='xr-var-data-in' type='checkbox'><label for='data-6ec53786-b0fa-4a8b-9455-981bb52efe0a' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>standard_name :</span></dt><dd>latitude</dd><dt><span>long_name :</span></dt><dd>Latitude</dd><dt><span>units :</span></dt><dd>degrees_north</dd><dt><span>axis :</span></dt><dd>Y</dd></dl></div><div class='xr-var-data'><pre>array([75. , 72.5, 70. , 67.5, 65. , 62.5, 60. , 57.5, 55. , 52.5, 50. , 47.5,\n",
" 45. , 42.5, 40. , 37.5, 35. , 32.5, 30. , 27.5, 25. , 22.5, 20. , 17.5,\n",
" 15. ], dtype=float32)</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>lon</span></div><div class='xr-var-dims'>(lon)</div><div class='xr-var-dtype'>float32</div><div class='xr-var-preview xr-preview'>200.0 202.5 205.0 ... 327.5 330.0</div><input id='attrs-3aed7203-8836-4f81-8437-13ea8239e7e0' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-3aed7203-8836-4f81-8437-13ea8239e7e0' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-1f12423e-bb3d-488e-8a4c-6fb9ae06293d' class='xr-var-data-in' type='checkbox'><label for='data-1f12423e-bb3d-488e-8a4c-6fb9ae06293d' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>standard_name :</span></dt><dd>longitude</dd><dt><span>long_name :</span></dt><dd>Longitude</dd><dt><span>units :</span></dt><dd>degrees_east</dd><dt><span>axis :</span></dt><dd>X</dd></dl></div><div class='xr-var-data'><pre>array([200. , 202.5, 205. , 207.5, 210. , 212.5, 215. , 217.5, 220. , 222.5,\n",
" 225. , 227.5, 230. , 232.5, 235. , 237.5, 240. , 242.5, 245. , 247.5,\n",
" 250. , 252.5, 255. , 257.5, 260. , 262.5, 265. , 267.5, 270. , 272.5,\n",
" 275. , 277.5, 280. , 282.5, 285. , 287.5, 290. , 292.5, 295. , 297.5,\n",
" 300. , 302.5, 305. , 307.5, 310. , 312.5, 315. , 317.5, 320. , 322.5,\n",
" 325. , 327.5, 330. ], dtype=float32)</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>time</span></div><div class='xr-var-dims'>(time)</div><div class='xr-var-dtype'>datetime64[ns]</div><div class='xr-var-preview xr-preview'>2013-01-01 ... 2014-12-31T18:00:00</div><input id='attrs-2dad0c4b-2a4a-40c7-8f35-ab80a9c0391d' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-2dad0c4b-2a4a-40c7-8f35-ab80a9c0391d' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-b008735a-e87c-40e9-bd5e-cb83fc6bc2aa' class='xr-var-data-in' type='checkbox'><label for='data-b008735a-e87c-40e9-bd5e-cb83fc6bc2aa' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>standard_name :</span></dt><dd>time</dd><dt><span>long_name :</span></dt><dd>Time</dd></dl></div><div class='xr-var-data'><pre>array([&#x27;2013-01-01T00:00:00.000000000&#x27;, &#x27;2013-01-01T06:00:00.000000000&#x27;,\n",
" &#x27;2013-01-01T12:00:00.000000000&#x27;, ..., &#x27;2014-12-31T06:00:00.000000000&#x27;,\n",
" &#x27;2014-12-31T12:00:00.000000000&#x27;, &#x27;2014-12-31T18:00:00.000000000&#x27;],\n",
" dtype=&#x27;datetime64[ns]&#x27;)</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-afbe5ce9-0068-401b-8421-f83c34d8499b' class='xr-section-summary-in' type='checkbox' checked><label for='section-afbe5ce9-0068-401b-8421-f83c34d8499b' class='xr-section-summary' >Data variables: <span>(1)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-var-name'><span>air</span></div><div class='xr-var-dims'>(time, lat, lon)</div><div class='xr-var-dtype'>float32</div><div class='xr-var-preview xr-preview'>241.2 242.5 243.5 ... 296.2 295.7</div><input id='attrs-71c8ee9c-0897-4068-b414-339c2511a8ec' class='xr-var-attrs-in' type='checkbox' ><label for='attrs-71c8ee9c-0897-4068-b414-339c2511a8ec' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-af44cdcc-b8e0-4ead-91a2-e6a72d69898f' class='xr-var-data-in' type='checkbox'><label for='data-af44cdcc-b8e0-4ead-91a2-e6a72d69898f' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'><dt><span>long_name :</span></dt><dd>4xDaily Air temperature at sigma level 995</dd><dt><span>units :</span></dt><dd>degK</dd><dt><span>precision :</span></dt><dd>2</dd><dt><span>GRIB_id :</span></dt><dd>11</dd><dt><span>GRIB_name :</span></dt><dd>TMP</dd><dt><span>var_desc :</span></dt><dd>Air temperature</dd><dt><span>dataset :</span></dt><dd>NMC Reanalysis</dd><dt><span>level_desc :</span></dt><dd>Surface</dd><dt><span>statistic :</span></dt><dd>Individual Obs</dd><dt><span>parent_stat :</span></dt><dd>Other</dd><dt><span>actual_range :</span></dt><dd>[185.16 322.1 ]</dd></dl></div><div class='xr-var-data'><pre>array([[[241.2 , 242.5 , 243.5 , ..., 232.79999, 235.5 ,\n",
" 238.59999],\n",
" [243.79999, 244.5 , 244.7 , ..., 232.79999, 235.29999,\n",
" 239.29999],\n",
" [250. , 249.79999, 248.89 , ..., 233.2 , 236.39 ,\n",
" 241.7 ],\n",
" ...,\n",
" [296.6 , 296.19998, 296.4 , ..., 295.4 , 295.1 ,\n",
" 294.69998],\n",
" [295.9 , 296.19998, 296.79 , ..., 295.9 , 295.9 ,\n",
" 295.19998],\n",
" [296.29 , 296.79 , 297.1 , ..., 296.9 , 296.79 ,\n",
" 296.6 ]],\n",
"\n",
" [[242.09999, 242.7 , 243.09999, ..., 232. , 233.59999,\n",
" 235.79999],\n",
" [243.59999, 244.09999, 244.2 , ..., 231. , 232.5 ,\n",
" 235.7 ],\n",
" [253.2 , 252.89 , 252.09999, ..., 230.79999, 233.39 ,\n",
" 238.5 ],\n",
"...\n",
" [293.69 , 293.88998, 295.38998, ..., 295.09 , 294.69 ,\n",
" 294.29 ],\n",
" [296.29 , 297.19 , 297.59 , ..., 295.29 , 295.09 ,\n",
" 294.38998],\n",
" [297.79 , 298.38998, 298.49 , ..., 295.69 , 295.49 ,\n",
" 295.19 ]],\n",
"\n",
" [[245.09 , 244.29 , 243.29 , ..., 241.68999, 241.48999,\n",
" 241.79 ],\n",
" [249.89 , 249.29 , 248.39 , ..., 239.59 , 240.29 ,\n",
" 241.68999],\n",
" [262.99 , 262.19 , 261.38998, ..., 239.89 , 242.59 ,\n",
" 246.29 ],\n",
" ...,\n",
" [293.79 , 293.69 , 295.09 , ..., 295.29 , 295.09 ,\n",
" 294.69 ],\n",
" [296.09 , 296.88998, 297.19 , ..., 295.69 , 295.69 ,\n",
" 295.19 ],\n",
" [297.69 , 298.09 , 298.09 , ..., 296.49 , 296.19 ,\n",
" 295.69 ]]], dtype=float32)</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-ae6bebf7-9bbc-4ae3-9994-3a6b0d9d86a9' class='xr-section-summary-in' type='checkbox' ><label for='section-ae6bebf7-9bbc-4ae3-9994-3a6b0d9d86a9' class='xr-section-summary' >Indexes: <span>(3)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-index-name'><div>lat</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-84aef837-f7fc-4128-be85-739cf1d2cd69' class='xr-index-data-in' type='checkbox'/><label for='index-84aef837-f7fc-4128-be85-739cf1d2cd69' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(Float64Index([75.0, 72.5, 70.0, 67.5, 65.0, 62.5, 60.0, 57.5, 55.0, 52.5, 50.0,\n",
" 47.5, 45.0, 42.5, 40.0, 37.5, 35.0, 32.5, 30.0, 27.5, 25.0, 22.5,\n",
" 20.0, 17.5, 15.0],\n",
" dtype=&#x27;float64&#x27;, name=&#x27;lat&#x27;))</pre></div></li><li class='xr-var-item'><div class='xr-index-name'><div>lon</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-ae7792f4-3096-4fa0-8d7e-34a69d3a0c93' class='xr-index-data-in' type='checkbox'/><label for='index-ae7792f4-3096-4fa0-8d7e-34a69d3a0c93' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(Float64Index([200.0, 202.5, 205.0, 207.5, 210.0, 212.5, 215.0, 217.5, 220.0,\n",
" 222.5, 225.0, 227.5, 230.0, 232.5, 235.0, 237.5, 240.0, 242.5,\n",
" 245.0, 247.5, 250.0, 252.5, 255.0, 257.5, 260.0, 262.5, 265.0,\n",
" 267.5, 270.0, 272.5, 275.0, 277.5, 280.0, 282.5, 285.0, 287.5,\n",
" 290.0, 292.5, 295.0, 297.5, 300.0, 302.5, 305.0, 307.5, 310.0,\n",
" 312.5, 315.0, 317.5, 320.0, 322.5, 325.0, 327.5, 330.0],\n",
" dtype=&#x27;float64&#x27;, name=&#x27;lon&#x27;))</pre></div></li><li class='xr-var-item'><div class='xr-index-name'><div>time</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-8483ed2a-6156-4c04-9b8a-741152ba77df' class='xr-index-data-in' type='checkbox'/><label for='index-8483ed2a-6156-4c04-9b8a-741152ba77df' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(DatetimeIndex([&#x27;2013-01-01 00:00:00&#x27;, &#x27;2013-01-01 06:00:00&#x27;,\n",
" &#x27;2013-01-01 12:00:00&#x27;, &#x27;2013-01-01 18:00:00&#x27;,\n",
" &#x27;2013-01-02 00:00:00&#x27;, &#x27;2013-01-02 06:00:00&#x27;,\n",
" &#x27;2013-01-02 12:00:00&#x27;, &#x27;2013-01-02 18:00:00&#x27;,\n",
" &#x27;2013-01-03 00:00:00&#x27;, &#x27;2013-01-03 06:00:00&#x27;,\n",
" ...\n",
" &#x27;2014-12-29 12:00:00&#x27;, &#x27;2014-12-29 18:00:00&#x27;,\n",
" &#x27;2014-12-30 00:00:00&#x27;, &#x27;2014-12-30 06:00:00&#x27;,\n",
" &#x27;2014-12-30 12:00:00&#x27;, &#x27;2014-12-30 18:00:00&#x27;,\n",
" &#x27;2014-12-31 00:00:00&#x27;, &#x27;2014-12-31 06:00:00&#x27;,\n",
" &#x27;2014-12-31 12:00:00&#x27;, &#x27;2014-12-31 18:00:00&#x27;],\n",
" dtype=&#x27;datetime64[ns]&#x27;, name=&#x27;time&#x27;, length=2920, freq=None))</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-4b9f8d21-0cf9-4427-8ffd-3c6cb02c0aa5' class='xr-section-summary-in' type='checkbox' checked><label for='section-4b9f8d21-0cf9-4427-8ffd-3c6cb02c0aa5' class='xr-section-summary' >Attributes: <span>(5)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><dl class='xr-attrs'><dt><span>Conventions :</span></dt><dd>COARDS</dd><dt><span>title :</span></dt><dd>4x daily NMC reanalysis (1948)</dd><dt><span>description :</span></dt><dd>Data is from NMC initialized reanalysis\n",
"(4x/day). These are the 0.9950 sigma level values.</dd><dt><span>platform :</span></dt><dd>Model</dd><dt><span>references :</span></dt><dd>http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanalysis.html</dd></dl></div></li></ul></div></div>"
],
"text/plain": [
"<xarray.Dataset>\n",
"Dimensions: (lat: 25, time: 2920, lon: 53)\n",
"Coordinates:\n",
" * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
" * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
" * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
"Data variables:\n",
" air (time, lat, lon) float32 241.2 242.5 243.5 ... 296.5 296.2 295.7\n",
"Attributes:\n",
" Conventions: COARDS\n",
" title: 4x daily NMC reanalysis (1948)\n",
" description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n",
" platform: Model\n",
" references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly..."
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# this cell can optionally be hidden to make the point how to work with numpy arrays\n",
"import xarray as xr\n",
"airds=xr.tutorial.load_dataset(\"air_temperature\")\n",
"airds"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "9f51c31d",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"(2920, 25, 53)"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# assume a numpy array of temprature of this shape\n",
"air_ndarray = airds.air.values \n",
"air_ndarray.shape"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "83de0fa7",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"281.25513"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# now for instance we can compute mean using standard numpy methods\n",
"air_ndarray.mean() # all dims, all values, serial computation"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "0f3dd587-1fd4-4caa-8a6b-a8e5faf399e3",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"#now lets chunk the data save it in s3 and retrieve it and compute mean again"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "982e8f39",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <td>\n",
" <table style=\"border-collapse: collapse;\">\n",
" <thead>\n",
" <tr>\n",
" <td> </td>\n",
" <th> Array </th>\n",
" <th> Chunk </th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" \n",
" <tr>\n",
" <th> Bytes </th>\n",
" <td> 14.76 MiB </td>\n",
" <td> 2.53 MiB </td>\n",
" </tr>\n",
" \n",
" <tr>\n",
" <th> Shape </th>\n",
" <td> (2920, 25, 53) </td>\n",
" <td> (500, 25, 53) </td>\n",
" </tr>\n",
" <tr>\n",
" <th> Dask graph </th>\n",
" <td colspan=\"2\"> 6 chunks in 1 graph layer </td>\n",
" </tr>\n",
" <tr>\n",
" <th> Data type </th>\n",
" <td colspan=\"2\"> float32 numpy.ndarray </td>\n",
" </tr>\n",
" </tbody>\n",
" </table>\n",
" </td>\n",
" <td>\n",
" <svg width=\"159\" height=\"146\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"80\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"10\" y1=\"25\" x2=\"80\" y2=\"96\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"10\" y2=\"25\" style=\"stroke-width:2\" />\n",
" <line x1=\"22\" y1=\"12\" x2=\"22\" y2=\"37\" />\n",
" <line x1=\"34\" y1=\"24\" x2=\"34\" y2=\"49\" />\n",
" <line x1=\"46\" y1=\"36\" x2=\"46\" y2=\"61\" />\n",
" <line x1=\"58\" y1=\"48\" x2=\"58\" y2=\"73\" />\n",
" <line x1=\"70\" y1=\"60\" x2=\"70\" y2=\"85\" />\n",
" <line x1=\"80\" y1=\"70\" x2=\"80\" y2=\"96\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"10.0,0.0 80.58823529411765,70.58823529411765 80.58823529411765,96.00085180870013 10.0,25.41261651458249\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"38\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"22\" y1=\"12\" x2=\"50\" y2=\"12\" />\n",
" <line x1=\"34\" y1=\"24\" x2=\"62\" y2=\"24\" />\n",
" <line x1=\"46\" y1=\"36\" x2=\"74\" y2=\"36\" />\n",
" <line x1=\"58\" y1=\"48\" x2=\"86\" y2=\"48\" />\n",
" <line x1=\"70\" y1=\"60\" x2=\"98\" y2=\"60\" />\n",
" <line x1=\"80\" y1=\"70\" x2=\"109\" y2=\"70\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"80\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"38\" y1=\"0\" x2=\"109\" y2=\"70\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"10.0,0.0 38.48973265594604,0.0 109.0779679500637,70.58823529411765 80.58823529411765,70.58823529411765\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"80\" y1=\"70\" x2=\"109\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"80\" y1=\"96\" x2=\"109\" y2=\"96\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"80\" y1=\"70\" x2=\"80\" y2=\"96\" style=\"stroke-width:2\" />\n",
" <line x1=\"109\" y1=\"70\" x2=\"109\" y2=\"96\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"80.58823529411765,70.58823529411765 109.0779679500637,70.58823529411765 109.0779679500637,96.00085180870013 80.58823529411765,96.00085180870013\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"94.833102\" y=\"116.000852\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >53</text>\n",
" <text x=\"129.077968\" y=\"83.294544\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,129.077968,83.294544)\">25</text>\n",
" <text x=\"35.294118\" y=\"80.706734\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(45,35.294118,80.706734)\">2920</text>\n",
"</svg>\n",
" </td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"dask.array<array, shape=(2920, 25, 53), dtype=float32, chunksize=(500, 25, 53), chunktype=numpy.ndarray>"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Why use dask array? it gives flexibility to users to chunk nd arrays in way they like, IF we use numpy as blob it is hard to control the layout of parts of arrays. \n",
"# This layout can be advantageous to parallel loading of data and computation \n",
"chunked_array=da.from_array(air_ndarray,chunks=(500,25,53)) # generic size just for demo, in reality how and what is chunk size should be optimized\n",
"chunked_array # describe what below schematic shows, chunks etc"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "98489893",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Now why zarr? chunking arrays is one part of story, now when we store each file as an object or file locally, how do we reconstruct the data array back? \n",
"# we also need to store some meta data telling what is shape of data, chunks used. Zarr does that, in its simplest sense it stores a metadata file stating shape, chunks and \n",
"# file naming convention (see below), to add more zarr can also compress chunks which is effective for retriving data over the network.\n",
"\n",
"chunked_array.to_zarr('air_chunked') # i am using this but use below or some thing else if you want or this if you want to intro zarr locally\n",
"# import fsspec\n",
"# mapper = fsspec.get_mapper(\"s3://s3store/bucket/new.zarr\",\n",
"# key=, secret=, endpoint=)\n",
"# chunked_array.to_zarr(mapper) \n"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "38ef1e2f",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.0.0 1.0.0 2.0.0 3.0.0 4.0.0 5.0.0\n"
]
}
],
"source": [
"!ls air_chunked/ # see no chunks in second dimenson and third"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "4969720d",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <td>\n",
" <table style=\"border-collapse: collapse;\">\n",
" <thead>\n",
" <tr>\n",
" <td> </td>\n",
" <th> Array </th>\n",
" <th> Chunk </th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" \n",
" <tr>\n",
" <th> Bytes </th>\n",
" <td> 14.76 MiB </td>\n",
" <td> 2.53 MiB </td>\n",
" </tr>\n",
" \n",
" <tr>\n",
" <th> Shape </th>\n",
" <td> (2920, 25, 53) </td>\n",
" <td> (500, 25, 53) </td>\n",
" </tr>\n",
" <tr>\n",
" <th> Dask graph </th>\n",
" <td colspan=\"2\"> 6 chunks in 2 graph layers </td>\n",
" </tr>\n",
" <tr>\n",
" <th> Data type </th>\n",
" <td colspan=\"2\"> float32 numpy.ndarray </td>\n",
" </tr>\n",
" </tbody>\n",
" </table>\n",
" </td>\n",
" <td>\n",
" <svg width=\"159\" height=\"146\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"80\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"10\" y1=\"25\" x2=\"80\" y2=\"96\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"10\" y2=\"25\" style=\"stroke-width:2\" />\n",
" <line x1=\"22\" y1=\"12\" x2=\"22\" y2=\"37\" />\n",
" <line x1=\"34\" y1=\"24\" x2=\"34\" y2=\"49\" />\n",
" <line x1=\"46\" y1=\"36\" x2=\"46\" y2=\"61\" />\n",
" <line x1=\"58\" y1=\"48\" x2=\"58\" y2=\"73\" />\n",
" <line x1=\"70\" y1=\"60\" x2=\"70\" y2=\"85\" />\n",
" <line x1=\"80\" y1=\"70\" x2=\"80\" y2=\"96\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"10.0,0.0 80.58823529411765,70.58823529411765 80.58823529411765,96.00085180870013 10.0,25.41261651458249\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"38\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"22\" y1=\"12\" x2=\"50\" y2=\"12\" />\n",
" <line x1=\"34\" y1=\"24\" x2=\"62\" y2=\"24\" />\n",
" <line x1=\"46\" y1=\"36\" x2=\"74\" y2=\"36\" />\n",
" <line x1=\"58\" y1=\"48\" x2=\"86\" y2=\"48\" />\n",
" <line x1=\"70\" y1=\"60\" x2=\"98\" y2=\"60\" />\n",
" <line x1=\"80\" y1=\"70\" x2=\"109\" y2=\"70\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"80\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"38\" y1=\"0\" x2=\"109\" y2=\"70\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"10.0,0.0 38.48973265594604,0.0 109.0779679500637,70.58823529411765 80.58823529411765,70.58823529411765\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"80\" y1=\"70\" x2=\"109\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"80\" y1=\"96\" x2=\"109\" y2=\"96\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"80\" y1=\"70\" x2=\"80\" y2=\"96\" style=\"stroke-width:2\" />\n",
" <line x1=\"109\" y1=\"70\" x2=\"109\" y2=\"96\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"80.58823529411765,70.58823529411765 109.0779679500637,70.58823529411765 109.0779679500637,96.00085180870013 80.58823529411765,96.00085180870013\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"94.833102\" y=\"116.000852\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >53</text>\n",
" <text x=\"129.077968\" y=\"83.294544\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,129.077968,83.294544)\">25</text>\n",
" <text x=\"35.294118\" y=\"80.706734\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(45,35.294118,80.706734)\">2920</text>\n",
"</svg>\n",
" </td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"dask.array<from-zarr, shape=(2920, 25, 53), dtype=float32, chunksize=(500, 25, 53), chunktype=numpy.ndarray>"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# reload the data as \n",
"#air_data=zarr.open_array(\"air_chunked\")\n",
"#air_data.chunks # nothing read in\n",
"chunked_air_data = da.from_zarr(\"air_chunked\")\n",
"chunked_air_data # btw untill now nothing is read it is just the metadata is read to infer shapes and chunks"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "a4eb82ec",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <td>\n",
" <table style=\"border-collapse: collapse;\">\n",
" <thead>\n",
" <tr>\n",
" <td> </td>\n",
" <th> Array </th>\n",
" <th> Chunk </th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" \n",
" <tr>\n",
" <th> Bytes </th>\n",
" <td> 4 B </td>\n",
" <td> 4 B </td>\n",
" </tr>\n",
" \n",
" <tr>\n",
" <th> Shape </th>\n",
" <td> () </td>\n",
" <td> () </td>\n",
" </tr>\n",
" <tr>\n",
" <th> Dask graph </th>\n",
" <td colspan=\"2\"> 1 chunks in 6 graph layers </td>\n",
" </tr>\n",
" <tr>\n",
" <th> Data type </th>\n",
" <td colspan=\"2\"> float32 numpy.ndarray </td>\n",
" </tr>\n",
" </tbody>\n",
" </table>\n",
" </td>\n",
" <td>\n",
" \n",
" </td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"dask.array<mean_agg-aggregate, shape=(), dtype=float32, chunksize=(), chunktype=numpy.ndarray>"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# we do mean like before, but nothing is done yet (lazy)\n",
"mean_chunked_air_data = chunked_air_data.mean() \n",
"mean_chunked_air_data # view how result will look like when computed"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "d299dafd",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5d5b153167fb43b286c4651fff759851",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"CytoscapeWidget(cytoscape_layout={'name': 'dagre', 'rankDir': 'BT', 'nodeSep': 10, 'edgeSep': 10, 'spacingFact…"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mean_chunked_air_data.visualize() # to see how computation will be done, the DAG\n",
" # point is chunking and storing array also enables parallel (and lazy) reading of large dataset to operations"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "3d4d3ce3",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"281.25504"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# now compute happens\n",
"mean_chunked_air_data.compute() # parallel reading and compute (by default no of cores, dask customization is another topic)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4080e703",
"metadata": {},
"outputs": [],
"source": [
"# may be if you want\n",
"# say i showed how to do for numpy arrays, there are specialized higher level domain specific libraries like xarray which expose lot of chunking and\n",
"# exporting to zarr, you just need to add storage s3 storage options to store and retrieve data from those libraries \n",
"#for instance : just chunk above air temp data above and store it in s3 from xarray and reload using xarray open"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment