Skip to content

Instantly share code, notes, and snippets.

@tinaok
Last active October 1, 2019 14:27
Show Gist options
  • Save tinaok/c2ef193e94508a5ba426979d01e99307 to your computer and use it in GitHub Desktop.
Save tinaok/c2ef193e94508a5ba426979d01e99307 to your computer and use it in GitHub Desktop.
automatic chunk size
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"import dask.array as da\n",
"import numpy as np\n",
"import dask\n",
"import xarray as xr\n",
"import math\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"def climatology(ds):\n",
" seasonal_clim = ds.groupby(\"time.season\").mean(dim=\"time\")\n",
" return seasonal_clim\n",
"def anomaly(ds):\n",
" seasonal_clim = climatology(ds)\n",
" seasonal_anom = ds.groupby(\"time.season\") - seasonal_clim\n",
" return seasonal_anom"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"dask.config.set({\"array.chunk-size\": '256MB'})\n",
"timesteps=20834\n",
"lat=320\n",
"lon=384\n",
"random_data =da.random.RandomState(0).standard_normal(shape, chunks='auto' )\n",
"lats = xr.DataArray(np.linspace(start=-90, stop=90, num=lat), dims=[\"lat\"])\n",
"lons = xr.DataArray(np.linspace(start=-180, stop=180, num=lon), dims=[\"lon\"])\n",
"times = xr.DataArray(pd.date_range(start=\"1980-01-01\", freq=\"1D\", periods=timesteps), dims=[\"time\"])\n",
"ds = xr.DataArray(\n",
" random_data,\n",
" dims=[\"time\", \"lon\", \"lat\"],\n",
" coords={\"time\": times, \"lon\": lons, \"lat\": lats},\n",
" name=\"sst\",\n",
").to_dataset()\n"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<xarray.Dataset>\n",
"Dimensions: (lat: 320, lon: 384, time: 20834)\n",
"Coordinates:\n",
" * time (time) datetime64[ns] 1980-01-01 1980-01-02 ... 2037-01-14\n",
" * lon (lon) float64 -180.0 -179.1 -178.1 -177.2 ... 178.1 179.1 180.0\n",
" * lat (lat) float64 -90.0 -89.44 -88.87 -88.31 ... 88.31 88.87 89.44 90.0\n",
"Data variables:\n",
" sst (time, lon, lat) float64 dask.array<shape=(20834, 384, 320), chunksize=(317, 192, 160)>"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"When specifying chunk size as 370MB in dask, dask array creates chunksize 187MB."
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<xarray.Dataset>\n",
"Dimensions: (lat: 320, lon: 384, time: 20834)\n",
"Coordinates:\n",
" * lon (lon) float64 -180.0 -179.1 -178.1 -177.2 ... 178.1 179.1 180.0\n",
" * lat (lat) float64 -90.0 -89.44 -88.87 -88.31 ... 88.31 88.87 89.44 90.0\n",
" * time (time) datetime64[ns] 1980-01-01 1980-01-02 ... 2037-01-14\n",
" season (time) <U3 'DJF' 'DJF' 'DJF' 'DJF' ... 'DJF' 'DJF' 'DJF' 'DJF'\n",
"Data variables:\n",
" sst (time, lon, lat) float64 dask.array<shape=(20834, 384, 320), chunksize=(60, 192, 160)>"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"anomaly(ds)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "pangeobench",
"language": "python",
"name": "pangeobench"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment