Skip to content

Instantly share code, notes, and snippets.

@aaronspring
Created December 29, 2019 15:54
Show Gist options
  • Save aaronspring/4750d2b1eb0468ac59a034ed5a5f136c to your computer and use it in GitHub Desktop.
Save aaronspring/4750d2b1eb0468ac59a034ed5a5f136c to your computer and use it in GitHub Desktop.
bootstrap skill in parallel mockup
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Bootstrap threshold in parallel\n",
"\n",
"This notebooks tries to bootstrap a threshold for a function on resampled input fields in parallel.\n",
"\n",
"Function used: https://climpred.readthedocs.io/en/stable/api/climpred.stats.varweighted_mean_period.html\n",
"\n",
"This function looks for low-frequency variations on a global map.\n",
"\n",
"Data used: https://climpred.readthedocs.io/en/stable/api/climpred.tutorial.load_dataset.html#climpred.tutorial.load_dataset\n",
"\n",
"From there we load a pre-industrial (trend-free) control simulation."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import climpred\n",
"\n",
"import dask\n",
"import numpy as np\n",
"import pandas as pd\n",
"import xarray as xr\n",
"from tqdm import trange"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from dask.distributed import Client\n",
"import multiprocessing\n",
"import warnings\n",
"\n",
"warnings.filterwarnings(\"ignore\")\n",
"# number of logical cpus: https://www.dkrz.de/up/systems/mistral/configuration\n",
"ncpu = multiprocessing.cpu_count()\n",
"import timeit\n",
"import matplotlib.pyplot as plt\n",
"import xskillscore as xs"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# xr.show_versions()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"%load_ext lab_black"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# climpred.bootstrap"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of CPUs: 48, number of threads: 6, number of workers: 8, processes: False\n"
]
}
],
"source": [
"processes = False\n",
"nworker = 8\n",
"threads = ncpu // nworker\n",
"print(\n",
" f\"Number of CPUs: {ncpu}, number of threads: {threads}, number of workers: {nworker}, processes: {processes}\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# client.close()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"client = Client(\n",
" processes=processes,\n",
" threads_per_worker=threads,\n",
" n_workers=nworker,\n",
" memory_limit=\"64GB\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table style=\"border: 2px solid white;\">\n",
"<tr>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3 style=\"text-align: left;\">Client</h3>\n",
"<ul style=\"text-align: left; list-style: none; margin: 0; padding: 0;\">\n",
" <li><b>Scheduler: </b>inproc://10.50.36.138/35196/1</li>\n",
" <li><b>Dashboard: </b><a href='http://localhost:8888/proxy/8787/status' target='_blank'>http://localhost:8888/proxy/8787/status</a>\n",
"</ul>\n",
"</td>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3 style=\"text-align: left;\">Cluster</h3>\n",
"<ul style=\"text-align: left; list-style:none; margin: 0; padding: 0;\">\n",
" <li><b>Workers: </b>8</li>\n",
" <li><b>Cores: </b>48</li>\n",
" <li><b>Memory: </b>512.00 GB</li>\n",
"</ul>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"<Client: 'inproc://10.50.36.138/35196/1' processes=8 threads=48, memory=512.00 GB>"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## small data"
]
},
{
"cell_type": "code",
"execution_count": 307,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
"<tr>\n",
"<td>\n",
"<table>\n",
" <thead>\n",
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr><th> Bytes </th><td> 67.81 MB </td> <td> 67.81 MB </td></tr>\n",
" <tr><th> Shape </th><td> (301, 220, 256) </td> <td> (301, 220, 256) </td></tr>\n",
" <tr><th> Count </th><td> 1 Tasks </td><td> 1 Chunks </td></tr>\n",
" <tr><th> Type </th><td> float32 </td><td> numpy.ndarray </td></tr>\n",
" </tbody>\n",
"</table>\n",
"</td>\n",
"<td>\n",
"<svg width=\"232\" height=\"208\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"80\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"10\" y1=\"87\" x2=\"80\" y2=\"158\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"10\" y2=\"87\" style=\"stroke-width:2\" />\n",
" <line x1=\"80\" y1=\"70\" x2=\"80\" y2=\"158\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"10.000000,0.000000 80.588235,70.588235 80.588235,158.295876 10.000000,87.707641\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"112\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"80\" y1=\"70\" x2=\"182\" y2=\"70\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"80\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"112\" y1=\"0\" x2=\"182\" y2=\"70\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"10.000000,0.000000 112.059801,0.000000 182.648036,70.588235 80.588235,70.588235\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"80\" y1=\"70\" x2=\"182\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"80\" y1=\"158\" x2=\"182\" y2=\"158\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"80\" y1=\"70\" x2=\"80\" y2=\"158\" style=\"stroke-width:2\" />\n",
" <line x1=\"182\" y1=\"70\" x2=\"182\" y2=\"158\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"80.588235,70.588235 182.648036,70.588235 182.648036,158.295876 80.588235,158.295876\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"131.618136\" y=\"178.295876\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >256</text>\n",
" <text x=\"202.648036\" y=\"114.442056\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,202.648036,114.442056)\">220</text>\n",
" <text x=\"35.294118\" y=\"143.001759\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(45,35.294118,143.001759)\">301</text>\n",
"</svg>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"dask.array<xarray-<this-array>, shape=(301, 220, 256), dtype=float32, chunksize=(301, 220, 256), chunktype=numpy.ndarray>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<table>\n",
"<tr>\n",
"<td>\n",
"<table>\n",
" <thead>\n",
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr><th> Bytes </th><td> 567.71 MB </td> <td> 135.17 MB </td></tr>\n",
" <tr><th> Shape </th><td> (21, 12, 10, 220, 256) </td> <td> (5, 12, 10, 220, 256) </td></tr>\n",
" <tr><th> Count </th><td> 5 Tasks </td><td> 5 Chunks </td></tr>\n",
" <tr><th> Type </th><td> float32 </td><td> numpy.ndarray </td></tr>\n",
" </tbody>\n",
"</table>\n",
"</td>\n",
"<td>\n",
"<svg width=\"397\" height=\"172\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"34\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"0\" y1=\"8\" x2=\"34\" y2=\"8\" />\n",
" <line x1=\"0\" y1=\"17\" x2=\"34\" y2=\"17\" />\n",
" <line x1=\"0\" y1=\"26\" x2=\"34\" y2=\"26\" />\n",
" <line x1=\"0\" y1=\"35\" x2=\"34\" y2=\"35\" />\n",
" <line x1=\"0\" y1=\"37\" x2=\"34\" y2=\"37\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"37\" style=\"stroke-width:2\" />\n",
" <line x1=\"34\" y1=\"0\" x2=\"34\" y2=\"37\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"0.000000,0.000000 34.105574,0.000000 34.105574,37.472373 0.000000,37.472373\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"17.052787\" y=\"57.472373\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >12</text>\n",
" <text x=\"54.105574\" y=\"18.736187\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,54.105574,18.736187)\">21</text>\n",
"\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"104\" y1=\"0\" x2=\"123\" y2=\"19\" style=\"stroke-width:2\" />\n",
" <line x1=\"104\" y1=\"103\" x2=\"123\" y2=\"122\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"104\" y1=\"0\" x2=\"104\" y2=\"103\" style=\"stroke-width:2\" />\n",
" <line x1=\"123\" y1=\"19\" x2=\"123\" y2=\"122\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"104.000000,0.000000 123.405241,19.405241 123.405241,122.530241 104.000000,103.125000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"104\" y1=\"0\" x2=\"224\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"123\" y1=\"19\" x2=\"243\" y2=\"19\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"104\" y1=\"0\" x2=\"123\" y2=\"19\" style=\"stroke-width:2\" />\n",
" <line x1=\"224\" y1=\"0\" x2=\"243\" y2=\"19\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"104.000000,0.000000 224.000000,0.000000 243.405241,19.405241 123.405241,19.405241\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"123\" y1=\"19\" x2=\"243\" y2=\"19\" style=\"stroke-width:2\" />\n",
" <line x1=\"123\" y1=\"122\" x2=\"243\" y2=\"122\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"123\" y1=\"19\" x2=\"123\" y2=\"122\" style=\"stroke-width:2\" />\n",
" <line x1=\"243\" y1=\"19\" x2=\"243\" y2=\"122\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"123.405241,19.405241 243.405241,19.405241 243.405241,122.530241 123.405241,122.530241\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"183.405241\" y=\"142.530241\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >256</text>\n",
" <text x=\"263.405241\" y=\"70.967741\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,263.405241,70.967741)\">220</text>\n",
" <text x=\"103.702620\" y=\"132.827620\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(45,103.702620,132.827620)\">10</text>\n",
"</svg>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"dask.array<xarray-<this-array>, shape=(21, 12, 10, 220, 256), dtype=float32, chunksize=(5, 12, 10, 220, 256), chunktype=numpy.ndarray>"
]
},
"execution_count": 307,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def get_input(\n",
" nworker,\n",
" no_time_chunks=True,\n",
" chunk_dim=\"x\",\n",
" persist=True,\n",
" D=\"3\",\n",
" rm_coords=True,\n",
" use_climpred_data=False,\n",
" var=\"tos\",\n",
" only_one_control_chunk=True,\n",
"):\n",
" if use_climpred_data:\n",
" ds3d = climpred.tutorial.load_dataset(f\"MPI-PM-DP-{D}D\")[var]\n",
" else:\n",
"\n",
" def rename(ds):\n",
" return ds.rename({\"ensemble\": \"init\", \"time\": \"lead\"})\n",
"\n",
" control_path = (\n",
" f\"/work/mh0727/m300524/experiments/postprocessed/control_{var}_ym.nc\"\n",
" )\n",
" ds_path = f\"/work/mh0727/m300524/experiments/postprocessed/ds_{var}_ym.nc\"\n",
" ds3d = rename(xr.open_dataset(ds_path))[var]\n",
" c = ds3d[chunk_dim].size // nworker\n",
" c = c if c >= 1 else 1\n",
" chunks = {chunk_dim: c}\n",
" if use_climpred_data:\n",
" ds3d = climpred.tutorial.load_dataset(f\"MPI-PM-DP-{D}D\")[var].chunk(chunks)\n",
" else:\n",
" ds3d = rename(xr.open_dataset(ds_path))[var].chunk(chunks)\n",
" if only_one_control_chunk:\n",
" control_chunks = {\"time\": -1}\n",
" elif chunk_dim not in [\"lead\", \"member\", \"init\"]:\n",
" control_chunks = chunks\n",
" else:\n",
" control_chunks = {\"time\": c}\n",
" if use_climpred_data:\n",
" control3d = climpred.tutorial.load_dataset(f\"MPI-control-{D}D\")[var].chunk(\n",
" control_chunks\n",
" )\n",
" else:\n",
" control3d = xr.open_dataset(control_path)[var].chunk(control_chunks)\n",
" if rm_coords:\n",
" for d in [ds3d, control3d]:\n",
" for c in [\"lon\", \"lat\"]: # remove coords for chunk error\n",
" if c in d.coords:\n",
" del d[c]\n",
" if persist:\n",
" ds3d = ds3d.persist()\n",
" control3d = control3d.persist()\n",
" return ds3d, control3d\n",
"\n",
"\n",
"ds3d, control3d = get_input(4, chunk_dim=\"lead\")\n",
"display(control3d.data)\n",
"ds3d.data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Compute"
]
},
{
"cell_type": "code",
"execution_count": 308,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"16"
]
},
"execution_count": 308,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bootstrap = 8 * nworker\n",
"bootstrap = 16\n",
"# bootstrap = 1000\n",
"bootstrap"
]
},
{
"cell_type": "code",
"execution_count": 309,
"metadata": {},
"outputs": [],
"source": [
"from climpred.stats import varweighted_mean_period\n",
"from climpred.bootstrap import (\n",
" bootstrap_perfect_model as climpred_bootstrap_perfect_model,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 317,
"metadata": {},
"outputs": [],
"source": [
"kw = {\"metric\": \"mse\", \"comparison\": \"m2c\"}"
]
},
{
"cell_type": "code",
"execution_count": 318,
"metadata": {},
"outputs": [],
"source": [
"ds3d, control3d = get_input(nworker, chunk_dim=\"x\")"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"ds3d, control3d = get_input(nworker, chunk_dim=\"lead\")"
]
},
{
"cell_type": "code",
"execution_count": 319,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
"<tr>\n",
"<td>\n",
"<table>\n",
" <thead>\n",
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr><th> Bytes </th><td> 67.81 MB </td> <td> 67.81 MB </td></tr>\n",
" <tr><th> Shape </th><td> (301, 220, 256) </td> <td> (301, 220, 256) </td></tr>\n",
" <tr><th> Count </th><td> 1 Tasks </td><td> 1 Chunks </td></tr>\n",
" <tr><th> Type </th><td> float32 </td><td> numpy.ndarray </td></tr>\n",
" </tbody>\n",
"</table>\n",
"</td>\n",
"<td>\n",
"<svg width=\"232\" height=\"208\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"80\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"10\" y1=\"87\" x2=\"80\" y2=\"158\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"10\" y2=\"87\" style=\"stroke-width:2\" />\n",
" <line x1=\"80\" y1=\"70\" x2=\"80\" y2=\"158\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"10.000000,0.000000 80.588235,70.588235 80.588235,158.295876 10.000000,87.707641\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"112\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"80\" y1=\"70\" x2=\"182\" y2=\"70\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"80\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"112\" y1=\"0\" x2=\"182\" y2=\"70\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"10.000000,0.000000 112.059801,0.000000 182.648036,70.588235 80.588235,70.588235\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"80\" y1=\"70\" x2=\"182\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"80\" y1=\"158\" x2=\"182\" y2=\"158\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"80\" y1=\"70\" x2=\"80\" y2=\"158\" style=\"stroke-width:2\" />\n",
" <line x1=\"182\" y1=\"70\" x2=\"182\" y2=\"158\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"80.588235,70.588235 182.648036,70.588235 182.648036,158.295876 80.588235,158.295876\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"131.618136\" y=\"178.295876\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >256</text>\n",
" <text x=\"202.648036\" y=\"114.442056\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,202.648036,114.442056)\">220</text>\n",
" <text x=\"35.294118\" y=\"143.001759\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(45,35.294118,143.001759)\">301</text>\n",
"</svg>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"dask.array<xarray-<this-array>, shape=(301, 220, 256), dtype=float32, chunksize=(301, 220, 256), chunktype=numpy.ndarray>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<table>\n",
"<tr>\n",
"<td>\n",
"<table>\n",
" <thead>\n",
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr><th> Bytes </th><td> 567.71 MB </td> <td> 70.96 MB </td></tr>\n",
" <tr><th> Shape </th><td> (21, 12, 10, 220, 256) </td> <td> (21, 12, 10, 220, 32) </td></tr>\n",
" <tr><th> Count </th><td> 8 Tasks </td><td> 8 Chunks </td></tr>\n",
" <tr><th> Type </th><td> float32 </td><td> numpy.ndarray </td></tr>\n",
" </tbody>\n",
"</table>\n",
"</td>\n",
"<td>\n",
"<svg width=\"397\" height=\"172\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"34\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"0\" y1=\"37\" x2=\"34\" y2=\"37\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"37\" style=\"stroke-width:2\" />\n",
" <line x1=\"34\" y1=\"0\" x2=\"34\" y2=\"37\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"0.000000,0.000000 34.105574,0.000000 34.105574,37.472373 0.000000,37.472373\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"17.052787\" y=\"57.472373\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >12</text>\n",
" <text x=\"54.105574\" y=\"18.736187\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,54.105574,18.736187)\">21</text>\n",
"\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"104\" y1=\"0\" x2=\"123\" y2=\"19\" style=\"stroke-width:2\" />\n",
" <line x1=\"104\" y1=\"103\" x2=\"123\" y2=\"122\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"104\" y1=\"0\" x2=\"104\" y2=\"103\" style=\"stroke-width:2\" />\n",
" <line x1=\"123\" y1=\"19\" x2=\"123\" y2=\"122\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"104.000000,0.000000 123.405241,19.405241 123.405241,122.530241 104.000000,103.125000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"104\" y1=\"0\" x2=\"224\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"123\" y1=\"19\" x2=\"243\" y2=\"19\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"104\" y1=\"0\" x2=\"123\" y2=\"19\" style=\"stroke-width:2\" />\n",
" <line x1=\"119\" y1=\"0\" x2=\"138\" y2=\"19\" />\n",
" <line x1=\"134\" y1=\"0\" x2=\"153\" y2=\"19\" />\n",
" <line x1=\"149\" y1=\"0\" x2=\"168\" y2=\"19\" />\n",
" <line x1=\"164\" y1=\"0\" x2=\"183\" y2=\"19\" />\n",
" <line x1=\"179\" y1=\"0\" x2=\"198\" y2=\"19\" />\n",
" <line x1=\"194\" y1=\"0\" x2=\"213\" y2=\"19\" />\n",
" <line x1=\"209\" y1=\"0\" x2=\"228\" y2=\"19\" />\n",
" <line x1=\"224\" y1=\"0\" x2=\"243\" y2=\"19\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"104.000000,0.000000 224.000000,0.000000 243.405241,19.405241 123.405241,19.405241\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"123\" y1=\"19\" x2=\"243\" y2=\"19\" style=\"stroke-width:2\" />\n",
" <line x1=\"123\" y1=\"122\" x2=\"243\" y2=\"122\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"123\" y1=\"19\" x2=\"123\" y2=\"122\" style=\"stroke-width:2\" />\n",
" <line x1=\"138\" y1=\"19\" x2=\"138\" y2=\"122\" />\n",
" <line x1=\"153\" y1=\"19\" x2=\"153\" y2=\"122\" />\n",
" <line x1=\"168\" y1=\"19\" x2=\"168\" y2=\"122\" />\n",
" <line x1=\"183\" y1=\"19\" x2=\"183\" y2=\"122\" />\n",
" <line x1=\"198\" y1=\"19\" x2=\"198\" y2=\"122\" />\n",
" <line x1=\"213\" y1=\"19\" x2=\"213\" y2=\"122\" />\n",
" <line x1=\"228\" y1=\"19\" x2=\"228\" y2=\"122\" />\n",
" <line x1=\"243\" y1=\"19\" x2=\"243\" y2=\"122\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"123.405241,19.405241 243.405241,19.405241 243.405241,122.530241 123.405241,122.530241\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"183.405241\" y=\"142.530241\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >256</text>\n",
" <text x=\"263.405241\" y=\"70.967741\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,263.405241,70.967741)\">220</text>\n",
" <text x=\"103.702620\" y=\"132.827620\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(45,103.702620,132.827620)\">10</text>\n",
"</svg>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"dask.array<xarray-<this-array>, shape=(21, 12, 10, 220, 256), dtype=float32, chunksize=(21, 12, 10, 220, 32), chunktype=numpy.ndarray>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.38 s, sys: 2.88 s, total: 4.26 s\n",
"Wall time: 682 ms\n"
]
}
],
"source": [
"display(control3d.data)\n",
"display(ds3d.data)\n",
"\n",
"%time _ = climpred.prediction.compute_perfect_model(ds3d,control3d,**kw).compute()"
]
},
{
"cell_type": "code",
"execution_count": 320,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 618 ms, sys: 800 ms, total: 1.42 s\n",
"Wall time: 1.3 s\n"
]
}
],
"source": [
"ds3d = ds3d.load()\n",
"control = control3d.load()\n",
"%time _ = climpred.prediction.compute_perfect_model(ds3d,control3d,**kw)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"> after chunking reduce time by factor of 2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## init skill"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"bootstrap = 8"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"to_be_shuffled = ds3d.member.values\n",
"\n",
"\n",
"def resample(hind, shuffle_dim, to_be_shuffled):\n",
" smp = np.random.choice(to_be_shuffled, len(to_be_shuffled))\n",
" smp_hind = hind.sel({shuffle_dim: smp})\n",
" if shuffle_dim == \"member\":\n",
" smp_hind[\"member\"] = np.arange(1, 1 + smp_hind.member.size)\n",
" return smp_hind"
]
},
{
"cell_type": "code",
"execution_count": 321,
"metadata": {},
"outputs": [],
"source": [
"# resample(ds3d, \"member\", to_be_shuffled)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 22.7 s, sys: 1min 4s, total: 1min 27s\n",
"Wall time: 4.52 s\n"
]
}
],
"source": [
"%%time\n",
"res = []\n",
"for b in range(bootstrap):\n",
" ds3dr = resample(ds3d, 'member', to_be_shuffled)\n",
" res.append(climpred.prediction.compute_perfect_model(ds3dr,control3d,**kw))\n",
"result = xr.concat(res, 'bootstrap').compute()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## uninit skill"
]
},
{
"cell_type": "code",
"execution_count": 275,
"metadata": {},
"outputs": [],
"source": [
"def bootstrap_uninit_pm_ensemble_from_control(ds, control):\n",
" \"\"\"\n",
" Create a pseudo-ensemble from control run.\n",
" Note:\n",
" Needed for block bootstrapping confidence intervals of a metric in perfect\n",
" model framework. Takes randomly segments of length of ensemble dataset from\n",
" control and rearranges them into ensemble and member dimensions.\n",
" Args:\n",
" ds (xarray object): ensemble simulation.\n",
" control (xarray object): control simulation.\n",
" Returns:\n",
" ds_e (xarray object): pseudo-ensemble generated from control run.\n",
" \"\"\"\n",
" nens = ds.init.size\n",
" nmember = ds.member.size\n",
" length = ds.lead.size\n",
" c_start = 0\n",
" c_end = control[\"time\"].size\n",
" lead_time = ds[\"lead\"]\n",
"\n",
" def isel_years(control, year_s, length):\n",
" new = control.isel(time=slice(year_s, year_s + length))\n",
" new = new.rename({\"time\": \"lead\"})\n",
" new[\"lead\"] = lead_time\n",
" return new\n",
"\n",
" def create_pseudo_members(control):\n",
" startlist = np.random.randint(c_start, c_end - length - 1, nmember)\n",
" return xr.concat(\n",
" (isel_years(control, start, length) for start in startlist), \"member\"\n",
" )\n",
"\n",
" uninit = xr.concat((create_pseudo_members(control) for _ in range(nens)), \"init\")\n",
" # transpose to same dimensions as ds\n",
" uninit = uninit.transpose(*ds.dims)\n",
" # set member init coords\n",
" for c in [\"init\", \"member\"]:\n",
" uninit[c] = ds[c].values\n",
" # chunk to same dims\n",
" if dask.is_dask_collection(uninit):\n",
" if uninit.chunks != ds.chunks:\n",
" uninit = _transpose_and_rechunk_to(uninit, ds)\n",
" return uninit"
]
},
{
"cell_type": "code",
"execution_count": 276,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 244 ms, sys: 17 ms, total: 261 ms\n",
"Wall time: 240 ms\n"
]
}
],
"source": [
"%time r = bootstrap_uninit_pm_ensemble_from_control(ds3d, control3d)"
]
},
{
"cell_type": "code",
"execution_count": 277,
"metadata": {},
"outputs": [],
"source": [
"for c in r.coords:\n",
" assert (r[c] == ds3d[c]).all()"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
"<tr>\n",
"<td>\n",
"<table>\n",
" <thead>\n",
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr><th> Bytes </th><td> 67.81 MB </td> <td> 8.48 MB </td></tr>\n",
" <tr><th> Shape </th><td> (301, 220, 256) </td> <td> (301, 220, 32) </td></tr>\n",
" <tr><th> Count </th><td> 8 Tasks </td><td> 8 Chunks </td></tr>\n",
" <tr><th> Type </th><td> float32 </td><td> numpy.ndarray </td></tr>\n",
" </tbody>\n",
"</table>\n",
"</td>\n",
"<td>\n",
"<svg width=\"232\" height=\"208\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"80\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"10\" y1=\"87\" x2=\"80\" y2=\"158\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"10\" y2=\"87\" style=\"stroke-width:2\" />\n",
" <line x1=\"80\" y1=\"70\" x2=\"80\" y2=\"158\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"10.000000,0.000000 80.588235,70.588235 80.588235,158.295876 10.000000,87.707641\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"112\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"80\" y1=\"70\" x2=\"182\" y2=\"70\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"80\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"22\" y1=\"0\" x2=\"93\" y2=\"70\" />\n",
" <line x1=\"35\" y1=\"0\" x2=\"106\" y2=\"70\" />\n",
" <line x1=\"48\" y1=\"0\" x2=\"118\" y2=\"70\" />\n",
" <line x1=\"61\" y1=\"0\" x2=\"131\" y2=\"70\" />\n",
" <line x1=\"73\" y1=\"0\" x2=\"144\" y2=\"70\" />\n",
" <line x1=\"86\" y1=\"0\" x2=\"157\" y2=\"70\" />\n",
" <line x1=\"99\" y1=\"0\" x2=\"169\" y2=\"70\" />\n",
" <line x1=\"112\" y1=\"0\" x2=\"182\" y2=\"70\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"10.000000,0.000000 112.059801,0.000000 182.648036,70.588235 80.588235,70.588235\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"80\" y1=\"70\" x2=\"182\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"80\" y1=\"158\" x2=\"182\" y2=\"158\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"80\" y1=\"70\" x2=\"80\" y2=\"158\" style=\"stroke-width:2\" />\n",
" <line x1=\"93\" y1=\"70\" x2=\"93\" y2=\"158\" />\n",
" <line x1=\"106\" y1=\"70\" x2=\"106\" y2=\"158\" />\n",
" <line x1=\"118\" y1=\"70\" x2=\"118\" y2=\"158\" />\n",
" <line x1=\"131\" y1=\"70\" x2=\"131\" y2=\"158\" />\n",
" <line x1=\"144\" y1=\"70\" x2=\"144\" y2=\"158\" />\n",
" <line x1=\"157\" y1=\"70\" x2=\"157\" y2=\"158\" />\n",
" <line x1=\"169\" y1=\"70\" x2=\"169\" y2=\"158\" />\n",
" <line x1=\"182\" y1=\"70\" x2=\"182\" y2=\"158\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"80.588235,70.588235 182.648036,70.588235 182.648036,158.295876 80.588235,158.295876\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"131.618136\" y=\"178.295876\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >256</text>\n",
" <text x=\"202.648036\" y=\"114.442056\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,202.648036,114.442056)\">220</text>\n",
" <text x=\"35.294118\" y=\"143.001759\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(45,35.294118,143.001759)\">301</text>\n",
"</svg>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"dask.array<xarray-<this-array>, shape=(301, 220, 256), dtype=float32, chunksize=(301, 220, 32), chunktype=numpy.ndarray>"
]
},
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds3d, control3d = get_input(nworker, chunk_dim=\"x\", only_one_control_chunk=False)\n",
"control3d.data"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1min 3s, sys: 24.1 s, total: 1min 27s\n",
"Wall time: 50.7 s\n"
]
}
],
"source": [
"%%time\n",
"res = []\n",
"for b in range(bootstrap):\n",
" ds3dr = bootstrap_uninit_pm_ensemble_from_control(ds3d, control3d)\n",
" res.append(climpred.prediction.compute_perfect_model(ds3dr,control3d,**kw))\n",
"result = xr.concat(res, 'bootstrap').compute()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"> uninit takes the most time"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## pers skill"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.79 s, sys: 367 ms, total: 2.15 s\n",
"Wall time: 1.54 s\n"
]
}
],
"source": [
"%%time\n",
"kwp = kw.copy()\n",
"del kwp['comparison']\n",
"res = []\n",
"for b in range(bootstrap):\n",
" ds3dr = resample(ds3d, 'member', to_be_shuffled)\n",
" res.append(climpred.prediction.compute_persistence(ds3dr,control3d,**kwp))\n",
"result = xr.concat(res, 'bootstrap').compute()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## speedup of quantile"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
"def dask_percentile(arr, axis=0, q=95):\n",
" if len(arr.chunks[axis]) > 1:\n",
" msg = \"Input array cannot be chunked along the percentile \" \"dimension.\"\n",
" raise ValueError(msg)\n",
" return dask.array.map_blocks(np.percentile, arr, axis=axis, q=q, drop_axis=axis)\n",
"\n",
"\n",
"def percentile(arr, axis=0, q=95):\n",
" if isinstance(arr, dask.array.Array):\n",
" return dask_percentile(arr, axis=axis, q=q)\n",
" else:\n",
" return np.percentile(arr, axis=axis, q=q)"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [],
"source": [
"A = a"
]
},
{
"cell_type": "code",
"execution_count": 153,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 207 ms, sys: 59 ms, total: 266 ms\n",
"Wall time: 243 ms\n"
]
}
],
"source": [
"%%time \n",
"A = A.chunk({\"x\": -1}).persist()\n",
"Aq = A.reduce(percentile, dim=\"bootstrap\", q=5, allow_lazy=True)\n",
"Aq = Aq.compute()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"A = a"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1min 47s, sys: 18.2 s, total: 2min 5s\n",
"Wall time: 1min 48s\n"
]
}
],
"source": [
"%time Aq = A.quantile(.95,'bootstrap')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### compute parallel"
]
},
{
"cell_type": "code",
"execution_count": 154,
"metadata": {},
"outputs": [],
"source": [
"import inspect\n",
"\n",
"import numpy as np\n",
"import xarray as xr\n",
"from tqdm.auto import tqdm\n",
"\n",
"from climpred.prediction import (\n",
" compute_hindcast,\n",
" compute_perfect_model,\n",
" compute_persistence,\n",
")\n",
"from climpred.checks import has_dims\n",
"from climpred.constants import ALL_COMPARISONS, ALL_METRICS, METRIC_ALIASES\n",
"from climpred.stats import dpp, varweighted_mean_period\n",
"from climpred.utils import assign_attrs, get_comparison_class, get_metric_class\n",
"from climpred.bootstrap import (\n",
" bootstrap_uninitialized_ensemble,\n",
" # bootstrap_uninit_pm_ensemble_from_control,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 306,
"metadata": {},
"outputs": [],
"source": [
"def _ensure_loaded(res):\n",
" \"\"\"Compute no lazy results.\"\"\"\n",
" if dask.is_dask_collection(res):\n",
" res = res.compute()\n",
" return res"
]
},
{
"cell_type": "code",
"execution_count": 322,
"metadata": {},
"outputs": [],
"source": [
"def _transpose_and_rechunk_to(a, ds):\n",
" \"\"\"First transpose a to ds.dims then apply ds chunking to a.\"\"\"\n",
" # todo: add to m2e comparisons\n",
" return a.transpose(*ds.dims).chunk(ds.chunks)\n",
"\n",
"\n",
"def _distribution_to_ci(ds, ci_low, ci_high, dim=\"bootstrap\", old=False):\n",
" \"\"\"Get confidence intervals from bootstrapped distribution.\n",
" Needed for bootstrapping confidence intervals and p_values of a metric.\n",
" Args:\n",
" ds (xarray object): distribution.\n",
" ci_low (float): low confidence interval.\n",
" ci_high (float): high confidence interval.\n",
" dim (str): dimension to apply xr.quantile to. Default: 'bootstrap'\n",
" Returns:\n",
" uninit_hind (xarray object): uninitialize hindcast with hind.coords.\n",
" \"\"\"\n",
" # xr.quantile is slow, use percentile from dask\n",
" # https://stackoverflow.com/questions/54938180/get-95-percentile-of-the-variables-for-son-djf-mam-over-multiple-years-data\n",
" if old:\n",
" ds_ci = ds.quantile(q=[ci_low, ci_high], dim=dim)\n",
" return ds_ci\n",
" else:\n",
" ds = ds.chunk({\"lead\": -1}).persist()\n",
" ds_ci_low = ds.reduce(\n",
" percentile, dim=\"bootstrap\", q=ci_low * 100, allow_lazy=True\n",
" )\n",
" ds_ci_high = ds.reduce(\n",
" percentile, dim=\"bootstrap\", q=ci_high * 100, allow_lazy=True\n",
" )\n",
" ds_ci = xr.concat([ds_ci_low, ds_ci_high], \"quantile\").compute()\n",
" ds_ci[\"quantile\"] = [ci_low, ci_high]\n",
" return ds_ci\n",
"\n",
"\n",
"def _pvalue_from_distributions(simple_fct, init, metric=\"pearson_r\"):\n",
" \"\"\"Get probability that skill of a simple forecast (e.g., persistence or\n",
" uninitlaized skill) is larger than initialized skill.\n",
" Needed for bootstrapping confidence intervals and p_values of a metric in\n",
" the hindcast framework. Checks whether a simple forecast like persistence\n",
" or uninitialized performs better than initialized forecast. Need to keep in\n",
" mind the orientation of metric (whether larger values are better or worse\n",
" than smaller ones.)\n",
" Args:\n",
" simple_fct (xarray object): persistence or uninit skill.\n",
" init (xarray object): hindcast skill.\n",
" metric (Metric): metric class Metric\n",
" Returns:\n",
" pv (xarray object): probability that simple forecast performs better\n",
" than initialized forecast.\n",
" \"\"\"\n",
" pv = ((simple_fct - init) > 0).sum(\"bootstrap\") / init.bootstrap.size\n",
" if not metric.positive:\n",
" pv = 1 - pv\n",
" return pv"
]
},
{
"cell_type": "code",
"execution_count": 157,
"metadata": {},
"outputs": [],
"source": [
"ci_low, ci_high = 0.05, 0.95\n",
"rn = _distribution_to_ci(A, 0.05, 0.95, old=False)\n",
"ro = _distribution_to_ci(A.load(), ci_low, ci_high, old=True)"
]
},
{
"cell_type": "code",
"execution_count": 168,
"metadata": {},
"outputs": [],
"source": [
"xr.testing.assert_equal(rn, ro)"
]
},
{
"cell_type": "code",
"execution_count": 179,
"metadata": {},
"outputs": [],
"source": [
"def bootstrap_compute(\n",
" hind,\n",
" reference,\n",
" hist=None,\n",
" metric=\"pearson_r\",\n",
" comparison=\"m2e\",\n",
" dim=\"init\",\n",
" sig=95,\n",
" bootstrap=500,\n",
" pers_sig=None,\n",
" compute=compute_hindcast,\n",
" resample_uninit=bootstrap_uninitialized_ensemble,\n",
" **metric_kwargs,\n",
"):\n",
" \"\"\"Bootstrap compute with replacement.\n",
" Args:\n",
" hind (xr.Dataset): prediction ensemble.\n",
" reference (xr.Dataset): reference simulation.\n",
" hist (xr.Dataset): historical/uninitialized simulation.\n",
" metric (str): `metric`. Defaults to 'pearson_r'.\n",
" comparison (str): `comparison`. Defaults to 'm2e'.\n",
" dim (str or list): dimension to apply metric over. default: 'init'\n",
" sig (int): Significance level for uninitialized and\n",
" initialized skill. Defaults to 95.\n",
" pers_sig (int): Significance level for persistence skill confidence levels.\n",
" Defaults to sig.\n",
" bootstrap (int): number of resampling iterations (bootstrap\n",
" with replacement). Defaults to 500.\n",
" compute (func): function to compute skill.\n",
" Choose from\n",
" [:py:func:`climpred.prediction.compute_perfect_model`,\n",
" :py:func:`climpred.prediction.compute_hindcast`].\n",
" resample_uninit (func): function to create an uninitialized ensemble\n",
" from a control simulation or uninitialized large\n",
" ensemble. Choose from:\n",
" [:py:func:`bootstrap_uninitialized_ensemble`,\n",
" :py:func:`bootstrap_uninit_pm_ensemble_from_control`].\n",
" ** metric_kwargs (dict): additional keywords to be passed to metric\n",
" (see the arguments required for a given metric in :ref:`Metrics`).\n",
" Returns:\n",
" results: (xr.Dataset): bootstrapped results\n",
" * init_ci (xr.Dataset): confidence levels of init_skill\n",
" * uninit_ci (xr.Dataset): confidence levels of uninit_skill\n",
" * p_uninit_over_init (xr.Dataset): p-value of the hypothesis\n",
" that the difference of\n",
" skill between the\n",
" initialized and uninitialized\n",
" simulations is smaller or\n",
" equal to zero based on\n",
" bootstrapping with\n",
" replacement.\n",
" Defaults to None.\n",
" * pers_ci (xr.Dataset): confidence levels of pers_skill\n",
" * p_pers_over_init (xr.Dataset): p-value of the hypothesis\n",
" that the difference of\n",
" skill between the\n",
" initialized and persistence\n",
" simulations is smaller or\n",
" equal to zero based on\n",
" bootstrapping with\n",
" replacement.\n",
" Defaults to None.\n",
" Reference:\n",
" * Goddard, L., A. Kumar, A. Solomon, D. Smith, G. Boer, P.\n",
" Gonzalez, V. Kharin, et al. “A Verification Framework for\n",
" Interannual-to-Decadal Predictions Experiments.” Climate\n",
" Dynamics 40, no. 1–2 (January 1, 2013): 245–72.\n",
" https://doi.org/10/f4jjvf.\n",
" See also:\n",
" * climpred.bootstrap.bootstrap_hindcast\n",
" * climpred.bootstrap.bootstrap_perfect_model\n",
" \"\"\"\n",
" if pers_sig is None:\n",
" pers_sig = sig\n",
"\n",
" p = (100 - sig) / 100\n",
" ci_low = p / 2\n",
" ci_high = 1 - p / 2\n",
" p_pers = (100 - pers_sig) / 100\n",
" ci_low_pers = p_pers / 2\n",
" ci_high_pers = 1 - p_pers / 2\n",
"\n",
" init = []\n",
" uninit = []\n",
" pers = []\n",
"\n",
" # get metric function name, not the alias\n",
" metric = METRIC_ALIASES.get(metric, metric)\n",
" # get class Metric(metric)\n",
" metric = get_metric_class(metric, ALL_METRICS)\n",
" # get comparison function\n",
" comparison = get_comparison_class(comparison, ALL_COMPARISONS)\n",
"\n",
" # which dim should be resampled: member or init\n",
" if dim == \"member\" and \"member\" in hind.dims:\n",
" members = hind.member.values\n",
" to_be_shuffled = members\n",
" shuffle_dim = \"member\"\n",
" elif \"init\" in dim and \"init\" in hind.dims:\n",
" # also allows ['init','member']\n",
" inits = hind.init.values\n",
" to_be_shuffled = inits\n",
" shuffle_dim = \"init\"\n",
" else:\n",
" raise ValueError(\"Shuffle either `member` or `init`; not\", dim)\n",
" # resample with replacement\n",
" # DoTo: parallelize loop\n",
" for _ in tqdm(range(bootstrap), desc=\"bootstrapping iteration\"):\n",
" smp = np.random.choice(to_be_shuffled, len(to_be_shuffled))\n",
" smp_hind = hind.sel({shuffle_dim: smp})\n",
" if shuffle_dim == \"member\":\n",
" smp_hind[\"member\"] = np.arange(1, 1 + smp_hind.member.size)\n",
" # compute init skill\n",
" init_skill = compute(\n",
" smp_hind,\n",
" reference,\n",
" metric=metric,\n",
" comparison=comparison,\n",
" add_attrs=False,\n",
" dim=dim,\n",
" **metric_kwargs,\n",
" )\n",
" # reset inits when probabilistic, otherwise tests fail\n",
" if (\n",
" shuffle_dim == \"init\"\n",
" and metric.probabilistic\n",
" and \"init\" in init_skill.coords\n",
" ):\n",
" init_skill[\"init\"] = inits\n",
" init.append(init_skill)\n",
" # generate uninitialized ensemble from hist\n",
" if hist is None: # PM path, use reference = control\n",
" hist = reference\n",
" uninit_hind = resample_uninit(hind, hist)\n",
" # compute uninit skill\n",
" uninit.append(\n",
" compute(\n",
" uninit_hind,\n",
" reference,\n",
" metric=metric,\n",
" comparison=comparison,\n",
" dim=dim,\n",
" add_attrs=False,\n",
" **metric_kwargs,\n",
" )\n",
" )\n",
" # compute persistence skill\n",
" # impossible for probabilistic\n",
" if not metric.probabilistic:\n",
" pers.append(\n",
" compute_persistence(smp_hind, reference, metric=metric, **metric_kwargs)\n",
" )\n",
" init = _ensure_loaded(xr.concat(init, dim=\"bootstrap\"))\n",
" # remove useless member = 0 coords after m2c\n",
" if \"member\" in init.coords and init.member.size == 1:\n",
" if init.member.size == 1:\n",
" del init[\"member\"]\n",
" uninit = _ensure_loaded(xr.concat(uninit, dim=\"bootstrap\"))\n",
" # when persistence is not computed set flag\n",
" if pers != []:\n",
" pers = _ensure_loaded(xr.concat(pers, dim=\"bootstrap\"))\n",
" pers_output = True\n",
" else:\n",
" pers_output = False\n",
"\n",
" # get confidence intervals CI\n",
" init_ci = _distribution_to_ci(init, ci_low, ci_high)\n",
" uninit_ci = _distribution_to_ci(uninit, ci_low, ci_high)\n",
" # probabilistic metrics wont have persistence forecast\n",
" # therefore only get CI if persistence was computed\n",
" if pers_output:\n",
" if set(pers.coords) != set(init.coords):\n",
" init, pers = xr.broadcast(init, pers)\n",
" pers_ci = _distribution_to_ci(pers, ci_low_pers, ci_high_pers)\n",
" else:\n",
" # otherwise set all persistence outputs to false\n",
" pers = init.isnull()\n",
" pers_ci = init_ci == -999\n",
"\n",
" # pvalue whether uninit or pers better than init forecast\n",
" p_uninit_over_init = _pvalue_from_distributions(uninit, init, metric=metric)\n",
" p_pers_over_init = _pvalue_from_distributions(pers, init, metric)\n",
"\n",
" # calc mean skill without any resampling\n",
" init_skill = _ensure_loaded(\n",
" compute(\n",
" hind,\n",
" reference,\n",
" metric=metric,\n",
" comparison=comparison,\n",
" dim=dim,\n",
" **metric_kwargs,\n",
" )\n",
" )\n",
" if \"init\" in init_skill:\n",
" init_skill = init_skill.mean(\"init\")\n",
" # remove useless member = 0 coords after m2c\n",
" if \"member\" in init_skill.coords and init_skill.member.size == 1:\n",
" del init_skill[\"member\"]\n",
" # uninit skill as mean resampled uninit skill\n",
" uninit_skill = uninit.mean(\"bootstrap\")\n",
" if not metric.probabilistic:\n",
" pers_skill = _ensure_loaded(\n",
" compute_persistence(hind, reference, metric=metric, **metric_kwargs)\n",
" )\n",
" else:\n",
" pers_skill = init_skill.isnull()\n",
" # align to prepare for concat\n",
" if set(pers_skill.coords) != set(init_skill.coords):\n",
" init_skill, pers_skill = xr.broadcast(init_skill, pers_skill)\n",
"\n",
" # wrap results together in one dataarray\n",
" skill = xr.concat([init_skill, uninit_skill, pers_skill], \"kind\")\n",
" skill[\"kind\"] = [\"init\", \"uninit\", \"pers\"]\n",
"\n",
" # probability that i beats init\n",
" p = xr.concat([p_uninit_over_init, p_pers_over_init], \"kind\")\n",
" p[\"kind\"] = [\"uninit\", \"pers\"]\n",
"\n",
" # ci for each skill\n",
" ci = xr.concat([init_ci, uninit_ci, pers_ci], \"kind\").rename(\n",
" {\"quantile\": \"results\"}\n",
" )\n",
" ci[\"kind\"] = [\"init\", \"uninit\", \"pers\"]\n",
"\n",
" results = xr.concat([skill, p], \"results\")\n",
" results[\"results\"] = [\"skill\", \"p\"]\n",
" if set(results.coords) != set(ci.coords):\n",
" res_drop = [c for c in results.coords if c not in ci.coords]\n",
" ci_drop = [c for c in ci.coords if c not in results.coords]\n",
" results = results.drop_vars(res_drop)\n",
" ci = ci.drop_vars(ci_drop)\n",
" results = xr.concat([results, ci], \"results\")\n",
" results[\"results\"] = [\"skill\", \"p\", \"low_ci\", \"high_ci\"]\n",
" # Attach climpred compute information to skill\n",
" metadata_dict = {\n",
" \"confidence_interval_levels\": f\"{ci_high}-{ci_low}\",\n",
" \"bootstrap_iterations\": bootstrap,\n",
" \"p\": \"probability that initialized forecast performs \\\n",
" better than reference forecast\",\n",
" }\n",
" metadata_dict.update(metric_kwargs)\n",
" results = assign_attrs(\n",
" results,\n",
" hind,\n",
" metric=metric,\n",
" comparison=comparison,\n",
" function_name=inspect.stack()[0][3], # take function.__name__\n",
" metadata_dict=metadata_dict,\n",
" )\n",
" return results\n",
"\n",
"\n",
"def bootstrap_hindcast(\n",
" hind,\n",
" hist,\n",
" reference,\n",
" metric=\"pearson_r\",\n",
" comparison=\"e2r\",\n",
" dim=\"init\",\n",
" sig=95,\n",
" bootstrap=500,\n",
" pers_sig=None,\n",
" **metric_kwargs,\n",
"):\n",
" \"\"\"Bootstrap compute with replacement. Wrapper of\n",
" py:func:`bootstrap_compute` for hindcasts.\n",
" Args:\n",
" hind (xr.Dataset): prediction ensemble.\n",
" reference (xr.Dataset): reference simulation.\n",
" hist (xr.Dataset): historical/uninitialized simulation.\n",
" metric (str): `metric`. Defaults to 'pearson_r'.\n",
" comparison (str): `comparison`. Defaults to 'e2r'.\n",
" dim (str): dimension to apply metric over. default: 'init'\n",
" sig (int): Significance level for uninitialized and\n",
" initialized skill. Defaults to 95.\n",
" pers_sig (int): Significance level for persistence skill confidence levels.\n",
" Defaults to sig.\n",
" bootstrap (int): number of resampling iterations (bootstrap\n",
" with replacement). Defaults to 500.\n",
" ** metric_kwargs (dict): additional keywords to be passed to metric\n",
" (see the arguments required for a given metric in :ref:`Metrics`).\n",
" Returns:\n",
" results: (xr.Dataset): bootstrapped results\n",
" * init_ci (xr.Dataset): confidence levels of init_skill\n",
" * uninit_ci (xr.Dataset): confidence levels of uninit_skill\n",
" * p_uninit_over_init (xr.Dataset): p-value of the hypothesis\n",
" that the difference of\n",
" skill between the\n",
" initialized and uninitialized\n",
" simulations is smaller or\n",
" equal to zero based on\n",
" bootstrapping with\n",
" replacement.\n",
" Defaults to None.\n",
" * pers_ci (xr.Dataset): confidence levels of pers_skill\n",
" * p_pers_over_init (xr.Dataset): p-value of the hypothesis\n",
" that the difference of\n",
" skill between the\n",
" initialized and persistence\n",
" simulations is smaller or\n",
" equal to zero based on\n",
" bootstrapping with\n",
" replacement.\n",
" Defaults to None.\n",
" Reference:\n",
" * Goddard, L., A. Kumar, A. Solomon, D. Smith, G. Boer, P.\n",
" Gonzalez, V. Kharin, et al. “A Verification Framework for\n",
" Interannual-to-Decadal Predictions Experiments.” Climate\n",
" Dynamics 40, no. 1–2 (January 1, 2013): 245–72.\n",
" https://doi.org/10/f4jjvf.\n",
" See also:\n",
" * climpred.bootstrap.bootstrap_compute\n",
" * climpred.prediction.compute_hindcast\n",
" \"\"\"\n",
" return bootstrap_compute(\n",
" hind,\n",
" reference,\n",
" hist=hist,\n",
" metric=metric,\n",
" comparison=comparison,\n",
" dim=dim,\n",
" sig=sig,\n",
" bootstrap=bootstrap,\n",
" pers_sig=pers_sig,\n",
" compute=compute_hindcast,\n",
" resample_uninit=bootstrap_uninitialized_ensemble,\n",
" **metric_kwargs,\n",
" )\n",
"\n",
"\n",
"def bootstrap_perfect_model(\n",
" ds,\n",
" control,\n",
" metric=\"pearson_r\",\n",
" comparison=\"m2e\",\n",
" dim=None,\n",
" sig=95,\n",
" bootstrap=500,\n",
" pers_sig=None,\n",
" **metric_kwargs,\n",
"):\n",
" \"\"\"Bootstrap compute with replacement. Wrapper of\n",
" py:func:`bootstrap_compute` for perfect-model framework.\n",
" Args:\n",
" hind (xr.Dataset): prediction ensemble.\n",
" reference (xr.Dataset): reference simulation.\n",
" hist (xr.Dataset): historical/uninitialized simulation.\n",
" metric (str): `metric`. Defaults to 'pearson_r'.\n",
" comparison (str): `comparison`. Defaults to 'm2e'.\n",
" dim (str): dimension to apply metric over. default: ['init', 'member']\n",
" sig (int): Significance level for uninitialized and\n",
" initialized skill. Defaults to 95.\n",
" pers_sig (int): Significance level for persistence skill confidence levels.\n",
" Defaults to sig.\n",
" bootstrap (int): number of resampling iterations (bootstrap\n",
" with replacement). Defaults to 500.\n",
" ** metric_kwargs (dict): additional keywords to be passed to metric\n",
" (see the arguments required for a given metric in :ref:`Metrics`).\n",
" Returns:\n",
" results: (xr.Dataset): bootstrapped results\n",
" * init_ci (xr.Dataset): confidence levels of init_skill\n",
" * uninit_ci (xr.Dataset): confidence levels of uninit_skill\n",
" * p_uninit_over_init (xr.Dataset): p-value of the hypothesis\n",
" that the difference of\n",
" skill between the\n",
" initialized and uninitialized\n",
" simulations is smaller or\n",
" equal to zero based on\n",
" bootstrapping with\n",
" replacement.\n",
" Defaults to None.\n",
" * pers_ci (xr.Dataset): confidence levels of pers_skill\n",
" * p_pers_over_init (xr.Dataset): p-value of the hypothesis\n",
" that the difference of\n",
" skill between the\n",
" initialized and persistence\n",
" simulations is smaller or\n",
" equal to zero based on\n",
" bootstrapping with\n",
" replacement.\n",
" Defaults to None.\n",
" Reference:\n",
" * Goddard, L., A. Kumar, A. Solomon, D. Smith, G. Boer, P.\n",
" Gonzalez, V. Kharin, et al. “A Verification Framework for\n",
" Interannual-to-Decadal Predictions Experiments.” Climate\n",
" Dynamics 40, no. 1–2 (January 1, 2013): 245–72.\n",
" https://doi.org/10/f4jjvf.\n",
" See also:\n",
" * climpred.bootstrap.bootstrap_compute\n",
" * climpred.prediction.compute_perfect_model\n",
" \"\"\"\n",
"\n",
" if dim is None:\n",
" dim = [\"init\", \"member\"]\n",
" return bootstrap_compute(\n",
" ds,\n",
" control,\n",
" hist=None,\n",
" metric=metric,\n",
" comparison=comparison,\n",
" dim=dim,\n",
" sig=sig,\n",
" bootstrap=bootstrap,\n",
" pers_sig=pers_sig,\n",
" compute=compute_perfect_model,\n",
" resample_uninit=bootstrap_uninit_pm_ensemble_from_control,\n",
" **metric_kwargs,\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 236,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'metric': 'mse', 'comparison': 'm2c', 'bootstrap': 8}"
]
},
"execution_count": 236,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"kwb = kw.copy()\n",
"kwb[\"bootstrap\"] = bootstrap\n",
"kwb"
]
},
{
"cell_type": "code",
"execution_count": 242,
"metadata": {},
"outputs": [],
"source": [
"# 8 bootstraps 19s\n",
"ds3d, control3d = get_input(nworker, chunk_dim=\"lead\")"
]
},
{
"cell_type": "code",
"execution_count": 244,
"metadata": {},
"outputs": [],
"source": [
"# 8 bootstraps 18s\n",
"ds3d, control3d = get_input(nworker, chunk_dim=\"x\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"> it seems irrelevant whether chunking in x, y or lead"
]
},
{
"cell_type": "code",
"execution_count": 245,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
"<tr>\n",
"<td>\n",
"<table>\n",
" <thead>\n",
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr><th> Bytes </th><td> 67.81 MB </td> <td> 67.81 MB </td></tr>\n",
" <tr><th> Shape </th><td> (301, 220, 256) </td> <td> (301, 220, 256) </td></tr>\n",
" <tr><th> Count </th><td> 1 Tasks </td><td> 1 Chunks </td></tr>\n",
" <tr><th> Type </th><td> float32 </td><td> numpy.ndarray </td></tr>\n",
" </tbody>\n",
"</table>\n",
"</td>\n",
"<td>\n",
"<svg width=\"232\" height=\"208\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"80\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"10\" y1=\"87\" x2=\"80\" y2=\"158\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"10\" y2=\"87\" style=\"stroke-width:2\" />\n",
" <line x1=\"80\" y1=\"70\" x2=\"80\" y2=\"158\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"10.000000,0.000000 80.588235,70.588235 80.588235,158.295876 10.000000,87.707641\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"112\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"80\" y1=\"70\" x2=\"182\" y2=\"70\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"80\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"112\" y1=\"0\" x2=\"182\" y2=\"70\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"10.000000,0.000000 112.059801,0.000000 182.648036,70.588235 80.588235,70.588235\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"80\" y1=\"70\" x2=\"182\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"80\" y1=\"158\" x2=\"182\" y2=\"158\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"80\" y1=\"70\" x2=\"80\" y2=\"158\" style=\"stroke-width:2\" />\n",
" <line x1=\"182\" y1=\"70\" x2=\"182\" y2=\"158\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"80.588235,70.588235 182.648036,70.588235 182.648036,158.295876 80.588235,158.295876\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"131.618136\" y=\"178.295876\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >256</text>\n",
" <text x=\"202.648036\" y=\"114.442056\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,202.648036,114.442056)\">220</text>\n",
" <text x=\"35.294118\" y=\"143.001759\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(45,35.294118,143.001759)\">301</text>\n",
"</svg>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"dask.array<xarray-<this-array>, shape=(301, 220, 256), dtype=float32, chunksize=(301, 220, 256), chunktype=numpy.ndarray>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<table>\n",
"<tr>\n",
"<td>\n",
"<table>\n",
" <thead>\n",
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr><th> Bytes </th><td> 567.71 MB </td> <td> 70.96 MB </td></tr>\n",
" <tr><th> Shape </th><td> (21, 12, 10, 220, 256) </td> <td> (21, 12, 10, 220, 32) </td></tr>\n",
" <tr><th> Count </th><td> 8 Tasks </td><td> 8 Chunks </td></tr>\n",
" <tr><th> Type </th><td> float32 </td><td> numpy.ndarray </td></tr>\n",
" </tbody>\n",
"</table>\n",
"</td>\n",
"<td>\n",
"<svg width=\"397\" height=\"172\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"34\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"0\" y1=\"37\" x2=\"34\" y2=\"37\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"37\" style=\"stroke-width:2\" />\n",
" <line x1=\"34\" y1=\"0\" x2=\"34\" y2=\"37\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"0.000000,0.000000 34.105574,0.000000 34.105574,37.472373 0.000000,37.472373\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"17.052787\" y=\"57.472373\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >12</text>\n",
" <text x=\"54.105574\" y=\"18.736187\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,54.105574,18.736187)\">21</text>\n",
"\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"104\" y1=\"0\" x2=\"123\" y2=\"19\" style=\"stroke-width:2\" />\n",
" <line x1=\"104\" y1=\"103\" x2=\"123\" y2=\"122\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"104\" y1=\"0\" x2=\"104\" y2=\"103\" style=\"stroke-width:2\" />\n",
" <line x1=\"123\" y1=\"19\" x2=\"123\" y2=\"122\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"104.000000,0.000000 123.405241,19.405241 123.405241,122.530241 104.000000,103.125000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"104\" y1=\"0\" x2=\"224\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"123\" y1=\"19\" x2=\"243\" y2=\"19\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"104\" y1=\"0\" x2=\"123\" y2=\"19\" style=\"stroke-width:2\" />\n",
" <line x1=\"119\" y1=\"0\" x2=\"138\" y2=\"19\" />\n",
" <line x1=\"134\" y1=\"0\" x2=\"153\" y2=\"19\" />\n",
" <line x1=\"149\" y1=\"0\" x2=\"168\" y2=\"19\" />\n",
" <line x1=\"164\" y1=\"0\" x2=\"183\" y2=\"19\" />\n",
" <line x1=\"179\" y1=\"0\" x2=\"198\" y2=\"19\" />\n",
" <line x1=\"194\" y1=\"0\" x2=\"213\" y2=\"19\" />\n",
" <line x1=\"209\" y1=\"0\" x2=\"228\" y2=\"19\" />\n",
" <line x1=\"224\" y1=\"0\" x2=\"243\" y2=\"19\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"104.000000,0.000000 224.000000,0.000000 243.405241,19.405241 123.405241,19.405241\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"123\" y1=\"19\" x2=\"243\" y2=\"19\" style=\"stroke-width:2\" />\n",
" <line x1=\"123\" y1=\"122\" x2=\"243\" y2=\"122\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"123\" y1=\"19\" x2=\"123\" y2=\"122\" style=\"stroke-width:2\" />\n",
" <line x1=\"138\" y1=\"19\" x2=\"138\" y2=\"122\" />\n",
" <line x1=\"153\" y1=\"19\" x2=\"153\" y2=\"122\" />\n",
" <line x1=\"168\" y1=\"19\" x2=\"168\" y2=\"122\" />\n",
" <line x1=\"183\" y1=\"19\" x2=\"183\" y2=\"122\" />\n",
" <line x1=\"198\" y1=\"19\" x2=\"198\" y2=\"122\" />\n",
" <line x1=\"213\" y1=\"19\" x2=\"213\" y2=\"122\" />\n",
" <line x1=\"228\" y1=\"19\" x2=\"228\" y2=\"122\" />\n",
" <line x1=\"243\" y1=\"19\" x2=\"243\" y2=\"122\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"123.405241,19.405241 243.405241,19.405241 243.405241,122.530241 123.405241,122.530241\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"183.405241\" y=\"142.530241\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >256</text>\n",
" <text x=\"263.405241\" y=\"70.967741\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,263.405241,70.967741)\">220</text>\n",
" <text x=\"103.702620\" y=\"132.827620\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(45,103.702620,132.827620)\">10</text>\n",
"</svg>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"dask.array<xarray-<this-array>, shape=(21, 12, 10, 220, 256), dtype=float32, chunksize=(21, 12, 10, 220, 32), chunktype=numpy.ndarray>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display(control3d.data)\n",
"display(ds3d.data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%time _ = bootstrap_perfect_model(ds3d,control3d,**kwb)"
]
},
{
"cell_type": "code",
"execution_count": 233,
"metadata": {},
"outputs": [],
"source": [
"ds3d = ds3d.compute()\n",
"control3d = control3d.compute()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%time _ = bootstrap_perfect_model(ds3d,control3d,**kwb)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%time _ = climpred.bootstrap.bootstrap_perfect_model(ds3d,control3d,**kwb)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"> the code from climpred=1.2.0 is very slow\n",
"\n",
"> I think because of some unnecessary ds=ds.compute()"
]
},
{
"cell_type": "code",
"execution_count": 247,
"metadata": {},
"outputs": [],
"source": [
"# doesnt work yet for chunking reasons\n",
"# ValueError: dimension 'member' on 0th function argument to apply_ufunc with dask='parallelized' consists of \n",
"# multiple chunks, but is also a core dimension. To fix, rechunk into a single dask array chunk along this dimension,\n",
"# i.e., ``.chunk({'member': -1})``, but beware that this may significantly increase memory usage.\n",
"# kwb[\"comparison\"] = \"m2e\""
]
},
{
"cell_type": "code",
"execution_count": 257,
"metadata": {},
"outputs": [],
"source": [
"a, b = climpred.comparisons.__m2e.function(ds3d)"
]
},
{
"cell_type": "code",
"execution_count": 261,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
"<tr>\n",
"<td>\n",
"<table>\n",
" <thead>\n",
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr><th> Bytes </th><td> 567.71 MB </td> <td> 70.96 MB </td></tr>\n",
" <tr><th> Shape </th><td> (21, 12, 10, 220, 256) </td> <td> (21, 12, 10, 220, 32) </td></tr>\n",
" <tr><th> Count </th><td> 8 Tasks </td><td> 8 Chunks </td></tr>\n",
" <tr><th> Type </th><td> float32 </td><td> numpy.ndarray </td></tr>\n",
" </tbody>\n",
"</table>\n",
"</td>\n",
"<td>\n",
"<svg width=\"397\" height=\"172\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"34\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"0\" y1=\"37\" x2=\"34\" y2=\"37\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"37\" style=\"stroke-width:2\" />\n",
" <line x1=\"34\" y1=\"0\" x2=\"34\" y2=\"37\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"0.000000,0.000000 34.105574,0.000000 34.105574,37.472373 0.000000,37.472373\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"17.052787\" y=\"57.472373\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >12</text>\n",
" <text x=\"54.105574\" y=\"18.736187\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,54.105574,18.736187)\">21</text>\n",
"\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"104\" y1=\"0\" x2=\"123\" y2=\"19\" style=\"stroke-width:2\" />\n",
" <line x1=\"104\" y1=\"103\" x2=\"123\" y2=\"122\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"104\" y1=\"0\" x2=\"104\" y2=\"103\" style=\"stroke-width:2\" />\n",
" <line x1=\"123\" y1=\"19\" x2=\"123\" y2=\"122\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"104.000000,0.000000 123.405241,19.405241 123.405241,122.530241 104.000000,103.125000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"104\" y1=\"0\" x2=\"224\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"123\" y1=\"19\" x2=\"243\" y2=\"19\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"104\" y1=\"0\" x2=\"123\" y2=\"19\" style=\"stroke-width:2\" />\n",
" <line x1=\"119\" y1=\"0\" x2=\"138\" y2=\"19\" />\n",
" <line x1=\"134\" y1=\"0\" x2=\"153\" y2=\"19\" />\n",
" <line x1=\"149\" y1=\"0\" x2=\"168\" y2=\"19\" />\n",
" <line x1=\"164\" y1=\"0\" x2=\"183\" y2=\"19\" />\n",
" <line x1=\"179\" y1=\"0\" x2=\"198\" y2=\"19\" />\n",
" <line x1=\"194\" y1=\"0\" x2=\"213\" y2=\"19\" />\n",
" <line x1=\"209\" y1=\"0\" x2=\"228\" y2=\"19\" />\n",
" <line x1=\"224\" y1=\"0\" x2=\"243\" y2=\"19\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"104.000000,0.000000 224.000000,0.000000 243.405241,19.405241 123.405241,19.405241\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"123\" y1=\"19\" x2=\"243\" y2=\"19\" style=\"stroke-width:2\" />\n",
" <line x1=\"123\" y1=\"122\" x2=\"243\" y2=\"122\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"123\" y1=\"19\" x2=\"123\" y2=\"122\" style=\"stroke-width:2\" />\n",
" <line x1=\"138\" y1=\"19\" x2=\"138\" y2=\"122\" />\n",
" <line x1=\"153\" y1=\"19\" x2=\"153\" y2=\"122\" />\n",
" <line x1=\"168\" y1=\"19\" x2=\"168\" y2=\"122\" />\n",
" <line x1=\"183\" y1=\"19\" x2=\"183\" y2=\"122\" />\n",
" <line x1=\"198\" y1=\"19\" x2=\"198\" y2=\"122\" />\n",
" <line x1=\"213\" y1=\"19\" x2=\"213\" y2=\"122\" />\n",
" <line x1=\"228\" y1=\"19\" x2=\"228\" y2=\"122\" />\n",
" <line x1=\"243\" y1=\"19\" x2=\"243\" y2=\"122\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"123.405241,19.405241 243.405241,19.405241 243.405241,122.530241 123.405241,122.530241\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"183.405241\" y=\"142.530241\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >256</text>\n",
" <text x=\"263.405241\" y=\"70.967741\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,263.405241,70.967741)\">220</text>\n",
" <text x=\"103.702620\" y=\"132.827620\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(45,103.702620,132.827620)\">10</text>\n",
"</svg>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"dask.array<xarray-<this-array>, shape=(21, 12, 10, 220, 256), dtype=float32, chunksize=(21, 12, 10, 220, 32), chunktype=numpy.ndarray>"
]
},
"execution_count": 261,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds3d.data"
]
},
{
"cell_type": "code",
"execution_count": 262,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
"<tr>\n",
"<td>\n",
"<table>\n",
" <thead>\n",
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr><th> Bytes </th><td> 567.71 MB </td> <td> 7.10 MB </td></tr>\n",
" <tr><th> Shape </th><td> (10, 21, 12, 220, 256) </td> <td> (1, 21, 12, 220, 32) </td></tr>\n",
" <tr><th> Count </th><td> 408 Tasks </td><td> 80 Chunks </td></tr>\n",
" <tr><th> Type </th><td> float32 </td><td> numpy.ndarray </td></tr>\n",
" </tbody>\n",
"</table>\n",
"</td>\n",
"<td>\n",
"<svg width=\"404\" height=\"173\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"37\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"0\" y1=\"3\" x2=\"37\" y2=\"3\" />\n",
" <line x1=\"0\" y1=\"6\" x2=\"37\" y2=\"6\" />\n",
" <line x1=\"0\" y1=\"9\" x2=\"37\" y2=\"9\" />\n",
" <line x1=\"0\" y1=\"13\" x2=\"37\" y2=\"13\" />\n",
" <line x1=\"0\" y1=\"16\" x2=\"37\" y2=\"16\" />\n",
" <line x1=\"0\" y1=\"19\" x2=\"37\" y2=\"19\" />\n",
" <line x1=\"0\" y1=\"23\" x2=\"37\" y2=\"23\" />\n",
" <line x1=\"0\" y1=\"26\" x2=\"37\" y2=\"26\" />\n",
" <line x1=\"0\" y1=\"29\" x2=\"37\" y2=\"29\" />\n",
" <line x1=\"0\" y1=\"32\" x2=\"37\" y2=\"32\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"32\" style=\"stroke-width:2\" />\n",
" <line x1=\"37\" y1=\"0\" x2=\"37\" y2=\"32\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"0.000000,0.000000 37.472373,0.000000 37.472373,32.988909 0.000000,32.988909\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"18.736187\" y=\"52.988909\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >21</text>\n",
" <text x=\"57.472373\" y=\"16.494455\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,57.472373,16.494455)\">10</text>\n",
"\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"107\" y1=\"0\" x2=\"127\" y2=\"20\" style=\"stroke-width:2\" />\n",
" <line x1=\"107\" y1=\"103\" x2=\"127\" y2=\"123\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"107\" y1=\"0\" x2=\"107\" y2=\"103\" style=\"stroke-width:2\" />\n",
" <line x1=\"127\" y1=\"20\" x2=\"127\" y2=\"123\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"107.000000,0.000000 127.062102,20.062102 127.062102,123.187102 107.000000,103.125000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"107\" y1=\"0\" x2=\"227\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"127\" y1=\"20\" x2=\"247\" y2=\"20\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"107\" y1=\"0\" x2=\"127\" y2=\"20\" style=\"stroke-width:2\" />\n",
" <line x1=\"122\" y1=\"0\" x2=\"142\" y2=\"20\" />\n",
" <line x1=\"137\" y1=\"0\" x2=\"157\" y2=\"20\" />\n",
" <line x1=\"152\" y1=\"0\" x2=\"172\" y2=\"20\" />\n",
" <line x1=\"167\" y1=\"0\" x2=\"187\" y2=\"20\" />\n",
" <line x1=\"182\" y1=\"0\" x2=\"202\" y2=\"20\" />\n",
" <line x1=\"197\" y1=\"0\" x2=\"217\" y2=\"20\" />\n",
" <line x1=\"212\" y1=\"0\" x2=\"232\" y2=\"20\" />\n",
" <line x1=\"227\" y1=\"0\" x2=\"247\" y2=\"20\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"107.000000,0.000000 227.000000,0.000000 247.062102,20.062102 127.062102,20.062102\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"127\" y1=\"20\" x2=\"247\" y2=\"20\" style=\"stroke-width:2\" />\n",
" <line x1=\"127\" y1=\"123\" x2=\"247\" y2=\"123\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"127\" y1=\"20\" x2=\"127\" y2=\"123\" style=\"stroke-width:2\" />\n",
" <line x1=\"142\" y1=\"20\" x2=\"142\" y2=\"123\" />\n",
" <line x1=\"157\" y1=\"20\" x2=\"157\" y2=\"123\" />\n",
" <line x1=\"172\" y1=\"20\" x2=\"172\" y2=\"123\" />\n",
" <line x1=\"187\" y1=\"20\" x2=\"187\" y2=\"123\" />\n",
" <line x1=\"202\" y1=\"20\" x2=\"202\" y2=\"123\" />\n",
" <line x1=\"217\" y1=\"20\" x2=\"217\" y2=\"123\" />\n",
" <line x1=\"232\" y1=\"20\" x2=\"232\" y2=\"123\" />\n",
" <line x1=\"247\" y1=\"20\" x2=\"247\" y2=\"123\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"127.062102,20.062102 247.062102,20.062102 247.062102,123.187102 127.062102,123.187102\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"187.062102\" y=\"143.187102\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >256</text>\n",
" <text x=\"267.062102\" y=\"71.624602\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,267.062102,71.624602)\">220</text>\n",
" <text x=\"107.031051\" y=\"133.156051\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(45,107.031051,133.156051)\">12</text>\n",
"</svg>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"dask.array<concatenate, shape=(10, 21, 12, 220, 256), dtype=float32, chunksize=(1, 21, 12, 220, 32), chunktype=numpy.ndarray>"
]
},
"execution_count": 262,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.data"
]
},
{
"cell_type": "code",
"execution_count": 270,
"metadata": {},
"outputs": [],
"source": [
"def _transpose_and_rechunk_to(a, ds):\n",
" return a.transpose(*ds.dims).chunk(ds.chunks)"
]
},
{
"cell_type": "code",
"execution_count": 273,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 6 ms, sys: 0 ns, total: 6 ms\n",
"Wall time: 5.71 ms\n"
]
},
{
"data": {
"text/html": [
"<pre>&lt;xarray.DataArray &#x27;tos&#x27; (lead: 21, init: 12, member: 10, y: 220, x: 256)&gt;\n",
"dask.array&lt;rechunk-merge, shape=(21, 12, 10, 220, 256), dtype=float32, chunksize=(21, 12, 10, 220, 32), chunktype=numpy.ndarray&gt;\n",
"Coordinates:\n",
" * init (init) int64 3014 3023 3045 3061 3124 ... 3175 3178 3228 3237 3257\n",
" * lead (lead) int64 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21\n",
" * member (member) int64 0 1 2 3 4 5 6 7 8 9\n",
"Dimensions without coordinates: y, x</pre>"
],
"text/plain": [
"<xarray.DataArray 'tos' (lead: 21, init: 12, member: 10, y: 220, x: 256)>\n",
"dask.array<rechunk-merge, shape=(21, 12, 10, 220, 256), dtype=float32, chunksize=(21, 12, 10, 220, 32), chunktype=numpy.ndarray>\n",
"Coordinates:\n",
" * init (init) int64 3014 3023 3045 3061 3124 ... 3175 3178 3228 3237 3257\n",
" * lead (lead) int64 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21\n",
" * member (member) int64 0 1 2 3 4 5 6 7 8 9\n",
"Dimensions without coordinates: y, x"
]
},
"execution_count": 273,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%time _transpose_and_rechunk_to(a, ds3d)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# timing of 1d input"
]
},
{
"cell_type": "code",
"execution_count": 213,
"metadata": {},
"outputs": [],
"source": [
"kwb = kw.copy()\n",
"kwb[\"bootstrap\"] = 32"
]
},
{
"cell_type": "code",
"execution_count": 225,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
"<tr>\n",
"<td>\n",
"<table>\n",
" <thead>\n",
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr><th> Bytes </th><td> 9.60 kB </td> <td> 1.92 kB </td></tr>\n",
" <tr><th> Shape </th><td> (20, 12, 10) </td> <td> (4, 12, 10) </td></tr>\n",
" <tr><th> Count </th><td> 5 Tasks </td><td> 5 Chunks </td></tr>\n",
" <tr><th> Type </th><td> float32 </td><td> numpy.ndarray </td></tr>\n",
" </tbody>\n",
"</table>\n",
"</td>\n",
"<td>\n",
"<svg width=\"190\" height=\"192\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"80\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"10\" y1=\"72\" x2=\"80\" y2=\"142\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"10\" y2=\"72\" style=\"stroke-width:2\" />\n",
" <line x1=\"24\" y1=\"14\" x2=\"24\" y2=\"86\" />\n",
" <line x1=\"38\" y1=\"28\" x2=\"38\" y2=\"100\" />\n",
" <line x1=\"52\" y1=\"42\" x2=\"52\" y2=\"114\" />\n",
" <line x1=\"66\" y1=\"56\" x2=\"66\" y2=\"128\" />\n",
" <line x1=\"80\" y1=\"70\" x2=\"80\" y2=\"142\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"10.000000,0.000000 80.588235,70.588235 80.588235,142.588235 10.000000,72.000000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"70\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"24\" y1=\"14\" x2=\"84\" y2=\"14\" />\n",
" <line x1=\"38\" y1=\"28\" x2=\"98\" y2=\"28\" />\n",
" <line x1=\"52\" y1=\"42\" x2=\"112\" y2=\"42\" />\n",
" <line x1=\"66\" y1=\"56\" x2=\"126\" y2=\"56\" />\n",
" <line x1=\"80\" y1=\"70\" x2=\"140\" y2=\"70\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"80\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"70\" y1=\"0\" x2=\"140\" y2=\"70\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"10.000000,0.000000 70.000000,0.000000 140.588235,70.588235 80.588235,70.588235\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"80\" y1=\"70\" x2=\"140\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"80\" y1=\"142\" x2=\"140\" y2=\"142\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"80\" y1=\"70\" x2=\"80\" y2=\"142\" style=\"stroke-width:2\" />\n",
" <line x1=\"140\" y1=\"70\" x2=\"140\" y2=\"142\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"80.588235,70.588235 140.588235,70.588235 140.588235,142.588235 80.588235,142.588235\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"110.588235\" y=\"162.588235\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >10</text>\n",
" <text x=\"160.588235\" y=\"106.588235\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,160.588235,106.588235)\">12</text>\n",
" <text x=\"35.294118\" y=\"127.294118\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(45,35.294118,127.294118)\">20</text>\n",
"</svg>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"dask.array<getitem, shape=(20, 12, 10), dtype=float32, chunksize=(4, 12, 10), chunktype=numpy.ndarray>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<table>\n",
"<tr>\n",
"<td>\n",
"<table>\n",
" <thead>\n",
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr><th> Bytes </th><td> 1.20 kB </td> <td> 1.20 kB </td></tr>\n",
" <tr><th> Shape </th><td> (300,) </td> <td> (300,) </td></tr>\n",
" <tr><th> Count </th><td> 1 Tasks </td><td> 1 Chunks </td></tr>\n",
" <tr><th> Type </th><td> float32 </td><td> numpy.ndarray </td></tr>\n",
" </tbody>\n",
"</table>\n",
"</td>\n",
"<td>\n",
"<svg width=\"170\" height=\"75\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"0\" y1=\"25\" x2=\"120\" y2=\"25\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"25\" style=\"stroke-width:2\" />\n",
" <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"25\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"0.000000,0.000000 120.000000,0.000000 120.000000,25.412617 0.000000,25.412617\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"60.000000\" y=\"45.412617\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >300</text>\n",
" <text x=\"140.000000\" y=\"12.706308\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,140.000000,12.706308)\">1</text>\n",
"</svg>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"dask.array<getitem, shape=(300,), dtype=float32, chunksize=(300,), chunktype=numpy.ndarray>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"ds, control = get_input(5, chunk_dim=\"lead\", D=1, use_climpred_data=True)\n",
"\n",
"ds = ds.isel(area=1, period=-1)\n",
"ds = ds.persist()\n",
"control = control.isel(area=1, period=-1)\n",
"control = control.persist()\n",
"display(ds.data)\n",
"display(control.data)"
]
},
{
"cell_type": "code",
"execution_count": 226,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "522cb3a97aae4ee7ba002777c7cc3bcf",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, description='bootstrapping iteration', max=32.0, style=ProgressStyle(d…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"CPU times: user 24.8 s, sys: 1.24 s, total: 26 s\n",
"Wall time: 25.1 s\n"
]
}
],
"source": [
"%time _ = bootstrap_perfect_model(ds,control,**kwb)"
]
},
{
"cell_type": "code",
"execution_count": 227,
"metadata": {},
"outputs": [],
"source": [
"ds = ds.load()\n",
"control = control.load()"
]
},
{
"cell_type": "code",
"execution_count": 228,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d2ebc6d5e5ce416bbca03e70532c3c4c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, description='bootstrapping iteration', max=32.0, style=ProgressStyle(d…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"CPU times: user 7.94 s, sys: 172 ms, total: 8.11 s\n",
"Wall time: 7.78 s\n"
]
}
],
"source": [
"%time _ = bootstrap_perfect_model(ds,control,**kwb)"
]
},
{
"cell_type": "code",
"execution_count": 229,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "88a0cb76c6534726a63cb6083fe5c118",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, description='bootstrapping iteration', max=32.0, style=ProgressStyle(d…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"CPU times: user 7.75 s, sys: 165 ms, total: 7.91 s\n",
"Wall time: 7.58 s\n"
]
}
],
"source": [
"%time _ = climpred.bootstrap.bootstrap_perfect_model(ds,control,**kwb)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"> parallelize chunked inputs dont make sense!"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## timing of hindcast"
]
},
{
"cell_type": "code",
"execution_count": 282,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre>&lt;xarray.Dataset&gt;\n",
"Dimensions: (init: 64, lead: 10, nlat: 37, nlon: 26)\n",
"Coordinates:\n",
" TLAT (nlat, nlon) float64 ...\n",
" TLONG (nlat, nlon) float64 ...\n",
" * init (init) float32 1954.0 1955.0 1956.0 1957.0 ... 2015.0 2016.0 2017.0\n",
" * lead (lead) int32 1 2 3 4 5 6 7 8 9 10\n",
" TAREA (nlat, nlon) float64 ...\n",
"Dimensions without coordinates: nlat, nlon\n",
"Data variables:\n",
" SST (init, lead, nlat, nlon) float32 ...</pre>"
],
"text/plain": [
"<xarray.Dataset>\n",
"Dimensions: (init: 64, lead: 10, nlat: 37, nlon: 26)\n",
"Coordinates:\n",
" TLAT (nlat, nlon) float64 ...\n",
" TLONG (nlat, nlon) float64 ...\n",
" * init (init) float32 1954.0 1955.0 1956.0 1957.0 ... 2015.0 2016.0 2017.0\n",
" * lead (lead) int32 1 2 3 4 5 6 7 8 9 10\n",
" TAREA (nlat, nlon) float64 ...\n",
"Dimensions without coordinates: nlat, nlon\n",
"Data variables:\n",
" SST (init, lead, nlat, nlon) float32 ..."
]
},
"execution_count": 282,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"climpred.tutorial.load_dataset(\"CESM-DP-SST-3D\")"
]
},
{
"cell_type": "code",
"execution_count": 301,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
"<tr>\n",
"<td>\n",
"<table>\n",
" <thead>\n",
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr><th> Bytes </th><td> 261.66 kB </td> <td> 261.66 kB </td></tr>\n",
" <tr><th> Shape </th><td> (68, 37, 26) </td> <td> (68, 37, 26) </td></tr>\n",
" <tr><th> Count </th><td> 1 Tasks </td><td> 1 Chunks </td></tr>\n",
" <tr><th> Type </th><td> float32 </td><td> numpy.ndarray </td></tr>\n",
" </tbody>\n",
"</table>\n",
"</td>\n",
"<td>\n",
"<svg width=\"176\" height=\"185\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"80\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"10\" y1=\"65\" x2=\"80\" y2=\"135\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"10\" y2=\"65\" style=\"stroke-width:2\" />\n",
" <line x1=\"80\" y1=\"70\" x2=\"80\" y2=\"135\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"10.000000,0.000000 80.588235,70.588235 80.588235,135.882353 10.000000,65.294118\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"55\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"80\" y1=\"70\" x2=\"126\" y2=\"70\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"10\" y1=\"0\" x2=\"80\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"55\" y1=\"0\" x2=\"126\" y2=\"70\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"10.000000,0.000000 55.882353,0.000000 126.470588,70.588235 80.588235,70.588235\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"80\" y1=\"70\" x2=\"126\" y2=\"70\" style=\"stroke-width:2\" />\n",
" <line x1=\"80\" y1=\"135\" x2=\"126\" y2=\"135\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"80\" y1=\"70\" x2=\"80\" y2=\"135\" style=\"stroke-width:2\" />\n",
" <line x1=\"126\" y1=\"70\" x2=\"126\" y2=\"135\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"80.588235,70.588235 126.470588,70.588235 126.470588,135.882353 80.588235,135.882353\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"103.529412\" y=\"155.882353\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >26</text>\n",
" <text x=\"146.470588\" y=\"103.235294\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,146.470588,103.235294)\">37</text>\n",
" <text x=\"35.294118\" y=\"120.588235\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(45,35.294118,120.588235)\">68</text>\n",
"</svg>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"dask.array<xarray-<this-array>, shape=(68, 37, 26), dtype=float32, chunksize=(68, 37, 26), chunktype=numpy.ndarray>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<table>\n",
"<tr>\n",
"<td>\n",
"<table>\n",
" <thead>\n",
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr><th> Bytes </th><td> 4.93 MB </td> <td> 492.54 kB </td></tr>\n",
" <tr><th> Shape </th><td> (2, 64, 10, 37, 26) </td> <td> (1, 64, 2, 37, 26) </td></tr>\n",
" <tr><th> Count </th><td> 10 Tasks </td><td> 10 Chunks </td></tr>\n",
" <tr><th> Type </th><td> float32 </td><td> numpy.ndarray </td></tr>\n",
" </tbody>\n",
"</table>\n",
"</td>\n",
"<td>\n",
"<svg width=\"502\" height=\"143\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"0\" y1=\"15\" x2=\"120\" y2=\"15\" />\n",
" <line x1=\"0\" y1=\"31\" x2=\"120\" y2=\"31\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"31\" style=\"stroke-width:2\" />\n",
" <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"31\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"0.000000,0.000000 120.000000,0.000000 120.000000,31.635229 0.000000,31.635229\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"60.000000\" y=\"51.635229\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >64</text>\n",
" <text x=\"140.000000\" y=\"15.817615\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,140.000000,15.817615)\">2</text>\n",
"\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"190\" y1=\"0\" x2=\"214\" y2=\"24\" style=\"stroke-width:2\" />\n",
" <line x1=\"190\" y1=\"69\" x2=\"214\" y2=\"93\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"190\" y1=\"0\" x2=\"190\" y2=\"69\" style=\"stroke-width:2\" />\n",
" <line x1=\"194\" y1=\"4\" x2=\"194\" y2=\"74\" />\n",
" <line x1=\"199\" y1=\"9\" x2=\"199\" y2=\"78\" />\n",
" <line x1=\"204\" y1=\"14\" x2=\"204\" y2=\"83\" />\n",
" <line x1=\"209\" y1=\"19\" x2=\"209\" y2=\"88\" />\n",
" <line x1=\"214\" y1=\"24\" x2=\"214\" y2=\"93\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"190.000000,0.000000 214.059877,24.059877 214.059877,93.434877 190.000000,69.375000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"190\" y1=\"0\" x2=\"238\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"194\" y1=\"4\" x2=\"243\" y2=\"4\" />\n",
" <line x1=\"199\" y1=\"9\" x2=\"248\" y2=\"9\" />\n",
" <line x1=\"204\" y1=\"14\" x2=\"253\" y2=\"14\" />\n",
" <line x1=\"209\" y1=\"19\" x2=\"257\" y2=\"19\" />\n",
" <line x1=\"214\" y1=\"24\" x2=\"262\" y2=\"24\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"190\" y1=\"0\" x2=\"214\" y2=\"24\" style=\"stroke-width:2\" />\n",
" <line x1=\"238\" y1=\"0\" x2=\"262\" y2=\"24\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"190.000000,0.000000 238.750000,0.000000 262.809877,24.059877 214.059877,24.059877\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"214\" y1=\"24\" x2=\"262\" y2=\"24\" style=\"stroke-width:2\" />\n",
" <line x1=\"214\" y1=\"93\" x2=\"262\" y2=\"93\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"214\" y1=\"24\" x2=\"214\" y2=\"93\" style=\"stroke-width:2\" />\n",
" <line x1=\"262\" y1=\"24\" x2=\"262\" y2=\"93\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"214.059877,24.059877 262.809877,24.059877 262.809877,93.434877 214.059877,93.434877\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"238.434877\" y=\"113.434877\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >26</text>\n",
" <text x=\"282.809877\" y=\"58.747377\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,282.809877,58.747377)\">37</text>\n",
" <text x=\"192.029938\" y=\"101.404938\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(45,192.029938,101.404938)\">10</text>\n",
"</svg>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"dask.array<concatenate, shape=(2, 64, 10, 37, 26), dtype=float32, chunksize=(1, 64, 2, 37, 26), chunktype=numpy.ndarray>"
]
},
"execution_count": 301,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def get_hindcast(\n",
" nworker,\n",
" no_time_chunks=True,\n",
" chunk_dim=\"nlon\",\n",
" persist=True,\n",
" rm_coords=True,\n",
" use_climpred_data=True,\n",
" var=\"SST\",\n",
" only_one_control_chunk=True,\n",
" add_member=True,\n",
"):\n",
" D = \"3\"\n",
" if use_climpred_data:\n",
" ds3d = climpred.tutorial.load_dataset(f\"CESM-DP-{var}-{D}D\")[var]\n",
" else:\n",
"\n",
" def rename(ds):\n",
" return ds.rename({\"ensemble\": \"init\", \"time\": \"lead\"})\n",
"\n",
" control_path = (\n",
" f\"/work/mh0727/m300524/experiments/postprocessed/control_{var}_ym.nc\"\n",
" )\n",
" ds_path = f\"/work/mh0727/m300524/experiments/postprocessed/ds_{var}_ym.nc\"\n",
" ds3d = rename(xr.open_dataset(ds_path))[var]\n",
" c = ds3d[chunk_dim].size // nworker\n",
" c = c if c >= 1 else 1\n",
" chunks = {chunk_dim: c}\n",
" if use_climpred_data:\n",
" ds3d = climpred.tutorial.load_dataset(f\"CESM-DP-{var}-{D}D\")[var].chunk(chunks)\n",
" else:\n",
" ds3d = rename(xr.open_dataset(ds_path))[var].chunk(chunks)\n",
" if add_member:\n",
" ds3d = xr.concat([ds3d, ds3d], \"member\")\n",
" ds3d[\"member\"] = np.arange(1, 1 + ds3d.member.size)\n",
" if only_one_control_chunk:\n",
" control_chunks = {\"time\": -1}\n",
" elif chunk_dim not in [\"lead\", \"member\", \"init\"]:\n",
" control_chunks = chunks\n",
" else:\n",
" control_chunks = {\"time\": c}\n",
" if use_climpred_data:\n",
" control3d = climpred.tutorial.load_dataset(f\"FOSI-{var}-{D}D\")[var].chunk(\n",
" control_chunks\n",
" )\n",
" else:\n",
" control3d = xr.open_dataset(control_path)[var].chunk(control_chunks)\n",
" if rm_coords:\n",
" for d in [ds3d, control3d]:\n",
" for c in [\"lon\", \"lat\"]: # remove coords for chunk error\n",
" if c in d.coords:\n",
" del d[c]\n",
" if persist:\n",
" ds3d = ds3d.chunk(chunks).persist()\n",
" control3d = control3d.persist()\n",
" return ds3d, control3d\n",
"\n",
"\n",
"ds3d, control3d = get_hindcast(4, chunk_dim=\"lead\")\n",
"display(control3d.data)\n",
"ds3d.data"
]
},
{
"cell_type": "code",
"execution_count": 302,
"metadata": {},
"outputs": [],
"source": [
"kwh = {\"metric\": \"mse\", \"comparison\": \"m2r\"}"
]
},
{
"cell_type": "code",
"execution_count": 303,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('member', 'init', 'lead', 'nlat', 'nlon')"
]
},
"execution_count": 303,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds3d.dims"
]
},
{
"cell_type": "code",
"execution_count": 304,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 2.15 s, sys: 112 ms, total: 2.26 s\n",
"Wall time: 2.56 s\n"
]
}
],
"source": [
"%time _ = climpred.prediction.compute_hindcast(ds3d,control3d,**kwh).compute()"
]
},
{
"cell_type": "code",
"execution_count": 305,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 172 ms, sys: 18 ms, total: 190 ms\n",
"Wall time: 547 ms\n"
]
}
],
"source": [
"ds3d = ds3d.load()\n",
"control = control3d.load()\n",
"%time _ = climpred.prediction.compute_hindcast(ds3d,control3d,**kwh)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:pymistral]",
"language": "python",
"name": "conda-env-pymistral-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment