Last active
June 1, 2021 13:24
-
-
Save aaronspring/251553f132202cc91aadde03f2a452f9 to your computer and use it in GitHub Desktop.
xarray histogram
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 78, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<xarray.core.options.set_options at 0x7fbd8ef59150>" | |
] | |
}, | |
"execution_count": 78, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import numpy as np\n", | |
"import xarray as xr\n", | |
"import xskillscore as xs\n", | |
"import matplotlib.pyplot as plt\n", | |
"np.random.seed(seed=42)\n", | |
"xr.set_options(display_style='text')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 120, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Frozen({'time': 5, 'lat': 4, 'lon': 5})" | |
] | |
}, | |
"execution_count": 120, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"n=5\n", | |
"obs = xr.DataArray(\n", | |
" np.random.rand(n, 4, 5),\n", | |
" coords=[\n", | |
" xr.cftime_range(\"2000-01-01\", periods=n, freq=\"D\"),\n", | |
" np.arange(4),\n", | |
" np.arange(5),\n", | |
" ],\n", | |
" dims=[\"time\", \"lat\", \"lon\"],\n", | |
" name='var'\n", | |
" )\n", | |
"obs.sizes" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 121, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<pre><xarray.DataArray 'var' (category_edge: 2, lat: 4, lon: 5)>\n", | |
"array([[[0.63921864, 0.72676296, 0.45690626, 0.36150778, 0.18484249],\n", | |
" [0.49546159, 0.07373365, 0.32318401, 0.43600901, 0.17322283],\n", | |
" [0.48097242, 0.30861776, 0.27171448, 0.49101462, 0.17059364],\n", | |
" [0.6491125 , 0.41434815, 0.36527559, 0.30878742, 0.37577337]],\n", | |
"\n", | |
" [[0.74546849, 0.84781136, 0.66679147, 0.55885266, 0.39435487],\n", | |
" [0.58689895, 0.08540946, 0.59092582, 0.63592234, 0.60518469],\n", | |
" [0.75926049, 0.77059137, 0.47549921, 0.667326 , 0.28618977],\n", | |
" [0.7390259 , 0.81822387, 0.56789387, 0.6810793 , 0.59875305]]])\n", | |
"Coordinates:\n", | |
" * lat (lat) int64 0 1 2 3\n", | |
" * lon (lon) int64 0 1 2 3 4\n", | |
" * category_edge (category_edge) float64 0.3333 0.6667</pre>" | |
], | |
"text/plain": [ | |
"<xarray.DataArray 'var' (category_edge: 2, lat: 4, lon: 5)>\n", | |
"array([[[0.63921864, 0.72676296, 0.45690626, 0.36150778, 0.18484249],\n", | |
" [0.49546159, 0.07373365, 0.32318401, 0.43600901, 0.17322283],\n", | |
" [0.48097242, 0.30861776, 0.27171448, 0.49101462, 0.17059364],\n", | |
" [0.6491125 , 0.41434815, 0.36527559, 0.30878742, 0.37577337]],\n", | |
"\n", | |
" [[0.74546849, 0.84781136, 0.66679147, 0.55885266, 0.39435487],\n", | |
" [0.58689895, 0.08540946, 0.59092582, 0.63592234, 0.60518469],\n", | |
" [0.75926049, 0.77059137, 0.47549921, 0.667326 , 0.28618977],\n", | |
" [0.7390259 , 0.81822387, 0.56789387, 0.6810793 , 0.59875305]]])\n", | |
"Coordinates:\n", | |
" * lat (lat) int64 0 1 2 3\n", | |
" * lon (lon) int64 0 1 2 3 4\n", | |
" * category_edge (category_edge) float64 0.3333 0.6667" | |
] | |
}, | |
"execution_count": 121, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"bins = obs.quantile(q=[1/3, 2/3], dim='time').rename({'quantile':'category_edge'})\n", | |
"\n", | |
"bins" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 122, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# quantile is the 1d that was allowed before\n", | |
"# lat, lon are vectorized aka. not used for histogram\n", | |
"# rather for each lon, lat an independent histogram is created" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 123, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Frozen({'category_edge': 2, 'lat': 4, 'lon': 5})" | |
] | |
}, | |
"execution_count": 123, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"bins.sizes" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 127, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<pre><xarray.DataArray 'var' (bin_dim: 3, lat: 4, lon: 5)>\n", | |
"array([[[2, 2, 2, 2, 2],\n", | |
" [2, 2, 2, 2, 2],\n", | |
" [2, 2, 2, 2, 2],\n", | |
" [2, 2, 2, 2, 2]],\n", | |
"\n", | |
" [[1, 1, 1, 1, 1],\n", | |
" [1, 1, 1, 1, 1],\n", | |
" [1, 1, 1, 1, 1],\n", | |
" [1, 1, 1, 1, 1]],\n", | |
"\n", | |
" [[2, 2, 2, 2, 2],\n", | |
" [2, 2, 2, 2, 2],\n", | |
" [2, 2, 2, 2, 2],\n", | |
" [2, 2, 2, 2, 2]]])\n", | |
"Coordinates:\n", | |
" * lat (lat) int64 0 1 2 3\n", | |
" * lon (lon) int64 0 1 2 3 4\n", | |
"Dimensions without coordinates: bin_dim</pre>" | |
], | |
"text/plain": [ | |
"<xarray.DataArray 'var' (bin_dim: 3, lat: 4, lon: 5)>\n", | |
"array([[[2, 2, 2, 2, 2],\n", | |
" [2, 2, 2, 2, 2],\n", | |
" [2, 2, 2, 2, 2],\n", | |
" [2, 2, 2, 2, 2]],\n", | |
"\n", | |
" [[1, 1, 1, 1, 1],\n", | |
" [1, 1, 1, 1, 1],\n", | |
" [1, 1, 1, 1, 1],\n", | |
" [1, 1, 1, 1, 1]],\n", | |
"\n", | |
" [[2, 2, 2, 2, 2],\n", | |
" [2, 2, 2, 2, 2],\n", | |
" [2, 2, 2, 2, 2],\n", | |
" [2, 2, 2, 2, 2]]])\n", | |
"Coordinates:\n", | |
" * lat (lat) int64 0 1 2 3\n", | |
" * lon (lon) int64 0 1 2 3 4\n", | |
"Dimensions without coordinates: bin_dim" | |
] | |
}, | |
"execution_count": 127, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# the results look a bit odd (maybe thats my input distribution?!), but I am thinking about this kind of API\n", | |
"def xhist(ds, bins, dim=[]):\n", | |
" \"\"\"Counting how often dim is in each quantile/category. ds gets reduced by dim.\"\"\"\n", | |
" cat1 = (ds < bins.isel(category_edge=0, drop=True)) # below first edge\n", | |
" cat2 = (ds < bins.isel(category_edge=1, drop=True)) & (ds >= bins.isel(category_edge=0, drop=True)) # in between\n", | |
" cat3 = (ds >= bins.isel(category_edge=1, drop=True)) # above last edge\n", | |
" binned = xr.concat([cat1, cat2, cat3], 'bin_dim').astype('int').sum(dim)\n", | |
" return binned\n", | |
"\n", | |
"xhist(obs, bins, dim='time')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.10" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment