Skip to content

Instantly share code, notes, and snippets.

@aaronspring
Last active June 1, 2021 13:24
Show Gist options
  • Save aaronspring/251553f132202cc91aadde03f2a452f9 to your computer and use it in GitHub Desktop.
Save aaronspring/251553f132202cc91aadde03f2a452f9 to your computer and use it in GitHub Desktop.
xarray histogram
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<xarray.core.options.set_options at 0x7fbd8ef59150>"
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"import xarray as xr\n",
"import xskillscore as xs\n",
"import matplotlib.pyplot as plt\n",
"np.random.seed(seed=42)\n",
"xr.set_options(display_style='text')"
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Frozen({'time': 5, 'lat': 4, 'lon': 5})"
]
},
"execution_count": 120,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"n=5\n",
"obs = xr.DataArray(\n",
" np.random.rand(n, 4, 5),\n",
" coords=[\n",
" xr.cftime_range(\"2000-01-01\", periods=n, freq=\"D\"),\n",
" np.arange(4),\n",
" np.arange(5),\n",
" ],\n",
" dims=[\"time\", \"lat\", \"lon\"],\n",
" name='var'\n",
" )\n",
"obs.sizes"
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre>&lt;xarray.DataArray &#x27;var&#x27; (category_edge: 2, lat: 4, lon: 5)&gt;\n",
"array([[[0.63921864, 0.72676296, 0.45690626, 0.36150778, 0.18484249],\n",
" [0.49546159, 0.07373365, 0.32318401, 0.43600901, 0.17322283],\n",
" [0.48097242, 0.30861776, 0.27171448, 0.49101462, 0.17059364],\n",
" [0.6491125 , 0.41434815, 0.36527559, 0.30878742, 0.37577337]],\n",
"\n",
" [[0.74546849, 0.84781136, 0.66679147, 0.55885266, 0.39435487],\n",
" [0.58689895, 0.08540946, 0.59092582, 0.63592234, 0.60518469],\n",
" [0.75926049, 0.77059137, 0.47549921, 0.667326 , 0.28618977],\n",
" [0.7390259 , 0.81822387, 0.56789387, 0.6810793 , 0.59875305]]])\n",
"Coordinates:\n",
" * lat (lat) int64 0 1 2 3\n",
" * lon (lon) int64 0 1 2 3 4\n",
" * category_edge (category_edge) float64 0.3333 0.6667</pre>"
],
"text/plain": [
"<xarray.DataArray 'var' (category_edge: 2, lat: 4, lon: 5)>\n",
"array([[[0.63921864, 0.72676296, 0.45690626, 0.36150778, 0.18484249],\n",
" [0.49546159, 0.07373365, 0.32318401, 0.43600901, 0.17322283],\n",
" [0.48097242, 0.30861776, 0.27171448, 0.49101462, 0.17059364],\n",
" [0.6491125 , 0.41434815, 0.36527559, 0.30878742, 0.37577337]],\n",
"\n",
" [[0.74546849, 0.84781136, 0.66679147, 0.55885266, 0.39435487],\n",
" [0.58689895, 0.08540946, 0.59092582, 0.63592234, 0.60518469],\n",
" [0.75926049, 0.77059137, 0.47549921, 0.667326 , 0.28618977],\n",
" [0.7390259 , 0.81822387, 0.56789387, 0.6810793 , 0.59875305]]])\n",
"Coordinates:\n",
" * lat (lat) int64 0 1 2 3\n",
" * lon (lon) int64 0 1 2 3 4\n",
" * category_edge (category_edge) float64 0.3333 0.6667"
]
},
"execution_count": 121,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bins = obs.quantile(q=[1/3, 2/3], dim='time').rename({'quantile':'category_edge'})\n",
"\n",
"bins"
]
},
{
"cell_type": "code",
"execution_count": 122,
"metadata": {},
"outputs": [],
"source": [
"# quantile is the 1d that was allowed before\n",
"# lat, lon are vectorized aka. not used for histogram\n",
"# rather for each lon, lat an independent histogram is created"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Frozen({'category_edge': 2, 'lat': 4, 'lon': 5})"
]
},
"execution_count": 123,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bins.sizes"
]
},
{
"cell_type": "code",
"execution_count": 127,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre>&lt;xarray.DataArray &#x27;var&#x27; (bin_dim: 3, lat: 4, lon: 5)&gt;\n",
"array([[[2, 2, 2, 2, 2],\n",
" [2, 2, 2, 2, 2],\n",
" [2, 2, 2, 2, 2],\n",
" [2, 2, 2, 2, 2]],\n",
"\n",
" [[1, 1, 1, 1, 1],\n",
" [1, 1, 1, 1, 1],\n",
" [1, 1, 1, 1, 1],\n",
" [1, 1, 1, 1, 1]],\n",
"\n",
" [[2, 2, 2, 2, 2],\n",
" [2, 2, 2, 2, 2],\n",
" [2, 2, 2, 2, 2],\n",
" [2, 2, 2, 2, 2]]])\n",
"Coordinates:\n",
" * lat (lat) int64 0 1 2 3\n",
" * lon (lon) int64 0 1 2 3 4\n",
"Dimensions without coordinates: bin_dim</pre>"
],
"text/plain": [
"<xarray.DataArray 'var' (bin_dim: 3, lat: 4, lon: 5)>\n",
"array([[[2, 2, 2, 2, 2],\n",
" [2, 2, 2, 2, 2],\n",
" [2, 2, 2, 2, 2],\n",
" [2, 2, 2, 2, 2]],\n",
"\n",
" [[1, 1, 1, 1, 1],\n",
" [1, 1, 1, 1, 1],\n",
" [1, 1, 1, 1, 1],\n",
" [1, 1, 1, 1, 1]],\n",
"\n",
" [[2, 2, 2, 2, 2],\n",
" [2, 2, 2, 2, 2],\n",
" [2, 2, 2, 2, 2],\n",
" [2, 2, 2, 2, 2]]])\n",
"Coordinates:\n",
" * lat (lat) int64 0 1 2 3\n",
" * lon (lon) int64 0 1 2 3 4\n",
"Dimensions without coordinates: bin_dim"
]
},
"execution_count": 127,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# the results look a bit odd (maybe thats my input distribution?!), but I am thinking about this kind of API\n",
"def xhist(ds, bins, dim=[]):\n",
" \"\"\"Counting how often dim is in each quantile/category. ds gets reduced by dim.\"\"\"\n",
" cat1 = (ds < bins.isel(category_edge=0, drop=True)) # below first edge\n",
" cat2 = (ds < bins.isel(category_edge=1, drop=True)) & (ds >= bins.isel(category_edge=0, drop=True)) # in between\n",
" cat3 = (ds >= bins.isel(category_edge=1, drop=True)) # above last edge\n",
" binned = xr.concat([cat1, cat2, cat3], 'bin_dim').astype('int').sum(dim)\n",
" return binned\n",
"\n",
"xhist(obs, bins, dim='time')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment