kforeman/20170413 - Xarray Groupby Examples.ipynb

## 20170413 - Xarray Groupby Examples.ipynb
{
 "cells": [
  {
   "metadata": {
    "trusted": true,
    "collapsed": true
   },
   "cell_type": "code",
   "source": "import xarray as xr\nfrom fbd_core import db\nimport numpy as np\nimport pandas as pd",
   "execution_count": 1,
   "outputs": []
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "# Open example `DataArray`"
  },
  {
   "metadata": {
    "trusted": true,
    "collapsed": false
   },
   "cell_type": "code",
   "source": "da = xr.open_dataarray('/ihme/forecasting/data/fbd_scenarios_data/forecast/sev/20170406_weighted_quantiles/metab_sbp.nc')\nda = da.loc[{'year_id': 2015, 'scenario': 0, 'draw': 0}]\nda",
   "execution_count": 2,
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "<xarray.DataArray (location_id: 188, age_group_id: 12, sex_id: 2)>\narray([[[ 0.083082,  0.039269],\n        [ 0.132577,  0.060753],\n        ..., \n        [ 0.426379,  0.436346],\n        [ 0.444403,  0.466056]],\n\n       [[ 0.044293,  0.033703],\n        [ 0.091831,  0.054983],\n        ..., \n        [ 0.39365 ,  0.417222],\n        [ 0.415969,  0.445148]],\n\n       ..., \n       [[ 0.070702,  0.044486],\n        [ 0.129525,  0.073576],\n        ..., \n        [ 0.39822 ,  0.463433],\n        [ 0.399438,  0.500044]],\n\n       [[ 0.076865,  0.067248],\n        [ 0.143387,  0.101176],\n        ..., \n        [ 0.407869,  0.493196],\n        [ 0.407822,  0.516327]]])\nCoordinates:\n  * location_id   (location_id) int32 6 7 8 10 11 12 13 14 15 16 17 18 19 20 ...\n    year_id       int32 2015\n  * age_group_id  (age_group_id) int32 10 11 12 13 14 15 16 17 18 19 20 21\n    rei           |S9 'metab_sbp'\n    measure       |S3 'sev'\n  * sex_id        (sex_id) int32 1 2\n    quantile      float64 0.025\n    scenario      int64 0\n    draw          int64 0"
     },
     "metadata": {},
     "execution_count": 2
    }
   ]
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "# Map `location_id` to `super_region_id`"
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "### Get list"
  },
  {
   "metadata": {
    "trusted": true,
    "collapsed": false
   },
   "cell_type": "code",
   "source": "sr_map = db.get_locations_by_level(3)[['location_id','super_region_id']]\nsr_map.head()",
   "execution_count": 3,
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>location_id</th>\n      <th>super_region_id</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>6</td>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>7</td>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>8</td>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>10</td>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>11</td>\n      <td>4</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
      "text/plain": "   location_id  super_region_id\n0            6                4\n1            7                4\n2            8                4\n3           10                4\n4           11                4"
     },
     "metadata": {},
     "execution_count": 3
    }
   ]
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "### Restrict to the 188 locations in the example `DataArray`"
  },
  {
   "metadata": {
    "trusted": true,
    "collapsed": false
   },
   "cell_type": "code",
   "source": "sr_map = sr_map.loc[sr_map['location_id'].isin(da.coords['location_id'].values)]",
   "execution_count": 4,
   "outputs": []
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "### Make an `xarray.DataArray` which maps `location_id` to `super_region_id` using 0/1"
  },
  {
   "metadata": {
    "trusted": true,
    "collapsed": false
   },
   "cell_type": "code",
   "source": "sr_map['match'] = int(1)\nx_map = sr_map.set_index(['location_id','super_region_id']).to_xarray()\nx_map = x_map.fillna(0)\nx_map",
   "execution_count": 5,
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "<xarray.Dataset>\nDimensions:          (location_id: 188, super_region_id: 7)\nCoordinates:\n  * location_id      (location_id) int64 6 7 8 10 11 12 13 14 15 16 17 18 19 ...\n  * super_region_id  (super_region_id) int64 4 31 64 103 137 158 166\nData variables:\n    match            (location_id, super_region_id) float64 1.0 0.0 0.0 0.0 ..."
     },
     "metadata": {},
     "execution_count": 5
    }
   ]
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "# Use array multiplication to do `groupby('super_region_id')`"
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "### Multiply to stratify locations by super regions"
  },
  {
   "metadata": {
    "trusted": true,
    "collapsed": false
   },
   "cell_type": "code",
   "source": "prod = da * x_map\nprod",
   "execution_count": 6,
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "<xarray.Dataset>\nDimensions:          (age_group_id: 12, location_id: 188, sex_id: 2, super_region_id: 7)\nCoordinates:\n  * location_id      (location_id) int64 6 7 8 10 11 12 13 14 15 16 17 18 19 ...\n  * super_region_id  (super_region_id) int64 4 31 64 103 137 158 166\n    year_id          int32 2015\n  * age_group_id     (age_group_id) int32 10 11 12 13 14 15 16 17 18 19 20 21\n    rei              |S9 'metab_sbp'\n    measure          |S3 'sev'\n  * sex_id           (sex_id) int32 1 2\n    quantile         float64 0.025\n    scenario         int64 0\n    draw             int64 0\nData variables:\n    match            (location_id, age_group_id, sex_id, super_region_id) float64 0.08308 ..."
     },
     "metadata": {},
     "execution_count": 6
    }
   ]
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "### Then sum across locations within a super region"
  },
  {
   "metadata": {
    "trusted": true,
    "collapsed": false
   },
   "cell_type": "code",
   "source": "by_sr = prod.sum(dim='location_id')",
   "execution_count": 7,
   "outputs": []
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "# Compare to `pandas.DataFrame` approach"
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "### Convert to dataframe"
  },
  {
   "metadata": {
    "trusted": true,
    "collapsed": false,
    "scrolled": true
   },
   "cell_type": "code",
   "source": "df = da.to_dataframe(name='value').reset_index()\ndf.head()",
   "execution_count": 8,
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>location_id</th>\n      <th>age_group_id</th>\n      <th>sex_id</th>\n      <th>year_id</th>\n      <th>rei</th>\n      <th>measure</th>\n      <th>quantile</th>\n      <th>scenario</th>\n      <th>draw</th>\n      <th>value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>6</td>\n      <td>10</td>\n      <td>1</td>\n      <td>2015</td>\n      <td>metab_sbp</td>\n      <td>sev</td>\n      <td>0.025</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0.083082</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>6</td>\n      <td>10</td>\n      <td>2</td>\n      <td>2015</td>\n      <td>metab_sbp</td>\n      <td>sev</td>\n      <td>0.025</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0.039269</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>6</td>\n      <td>11</td>\n      <td>1</td>\n      <td>2015</td>\n      <td>metab_sbp</td>\n      <td>sev</td>\n      <td>0.025</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0.132577</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>6</td>\n      <td>11</td>\n      <td>2</td>\n      <td>2015</td>\n      <td>metab_sbp</td>\n      <td>sev</td>\n      <td>0.025</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0.060753</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>6</td>\n      <td>12</td>\n      <td>1</td>\n      <td>2015</td>\n      <td>metab_sbp</td>\n      <td>sev</td>\n      <td>0.025</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0.159037</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
      "text/plain": "   location_id  age_group_id  sex_id  year_id        rei measure  quantile  \\\n0            6            10       1     2015  metab_sbp     sev     0.025   \n1            6            10       2     2015  metab_sbp     sev     0.025   \n2            6            11       1     2015  metab_sbp     sev     0.025   \n3            6            11       2     2015  metab_sbp     sev     0.025   \n4            6            12       1     2015  metab_sbp     sev     0.025   \n\n   scenario  draw     value  \n0         0     0  0.083082  \n1         0     0  0.039269  \n2         0     0  0.132577  \n3         0     0  0.060753  \n4         0     0  0.159037  "
     },
     "metadata": {},
     "execution_count": 8
    }
   ]
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "### Merge on super region"
  },
  {
   "metadata": {
    "trusted": true,
    "collapsed": false
   },
   "cell_type": "code",
   "source": "df = df.merge(sr_map[['location_id','super_region_id']])",
   "execution_count": 9,
   "outputs": []
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "### `groupby`"
  },
  {
   "metadata": {
    "trusted": true,
    "collapsed": false
   },
   "cell_type": "code",
   "source": "df_by_sr = df.groupby(['super_region_id','age_group_id','sex_id']).aggregate({'value': 'sum'})\ndf_by_sr.head()",
   "execution_count": 10,
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th></th>\n      <th></th>\n      <th>value</th>\n    </tr>\n    <tr>\n      <th>super_region_id</th>\n      <th>age_group_id</th>\n      <th>sex_id</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th rowspan=\"5\" valign=\"top\">4</th>\n      <th rowspan=\"2\" valign=\"top\">10</th>\n      <th>1</th>\n      <td>1.553835</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>0.720195</td>\n    </tr>\n    <tr>\n      <th rowspan=\"2\" valign=\"top\">11</th>\n      <th>1</th>\n      <td>3.011857</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1.277670</td>\n    </tr>\n    <tr>\n      <th>12</th>\n      <th>1</th>\n      <td>3.415010</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
      "text/plain": "                                        value\nsuper_region_id age_group_id sex_id          \n4               10           1       1.553835\n                             2       0.720195\n                11           1       3.011857\n                             2       1.277670\n                12           1       3.415010"
     },
     "metadata": {},
     "execution_count": 10
    }
   ]
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "# Compare results"
  },
  {
   "metadata": {
    "trusted": true,
    "collapsed": false
   },
   "cell_type": "code",
   "source": "def compare(sr, a, s):\n    pandas_result = df_by_sr.loc[sr, a, s].value\n    xarray_result = by_sr.loc[{'super_region_id': sr, 'age_group_id': a, 'sex_id': s}]['match'].values\n    print(pandas_result, xarray_result)",
   "execution_count": 11,
   "outputs": []
  },
  {
   "metadata": {
    "trusted": true,
    "collapsed": false
   },
   "cell_type": "code",
   "source": "compare(4, 11, 1)",
   "execution_count": 12,
   "outputs": [
    {
     "output_type": "stream",
     "text": "(3.0118570055526592, array(3.0118570055526592))\n",
     "name": "stdout"
    }
   ]
  },
  {
   "metadata": {
    "trusted": true,
    "collapsed": false
   },
   "cell_type": "code",
   "source": "compare(166, 15, 2)",
   "execution_count": 13,
   "outputs": [
    {
     "output_type": "stream",
     "text": "(11.459929973513688, array(11.459929973513688))\n",
     "name": "stdout"
    }
   ]
  },
  {
   "metadata": {
    "trusted": true,
    "collapsed": false
   },
   "cell_type": "code",
   "source": "combined = pd.merge(df_by_sr.reset_index(), by_sr.to_dataframe().reset_index())\ncombined.head()",
   "execution_count": 14,
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>super_region_id</th>\n      <th>age_group_id</th>\n      <th>sex_id</th>\n      <th>value</th>\n      <th>match</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>4</td>\n      <td>10</td>\n      <td>1</td>\n      <td>1.553835</td>\n      <td>1.553835</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>4</td>\n      <td>10</td>\n      <td>2</td>\n      <td>0.720195</td>\n      <td>0.720195</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>4</td>\n      <td>11</td>\n      <td>1</td>\n      <td>3.011857</td>\n      <td>3.011857</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>4</td>\n      <td>11</td>\n      <td>2</td>\n      <td>1.277670</td>\n      <td>1.277670</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>4</td>\n      <td>12</td>\n      <td>1</td>\n      <td>3.415010</td>\n      <td>3.415010</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
      "text/plain": "   super_region_id  age_group_id  sex_id     value     match\n0                4            10       1  1.553835  1.553835\n1                4            10       2  0.720195  0.720195\n2                4            11       1  3.011857  3.011857\n3                4            11       2  1.277670  1.277670\n4                4            12       1  3.415010  3.415010"
     },
     "metadata": {},
     "execution_count": 14
    }
   ]
  },
  {
   "metadata": {
    "trusted": true,
    "collapsed": false
   },
   "cell_type": "code",
   "source": "np.all(combined['value'] == combined['match'])",
   "execution_count": 15,
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "True"
     },
     "metadata": {},
     "execution_count": 15
    }
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "name": "python2",
   "display_name": "Python 2",
   "language": "python"
  },
  "language_info": {
   "mimetype": "text/x-python",
   "nbconvert_exporter": "python",
   "name": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12",
   "file_extension": ".py",
   "codemirror_mode": {
    "version": 2,
    "name": "ipython"
   }
  },
  "gist_id": "a549966a39242b717286bb48ada17afb"
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"metadata": {
	"trusted": true,
	"collapsed": true
	},
	"cell_type": "code",
	"source": "import xarray as xr\nfrom fbd_core import db\nimport numpy as np\nimport pandas as pd",
	"execution_count": 1,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "# Open example `DataArray`"
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "da = xr.open_dataarray('/ihme/forecasting/data/fbd_scenarios_data/forecast/sev/20170406_weighted_quantiles/metab_sbp.nc')\nda = da.loc[{'year_id': 2015, 'scenario': 0, 'draw': 0}]\nda",
	"execution_count": 2,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": "<xarray.DataArray (location_id: 188, age_group_id: 12, sex_id: 2)>\narray([[[ 0.083082, 0.039269],\n [ 0.132577, 0.060753],\n ..., \n [ 0.426379, 0.436346],\n [ 0.444403, 0.466056]],\n\n [[ 0.044293, 0.033703],\n [ 0.091831, 0.054983],\n ..., \n [ 0.39365 , 0.417222],\n [ 0.415969, 0.445148]],\n\n ..., \n [[ 0.070702, 0.044486],\n [ 0.129525, 0.073576],\n ..., \n [ 0.39822 , 0.463433],\n [ 0.399438, 0.500044]],\n\n [[ 0.076865, 0.067248],\n [ 0.143387, 0.101176],\n ..., \n [ 0.407869, 0.493196],\n [ 0.407822, 0.516327]]])\nCoordinates:\n * location_id (location_id) int32 6 7 8 10 11 12 13 14 15 16 17 18 19 20 ...\n year_id int32 2015\n * age_group_id (age_group_id) int32 10 11 12 13 14 15 16 17 18 19 20 21\n rei \|S9 'metab_sbp'\n measure \|S3 'sev'\n * sex_id (sex_id) int32 1 2\n quantile float64 0.025\n scenario int64 0\n draw int64 0"
	},
	"metadata": {},
	"execution_count": 2
	}
	]
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "# Map `location_id` to `super_region_id`"
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "### Get list"
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "sr_map = db.get_locations_by_level(3)[['location_id','super_region_id']]\nsr_map.head()",
	"execution_count": 3,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>location_id</th>\n <th>super_region_id</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>6</td>\n <td>4</td>\n </tr>\n <tr>\n <th>1</th>\n <td>7</td>\n <td>4</td>\n </tr>\n <tr>\n <th>2</th>\n <td>8</td>\n <td>4</td>\n </tr>\n <tr>\n <th>3</th>\n <td>10</td>\n <td>4</td>\n </tr>\n <tr>\n <th>4</th>\n <td>11</td>\n <td>4</td>\n </tr>\n </tbody>\n</table>\n</div>",
	"text/plain": " location_id super_region_id\n0 6 4\n1 7 4\n2 8 4\n3 10 4\n4 11 4"
	},
	"metadata": {},
	"execution_count": 3
	}
	]
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "### Restrict to the 188 locations in the example `DataArray`"
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "sr_map = sr_map.loc[sr_map['location_id'].isin(da.coords['location_id'].values)]",
	"execution_count": 4,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "### Make an `xarray.DataArray` which maps `location_id` to `super_region_id` using 0/1"
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "sr_map['match'] = int(1)\nx_map = sr_map.set_index(['location_id','super_region_id']).to_xarray()\nx_map = x_map.fillna(0)\nx_map",
	"execution_count": 5,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": "<xarray.Dataset>\nDimensions: (location_id: 188, super_region_id: 7)\nCoordinates:\n * location_id (location_id) int64 6 7 8 10 11 12 13 14 15 16 17 18 19 ...\n * super_region_id (super_region_id) int64 4 31 64 103 137 158 166\nData variables:\n match (location_id, super_region_id) float64 1.0 0.0 0.0 0.0 ..."
	},
	"metadata": {},
	"execution_count": 5
	}
	]
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "# Use array multiplication to do `groupby('super_region_id')`"
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "### Multiply to stratify locations by super regions"
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "prod = da * x_map\nprod",
	"execution_count": 6,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": "<xarray.Dataset>\nDimensions: (age_group_id: 12, location_id: 188, sex_id: 2, super_region_id: 7)\nCoordinates:\n * location_id (location_id) int64 6 7 8 10 11 12 13 14 15 16 17 18 19 ...\n * super_region_id (super_region_id) int64 4 31 64 103 137 158 166\n year_id int32 2015\n * age_group_id (age_group_id) int32 10 11 12 13 14 15 16 17 18 19 20 21\n rei \|S9 'metab_sbp'\n measure \|S3 'sev'\n * sex_id (sex_id) int32 1 2\n quantile float64 0.025\n scenario int64 0\n draw int64 0\nData variables:\n match (location_id, age_group_id, sex_id, super_region_id) float64 0.08308 ..."
	},
	"metadata": {},
	"execution_count": 6
	}
	]
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "### Then sum across locations within a super region"
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "by_sr = prod.sum(dim='location_id')",
	"execution_count": 7,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "# Compare to `pandas.DataFrame` approach"
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "### Convert to dataframe"
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false,
	"scrolled": true
	},
	"cell_type": "code",
	"source": "df = da.to_dataframe(name='value').reset_index()\ndf.head()",
	"execution_count": 8,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>location_id</th>\n <th>age_group_id</th>\n <th>sex_id</th>\n <th>year_id</th>\n <th>rei</th>\n <th>measure</th>\n <th>quantile</th>\n <th>scenario</th>\n <th>draw</th>\n <th>value</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>6</td>\n <td>10</td>\n <td>1</td>\n <td>2015</td>\n <td>metab_sbp</td>\n <td>sev</td>\n <td>0.025</td>\n <td>0</td>\n <td>0</td>\n <td>0.083082</td>\n </tr>\n <tr>\n <th>1</th>\n <td>6</td>\n <td>10</td>\n <td>2</td>\n <td>2015</td>\n <td>metab_sbp</td>\n <td>sev</td>\n <td>0.025</td>\n <td>0</td>\n <td>0</td>\n <td>0.039269</td>\n </tr>\n <tr>\n <th>2</th>\n <td>6</td>\n <td>11</td>\n <td>1</td>\n <td>2015</td>\n <td>metab_sbp</td>\n <td>sev</td>\n <td>0.025</td>\n <td>0</td>\n <td>0</td>\n <td>0.132577</td>\n </tr>\n <tr>\n <th>3</th>\n <td>6</td>\n <td>11</td>\n <td>2</td>\n <td>2015</td>\n <td>metab_sbp</td>\n <td>sev</td>\n <td>0.025</td>\n <td>0</td>\n <td>0</td>\n <td>0.060753</td>\n </tr>\n <tr>\n <th>4</th>\n <td>6</td>\n <td>12</td>\n <td>1</td>\n <td>2015</td>\n <td>metab_sbp</td>\n <td>sev</td>\n <td>0.025</td>\n <td>0</td>\n <td>0</td>\n <td>0.159037</td>\n </tr>\n </tbody>\n</table>\n</div>",
	"text/plain": " location_id age_group_id sex_id year_id rei measure quantile \\\n0 6 10 1 2015 metab_sbp sev 0.025 \n1 6 10 2 2015 metab_sbp sev 0.025 \n2 6 11 1 2015 metab_sbp sev 0.025 \n3 6 11 2 2015 metab_sbp sev 0.025 \n4 6 12 1 2015 metab_sbp sev 0.025 \n\n scenario draw value \n0 0 0 0.083082 \n1 0 0 0.039269 \n2 0 0 0.132577 \n3 0 0 0.060753 \n4 0 0 0.159037 "
	},
	"metadata": {},
	"execution_count": 8
	}
	]
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "### Merge on super region"
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "df = df.merge(sr_map[['location_id','super_region_id']])",
	"execution_count": 9,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "### `groupby`"
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "df_by_sr = df.groupby(['super_region_id','age_group_id','sex_id']).aggregate({'value': 'sum'})\ndf_by_sr.head()",
	"execution_count": 10,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th></th>\n <th></th>\n <th>value</th>\n </tr>\n <tr>\n <th>super_region_id</th>\n <th>age_group_id</th>\n <th>sex_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th rowspan=\"5\" valign=\"top\">4</th>\n <th rowspan=\"2\" valign=\"top\">10</th>\n <th>1</th>\n <td>1.553835</td>\n </tr>\n <tr>\n <th>2</th>\n <td>0.720195</td>\n </tr>\n <tr>\n <th rowspan=\"2\" valign=\"top\">11</th>\n <th>1</th>\n <td>3.011857</td>\n </tr>\n <tr>\n <th>2</th>\n <td>1.277670</td>\n </tr>\n <tr>\n <th>12</th>\n <th>1</th>\n <td>3.415010</td>\n </tr>\n </tbody>\n</table>\n</div>",
	"text/plain": " value\nsuper_region_id age_group_id sex_id \n4 10 1 1.553835\n 2 0.720195\n 11 1 3.011857\n 2 1.277670\n 12 1 3.415010"
	},
	"metadata": {},
	"execution_count": 10
	}
	]
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "# Compare results"
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "def compare(sr, a, s):\n pandas_result = df_by_sr.loc[sr, a, s].value\n xarray_result = by_sr.loc[{'super_region_id': sr, 'age_group_id': a, 'sex_id': s}]['match'].values\n print(pandas_result, xarray_result)",
	"execution_count": 11,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "compare(4, 11, 1)",
	"execution_count": 12,
	"outputs": [
	{
	"output_type": "stream",
	"text": "(3.0118570055526592, array(3.0118570055526592))\n",
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "compare(166, 15, 2)",
	"execution_count": 13,
	"outputs": [
	{
	"output_type": "stream",
	"text": "(11.459929973513688, array(11.459929973513688))\n",
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "combined = pd.merge(df_by_sr.reset_index(), by_sr.to_dataframe().reset_index())\ncombined.head()",
	"execution_count": 14,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>super_region_id</th>\n <th>age_group_id</th>\n <th>sex_id</th>\n <th>value</th>\n <th>match</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>4</td>\n <td>10</td>\n <td>1</td>\n <td>1.553835</td>\n <td>1.553835</td>\n </tr>\n <tr>\n <th>1</th>\n <td>4</td>\n <td>10</td>\n <td>2</td>\n <td>0.720195</td>\n <td>0.720195</td>\n </tr>\n <tr>\n <th>2</th>\n <td>4</td>\n <td>11</td>\n <td>1</td>\n <td>3.011857</td>\n <td>3.011857</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4</td>\n <td>11</td>\n <td>2</td>\n <td>1.277670</td>\n <td>1.277670</td>\n </tr>\n <tr>\n <th>4</th>\n <td>4</td>\n <td>12</td>\n <td>1</td>\n <td>3.415010</td>\n <td>3.415010</td>\n </tr>\n </tbody>\n</table>\n</div>",
	"text/plain": " super_region_id age_group_id sex_id value match\n0 4 10 1 1.553835 1.553835\n1 4 10 2 0.720195 0.720195\n2 4 11 1 3.011857 3.011857\n3 4 11 2 1.277670 1.277670\n4 4 12 1 3.415010 3.415010"
	},
	"metadata": {},
	"execution_count": 14
	}
	]
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "np.all(combined['value'] == combined['match'])",
	"execution_count": 15,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": "True"
	},
	"metadata": {},
	"execution_count": 15
	}
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"name": "python2",
	"display_name": "Python 2",
	"language": "python"
	},
	"language_info": {
	"mimetype": "text/x-python",
	"nbconvert_exporter": "python",
	"name": "python",
	"pygments_lexer": "ipython2",
	"version": "2.7.12",
	"file_extension": ".py",
	"codemirror_mode": {
	"version": 2,
	"name": "ipython"
	}
	},
	"gist_id": "a549966a39242b717286bb48ada17afb"
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}