Skip to content

Instantly share code, notes, and snippets.

@tgarc
Last active August 29, 2015 14:24
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tgarc/6c40a65f648302b6b9d7 to your computer and use it in GitHub Desktop.
Save tgarc/6c40a65f648302b6b9d7 to your computer and use it in GitHub Desktop.
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# A short tutorial on pandas MultiIndexing with DataFrames"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"pandas version: 0.16.2\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from IPython.display import display\n",
"\n",
"print \"pandas version:\",pd.version.version"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>clvl1</th>\n",
" <th colspan=\"4\" halign=\"left\">x1</th>\n",
" <th colspan=\"4\" halign=\"left\">x2</th>\n",
" <th colspan=\"4\" halign=\"left\">x3</th>\n",
" <th colspan=\"4\" halign=\"left\">x4</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>clvl2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>clvl3</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" </tr>\n",
" <tr>\n",
" <th>rlvl1</th>\n",
" <th>rlvl2</th>\n",
" <th>rlvl3</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"4\" valign=\"top\">a1</th>\n",
" <th rowspan=\"2\" valign=\"top\">b1</th>\n",
" <th>c1</th>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c2</th>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>9</td>\n",
" <td>8</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">b2</th>\n",
" <th>c1</th>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>8</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>9</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c2</th>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"4\" valign=\"top\">a2</th>\n",
" <th rowspan=\"2\" valign=\"top\">b1</th>\n",
" <th>c1</th>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>8</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c2</th>\n",
" <td>6</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>3</td>\n",
" <td>9</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>9</td>\n",
" <td>8</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">b2</th>\n",
" <th>c1</th>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>7</td>\n",
" <td>9</td>\n",
" <td>8</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c2</th>\n",
" <td>7</td>\n",
" <td>4</td>\n",
" <td>9</td>\n",
" <td>7</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>3</td>\n",
" <td>8</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>9</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"4\" valign=\"top\">a3</th>\n",
" <th rowspan=\"2\" valign=\"top\">b1</th>\n",
" <th>c1</th>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>8</td>\n",
" <td>0</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c2</th>\n",
" <td>4</td>\n",
" <td>9</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>9</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">b2</th>\n",
" <th>c1</th>\n",
" <td>6</td>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" <td>9</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>9</td>\n",
" <td>9</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c2</th>\n",
" <td>9</td>\n",
" <td>8</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"4\" valign=\"top\">a4</th>\n",
" <th rowspan=\"2\" valign=\"top\">b1</th>\n",
" <th>c1</th>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>9</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c2</th>\n",
" <td>8</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>9</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>9</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>6</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">b2</th>\n",
" <th>c1</th>\n",
" <td>9</td>\n",
" <td>3</td>\n",
" <td>8</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>8</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c2</th>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" <td>9</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>9</td>\n",
" <td>2</td>\n",
" <td>9</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>8</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"clvl1 x1 x2 x3 x4 \n",
"clvl2 y1 y2 y1 y2 y1 y2 y1 y2 \n",
"clvl3 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2\n",
"rlvl1 rlvl2 rlvl3 \n",
"a1 b1 c1 2 4 3 7 5 3 6 9 0 5 8 3 6 2 3 5\n",
" c2 3 4 6 6 5 6 3 2 9 8 1 8 7 0 6 0\n",
" b2 c1 4 0 5 7 0 4 8 5 7 1 9 4 7 6 6 6\n",
" c2 7 5 8 8 4 7 4 4 9 0 3 6 1 8 0 6\n",
"a2 b1 c1 3 2 4 2 8 5 7 0 4 6 3 7 2 5 1 4\n",
" c2 6 4 6 3 9 3 6 0 3 1 2 3 5 9 8 3\n",
" b2 c1 8 9 0 7 9 8 0 3 7 6 1 8 9 0 3 9\n",
" c2 7 4 9 7 3 7 3 8 4 5 3 1 9 4 0 3\n",
"a3 b1 c1 9 0 8 0 7 5 8 2 7 8 4 3 7 2 1 7\n",
" c2 4 9 5 2 1 2 3 2 7 8 9 6 5 5 9 1\n",
" b2 c1 6 8 9 9 6 0 1 2 2 9 9 3 0 5 2 4\n",
" c2 9 8 4 0 3 3 9 0 7 0 8 8 0 4 2 9\n",
"a4 b1 c1 7 1 6 1 7 2 6 2 4 7 3 2 9 4 0 2\n",
" c2 8 2 7 9 4 7 8 2 2 0 9 7 5 8 6 4\n",
" b2 c1 9 3 8 6 2 4 8 5 8 4 1 5 4 6 3 3\n",
" c2 8 9 9 5 6 1 5 7 9 2 9 5 8 1 2 8"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"midx_col = pd.MultiIndex.from_product([['x1', 'x2', 'x3', 'x4'], ['y1', 'y2'],['z1','z2']],names=['clvl1','clvl2','clvl3'])\n",
"midx_row = pd.MultiIndex.from_product([['a1', 'a2', 'a3', 'a4'], ['b1', 'b2'],['c1','c2']],names=['rlvl1','rlvl2','rlvl3'])\n",
"df = pd.DataFrame(np.random.randint(10,size=(16,16)),index=midx_row,columns=midx_col)\n",
"\n",
"display(df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Single indexing on a MultiIndexed Dataframe"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Indexing the first *l < L* levels (where *L* = `len(MultiIndex.levels)`) returns a `DataFrame` with the first *l* levels of the row index dropped.\n",
"\n",
"If *l == L* a `Series` is returned.\n",
"\n",
"Note that the column index is always kept intact."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th>clvl1</th>\n",
" <th colspan=\"4\" halign=\"left\">x1</th>\n",
" <th colspan=\"4\" halign=\"left\">x2</th>\n",
" <th colspan=\"4\" halign=\"left\">x3</th>\n",
" <th colspan=\"4\" halign=\"left\">x4</th>\n",
" </tr>\n",
" <tr>\n",
" <th>clvl2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" </tr>\n",
" <tr>\n",
" <th>clvl3</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" </tr>\n",
" <tr>\n",
" <th>rlvl3</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>c1</th>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c2</th>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>9</td>\n",
" <td>8</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"clvl1 x1 x2 x3 x4 \n",
"clvl2 y1 y2 y1 y2 y1 y2 y1 y2 \n",
"clvl3 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2\n",
"rlvl3 \n",
"c1 2 4 3 7 5 3 6 9 0 5 8 3 6 2 3 5\n",
"c2 3 4 6 6 5 6 3 2 9 8 1 8 7 0 6 0"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"clvl1 clvl2 clvl3\n",
"x1 y1 z1 2\n",
" z2 4\n",
" y2 z1 3\n",
" z2 7\n",
"x2 y1 z1 5\n",
" z2 3\n",
" y2 z1 6\n",
" z2 9\n",
"x3 y1 z1 0\n",
" z2 5\n",
" y2 z1 8\n",
" z2 3\n",
"x4 y1 z1 6\n",
" z2 2\n",
" y2 z1 3\n",
" z2 5\n",
"Name: (a1, b1, c1), dtype: int64"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display(df.loc['a1','b1'])\n",
"display(df.loc['a1','b1','c1'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can also select all indices at a particular level by using a colon `:`. Note that if we use a colon at any index level, the MultiIndex from the original DataFrame is kept intact (i.e. none of the levels are dropped)."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>clvl1</th>\n",
" <th colspan=\"4\" halign=\"left\">x1</th>\n",
" <th colspan=\"4\" halign=\"left\">x2</th>\n",
" <th colspan=\"4\" halign=\"left\">x3</th>\n",
" <th colspan=\"4\" halign=\"left\">x4</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>clvl2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>clvl3</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" </tr>\n",
" <tr>\n",
" <th>rlvl1</th>\n",
" <th>rlvl2</th>\n",
" <th>rlvl3</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">a1</th>\n",
" <th>b1</th>\n",
" <th>c1</th>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b2</th>\n",
" <th>c1</th>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>8</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>9</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"clvl1 x1 x2 x3 x4 \n",
"clvl2 y1 y2 y1 y2 y1 y2 y1 y2 \n",
"clvl3 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2\n",
"rlvl1 rlvl2 rlvl3 \n",
"a1 b1 c1 2 4 3 7 5 3 6 9 0 5 8 3 6 2 3 5\n",
" b2 c1 4 0 5 7 0 4 8 5 7 1 9 4 7 6 6 6"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc['a1',:,'c1']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Selecting arbitrary indices at multiple levels <a id=\"arbitrary_multi-indexing\"></a>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The previous examples showed how to select rows that match only a single index at each index level (actually we learned that we can also select all indices at a particular level by using colon `:`). But it's also possible to select arbitrary indices at each index level by using indexing of the form\n",
"\n",
"`df.loc[( (rlvl1 indices), (rlvl2 indices), (rlvl3 indices) ), ( (clvl1 indices), (clvl2 indices), (clvl3 indices) )]`\n",
"\n",
"Note that\n",
"\n",
"1. The column index must be explicitly set \n",
"2. The order of index tuples must match the order of index levels\n",
"\n",
"I'll go into more detail on this syntax in the [next](#ambiguities) section."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here's a few examples"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>clvl1</th>\n",
" <th colspan=\"4\" halign=\"left\">x1</th>\n",
" <th colspan=\"4\" halign=\"left\">x2</th>\n",
" <th colspan=\"4\" halign=\"left\">x3</th>\n",
" <th colspan=\"4\" halign=\"left\">x4</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>clvl2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>clvl3</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" </tr>\n",
" <tr>\n",
" <th>rlvl1</th>\n",
" <th>rlvl2</th>\n",
" <th>rlvl3</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">a1</th>\n",
" <th>b1</th>\n",
" <th>c2</th>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>9</td>\n",
" <td>8</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b2</th>\n",
" <th>c2</th>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">a2</th>\n",
" <th>b1</th>\n",
" <th>c2</th>\n",
" <td>6</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>3</td>\n",
" <td>9</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>9</td>\n",
" <td>8</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b2</th>\n",
" <th>c2</th>\n",
" <td>7</td>\n",
" <td>4</td>\n",
" <td>9</td>\n",
" <td>7</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>3</td>\n",
" <td>8</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>9</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">a4</th>\n",
" <th>b1</th>\n",
" <th>c2</th>\n",
" <td>8</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>9</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>9</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>6</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b2</th>\n",
" <th>c2</th>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" <td>9</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>9</td>\n",
" <td>2</td>\n",
" <td>9</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>8</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"clvl1 x1 x2 x3 x4 \n",
"clvl2 y1 y2 y1 y2 y1 y2 y1 y2 \n",
"clvl3 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2\n",
"rlvl1 rlvl2 rlvl3 \n",
"a1 b1 c2 3 4 6 6 5 6 3 2 9 8 1 8 7 0 6 0\n",
" b2 c2 7 5 8 8 4 7 4 4 9 0 3 6 1 8 0 6\n",
"a2 b1 c2 6 4 6 3 9 3 6 0 3 1 2 3 5 9 8 3\n",
" b2 c2 7 4 9 7 3 7 3 8 4 5 3 1 9 4 0 3\n",
"a4 b1 c2 8 2 7 9 4 7 8 2 2 0 9 7 5 8 6 4\n",
" b2 c2 8 9 9 5 6 1 5 7 9 2 9 5 8 1 2 8"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>clvl2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>clvl3</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" </tr>\n",
" <tr>\n",
" <th>rlvl1</th>\n",
" <th>rlvl2</th>\n",
" <th>rlvl3</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">a1</th>\n",
" <th>b1</th>\n",
" <th>c2</th>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b2</th>\n",
" <th>c2</th>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">a2</th>\n",
" <th>b1</th>\n",
" <th>c2</th>\n",
" <td>6</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b2</th>\n",
" <th>c2</th>\n",
" <td>7</td>\n",
" <td>4</td>\n",
" <td>9</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">a4</th>\n",
" <th>b1</th>\n",
" <th>c2</th>\n",
" <td>8</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b2</th>\n",
" <th>c2</th>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" <td>9</td>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"clvl2 y1 y2 \n",
"clvl3 z1 z2 z1 z2\n",
"rlvl1 rlvl2 rlvl3 \n",
"a1 b1 c2 3 4 6 6\n",
" b2 c2 7 5 8 8\n",
"a2 b1 c2 6 4 6 3\n",
" b2 c2 7 4 9 7\n",
"a4 b1 c2 8 2 7 9\n",
" b2 c2 8 9 9 5"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>clvl3</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" </tr>\n",
" <tr>\n",
" <th>rlvl1</th>\n",
" <th>rlvl2</th>\n",
" <th>rlvl3</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">a1</th>\n",
" <th>b1</th>\n",
" <th>c2</th>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b2</th>\n",
" <th>c2</th>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">a2</th>\n",
" <th>b1</th>\n",
" <th>c2</th>\n",
" <td>6</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b2</th>\n",
" <th>c2</th>\n",
" <td>7</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">a4</th>\n",
" <th>b1</th>\n",
" <th>c2</th>\n",
" <td>8</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b2</th>\n",
" <th>c2</th>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"clvl3 z1 z2\n",
"rlvl1 rlvl2 rlvl3 \n",
"a1 b1 c2 3 4\n",
" b2 c2 7 5\n",
"a2 b1 c2 6 4\n",
" b2 c2 7 4\n",
"a4 b1 c2 8 2\n",
" b2 c2 8 9"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>clvl1</th>\n",
" <th colspan=\"2\" halign=\"left\">x1</th>\n",
" <th colspan=\"2\" halign=\"left\">x2</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>clvl2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>clvl3</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" </tr>\n",
" <tr>\n",
" <th>rlvl1</th>\n",
" <th>rlvl2</th>\n",
" <th>rlvl3</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">a1</th>\n",
" <th>b1</th>\n",
" <th>c2</th>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b2</th>\n",
" <th>c2</th>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">a2</th>\n",
" <th>b1</th>\n",
" <th>c2</th>\n",
" <td>6</td>\n",
" <td>4</td>\n",
" <td>9</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b2</th>\n",
" <th>c2</th>\n",
" <td>7</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">a4</th>\n",
" <th>b1</th>\n",
" <th>c2</th>\n",
" <td>8</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b2</th>\n",
" <th>c2</th>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"clvl1 x1 x2 \n",
"clvl2 y1 y1 \n",
"clvl3 z1 z2 z1 z2\n",
"rlvl1 rlvl2 rlvl3 \n",
"a1 b1 c2 3 4 5 6\n",
" b2 c2 7 5 4 7\n",
"a2 b1 c2 6 4 9 3\n",
" b2 c2 7 4 3 7\n",
"a4 b1 c2 8 2 4 7\n",
" b2 c2 8 9 6 1"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display(df.loc[(('a1','a2','a4'), ('b1','b2'), ('c2')), :])\n",
"display(df.loc[(('a1','a2','a4'), ('b1','b2'), ('c2')), 'x1'])\n",
"display(df.loc[(('a1','a2','a4'), ('b1','b2'), ('c2')), ('x1','y1')])\n",
"display(df.loc[(('a1','a2','a4'), ('b1','b2'), ('c2')), (('x1','x2'),('y1'))])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To select all indices at a particular level, a `slice(None)` can be used in place of a tuple. (`slice(None)` takes the place of `:` here since a colon inside a tuple is illegal python syntax).\n",
"\n",
"Just as in the last section, using a `slice(None)` at any index level will assure that the MultiIndex from the original DataFrame is kept intact."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>clvl1</th>\n",
" <th colspan=\"4\" halign=\"left\">x1</th>\n",
" <th colspan=\"4\" halign=\"left\">x2</th>\n",
" <th colspan=\"4\" halign=\"left\">x3</th>\n",
" <th colspan=\"4\" halign=\"left\">x4</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>clvl2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>clvl3</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" </tr>\n",
" <tr>\n",
" <th>rlvl1</th>\n",
" <th>rlvl2</th>\n",
" <th>rlvl3</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">a1</th>\n",
" <th>b1</th>\n",
" <th>c1</th>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b2</th>\n",
" <th>c1</th>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>8</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>9</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">a2</th>\n",
" <th>b1</th>\n",
" <th>c1</th>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>8</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b2</th>\n",
" <th>c1</th>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>7</td>\n",
" <td>9</td>\n",
" <td>8</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">a4</th>\n",
" <th>b1</th>\n",
" <th>c1</th>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>9</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b2</th>\n",
" <th>c1</th>\n",
" <td>9</td>\n",
" <td>3</td>\n",
" <td>8</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>8</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"clvl1 x1 x2 x3 x4 \n",
"clvl2 y1 y2 y1 y2 y1 y2 y1 y2 \n",
"clvl3 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2\n",
"rlvl1 rlvl2 rlvl3 \n",
"a1 b1 c1 2 4 3 7 5 3 6 9 0 5 8 3 6 2 3 5\n",
" b2 c1 4 0 5 7 0 4 8 5 7 1 9 4 7 6 6 6\n",
"a2 b1 c1 3 2 4 2 8 5 7 0 4 6 3 7 2 5 1 4\n",
" b2 c1 8 9 0 7 9 8 0 3 7 6 1 8 9 0 3 9\n",
"a4 b1 c1 7 1 6 1 7 2 6 2 4 7 3 2 9 4 0 2\n",
" b2 c1 9 3 8 6 2 4 8 5 8 4 1 5 4 6 3 3"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display(df.loc[(('a1','a2','a4'), slice(None), ('c1')), :]) # select all indices at row level 2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Some ambiguous cases <a id=\"ambiguities\"></a>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Remember how I said in the section [above](#arbitrary_multi-indexing) that for arbitrary multi-indexing you *had* to specify a column index for it to work? I lied :) This actually isn't always necessary. For example the following two are equivalent"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result1 = df.loc[('a1','a2','a4'), ('b1','b2'), ('c2')] # incomplete indexing\n",
"result2 = df.loc[(('a1','a2','a4'), ('b1','b2'), ('c2')),:] # explicit column indexing\n",
"np.all(result1 == result2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The problem with this type of **incomplete indexing** with multi-index dataframes is that the behavior isn't always well defined. Specifically, when the number of index tuples is 1 or 2, pandas has no way of knowing which index tuples belong to the row index and which belong to the column index.\n",
"\n",
"This is actually a python limitation. You see, indexing with 2 tuples *without* specifying a column index is of the form\n",
"\n",
"`df.loc[( (level1 indices), (level2 indices) )]`\n",
"\n",
"But in the context of the `__getitem__` interface, this notation is exactly equivalent to \n",
"\n",
"`df.loc[(level1 indices), (level2 indices)]`\n",
"\n",
"In pandas, this case will always be interpreted as the typical row and column indexing.\n",
"\n",
"Funnily enough, simply appending a comma after the parenthesis will eliminate this ambiguity\n",
"\n",
"`df.loc[( (level1 indices), (level2 indices) ),]` <-- Note the added comma!"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In summary, incomplete indexing with 3 or more indexing tuples works"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>clvl1</th>\n",
" <th colspan=\"4\" halign=\"left\">x1</th>\n",
" <th colspan=\"4\" halign=\"left\">x2</th>\n",
" <th colspan=\"4\" halign=\"left\">x3</th>\n",
" <th colspan=\"4\" halign=\"left\">x4</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>clvl2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>clvl3</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" </tr>\n",
" <tr>\n",
" <th>rlvl1</th>\n",
" <th>rlvl2</th>\n",
" <th>rlvl3</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>a1</th>\n",
" <th>b1</th>\n",
" <th>c2</th>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>9</td>\n",
" <td>8</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>a2</th>\n",
" <th>b1</th>\n",
" <th>c2</th>\n",
" <td>6</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>3</td>\n",
" <td>9</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>9</td>\n",
" <td>8</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>a4</th>\n",
" <th>b1</th>\n",
" <th>c2</th>\n",
" <td>8</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>9</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>9</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>6</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"clvl1 x1 x2 x3 x4 \n",
"clvl2 y1 y2 y1 y2 y1 y2 y1 y2 \n",
"clvl3 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2\n",
"rlvl1 rlvl2 rlvl3 \n",
"a1 b1 c2 3 4 6 6 5 6 3 2 9 8 1 8 7 0 6 0\n",
"a2 b1 c2 6 4 6 3 9 3 6 0 3 1 2 3 5 9 8 3\n",
"a4 b1 c2 8 2 7 9 4 7 8 2 2 0 9 7 5 8 6 4"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[('a1','a2','a4'), ('b1','b3'), ('c2')]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"But this does not work when using only 2 (or less) indexing tuples"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"KeyError: ('a1', 'a2', 'a4')\n"
]
}
],
"source": [
"try:\n",
" df.loc[('a1','a2','a4'), ('b1','b3')]\n",
"except KeyError as e:\n",
" print \"KeyError:\",e.message"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"KeyError: the label [a2] is not in the [columns]\n"
]
}
],
"source": [
"try:\n",
" df.loc[('a1','a2','a4')] # remember: this is the same as df.loc['a1','a2','a4'] to python\n",
"except KeyError as e:\n",
" print \"KeyError:\",e.message"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Perhaps surprisingly, in cases where you only need to specify a single index at every level of the row index this type of shorthand indexing will always work - even if the length of the index tuple is 2."
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th>clvl1</th>\n",
" <th colspan=\"4\" halign=\"left\">x1</th>\n",
" <th colspan=\"4\" halign=\"left\">x2</th>\n",
" <th colspan=\"4\" halign=\"left\">x3</th>\n",
" <th colspan=\"4\" halign=\"left\">x4</th>\n",
" </tr>\n",
" <tr>\n",
" <th>clvl2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" <th colspan=\"2\" halign=\"left\">y1</th>\n",
" <th colspan=\"2\" halign=\"left\">y2</th>\n",
" </tr>\n",
" <tr>\n",
" <th>clvl3</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" <th>z1</th>\n",
" <th>z2</th>\n",
" </tr>\n",
" <tr>\n",
" <th>rlvl3</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>c1</th>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c2</th>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>9</td>\n",
" <td>8</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"clvl1 x1 x2 x3 x4 \n",
"clvl2 y1 y2 y1 y2 y1 y2 y1 y2 \n",
"clvl3 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2 z1 z2\n",
"rlvl3 \n",
"c1 2 4 3 7 5 3 6 9 0 5 8 3 6 2 3 5\n",
"c2 3 4 6 6 5 6 3 2 9 8 1 8 7 0 6 0"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc['a1','b1']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This works because pandas tries first to match the second index (`b1` in our example) to the column index and falls back to the row index if it is not found. Clearly this behavior has some consequences if your row and column Index's are not unique, so take care."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment