yiboyang/pandas_help.ipynb

## pandas_help.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "57462fc1-d0e5-4dec-b39b-7a98b664e524",
   "metadata": {},
   "source": [
    "# Code snippets to remind myself how pandas works"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "305786fc-ca8c-4e21-ae4f-cac19509266b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ccb94d3f-7744-4193-8310-ab4a46265e35",
   "metadata": {},
   "source": [
    "## Create dataframe\n",
    "\n",
    "See https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html#from-dict-of-ndarrays-lists"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "555a9f98-0c69-43f5-a436-570c6144e4eb",
   "metadata": {},
   "outputs": [],
   "source": [
    "# My typical dataframe from model training:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "5d0b92a0-03c1-48eb-9e96-3328bc36f4fb",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame([{'step': 10, 'loss': 0.2, 'lr': 0.01}, {'step': 11, 'loss': 0.18, 'lr': 0.01}])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a9afb9b5-2fcc-4b19-a600-7b6fe1444510",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>step</th>\n",
       "      <th>loss</th>\n",
       "      <th>lr</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10</td>\n",
       "      <td>0.20</td>\n",
       "      <td>0.01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>11</td>\n",
       "      <td>0.18</td>\n",
       "      <td>0.01</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   step  loss    lr\n",
       "0    10  0.20  0.01\n",
       "1    11  0.18  0.01"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "b86e7c78-6d9d-438c-b6e4-6c72b5d49a40",
   "metadata": {},
   "outputs": [],
   "source": [
    "# alternatively, it's also possible to create the same df with\n",
    "df = pd.DataFrame({'step': [10, 11], 'loss': [0.2, 0.18], 'lr': [0.01, 0.01]})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "8f13f893-aa62-4396-a400-049bf472e092",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>step</th>\n",
       "      <th>loss</th>\n",
       "      <th>lr</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10</td>\n",
       "      <td>0.20</td>\n",
       "      <td>0.01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>11</td>\n",
       "      <td>0.18</td>\n",
       "      <td>0.01</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   step  loss    lr\n",
       "0    10  0.20  0.01\n",
       "1    11  0.18  0.01"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "17343ed7-f99d-459a-8446-50517b2b2e87",
   "metadata": {},
   "source": [
    "## Create index"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b612e9df-b79f-4c38-92cd-1db7a25eb434",
   "metadata": {},
   "source": [
    "\"Index\" is is what pandas calls the sequence of row labels; exactly analogous to column labels, which are ['step', 'loss', 'lr'] in this case. Since I didn't specify an index, the default index just consists of integers starting from 0. "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "df922877-3f81-4903-b861-b1741df10567",
   "metadata": {},
   "source": [
    "### Override existing index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "7c0e74bf-9261-42a9-a628-63eb98c5386d",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.index = ['a', 'b']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "fb7a456a-c583-42d8-b5e3-1a951de2e7d6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>step</th>\n",
       "      <th>loss</th>\n",
       "      <th>lr</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>10</td>\n",
       "      <td>0.20</td>\n",
       "      <td>0.01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>11</td>\n",
       "      <td>0.18</td>\n",
       "      <td>0.01</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   step  loss    lr\n",
       "a    10  0.20  0.01\n",
       "b    11  0.18  0.01"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d8b979cb-f002-4e11-90f3-d4c9b6dbbefb",
   "metadata": {},
   "source": [
    "### set_index\n",
    "https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.set_index.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "d10ee1db-19e2-499a-b63a-ce08a847d712",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loss</th>\n",
       "      <th>lr</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>0.20</td>\n",
       "      <td>0.01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>0.18</td>\n",
       "      <td>0.01</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      loss    lr\n",
       "step            \n",
       "10    0.20  0.01\n",
       "11    0.18  0.01"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.set_index('step')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "bce67f71-65dd-4f3e-95ef-8e07fa6d471a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# above returns a copy\n",
    "df = df.set_index('step')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9887975f-4b93-40e7-8274-06df770ab0e8",
   "metadata": {},
   "source": [
    "## Selection/indexing\n",
    "https://pandas.pydata.org/pandas-docs/stable/user_guide/10min.html#selection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "1a239dfb-8bfa-4662-ae8b-b7f817a063de",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "step\n",
       "10    0.20\n",
       "11    0.18\n",
       "Name: loss, dtype: float64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# by column name\n",
    "df['loss']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "c7cd7d26-23e3-498d-80ad-c32209628057",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "loss    0.20\n",
       "lr      0.01\n",
       "Name: 10, dtype: float64"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# by row label\n",
    "# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.loc.html#pandas.DataFrame.loc\n",
    "df.loc[10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2e98ab45-bfae-43c6-b89d-33413da6cec6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# by row AND column (analgous to indexing a numpy matrix; syntax also used for MultiIndex (see blelow))\n",
    "df.loc[10, 'loss']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "434f896f-b891-4d37-b661-ffd384f9f7e3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loss</th>\n",
       "      <th>lr</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>0.20</td>\n",
       "      <td>0.01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>0.18</td>\n",
       "      <td>0.01</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      loss    lr\n",
       "step            \n",
       "10    0.20  0.01\n",
       "11    0.18  0.01"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# by a subset of row and column labels (analgous to indexing a numpy matrix)\n",
    "df.loc[10:11, ['loss', 'lr']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "00a7865d-b854-40f7-95ef-17dcf3552004",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "step\n",
       "10    0.20\n",
       "11    0.18\n",
       "Name: loss, dtype: float64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Indexing by column name is actually equivalent to the following (preferred for more complicated settings):\n",
    "df.loc[:, 'loss']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dc336529-bce6-4052-86ef-24eee6ca7b2d",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "97e9c7ce-a791-432f-bd80-cc2c59d1d6d5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "loss    0.20\n",
       "lr      0.01\n",
       "Name: 10, dtype: float64"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# by position\n",
    "df.iloc[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "43f25fad-fa43-4e66-9271-ef1819dbcf6a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "loss    0.2\n",
       "Name: 10, dtype: float64"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# by position\n",
    "df.iloc[0, 0:1]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "38bfa6fc-9fd6-4001-bd8b-bf7588e552ec",
   "metadata": {},
   "source": [
    "## Multi-index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "df4f74b2-2694-4a5b-8b63-b45c3265bb4f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>step</th>\n",
       "      <th>loss</th>\n",
       "      <th>lr</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10</td>\n",
       "      <td>0.20</td>\n",
       "      <td>0.01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>11</td>\n",
       "      <td>0.18</td>\n",
       "      <td>0.01</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   step  loss    lr\n",
       "0    10  0.20  0.01\n",
       "1    11  0.18  0.01"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame([{'step': 10, 'loss': 0.2, 'lr': 0.01}, {'step': 11, 'loss': 0.18, 'lr': 0.01}])\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "65758813-04b8-436d-a69b-2153b81bf132",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>loss</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step</th>\n",
       "      <th>lr</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <th>0.01</th>\n",
       "      <td>0.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <th>0.01</th>\n",
       "      <td>0.18</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           loss\n",
       "step lr        \n",
       "10   0.01  0.20\n",
       "11   0.01  0.18"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.set_index.html\n",
    "df = df.set_index(['step', 'lr'])\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "01b3acb0-9cdc-41fa-8845-0c9dda9cfbc2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MultiIndex([(10, 0.01),\n",
       "            (11, 0.01)],\n",
       "           names=['step', 'lr'])"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "2faecd81-0773-4ff2-9c79-0ecfbc653362",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loss</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>lr</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0.01</th>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      loss\n",
       "lr        \n",
       "0.01   0.2"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "08c30b2f-e110-45f2-abd6-aa6a3d2e1ba5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "loss    0.2\n",
       "Name: (10, 0.01), dtype: float64"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[(10, 0.01)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "80defe61-5f98-4523-a4b3-4c6ff7cc80cc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loss</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>lr</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0.01</th>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      loss\n",
       "lr        \n",
       "0.01   0.2"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[(10, slice(None))]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "5bace266-c1ea-4659-971c-80ed2f21521d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "loss    0.2\n",
       "Name: (10, 0.01), dtype: float64"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[10, 0.01]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "be91c3a3-4731-4171-8be2-26fcaa06453e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>loss</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step</th>\n",
       "      <th>lr</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <th>0.01</th>\n",
       "      <td>0.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <th>0.01</th>\n",
       "      <td>0.18</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           loss\n",
       "step lr        \n",
       "10   0.01  0.20\n",
       "11   0.01  0.18"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[(slice(None), slice(None))]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "00caf4dc-c625-4c99-85bc-33221d94c4f8",
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "0.01",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "File \u001b[0;32m/extra/ucibdl0/yiboyang/envs/tf2.10/lib/python3.10/site-packages/pandas/core/indexes/base.py:3800\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m   3799\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3800\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3801\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
      "File \u001b[0;32m/extra/ucibdl0/yiboyang/envs/tf2.10/lib/python3.10/site-packages/pandas/_libs/index.pyx:138\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
      "File \u001b[0;32m/extra/ucibdl0/yiboyang/envs/tf2.10/lib/python3.10/site-packages/pandas/_libs/index.pyx:165\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
      "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:5745\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
      "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:5753\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;31mKeyError\u001b[0m: 0.01",
      "\nThe above exception was the direct cause of the following exception:\n",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "Cell \u001b[0;32mIn [40], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m df\u001b[38;5;241m.\u001b[39mloc[(\u001b[38;5;28mslice\u001b[39m(\u001b[38;5;28;01mNone\u001b[39;00m), \u001b[38;5;241m0.01\u001b[39m)]\n",
      "File \u001b[0;32m/extra/ucibdl0/yiboyang/envs/tf2.10/lib/python3.10/site-packages/pandas/core/indexing.py:1068\u001b[0m, in \u001b[0;36m_LocationIndexer.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   1066\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_is_scalar_access(key):\n\u001b[1;32m   1067\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\u001b[38;5;241m.\u001b[39m_get_value(\u001b[38;5;241m*\u001b[39mkey, takeable\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_takeable)\n\u001b[0;32m-> 1068\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_getitem_tuple\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1069\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1070\u001b[0m     \u001b[38;5;66;03m# we by definition only have the 0th axis\u001b[39;00m\n\u001b[1;32m   1071\u001b[0m     axis \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxis \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n",
      "File \u001b[0;32m/extra/ucibdl0/yiboyang/envs/tf2.10/lib/python3.10/site-packages/pandas/core/indexing.py:1248\u001b[0m, in \u001b[0;36m_LocIndexer._getitem_tuple\u001b[0;34m(self, tup)\u001b[0m\n\u001b[1;32m   1246\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m suppress(IndexingError):\n\u001b[1;32m   1247\u001b[0m     tup \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_expand_ellipsis(tup)\n\u001b[0;32m-> 1248\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_getitem_lowerdim\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtup\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1250\u001b[0m \u001b[38;5;66;03m# no multi-index, so validate all of the indexers\u001b[39;00m\n\u001b[1;32m   1251\u001b[0m tup \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_tuple_indexer(tup)\n",
      "File \u001b[0;32m/extra/ucibdl0/yiboyang/envs/tf2.10/lib/python3.10/site-packages/pandas/core/indexing.py:942\u001b[0m, in \u001b[0;36m_LocationIndexer._getitem_lowerdim\u001b[0;34m(self, tup)\u001b[0m\n\u001b[1;32m    940\u001b[0m \u001b[38;5;66;03m# we may have a nested tuples indexer here\u001b[39;00m\n\u001b[1;32m    941\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_is_nested_tuple_indexer(tup):\n\u001b[0;32m--> 942\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_getitem_nested_tuple\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtup\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    944\u001b[0m \u001b[38;5;66;03m# we maybe be using a tuple to represent multiple dimensions here\u001b[39;00m\n\u001b[1;32m    945\u001b[0m ax0 \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\u001b[38;5;241m.\u001b[39m_get_axis(\u001b[38;5;241m0\u001b[39m)\n",
      "File \u001b[0;32m/extra/ucibdl0/yiboyang/envs/tf2.10/lib/python3.10/site-packages/pandas/core/indexing.py:1048\u001b[0m, in \u001b[0;36m_LocationIndexer._getitem_nested_tuple\u001b[0;34m(self, tup)\u001b[0m\n\u001b[1;32m   1045\u001b[0m     axis \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m   1046\u001b[0m     \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[0;32m-> 1048\u001b[0m obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_getitem_axis\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1049\u001b[0m axis \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m   1051\u001b[0m \u001b[38;5;66;03m# if we have a scalar, we are done\u001b[39;00m\n",
      "File \u001b[0;32m/extra/ucibdl0/yiboyang/envs/tf2.10/lib/python3.10/site-packages/pandas/core/indexing.py:1313\u001b[0m, in \u001b[0;36m_LocIndexer._getitem_axis\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m   1311\u001b[0m \u001b[38;5;66;03m# fall thru to straight lookup\u001b[39;00m\n\u001b[1;32m   1312\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_key(key, axis)\n\u001b[0;32m-> 1313\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_label\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m/extra/ucibdl0/yiboyang/envs/tf2.10/lib/python3.10/site-packages/pandas/core/indexing.py:1261\u001b[0m, in \u001b[0;36m_LocIndexer._get_label\u001b[0;34m(self, label, axis)\u001b[0m\n\u001b[1;32m   1259\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_get_label\u001b[39m(\u001b[38;5;28mself\u001b[39m, label, axis: \u001b[38;5;28mint\u001b[39m):\n\u001b[1;32m   1260\u001b[0m     \u001b[38;5;66;03m# GH#5567 this will fail if the label is not present in the axis.\u001b[39;00m\n\u001b[0;32m-> 1261\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mxs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlabel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m/extra/ucibdl0/yiboyang/envs/tf2.10/lib/python3.10/site-packages/pandas/core/generic.py:4042\u001b[0m, in \u001b[0;36mNDFrame.xs\u001b[0;34m(self, key, axis, level, drop_level)\u001b[0m\n\u001b[1;32m   4040\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m axis \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m   4041\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m drop_level:\n\u001b[0;32m-> 4042\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m   4043\u001b[0m     index \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\n\u001b[1;32m   4044\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
      "File \u001b[0;32m/extra/ucibdl0/yiboyang/envs/tf2.10/lib/python3.10/site-packages/pandas/core/frame.py:3805\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   3803\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mnlevels \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m   3804\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_multilevel(key)\n\u001b[0;32m-> 3805\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3806\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n\u001b[1;32m   3807\u001b[0m     indexer \u001b[38;5;241m=\u001b[39m [indexer]\n",
      "File \u001b[0;32m/extra/ucibdl0/yiboyang/envs/tf2.10/lib/python3.10/site-packages/pandas/core/indexes/base.py:3802\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m   3800\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine\u001b[38;5;241m.\u001b[39mget_loc(casted_key)\n\u001b[1;32m   3801\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m-> 3802\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m   3803\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m   3804\u001b[0m     \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m   3805\u001b[0m     \u001b[38;5;66;03m#  InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m   3806\u001b[0m     \u001b[38;5;66;03m#  the TypeError.\u001b[39;00m\n\u001b[1;32m   3807\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n",
      "\u001b[0;31mKeyError\u001b[0m: 0.01"
     ]
    }
   ],
   "source": [
    "# Try to do a slice on the secondary index, none of them below works\n",
    "\n",
    "df.loc[slice(None), 0.01]\n",
    "\n",
    "df.loc[(slice(None), 0.01)]\n",
    "\n",
    "# https://stackoverflow.com/questions/46532371/pandas-slicing-along-first-level-of-multiindex?noredirect=1&lq=1\n",
    "df.loc[pd.IndexSlice[:, [0.01]]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "59d75da9-f186-470a-aee6-38d9ba7b5b8d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>loss</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step</th>\n",
       "      <th>lr</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <th>0.01</th>\n",
       "      <td>0.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <th>0.01</th>\n",
       "      <td>0.18</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           loss\n",
       "step lr        \n",
       "10   0.01  0.20\n",
       "11   0.01  0.18"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Finally this worked!\n",
    "# https://pandas.pydata.org/docs/user_guide/advanced.html#using-slicers\n",
    "df.loc[(slice(None), 0.01), :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "edf44397-d3fd-4310-8f53-345002c1c75f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# see https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.loc.html#pandas.DataFrame.loc\n",
    "# https://stackoverflow.com/questions/13389203/pandas-slice-a-multiindex-by-range-of-secondary-index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "577856a7-1898-4fea-9383-08f82fbd9034",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "925760af-6722-4f1e-b763-5b65bc9f32cc",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Official example: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.loc.html#pandas.DataFrame.loc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "e686e235-ba21-4dee-8dad-1bbd812a8924",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>max_speed</th>\n",
       "      <th>shield</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">cobra</th>\n",
       "      <th>mark i</th>\n",
       "      <td>12</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mark ii</th>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">sidewinder</th>\n",
       "      <th>mark i</th>\n",
       "      <td>10</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mark ii</th>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">viper</th>\n",
       "      <th>mark ii</th>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mark iii</th>\n",
       "      <td>16</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     max_speed  shield\n",
       "cobra      mark i           12       2\n",
       "           mark ii           0       4\n",
       "sidewinder mark i           10      20\n",
       "           mark ii           1       4\n",
       "viper      mark ii           7       1\n",
       "           mark iii         16      36"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tuples = [\n",
    "   ('cobra', 'mark i'), ('cobra', 'mark ii'),\n",
    "   ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'),\n",
    "   ('viper', 'mark ii'), ('viper', 'mark iii')\n",
    "]\n",
    "index = pd.MultiIndex.from_tuples(tuples)\n",
    "values = [[12, 2], [0, 4], [10, 20],\n",
    "        [1, 4], [7, 1], [16, 36]]\n",
    "df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "a1c9f151-3eb8-4adb-9f53-e7e98ca006ef",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>max_speed</th>\n",
       "      <th>shield</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>cobra</th>\n",
       "      <th>mark i</th>\n",
       "      <td>12</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sidewinder</th>\n",
       "      <th>mark i</th>\n",
       "      <td>10</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   max_speed  shield\n",
       "cobra      mark i         12       2\n",
       "sidewinder mark i         10      20"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "idx = pd.IndexSlice\n",
    "\n",
    "df.loc[idx[:,['mark i']],:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "621b9ebb-e9b6-4196-adbc-396094f04d19",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>max_speed</th>\n",
       "      <th>shield</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>cobra</th>\n",
       "      <th>mark i</th>\n",
       "      <td>12</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sidewinder</th>\n",
       "      <th>mark i</th>\n",
       "      <td>10</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   max_speed  shield\n",
       "cobra      mark i         12       2\n",
       "sidewinder mark i         10      20"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[idx[:,['mark i']], :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "5de54a40-295e-42e9-9e17-bc9ccafad221",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>max_speed</th>\n",
       "      <th>shield</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>cobra</th>\n",
       "      <th>mark i</th>\n",
       "      <td>12</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sidewinder</th>\n",
       "      <th>mark i</th>\n",
       "      <td>10</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   max_speed  shield\n",
       "cobra      mark i         12       2\n",
       "sidewinder mark i         10      20"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[(slice(None), 'mark i'), :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9c932e6e-f4e9-492c-af40-83a3b9445666",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "a8f1344f-d2f9-45af-934e-c878bf6c48d8",
   "metadata": {},
   "source": [
    "## Reduction"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dbaef952-f353-4945-93a5-2eeb2aac286f",
   "metadata": {},
   "source": [
    "### In this example, we are interested in finding the average loss and acc for a given lr."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "3fed1d76-f3a2-4e84-ac53-a8a3b3f7fc72",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame([{'step': 10, 'loss': 0.2, 'acc': 0.7, 'lr': 0.01}, {'step': 11, 'loss': 0.18, 'acc': 0.8, 'lr': 0.01},\n",
    "                   {'step': 12, 'loss': 0.12, 'acc': 0.8, 'lr': 0.001}, {'step': 13, 'loss': 0.15, 'acc': 0.9, 'lr': 0.001},\n",
    "                   {'step': 14, 'loss': 0.1, 'acc': 0.8, 'lr': 0.001}])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "0b8cae39-4b88-4d33-a225-4f1c2dd45967",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>step</th>\n",
       "      <th>loss</th>\n",
       "      <th>acc</th>\n",
       "      <th>lr</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10</td>\n",
       "      <td>0.20</td>\n",
       "      <td>0.7</td>\n",
       "      <td>0.010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>11</td>\n",
       "      <td>0.18</td>\n",
       "      <td>0.8</td>\n",
       "      <td>0.010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>12</td>\n",
       "      <td>0.12</td>\n",
       "      <td>0.8</td>\n",
       "      <td>0.001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>13</td>\n",
       "      <td>0.15</td>\n",
       "      <td>0.9</td>\n",
       "      <td>0.001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>14</td>\n",
       "      <td>0.10</td>\n",
       "      <td>0.8</td>\n",
       "      <td>0.001</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   step  loss  acc     lr\n",
       "0    10  0.20  0.7  0.010\n",
       "1    11  0.18  0.8  0.010\n",
       "2    12  0.12  0.8  0.001\n",
       "3    13  0.15  0.9  0.001\n",
       "4    14  0.10  0.8  0.001"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "18822344-7474-4f8f-8a3e-e817a88d272e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# First we group rows that share the same distinct values under some column(s).\n",
    "reduce_cols = ['lr']\n",
    "reduction_groups = df.groupby(reduce_cols)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "66284399-5376-4253-b8ca-ba87fafecc6a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f494d737490>"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reduction_groups[['loss', 'acc']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "87dd07a7-8ed1-413e-b154-03a5cf23c1d8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Then we take the average, holding 'lr' fixed\n",
    "avgs = reduction_groups[['loss', 'acc']].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "9f276813-de90-463a-8592-eea980aff6cd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loss</th>\n",
       "      <th>acc</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>lr</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0.001</th>\n",
       "      <td>0.123333</td>\n",
       "      <td>0.833333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0.010</th>\n",
       "      <td>0.190000</td>\n",
       "      <td>0.750000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           loss       acc\n",
       "lr                       \n",
       "0.001  0.123333  0.833333\n",
       "0.010  0.190000  0.750000"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# This results in a new df, with the index being the `reduce_cols`, and columns being the fields we chose above.\n",
    "avgs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "b1416b85-7070-4ac7-8f21-188df5b95116",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "lr\n",
       "0.001    0.123333\n",
       "0.010    0.190000\n",
       "Name: loss, dtype: float64"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "avgs['loss']"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "29b8094e-97e1-4951-b2ba-0e69ff73ad0c",
   "metadata": {},
   "source": [
    "### We can reduce/group by multiple fields; this allows us to take averages for each unique combination of the fields to group by"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "dacc44f1-0c48-4470-b441-17bf2ed69af4",
   "metadata": {},
   "outputs": [],
   "source": [
    "reduce_cols = ['lr', 'acc']\n",
    "reduction_groups = df.groupby(reduce_cols)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "63881603-007b-48b0-a139-5c0e8267ca0b",
   "metadata": {},
   "outputs": [],
   "source": [
    "avgs = reduction_groups[['loss']].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "e6845e5e-9a21-45bf-a279-ae978b8351e6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>loss</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>lr</th>\n",
       "      <th>acc</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">0.001</th>\n",
       "      <th>0.8</th>\n",
       "      <td>0.11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0.9</th>\n",
       "      <td>0.15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">0.010</th>\n",
       "      <th>0.7</th>\n",
       "      <td>0.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0.8</th>\n",
       "      <td>0.18</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           loss\n",
       "lr    acc      \n",
       "0.001 0.8  0.11\n",
       "      0.9  0.15\n",
       "0.010 0.7  0.20\n",
       "      0.8  0.18"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# This results in a multi-index df, with index being ('lr', 'acc')\n",
    "avgs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "dc0d8e93-f05c-4db3-9247-f476c3a58b7e",
   "metadata": {},
   "outputs": [],
   "source": [
    "avgs = avgs.sort_values('acc', ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "d1337c9b-f4ad-4268-9d73-a4eea4ae35f1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>loss</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>lr</th>\n",
       "      <th>acc</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">0.001</th>\n",
       "      <th>0.9</th>\n",
       "      <td>0.15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0.8</th>\n",
       "      <td>0.11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">0.010</th>\n",
       "      <th>0.8</th>\n",
       "      <td>0.18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0.7</th>\n",
       "      <td>0.20</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           loss\n",
       "lr    acc      \n",
       "0.001 0.9  0.15\n",
       "      0.8  0.11\n",
       "0.010 0.8  0.18\n",
       "      0.7  0.20"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "avgs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "36415289-b3b4-4128-b74a-29ebfa3335b8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MultiIndex([(0.001, 0.9),\n",
       "            (0.001, 0.8),\n",
       "            ( 0.01, 0.8),\n",
       "            ( 0.01, 0.7)],\n",
       "           names=['lr', 'acc'])"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "avgs.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "51aad0e7-7a07-4c3a-8abf-f5e15017c152",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "loss    0.15\n",
       "Name: (0.001, 0.9), dtype: float64"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "avgs.loc[(0.001, 0.9)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "d2a1b846-f9fc-4301-926a-b339c56150c4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.001\n",
      "lr     acc\n",
      "0.001  0.9    0.15\n",
      "       0.8    0.11\n",
      "Name: loss, dtype: float64\n",
      "0.01\n",
      "lr    acc\n",
      "0.01  0.8    0.18\n",
      "      0.7    0.20\n",
      "Name: loss, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "# Loop over one of the reduction columns:\n",
    "for lr, sub_df in avgs.groupby(level=0):  # Trick from https://stackoverflow.com/questions/25929319/how-to-iterate-over-pandas-multiindex-dataframe-using-index\n",
    "  print(lr)\n",
    "  print(sub_df['loss'])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}