Skip to content

Instantly share code, notes, and snippets.

@justmarkham
Last active September 21, 2023 20:47
Show Gist options
  • Save justmarkham/19241df07db2b93283fbb0cfe9c572f9 to your computer and use it in GitHub Desktop.
Save justmarkham/19241df07db2b93283fbb0cfe9c572f9 to your computer and use it in GitHub Desktop.
Lesson on the pandas MultiIndex from the October 23 webcast
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# How do I use the MultiIndex in pandas? ([video](https://www.youtube.com/watch?v=tcRGa2soc-c))"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" <th>Symbol</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2016-10-03</td>\n",
" <td>31.50</td>\n",
" <td>14070500</td>\n",
" <td>CSCO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2016-10-03</td>\n",
" <td>112.52</td>\n",
" <td>21701800</td>\n",
" <td>AAPL</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2016-10-03</td>\n",
" <td>57.42</td>\n",
" <td>19189500</td>\n",
" <td>MSFT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2016-10-04</td>\n",
" <td>113.00</td>\n",
" <td>29736800</td>\n",
" <td>AAPL</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2016-10-04</td>\n",
" <td>57.24</td>\n",
" <td>20085900</td>\n",
" <td>MSFT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2016-10-04</td>\n",
" <td>31.35</td>\n",
" <td>18460400</td>\n",
" <td>CSCO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2016-10-05</td>\n",
" <td>57.64</td>\n",
" <td>16726400</td>\n",
" <td>MSFT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2016-10-05</td>\n",
" <td>31.59</td>\n",
" <td>11808600</td>\n",
" <td>CSCO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>2016-10-05</td>\n",
" <td>113.05</td>\n",
" <td>21453100</td>\n",
" <td>AAPL</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date Close Volume Symbol\n",
"0 2016-10-03 31.50 14070500 CSCO\n",
"1 2016-10-03 112.52 21701800 AAPL\n",
"2 2016-10-03 57.42 19189500 MSFT\n",
"3 2016-10-04 113.00 29736800 AAPL\n",
"4 2016-10-04 57.24 20085900 MSFT\n",
"5 2016-10-04 31.35 18460400 CSCO\n",
"6 2016-10-05 57.64 16726400 MSFT\n",
"7 2016-10-05 31.59 11808600 CSCO\n",
"8 2016-10-05 113.05 21453100 AAPL"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stocks = pd.read_csv('http://bit.ly/smallstocks')\n",
"stocks"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RangeIndex(start=0, stop=9, step=1)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stocks.index"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Symbol\n",
"AAPL 112.856667\n",
"CSCO 31.480000\n",
"MSFT 57.433333\n",
"Name: Close, dtype: float64"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stocks.groupby('Symbol').Close.mean()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Series with MultiIndex"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Symbol Date \n",
"AAPL 2016-10-03 112.52\n",
" 2016-10-04 113.00\n",
" 2016-10-05 113.05\n",
"CSCO 2016-10-03 31.50\n",
" 2016-10-04 31.35\n",
" 2016-10-05 31.59\n",
"MSFT 2016-10-03 57.42\n",
" 2016-10-04 57.24\n",
" 2016-10-05 57.64\n",
"Name: Close, dtype: float64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ser = stocks.groupby(['Symbol', 'Date']).Close.mean()\n",
"ser"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"MultiIndex(levels=[['AAPL', 'CSCO', 'MSFT'], ['2016-10-03', '2016-10-04', '2016-10-05']],\n",
" labels=[[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],\n",
" names=['Symbol', 'Date'])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ser.index"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Date</th>\n",
" <th>2016-10-03</th>\n",
" <th>2016-10-04</th>\n",
" <th>2016-10-05</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Symbol</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>AAPL</th>\n",
" <td>112.52</td>\n",
" <td>113.00</td>\n",
" <td>113.05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>CSCO</th>\n",
" <td>31.50</td>\n",
" <td>31.35</td>\n",
" <td>31.59</td>\n",
" </tr>\n",
" <tr>\n",
" <th>MSFT</th>\n",
" <td>57.42</td>\n",
" <td>57.24</td>\n",
" <td>57.64</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Date 2016-10-03 2016-10-04 2016-10-05\n",
"Symbol \n",
"AAPL 112.52 113.00 113.05\n",
"CSCO 31.50 31.35 31.59\n",
"MSFT 57.42 57.24 57.64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ser.unstack()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Date</th>\n",
" <th>2016-10-03</th>\n",
" <th>2016-10-04</th>\n",
" <th>2016-10-05</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Symbol</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>AAPL</th>\n",
" <td>112.52</td>\n",
" <td>113.00</td>\n",
" <td>113.05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>CSCO</th>\n",
" <td>31.50</td>\n",
" <td>31.35</td>\n",
" <td>31.59</td>\n",
" </tr>\n",
" <tr>\n",
" <th>MSFT</th>\n",
" <td>57.42</td>\n",
" <td>57.24</td>\n",
" <td>57.64</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Date 2016-10-03 2016-10-04 2016-10-05\n",
"Symbol \n",
"AAPL 112.52 113.00 113.05\n",
"CSCO 31.50 31.35 31.59\n",
"MSFT 57.42 57.24 57.64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = stocks.pivot_table(values='Close', index='Symbol', columns='Date')\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Selection from Series with MultiIndex"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Symbol Date \n",
"AAPL 2016-10-03 112.52\n",
" 2016-10-04 113.00\n",
" 2016-10-05 113.05\n",
"CSCO 2016-10-03 31.50\n",
" 2016-10-04 31.35\n",
" 2016-10-05 31.59\n",
"MSFT 2016-10-03 57.42\n",
" 2016-10-04 57.24\n",
" 2016-10-05 57.64\n",
"Name: Close, dtype: float64"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ser"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"Date\n",
"2016-10-03 112.52\n",
"2016-10-04 113.00\n",
"2016-10-05 113.05\n",
"Name: Close, dtype: float64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ser.loc['AAPL']"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"112.52"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ser.loc['AAPL', '2016-10-03']"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"Symbol\n",
"AAPL 112.52\n",
"CSCO 31.50\n",
"MSFT 57.42\n",
"Name: Close, dtype: float64"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ser.loc[:, '2016-10-03']"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Date</th>\n",
" <th>2016-10-03</th>\n",
" <th>2016-10-04</th>\n",
" <th>2016-10-05</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Symbol</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>AAPL</th>\n",
" <td>112.52</td>\n",
" <td>113.00</td>\n",
" <td>113.05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>CSCO</th>\n",
" <td>31.50</td>\n",
" <td>31.35</td>\n",
" <td>31.59</td>\n",
" </tr>\n",
" <tr>\n",
" <th>MSFT</th>\n",
" <td>57.42</td>\n",
" <td>57.24</td>\n",
" <td>57.64</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Date 2016-10-03 2016-10-04 2016-10-05\n",
"Symbol \n",
"AAPL 112.52 113.00 113.05\n",
"CSCO 31.50 31.35 31.59\n",
"MSFT 57.42 57.24 57.64"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"Date\n",
"2016-10-03 112.52\n",
"2016-10-04 113.00\n",
"2016-10-05 113.05\n",
"Name: AAPL, dtype: float64"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc['AAPL']"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"112.52"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc['AAPL', '2016-10-03']"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Symbol\n",
"AAPL 112.52\n",
"CSCO 31.50\n",
"MSFT 57.42\n",
"Name: 2016-10-03, dtype: float64"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[:, '2016-10-03']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## DataFrame with MultiIndex"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Symbol</th>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>CSCO</th>\n",
" <th>2016-10-03</th>\n",
" <td>31.50</td>\n",
" <td>14070500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>AAPL</th>\n",
" <th>2016-10-03</th>\n",
" <td>112.52</td>\n",
" <td>21701800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>MSFT</th>\n",
" <th>2016-10-03</th>\n",
" <td>57.42</td>\n",
" <td>19189500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>AAPL</th>\n",
" <th>2016-10-04</th>\n",
" <td>113.00</td>\n",
" <td>29736800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>MSFT</th>\n",
" <th>2016-10-04</th>\n",
" <td>57.24</td>\n",
" <td>20085900</td>\n",
" </tr>\n",
" <tr>\n",
" <th>CSCO</th>\n",
" <th>2016-10-04</th>\n",
" <td>31.35</td>\n",
" <td>18460400</td>\n",
" </tr>\n",
" <tr>\n",
" <th>MSFT</th>\n",
" <th>2016-10-05</th>\n",
" <td>57.64</td>\n",
" <td>16726400</td>\n",
" </tr>\n",
" <tr>\n",
" <th>CSCO</th>\n",
" <th>2016-10-05</th>\n",
" <td>31.59</td>\n",
" <td>11808600</td>\n",
" </tr>\n",
" <tr>\n",
" <th>AAPL</th>\n",
" <th>2016-10-05</th>\n",
" <td>113.05</td>\n",
" <td>21453100</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Close Volume\n",
"Symbol Date \n",
"CSCO 2016-10-03 31.50 14070500\n",
"AAPL 2016-10-03 112.52 21701800\n",
"MSFT 2016-10-03 57.42 19189500\n",
"AAPL 2016-10-04 113.00 29736800\n",
"MSFT 2016-10-04 57.24 20085900\n",
"CSCO 2016-10-04 31.35 18460400\n",
"MSFT 2016-10-05 57.64 16726400\n",
"CSCO 2016-10-05 31.59 11808600\n",
"AAPL 2016-10-05 113.05 21453100"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stocks.set_index(['Symbol', 'Date'], inplace=True)\n",
"stocks"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"MultiIndex(levels=[['AAPL', 'CSCO', 'MSFT'], ['2016-10-03', '2016-10-04', '2016-10-05']],\n",
" labels=[[1, 0, 2, 0, 2, 1, 2, 1, 0], [0, 0, 0, 1, 1, 1, 2, 2, 2]],\n",
" names=['Symbol', 'Date'])"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stocks.index"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Symbol</th>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">AAPL</th>\n",
" <th>2016-10-03</th>\n",
" <td>112.52</td>\n",
" <td>21701800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-04</th>\n",
" <td>113.00</td>\n",
" <td>29736800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-05</th>\n",
" <td>113.05</td>\n",
" <td>21453100</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">CSCO</th>\n",
" <th>2016-10-03</th>\n",
" <td>31.50</td>\n",
" <td>14070500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-04</th>\n",
" <td>31.35</td>\n",
" <td>18460400</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-05</th>\n",
" <td>31.59</td>\n",
" <td>11808600</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">MSFT</th>\n",
" <th>2016-10-03</th>\n",
" <td>57.42</td>\n",
" <td>19189500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-04</th>\n",
" <td>57.24</td>\n",
" <td>20085900</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-05</th>\n",
" <td>57.64</td>\n",
" <td>16726400</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Close Volume\n",
"Symbol Date \n",
"AAPL 2016-10-03 112.52 21701800\n",
" 2016-10-04 113.00 29736800\n",
" 2016-10-05 113.05 21453100\n",
"CSCO 2016-10-03 31.50 14070500\n",
" 2016-10-04 31.35 18460400\n",
" 2016-10-05 31.59 11808600\n",
"MSFT 2016-10-03 57.42 19189500\n",
" 2016-10-04 57.24 20085900\n",
" 2016-10-05 57.64 16726400"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stocks.sort_index(inplace=True)\n",
"stocks"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Selection from DataFrame with MultiIndex"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2016-10-03</th>\n",
" <td>112.52</td>\n",
" <td>21701800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-04</th>\n",
" <td>113.00</td>\n",
" <td>29736800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-05</th>\n",
" <td>113.05</td>\n",
" <td>21453100</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Close Volume\n",
"Date \n",
"2016-10-03 112.52 21701800\n",
"2016-10-04 113.00 29736800\n",
"2016-10-05 113.05 21453100"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stocks.loc['AAPL']"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Close 112.52\n",
"Volume 21701800.00\n",
"Name: (AAPL, 2016-10-03), dtype: float64"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stocks.loc[('AAPL', '2016-10-03'), :]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"112.52"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stocks.loc[('AAPL', '2016-10-03'), 'Close']"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Symbol</th>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">AAPL</th>\n",
" <th>2016-10-03</th>\n",
" <td>112.52</td>\n",
" <td>21701800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-04</th>\n",
" <td>113.00</td>\n",
" <td>29736800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-05</th>\n",
" <td>113.05</td>\n",
" <td>21453100</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">MSFT</th>\n",
" <th>2016-10-03</th>\n",
" <td>57.42</td>\n",
" <td>19189500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-04</th>\n",
" <td>57.24</td>\n",
" <td>20085900</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-05</th>\n",
" <td>57.64</td>\n",
" <td>16726400</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Close Volume\n",
"Symbol Date \n",
"AAPL 2016-10-03 112.52 21701800\n",
" 2016-10-04 113.00 29736800\n",
" 2016-10-05 113.05 21453100\n",
"MSFT 2016-10-03 57.42 19189500\n",
" 2016-10-04 57.24 20085900\n",
" 2016-10-05 57.64 16726400"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stocks.loc[['AAPL', 'MSFT'], :]"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Symbol</th>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>AAPL</th>\n",
" <th>2016-10-03</th>\n",
" <td>112.52</td>\n",
" <td>21701800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>MSFT</th>\n",
" <th>2016-10-03</th>\n",
" <td>57.42</td>\n",
" <td>19189500</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Close Volume\n",
"Symbol Date \n",
"AAPL 2016-10-03 112.52 21701800\n",
"MSFT 2016-10-03 57.42 19189500"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stocks.loc[(['AAPL', 'MSFT'], '2016-10-03'), :]"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Symbol Date \n",
"AAPL 2016-10-03 112.52\n",
"MSFT 2016-10-03 57.42\n",
"Name: Close, dtype: float64"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stocks.loc[(['AAPL', 'MSFT'], '2016-10-03'), 'Close']"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Symbol Date \n",
"AAPL 2016-10-03 112.52\n",
" 2016-10-04 113.00\n",
"Name: Close, dtype: float64"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stocks.loc[('AAPL', ['2016-10-03', '2016-10-04']), 'Close']"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Symbol</th>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">AAPL</th>\n",
" <th>2016-10-03</th>\n",
" <td>112.52</td>\n",
" <td>21701800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-04</th>\n",
" <td>113.00</td>\n",
" <td>29736800</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">CSCO</th>\n",
" <th>2016-10-03</th>\n",
" <td>31.50</td>\n",
" <td>14070500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-04</th>\n",
" <td>31.35</td>\n",
" <td>18460400</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">MSFT</th>\n",
" <th>2016-10-03</th>\n",
" <td>57.42</td>\n",
" <td>19189500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-04</th>\n",
" <td>57.24</td>\n",
" <td>20085900</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Close Volume\n",
"Symbol Date \n",
"AAPL 2016-10-03 112.52 21701800\n",
" 2016-10-04 113.00 29736800\n",
"CSCO 2016-10-03 31.50 14070500\n",
" 2016-10-04 31.35 18460400\n",
"MSFT 2016-10-03 57.42 19189500\n",
" 2016-10-04 57.24 20085900"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stocks.loc[(slice(None), ['2016-10-03', '2016-10-04']), :]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Merging DataFrames with MultiIndexes"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Close</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Symbol</th>\n",
" <th>Date</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">AAPL</th>\n",
" <th>2016-10-03</th>\n",
" <td>112.52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-04</th>\n",
" <td>113.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-05</th>\n",
" <td>113.05</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">CSCO</th>\n",
" <th>2016-10-03</th>\n",
" <td>31.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-04</th>\n",
" <td>31.35</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-05</th>\n",
" <td>31.59</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">MSFT</th>\n",
" <th>2016-10-03</th>\n",
" <td>57.42</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-04</th>\n",
" <td>57.24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-05</th>\n",
" <td>57.64</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Close\n",
"Symbol Date \n",
"AAPL 2016-10-03 112.52\n",
" 2016-10-04 113.00\n",
" 2016-10-05 113.05\n",
"CSCO 2016-10-03 31.50\n",
" 2016-10-04 31.35\n",
" 2016-10-05 31.59\n",
"MSFT 2016-10-03 57.42\n",
" 2016-10-04 57.24\n",
" 2016-10-05 57.64"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"close = pd.read_csv('stocks.csv', usecols=[0, 1, 3], index_col=['Symbol', 'Date']).sort_index()\n",
"close"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Volume</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Symbol</th>\n",
" <th>Date</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">AAPL</th>\n",
" <th>2016-10-03</th>\n",
" <td>21701800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-04</th>\n",
" <td>29736800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-05</th>\n",
" <td>21453100</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">CSCO</th>\n",
" <th>2016-10-03</th>\n",
" <td>14070500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-04</th>\n",
" <td>18460400</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-05</th>\n",
" <td>11808600</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">MSFT</th>\n",
" <th>2016-10-03</th>\n",
" <td>19189500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-04</th>\n",
" <td>20085900</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-05</th>\n",
" <td>16726400</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Volume\n",
"Symbol Date \n",
"AAPL 2016-10-03 21701800\n",
" 2016-10-04 29736800\n",
" 2016-10-05 21453100\n",
"CSCO 2016-10-03 14070500\n",
" 2016-10-04 18460400\n",
" 2016-10-05 11808600\n",
"MSFT 2016-10-03 19189500\n",
" 2016-10-04 20085900\n",
" 2016-10-05 16726400"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"volume = pd.read_csv('stocks.csv', usecols=[0, 2, 3], index_col=['Symbol', 'Date']).sort_index()\n",
"volume"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Symbol</th>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">AAPL</th>\n",
" <th>2016-10-03</th>\n",
" <td>112.52</td>\n",
" <td>21701800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-04</th>\n",
" <td>113.00</td>\n",
" <td>29736800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-05</th>\n",
" <td>113.05</td>\n",
" <td>21453100</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">CSCO</th>\n",
" <th>2016-10-03</th>\n",
" <td>31.50</td>\n",
" <td>14070500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-04</th>\n",
" <td>31.35</td>\n",
" <td>18460400</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-05</th>\n",
" <td>31.59</td>\n",
" <td>11808600</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">MSFT</th>\n",
" <th>2016-10-03</th>\n",
" <td>57.42</td>\n",
" <td>19189500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-04</th>\n",
" <td>57.24</td>\n",
" <td>20085900</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016-10-05</th>\n",
" <td>57.64</td>\n",
" <td>16726400</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Close Volume\n",
"Symbol Date \n",
"AAPL 2016-10-03 112.52 21701800\n",
" 2016-10-04 113.00 29736800\n",
" 2016-10-05 113.05 21453100\n",
"CSCO 2016-10-03 31.50 14070500\n",
" 2016-10-04 31.35 18460400\n",
" 2016-10-05 31.59 11808600\n",
"MSFT 2016-10-03 57.42 19189500\n",
" 2016-10-04 57.24 20085900\n",
" 2016-10-05 57.64 16726400"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"both = pd.merge(close, volume, left_index=True, right_index=True)\n",
"both"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Symbol</th>\n",
" <th>Date</th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>AAPL</td>\n",
" <td>2016-10-03</td>\n",
" <td>112.52</td>\n",
" <td>21701800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>AAPL</td>\n",
" <td>2016-10-04</td>\n",
" <td>113.00</td>\n",
" <td>29736800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>AAPL</td>\n",
" <td>2016-10-05</td>\n",
" <td>113.05</td>\n",
" <td>21453100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>CSCO</td>\n",
" <td>2016-10-03</td>\n",
" <td>31.50</td>\n",
" <td>14070500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>CSCO</td>\n",
" <td>2016-10-04</td>\n",
" <td>31.35</td>\n",
" <td>18460400</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>CSCO</td>\n",
" <td>2016-10-05</td>\n",
" <td>31.59</td>\n",
" <td>11808600</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>MSFT</td>\n",
" <td>2016-10-03</td>\n",
" <td>57.42</td>\n",
" <td>19189500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>MSFT</td>\n",
" <td>2016-10-04</td>\n",
" <td>57.24</td>\n",
" <td>20085900</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>MSFT</td>\n",
" <td>2016-10-05</td>\n",
" <td>57.64</td>\n",
" <td>16726400</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Symbol Date Close Volume\n",
"0 AAPL 2016-10-03 112.52 21701800\n",
"1 AAPL 2016-10-04 113.00 29736800\n",
"2 AAPL 2016-10-05 113.05 21453100\n",
"3 CSCO 2016-10-03 31.50 14070500\n",
"4 CSCO 2016-10-04 31.35 18460400\n",
"5 CSCO 2016-10-05 31.59 11808600\n",
"6 MSFT 2016-10-03 57.42 19189500\n",
"7 MSFT 2016-10-04 57.24 20085900\n",
"8 MSFT 2016-10-05 57.64 16726400"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"both.reset_index()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Date Close Volume Symbol
2016-10-03 31.50 14070500 CSCO
2016-10-03 112.52 21701800 AAPL
2016-10-03 57.42 19189500 MSFT
2016-10-04 113.00 29736800 AAPL
2016-10-04 57.24 20085900 MSFT
2016-10-04 31.35 18460400 CSCO
2016-10-05 57.64 16726400 MSFT
2016-10-05 31.59 11808600 CSCO
2016-10-05 113.05 21453100 AAPL
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment