Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save theredpea/d7ae13a2a394fad425a7a7ba2561a923 to your computer and use it in GitHub Desktop.
Save theredpea/d7ae13a2a394fad425a7a7ba2561a923 to your computer and use it in GitHub Desktop.
helping neo with some test data
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 122,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Region</th>\n",
" <th>date</th>\n",
" <th>code</th>\n",
" <th>Rate</th>\n",
" <th>Unnamed: 4</th>\n",
" <th>Unnamed: 5</th>\n",
" <th>Unnamed: 6</th>\n",
" <th>Unnamed: 7</th>\n",
" <th>Unnamed: 8</th>\n",
" <th>Unnamed: 9</th>\n",
" <th>Unnamed: 10</th>\n",
" <th>Unnamed: 11</th>\n",
" <th>Unnamed: 12</th>\n",
" <th>Unnamed: 13</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>2018-05-15</td>\n",
" <td>101</td>\n",
" <td>123</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>2018-05-14</td>\n",
" <td>101</td>\n",
" <td>234</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>2018-05-13</td>\n",
" <td>101</td>\n",
" <td>345</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0</td>\n",
" <td>2018-05-12</td>\n",
" <td>101</td>\n",
" <td>456</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Code</td>\n",
" <td>Range from</td>\n",
" <td>to</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>2018-05-11</td>\n",
" <td>101</td>\n",
" <td>567</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>202, 101</td>\n",
" <td>2018-05-08 00:00:00</td>\n",
" <td>2018-05-15 00:00:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Region date code Rate Unnamed: 4 Unnamed: 5 Unnamed: 6 \\\n",
"0 0 2018-05-15 101 123 NaN NaN NaN \n",
"1 0 2018-05-14 101 234 NaN NaN NaN \n",
"2 0 2018-05-13 101 345 NaN NaN NaN \n",
"3 0 2018-05-12 101 456 NaN NaN NaN \n",
"4 0 2018-05-11 101 567 NaN NaN NaN \n",
"\n",
" Unnamed: 7 Unnamed: 8 Unnamed: 9 Unnamed: 10 \\\n",
"0 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 Code Range from to NaN \n",
"4 202, 101 2018-05-08 00:00:00 2018-05-15 00:00:00 NaN \n",
"\n",
" Unnamed: 11 Unnamed: 12 Unnamed: 13 \n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN "
]
},
"execution_count": 122,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%matplotlib inline\n",
"\n",
"import os\n",
"import pandas as pd\n",
"#https://github.com/theredpea/theredpea.github.io/blob/master/Test.xlsx",
"neo_original_df = pd.read_excel(os.path.join('Resources', 'Test.xlsx'))\n",
"#Just a preview to show \"Unnamed\" columns which pandas tries to read \n",
"neo_original_df.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Region</th>\n",
" <th>date</th>\n",
" <th>code</th>\n",
" <th>Rate</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>2018-05-15</td>\n",
" <td>101</td>\n",
" <td>123</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>2018-05-14</td>\n",
" <td>101</td>\n",
" <td>234</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>2018-05-13</td>\n",
" <td>101</td>\n",
" <td>345</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0</td>\n",
" <td>2018-05-12</td>\n",
" <td>101</td>\n",
" <td>456</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>2018-05-11</td>\n",
" <td>101</td>\n",
" <td>567</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Region date code Rate\n",
"0 0 2018-05-15 101 123\n",
"1 0 2018-05-14 101 234\n",
"2 0 2018-05-13 101 345\n",
"3 0 2018-05-12 101 456\n",
"4 0 2018-05-11 101 567"
]
},
"execution_count": 111,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Modify the dataframe; select specific columns in a specific order\n",
"neo_simplified_df = neo_original_df[['Region', 'date', 'code', 'Rate']]\n",
"\n",
"#Another way to do it; use `loc` as in : `loc[row_indexes, col_indexes]`:\n",
"# neo_simplified_df = neo_original_df.loc[:, ['Region', 'date', 'code', 'Rate']]\n",
"\n",
"#Yet another way, is to parse only up to column 4 (0-indexed), when you read the excel in the first place:\n",
"# neo_original_df = pd.read_excel(os.path.join('Resources', 'Test.xlsx'), usecols=3)\n",
"\n",
"neo_simplified_df.head()\n",
"# neo_simplified_df.sort_values('Rate', ascending=False)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"101 10\n",
"202 9\n",
"99 3\n",
"198 2\n",
"98 1\n",
"Name: code, dtype: int64"
]
},
"execution_count": 110,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"neo_original_df['code'].value_counts()\n",
"\n",
"#This is another way:\n",
"# neo_original_df['code'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Region</th>\n",
" <th>date</th>\n",
" <th>code</th>\n",
" <th>Rate</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>0</td>\n",
" <td>2018-05-11</td>\n",
" <td>99</td>\n",
" <td>1233</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>0</td>\n",
" <td>2018-05-10</td>\n",
" <td>99</td>\n",
" <td>1344</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>0</td>\n",
" <td>2017-01-01</td>\n",
" <td>99</td>\n",
" <td>1455</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>0</td>\n",
" <td>2016-02-28</td>\n",
" <td>98</td>\n",
" <td>1566</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>0</td>\n",
" <td>2018-05-15</td>\n",
" <td>202</td>\n",
" <td>1677</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Region date code Rate\n",
"10 0 2018-05-11 99 1233\n",
"11 0 2018-05-10 99 1344\n",
"12 0 2017-01-01 99 1455\n",
"13 0 2016-02-28 98 1566\n",
"14 0 2018-05-15 202 1677"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_wants_codes = (202, 99, 198, 98)\n",
"user_wants_date_min = '2018-05-08'\n",
"user_wants_date_max = '2018-05-15'\n",
"#This isin produces a \"boolean series\"; \n",
"where_code_eq_user_wants = neo_simplified_df.code.isin(user_wants_codes) \n",
"\n",
"#Use the boolean series to filter:\n",
"#Just a preview to demonstrate : there are no rows where code == 101 because (101 not in user_wants_code)\n",
"neo_simplified_df[where_code_eq_user_wants].head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Region</th>\n",
" <th>date</th>\n",
" <th>code</th>\n",
" <th>Rate</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>0</td>\n",
" <td>2018-05-11</td>\n",
" <td>99</td>\n",
" <td>1233</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>0</td>\n",
" <td>2018-05-10</td>\n",
" <td>99</td>\n",
" <td>1344</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>0</td>\n",
" <td>2018-05-15</td>\n",
" <td>202</td>\n",
" <td>1677</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>0</td>\n",
" <td>2018-05-14</td>\n",
" <td>202</td>\n",
" <td>1788</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>0</td>\n",
" <td>2018-05-13</td>\n",
" <td>202</td>\n",
" <td>1899</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Region date code Rate\n",
"10 0 2018-05-11 99 1233\n",
"11 0 2018-05-10 99 1344\n",
"14 0 2018-05-15 202 1677\n",
"15 0 2018-05-14 202 1788\n",
"16 0 2018-05-13 202 1899"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"where_date_gte_user_date_min = neo_simplified_df.date >= user_wants_date_min\n",
"where_date_lte_user_date_max = neo_simplified_df.date <= user_wants_date_max\n",
"\n",
"# \"and\" works for individual Python boolean values\n",
"# 3 < 5 and 5 < 10 == True\n",
"# 6 < 5 or 5 < 10 == False\n",
"\n",
"# \"&\" works like \"and\" on a Pandas series of boolean values\n",
"# \"|\" works like \"or\" on a Pandas series of boolean values\n",
"\n",
"# T T T\n",
"# F & F => F\n",
"# T F F\n",
"\n",
"# T T T\n",
"# F | F => F\n",
"# T F T\n",
"\n",
"#Modify the dataframe; select specific rows; aka filter\n",
"#Later, decided *not* to modify the dataframe ;\n",
"#we will need the dates outside of the range, \n",
"#to impute values inside the date range\n",
"neo_filtered_df = neo_simplified_df.loc[\n",
" where_code_eq_user_wants \n",
" & where_date_gte_user_date_min \n",
" & where_date_lte_user_date_max\n",
"]\n",
"#notice the preview below; there are no rows where date == 2017-01-01, nor where code is not in the list of codes\n",
"neo_filtered_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2018-05-08', '2018-05-09', '2018-05-10', '2018-05-11',\n",
" '2018-05-12', '2018-05-13', '2018-05-14', '2018-05-15'],\n",
" dtype='datetime64[ns]', freq='D')"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Pandas can make a range of dates\n",
"user_wants_date_range = pd.date_range(user_wants_date_min, user_wants_date_max)# , freq='D' daily frequency is the default\n",
"user_wants_date_range"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" <tr>\n",
" <th>code</th>\n",
" <th>date</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"8\" valign=\"top\">202</th>\n",
" <th>2018-05-08</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-09</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-10</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-11</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-12</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-13</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-14</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-15</th>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">99</th>\n",
" <th>2018-05-08</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-09</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-10</th>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: []\n",
"Index: [(202, 2018-05-08 00:00:00), (202, 2018-05-09 00:00:00), (202, 2018-05-10 00:00:00), (202, 2018-05-11 00:00:00), (202, 2018-05-12 00:00:00), (202, 2018-05-13 00:00:00), (202, 2018-05-14 00:00:00), (202, 2018-05-15 00:00:00), (99, 2018-05-08 00:00:00), (99, 2018-05-09 00:00:00), (99, 2018-05-10 00:00:00)]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"# `pd.MultiIndex`:\n",
"# Multiindex, a \"hierarchical\" index; grouped by code, then by date:\n",
"# https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html\n",
"\n",
"# `from_product`:\n",
"# Cartesian product (aka all combinations) of the unique user codes, x the unique dates in the date_range\n",
"# https://stackoverflow.com/a/46744050/1175496\n",
"multi_index = pd.MultiIndex.from_product([user_wants_codes, user_wants_date_range], names=['code', 'date'])\n",
"\n",
"#We'll pass in the index into a DataFrame, just to see the \"pretty formatting\" when we print the head rows\n",
"pd.DataFrame(index=multi_index).head(len(user_wants_date_range)+3)\n",
"# multi_index.get_"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Series([], Name: (202, 2018-05-09 00:00:00), dtype: float64)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Just playing around; we can use DataFrame.loc to get a specific combination\n",
"#https://stackoverflow.com/a/24436783/1175496\n",
"pd.DataFrame(index=multi_index).loc[(202, '2018-05-09')]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" </tr>\n",
" <tr>\n",
" <th>date</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2018-05-08</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-09</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-10</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-11</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-12</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-13</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-14</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-15</th>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: []\n",
"Index: [2018-05-08 00:00:00, 2018-05-09 00:00:00, 2018-05-10 00:00:00, 2018-05-11 00:00:00, 2018-05-12 00:00:00, 2018-05-13 00:00:00, 2018-05-14 00:00:00, 2018-05-15 00:00:00]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Just playing around; can use DataFrame.loc to get all values across the other index\n",
"#https://stackoverflow.com/a/24436783/1175496\n",
"pd.DataFrame(index=multi_index).loc[202]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" <tr>\n",
" <th>code</th>\n",
" <th>date</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">202</th>\n",
" <th>2018-05-08</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-09</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-10</th>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: []\n",
"Index: [(202, 2018-05-08 00:00:00), (202, 2018-05-09 00:00:00), (202, 2018-05-10 00:00:00)]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Just playing around; we can slice the index\n",
"pd.DataFrame(index=multi_index[:3])"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Just playing around, we cando lookups on the index itself using get_loc\n",
"multi_index.get_loc((202, '2018-05-09'))"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Region</th>\n",
" <th>Rate</th>\n",
" </tr>\n",
" <tr>\n",
" <th>code</th>\n",
" <th>date</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>98</th>\n",
" <th>2016-02-28</th>\n",
" <td>0</td>\n",
" <td>1566</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Region Rate\n",
"code date \n",
"98 2016-02-28 0 1566"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# By indexing on the colums ['code', 'date'], ; we can now JOIN our real data \n",
"# with the above \"window\" we need to analyze , represented by multi_index\n",
"# Our data might have few rows; for example only one row for code 98\n",
"indexed_simplified_df = neo_simplified_df.set_index(['code', 'date'])\n",
"indexed_simplified_df.loc[[98]].head(10)"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>0</th>\n",
" </tr>\n",
" <tr>\n",
" <th>code</th>\n",
" <th>date</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"8\" valign=\"top\">98</th>\n",
" <th>2018-05-08</th>\n",
" <td>&lt;get-rate-value-for-this-date&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-09</th>\n",
" <td>&lt;get-rate-value-for-this-date&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-10</th>\n",
" <td>&lt;get-rate-value-for-this-date&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-11</th>\n",
" <td>&lt;get-rate-value-for-this-date&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-12</th>\n",
" <td>&lt;get-rate-value-for-this-date&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-13</th>\n",
" <td>&lt;get-rate-value-for-this-date&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-14</th>\n",
" <td>&lt;get-rate-value-for-this-date&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-15</th>\n",
" <td>&lt;get-rate-value-for-this-date&gt;</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0\n",
"code date \n",
"98 2018-05-08 <get-rate-value-for-this-date>\n",
" 2018-05-09 <get-rate-value-for-this-date>\n",
" 2018-05-10 <get-rate-value-for-this-date>\n",
" 2018-05-11 <get-rate-value-for-this-date>\n",
" 2018-05-12 <get-rate-value-for-this-date>\n",
" 2018-05-13 <get-rate-value-for-this-date>\n",
" 2018-05-14 <get-rate-value-for-this-date>\n",
" 2018-05-15 <get-rate-value-for-this-date>"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# But the multi index will create all rows needed for code 98, we just need to fill in the blanks\n",
"pd.DataFrame(['<get-rate-value-for-this-date>']*len(multi_index), index=multi_index).loc[[98]]"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"# The columns ; specifically ('code', 'date'), are the same \n",
"# We can show this by using Python equality comparison; it should print \"True\"\n",
"indexed_simplified_df.index.names == multi_index.names"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"*Note the indexes match:*\n",
"\n",
"![image showing index names are same between our multi index and the actual data dataframe](same_index_names.png)"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>fake_rate</th>\n",
" </tr>\n",
" <tr>\n",
" <th>code</th>\n",
" <th>date</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"8\" valign=\"top\">202</th>\n",
" <th>2018-05-08</th>\n",
" <td>0.811359</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-09</th>\n",
" <td>0.621591</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-10</th>\n",
" <td>0.857150</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-11</th>\n",
" <td>0.772003</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-12</th>\n",
" <td>0.927317</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-13</th>\n",
" <td>0.967827</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-14</th>\n",
" <td>0.972950</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-15</th>\n",
" <td>0.411640</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">99</th>\n",
" <th>2018-05-08</th>\n",
" <td>0.725169</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-09</th>\n",
" <td>0.593050</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fake_rate\n",
"code date \n",
"202 2018-05-08 0.811359\n",
" 2018-05-09 0.621591\n",
" 2018-05-10 0.857150\n",
" 2018-05-11 0.772003\n",
" 2018-05-12 0.927317\n",
" 2018-05-13 0.967827\n",
" 2018-05-14 0.972950\n",
" 2018-05-15 0.411640\n",
"99 2018-05-08 0.725169\n",
" 2018-05-09 0.593050"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#We could even fill with fake data, random numbers:\n",
"import numpy as np\n",
"fake_rate_df= pd.DataFrame(np.random.rand(len(multi_index)), index=multi_index, columns=['fake_rate'])\n",
"fake_rate_df.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>avg_fake_rate</th>\n",
" </tr>\n",
" <tr>\n",
" <th>code</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>98</th>\n",
" <td>0.316849</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99</th>\n",
" <td>0.446200</td>\n",
" </tr>\n",
" <tr>\n",
" <th>198</th>\n",
" <td>0.658574</td>\n",
" </tr>\n",
" <tr>\n",
" <th>202</th>\n",
" <td>0.792730</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" avg_fake_rate\n",
"code \n",
"98 0.316849\n",
"99 0.446200\n",
"198 0.658574\n",
"202 0.792730"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake_rate_grouped_by_code = fake_rate_df.groupby('code')\n",
"\n",
"(fake_rate_grouped_by_code\n",
" .mean()\n",
" .rename(columns={'fake_rate': 'avg_fake_rate'}))\n",
"# fake_rate_df.rename()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Region</th>\n",
" <th>Rate</th>\n",
" </tr>\n",
" <tr>\n",
" <th>code</th>\n",
" <th>date</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"9\" valign=\"top\">98</th>\n",
" <th>2016-02-28</th>\n",
" <td>0.0</td>\n",
" <td>1566.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-08</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-09</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-10</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-11</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-12</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-13</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-14</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-15</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">99</th>\n",
" <th>2017-01-01</th>\n",
" <td>0.0</td>\n",
" <td>1455.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-08</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-09</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Region Rate\n",
"code date \n",
"98 2016-02-28 0.0 1566.0\n",
" 2018-05-08 NaN NaN\n",
" 2018-05-09 NaN NaN\n",
" 2018-05-10 NaN NaN\n",
" 2018-05-11 NaN NaN\n",
" 2018-05-12 NaN NaN\n",
" 2018-05-13 NaN NaN\n",
" 2018-05-14 NaN NaN\n",
" 2018-05-15 NaN NaN\n",
"99 2017-01-01 0.0 1455.0\n",
" 2018-05-08 NaN NaN\n",
" 2018-05-09 NaN NaN"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#So let's JOIN our data, to \"fill in the blanks\" where we have some data:\n",
"merged_dates_df = pd.merge(pd.DataFrame(index=multi_index), \n",
" indexed_simplified_df, \n",
" left_index=True, \n",
" right_index=True, \n",
" how='outer').sort_index()\n",
"\n",
"merged_dates_df[:12]\n",
"# more evidence that assigning with `loc` will align indices\n",
"#https://stackoverflow.com/a/13843741/1175496"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(202, 2018-05-11 00:00:00) 2\n",
"(101, 2018-05-08 00:00:00) 1\n",
"(202, 2018-05-08 00:00:00) 1\n",
"dtype: int64"
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# https://stackoverflow.com/a/19324591/1175496\n",
"# Could also use just the multi_index\n",
"#https://stackoverflow.com/a/45620300/1175496\n",
"# No.. not so sure...\n",
"# Ah, there are duplicates, so a reindex wouldnt work...\n",
"# Duplicatse because I did not account for the 'Region' column, so we have two rows for a given code, and on a given date...\n",
"pd.Series(indexed_simplified_df.index.values).value_counts()[:3]"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:2: PerformanceWarning: indexing past lexsort depth may impact performance.\n",
" \n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Region</th>\n",
" <th>Rate</th>\n",
" </tr>\n",
" <tr>\n",
" <th>code</th>\n",
" <th>date</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">202</th>\n",
" <th>2018-05-11</th>\n",
" <td>1</td>\n",
" <td>2121</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-11</th>\n",
" <td>0</td>\n",
" <td>2232</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Region Rate\n",
"code date \n",
"202 2018-05-11 1 2121\n",
" 2018-05-11 0 2232"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Here are the duplicate values...:\n",
"indexed_simplified_df.loc[(202, '2018-05-11')]"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [],
"source": [
"# https://stackoverflow.com/a/34297689/1175496\n",
"# Let's get rid of duplicates, assume \"Regions\" is not an issue; (I think you can re-run this to add the \"Regions\" the user wants...!)\n",
"# unique_indexed_simplified_df = indexed_simplified_df.iloc[~indexed_simplified_df.index.duplicated(keep='first')]\n",
"# pd.Series(unique_indexed_simplified_df.index.values).value_counts()[:5]"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Region</th>\n",
" <th>Rate</th>\n",
" </tr>\n",
" <tr>\n",
" <th>code</th>\n",
" <th>date</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"9\" valign=\"top\">98</th>\n",
" <th>2016-02-28</th>\n",
" <td>0.0</td>\n",
" <td>1566.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-08</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-09</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-10</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-11</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-12</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-13</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-14</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-15</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">99</th>\n",
" <th>2017-01-01</th>\n",
" <td>0.0</td>\n",
" <td>1455.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-08</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-09</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Region Rate\n",
"code date \n",
"98 2016-02-28 0.0 1566.0\n",
" 2018-05-08 NaN NaN\n",
" 2018-05-09 NaN NaN\n",
" 2018-05-10 NaN NaN\n",
" 2018-05-11 NaN NaN\n",
" 2018-05-12 NaN NaN\n",
" 2018-05-13 NaN NaN\n",
" 2018-05-14 NaN NaN\n",
" 2018-05-15 NaN NaN\n",
"99 2017-01-01 0.0 1455.0\n",
" 2018-05-08 NaN NaN\n",
" 2018-05-09 NaN NaN"
]
},
"execution_count": 101,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Sort the index; which should sort first by code, then by date: \n",
"merged_sorted_dates_df = merged_dates_df.sort_index()\n",
"merged_sorted_dates_df.head(12)"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Rate</th>\n",
" </tr>\n",
" <tr>\n",
" <th>code</th>\n",
" <th>date</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"9\" valign=\"top\">98</th>\n",
" <th>2016-02-28</th>\n",
" <td>1566.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-08</th>\n",
" <td>1566.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-09</th>\n",
" <td>1566.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-10</th>\n",
" <td>1566.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-11</th>\n",
" <td>1566.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-12</th>\n",
" <td>1566.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-13</th>\n",
" <td>1566.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-14</th>\n",
" <td>1566.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-15</th>\n",
" <td>1566.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"9\" valign=\"top\">99</th>\n",
" <th>2017-01-01</th>\n",
" <td>1455.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-08</th>\n",
" <td>1455.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-09</th>\n",
" <td>1455.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-10</th>\n",
" <td>1344.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-11</th>\n",
" <td>1233.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-12</th>\n",
" <td>1233.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-13</th>\n",
" <td>1233.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-14</th>\n",
" <td>1233.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-15</th>\n",
" <td>1233.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">101</th>\n",
" <th>2018-05-06</th>\n",
" <td>1122.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018-05-07</th>\n",
" <td>1011.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Rate\n",
"code date \n",
"98 2016-02-28 1566.0\n",
" 2018-05-08 1566.0\n",
" 2018-05-09 1566.0\n",
" 2018-05-10 1566.0\n",
" 2018-05-11 1566.0\n",
" 2018-05-12 1566.0\n",
" 2018-05-13 1566.0\n",
" 2018-05-14 1566.0\n",
" 2018-05-15 1566.0\n",
"99 2017-01-01 1455.0\n",
" 2018-05-08 1455.0\n",
" 2018-05-09 1455.0\n",
" 2018-05-10 1344.0\n",
" 2018-05-11 1233.0\n",
" 2018-05-12 1233.0\n",
" 2018-05-13 1233.0\n",
" 2018-05-14 1233.0\n",
" 2018-05-15 1233.0\n",
"101 2018-05-06 1122.0\n",
" 2018-05-07 1011.0"
]
},
"execution_count": 112,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ffilled_dates_df = (merged_dates_df\n",
" #Will remove the Region column, I realize I didn't account for it; \n",
" #It will leave a duplicate in (202, '2018-05-11')\n",
" .drop(columns='Region')\n",
" #fillna will handle na methods; \n",
" # check the arguments to fillna; \n",
" #-can fill all NA cells with one value, \n",
" #-or each NA cell with a particular value, \n",
" #-or in this case use a method 'forward fill'; this is why we sorted earlier\n",
" # > ffill: propagate last valid observation forward to next valid\n",
" .fillna(method='ffill')\n",
")\n",
"ffilled_dates_df.head(20)"
]
},
{
"cell_type": "code",
"execution_count": 139,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>2018-05-08 00:00:00</th>\n",
" <th>2018-05-09 00:00:00</th>\n",
" <th>2018-05-10 00:00:00</th>\n",
" <th>2018-05-11 00:00:00</th>\n",
" <th>2018-05-12 00:00:00</th>\n",
" <th>2018-05-13 00:00:00</th>\n",
" <th>2018-05-14 00:00:00</th>\n",
" <th>2018-05-15 00:00:00</th>\n",
" </tr>\n",
" <tr>\n",
" <th>code</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>202</th>\n",
" <td>2565.0</td>\n",
" <td>2454.0</td>\n",
" <td>2343.0</td>\n",
" <td>2121.0</td>\n",
" <td>2010.0</td>\n",
" <td>1899.0</td>\n",
" <td>1788.0</td>\n",
" <td>1677.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99</th>\n",
" <td>1455.0</td>\n",
" <td>1455.0</td>\n",
" <td>1344.0</td>\n",
" <td>1233.0</td>\n",
" <td>1233.0</td>\n",
" <td>1233.0</td>\n",
" <td>1233.0</td>\n",
" <td>1233.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>198</th>\n",
" <td>2676.0</td>\n",
" <td>2676.0</td>\n",
" <td>2676.0</td>\n",
" <td>2676.0</td>\n",
" <td>2676.0</td>\n",
" <td>2676.0</td>\n",
" <td>2676.0</td>\n",
" <td>2676.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>98</th>\n",
" <td>1566.0</td>\n",
" <td>1566.0</td>\n",
" <td>1566.0</td>\n",
" <td>1566.0</td>\n",
" <td>1566.0</td>\n",
" <td>1566.0</td>\n",
" <td>1566.0</td>\n",
" <td>1566.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 2018-05-08 2018-05-09 2018-05-10 2018-05-11 2018-05-12 2018-05-13 \\\n",
"code \n",
"202 2565.0 2454.0 2343.0 2121.0 2010.0 1899.0 \n",
"99 1455.0 1455.0 1344.0 1233.0 1233.0 1233.0 \n",
"198 2676.0 2676.0 2676.0 2676.0 2676.0 2676.0 \n",
"98 1566.0 1566.0 1566.0 1566.0 1566.0 1566.0 \n",
"\n",
" 2018-05-14 2018-05-15 \n",
"code \n",
"202 1788.0 1677.0 \n",
"99 1233.0 1233.0 \n",
"198 2676.0 2676.0 \n",
"98 1566.0 1566.0 "
]
},
"execution_count": 139,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Can easily pivot this up for easier viewing\n",
"ffilled_pivoted_dates_df = (ffilled_dates_df\n",
" #Get rid of the duplicate rows, otherwise pivot will fail:\n",
" .loc[~ffilled_dates_df.index.duplicated(keep='first')]\n",
" #Turn the indexes into columns, so the `pivot` method works: # https://stackoverflow.com/a/20461206/1175496\n",
" .reset_index(level=ffilled_dates_df.index.names)\n",
" #Pivot; specify \n",
" # which column is the new index aka row labels; \n",
" # which column because the new column index aka column labels;\n",
" # and which column will contain the values\n",
" .pivot(index='code', \n",
" columns='date', \n",
" values='Rate')\n",
" #Only select the dates the user wants, in the order user wants them\n",
" .loc[user_wants_codes, user_wants_date_range]\n",
")\n",
"\n",
"ffilled_pivoted_dates_df"
]
},
{
"cell_type": "code",
"execution_count": 140,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x24411c90390>"
]
},
"execution_count": 140,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAERCAYAAABy/XBZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3Xl8FfXV+PHPyc5OQoDkJoGALAlZ0bCqLEVWJVFRf+5Ltdq6L23FpY9Vq3Vpq/hIbVGo6GNVVDRBQcAFERTZTAirICCEG3aEsG/n98cMGDGBJCS5ubnn/XrllZszM3fOtHLPnfme+Y6oKsYYYwJPkK8TMMYY4xtWAIwxJkBZATDGmABlBcAYYwKUFQBjjAlQVgCMMSZAWQEwxpgAZQXAGGMClBUAY4wJUCG+TuBkoqOjNTEx0ddpGGOMX1mwYMFWVW15qvXqdAFITExk/vz5vk7DGGP8ioj8UJH17BKQMcYEKCsAxhgToKwAGGNMgLICYIwxAcoKgDHGBCgrAMYYE6CsABhjTICq0/cBnI6n5z7N8u3LfZ2GMcZUSVJUEvd3v79G92FnAMYYE6Dq7RlATVdOY4zxd3YGYIwxAcoKgDHGBCgrAMYYE6CsABhjTICyAmCMMQHqlAVARBJE5HMRWSYiS0TkrlLL7hCRFW78mVLxB0RklbtscKn4EDe2SkRGVv/hGGOMqaiKtIEeBu5T1YUi0gRYICLTgdZADpCuqgdEpBWAiHQBLgdSAA/wiYh0ct9rNDAQKALmiUieqi6t3kMyxhhTEacsAKpaDBS7r0tEZBkQB/wGeEpVD7jLNrub5ABvufE1IrIK6O4uW6WqqwFE5C13XSsAxhjjA5UaAxCRRKAr8A3QCThXRL4RkS9EpJu7WhywvtRmRW6svPiJ+7hZROaLyPwtW7ZUJj1jjDGVUOECICKNgfeAu1V1F87ZQyTQE/gDMEFEBJAyNteTxH8eUB2jqlmqmtWy5SmfaWyMMaaKKjQVhIiE4nz4v6GqE91wETBRVRWYKyJHgWg3nlBq83jA674uL26MMaaWVaQLSICxwDJV/UepRR8Av3LX6QSEAVuBPOByEQkXkXZAR2AuMA/oKCLtRCQMZ6A4rzoPprRDR47W1FsbY0y9UJEzgLOBa4BCEcl3Yw8C44BxIrIYOAhc554NLBGRCTiDu4eB21T1CICI3A5MBYKBcaq6pFqPxrX7wGH6PPM5vc9oQU5mHH06RRMeElwTuzLGGL8lzmd23ZSVlaXz58+v9HZbSg7wwqcr+aiwmO17DtI0IoRhabFkZ3jo0b4FwUFlDUcYY0z9ICILVDXrlOvVxwJwzKEjR5m9ait5+V6mLtnInoNHaNUknAvSPeRkekiPb4ZzhcsYY+oPKwAn2HfwCJ8t30xu/gZmrNjCwSNHSWzRkOwMD9mZHjq0alIt+zHGGF+zAnASO/cdYurijeQVePnq+60cVegS25TsTA/DMzzENW9Q7fs0xpjaYgWggjaX7OejRcXk5nvJX/8jAN0Toxie6eH8tFiiGoXV6P6NMaa6WQGogh+27WFSgZfcfC8rN+8mJEg4p2M0OZkeBnaJoXF4vX2CpjGmHrECcBpUleUbS8gr8JKX72XDj/uICA1iQHJrsjM89Ovc0tpKjTF1lhWAanL0qPLt+h3k5nv5aFEx2/YcpElECENTY8jJjKOntZUaY+oYKwA14PCRo8z+fhu5+RuYtmQTuw8cpmWTcC5Id+4xyExobm2lxhifswJQw/YfctpK8/K9fLZiMwcPH6VNlNNWmpPpoWNrays1xviGFYBatGv/T22ls1c5baXJsU3JzvAwPCOW+MiGvk7RGBNArAD4yJaSA3y0yEtegZeF65y20qy2keRkehiWFkuLxuE+ztAYU99ZAagD1m/fe7yTaMWmEoKDhHM6RJOd4WFQSmuaRIT6OkVjTD1kBaCOWb5xF3n5zplB0Y59hIcEMSC5FdkZcfTr3JKIUGsrNcZUDysAdZSqsnDdj0wq8PLhIi9bdx+kSXgIQ1JjyM700Kt9C0KCK/WkTmOM+RkrAH7g8JGjfL16G7n5XqYu3kjJgcNEN3bbSjM9dLW2UmNMFVgB8DP7Dx1hxorN5BV4+WSZ01aaENXAbSuNo5O1lRpjKsgKgB8r2X+IqUs2HW8rPXJUSYppwvAMD9kZHhKirK3UGFM+KwD1xNbdB5hc6MxWuuCHHQCc2aY5OZlxDEuLpWUTays1xvxctRUAEUkAXgNigKPAGFUdVWr574FngZaqutV9iPwoYBiwF7heVRe6614HPOxu+hdVHX+yfVsB+Ln12/cyaZHTVrp8YwlBAme7baWDU2Noam2lxhiqtwDEArGqulBEmgALgAtVdalbHF4BkoCz3AIwDLgDpwD0AEapag8RiQLmA1mAuu9zlqruKG/fVgDK992mEvLyveQWbGD99n2EhQQxIKkV2Rke+ie1srZSYwJYRQvAKSe4V9VioNh9XSIiy4A4YCnwHPBHILfUJjnAa+pUljki0twtIv2A6aq63U1wOjAEeLMyB2YcnVo34feDO3PfoE7kr/+R3HwvHy4qZsrijTQJD2FQSgw5mR56n2FtpcaYslXqCScikgh0Bb4RkWxgg6oWnNCqGAesL/V3kRsrL37iPm4GbgZo06ZNZdILSCJC1zaRdG0TycPnJzNn9XbyCjYwZfFG3ltYRHTjMM5Pc9pKz2wTaW2lxpjjKlwARKQx8B5wN3AYeAgYVNaqZcT0JPGfB1THAGPAuQRU0fwMhAQHcU7HaM7pGM1jOanMWLGFSQVe3pq3nvFf/0B8ZAOGu7OVJsU09XW6xhgfq1ABEJFQnA//N1R1ooikAe2AY9/+44GFItId55t9QqnN4wGvG+93QnzGaeZvyhERGsyQ1BiGpMZQsv8Q05duIjffy5iZq3lpxvd0at2YnMw4hqd7aNPC2kqNCUQVGQQWYDywXVXvLmedtUCWOwh8PnA7Pw0Cv6Cq3d1B4AXAme5mC3EGgbeXt28bBK5+29y20rwCL/PWOuPvXds0JzvDw/npsbRqEuHjDI0xp6s6u4DOAb4ECnHaQAEeVNXJpdZZy08FQIAXcQZ49wI3qOp8d71fAw+6mz2hqv852b6tANSsoh17+XCRc4/BsuJdBAn0PiOa7EwPg1NiaNbA2kqN8Ud2I5iplJWbSpypqwu8/LBtL2HBQfRPakl2RhwDkq2t1Bh/YgXAVImqUlC0k7x8L5MWedlScoDG4SEM6tKa7EwPZ3eIJtTaSo2p06wAmNN25KgyZ/U28vK9TF5cTMn+w0Q1ctpKf9fvDDzNG/g6RWNMGawAmGp14PARvlixhdwCL58s3URYSBB/uTCVnMxf3MphjPGxihYAO5c3FRIeEsyglBhGX3km0+7pQ8dWjbnrrXzuePNbdu495Ov0jDFVYAXAVFrbFo2YcEsv7hvYiSmFxQwZNZOvVm31dVrGmEqyAmCqJCQ4iDsGdGTirb1pEBbMla98w+MfLmX/oSO+Ts0YU0FWAMxpSY9vzkd3nMs1PdsydtYacl6czbLiXb5OyxhTAVYAzGlrEBbM4xem8p8burF970FyXpzNmJnfc/Ro3W0wMMZYATDVqH/nVky9uw/9k1ry5OTlXPnKHDb8uM/XaRljymEFwFSrqEZh/Ovqs3jmknQKi3Yy5PmZfPDtBupyu7ExgcoKgKl2IsJlWQlMuasPnVs34e63rV3UmLrICoCpMW1aNOTtW3rxh8Gd+XjxRgY/P5PZ1i5qTJ1hBcDUqOAg4bb+HXj/1rNpFB7MVa98w2OTrF3UmLrACoCpFWnxzfjwjnO5tldbxs1eQ/aLs1jqtXZRY3zJCoCpNQ3CgnksJ5VXb+jGjr2HyBk9i3998T1HrF3UGJ+wAmBqXT+3XXRAUmuemrKcK16eQ9GOvb5Oy5iA43ezgR46dIiioiL279/vo6xqRkREBPHx8YSGBs5TuFSVdxcU8eikpQjwaE4KF3WNw33OtDGmiio6G2iFHgpflxQVFdGkSRMSExPrzQeFqrJt2zaKiopo166dr9OpNSLCpVkJ9GzfgnvezufeCQV8umwzT1yUSvOGYb5Oz5h6z+8uAe3fv58WLVrUmw9/cD4IW7RoUe/OaioqIeqndtGpS5x20VkrrV3UmJp2ygIgIgki8rmILBORJSJylxt/VkSWi8giEXlfRJqX2uYBEVklIitEZHCp+BA3tkpERlY16fr04X9MfTymyjjWLvrBbWfTJCKUq8d+w6OTlli7qDE1qCJnAIeB+1Q1GegJ3CYiXYDpQKqqpgPfAQ8AuMsuB1KAIcA/RSRYRIKB0cBQoAtwhbuuMcelxjXjwzvO4freifxn9lqG/+8slnh3+jotY+qlUxYAVS1W1YXu6xJgGRCnqtNU9bC72hwg3n2dA7ylqgdUdQ2wCuju/qxS1dWqehB4y13XbzRu3NjXKQSEiNBg/pydwvhfd2fnvkNcOHo2L82wdlFjqlulxgBEJBHoCnxzwqJfA1Pc13HA+lLLitxYefET93GziMwXkflbtmypTHqmnunbqSVT7+7DwC6tefrj5VwxZg7rt1u7qDHVpcIFQEQaA+8Bd6vqrlLxh3AuE71xLFTG5nqS+M8DqmNUNUtVs1q2bFnR9CrltddeIz09nYyMDK655hp++OEHBgwYQHp6OgMGDGDdunUArFmzhl69etGtWzf+9Kc//ew9nn32Wbp160Z6ejqPPPJIjeRpILJRGKOvPJO/X5rB0uJdDB31Je8tKLLZRY2pBhUqACISivPh/4aqTiwVvw64ALhKf/oXWQQklNo8HvCeJF6rlixZwhNPPMFnn31GQUEBo0aN4vbbb+faa69l0aJFXHXVVdx5550A3HXXXfzud79j3rx5xMTEHH+PadOmsXLlSubOnUt+fj4LFixg5syZtX0oAUNEGHFWPFPuOpfk2Cbc904Bt/13ITv2HPR1asb4tYp0AQkwFlimqv8oFR8C3A9kq2rp8/I84HIRCReRdkBHYC4wD+goIu1EJAxnoDiv+g6lYj777DMuueQSoqOjAYiKiuLrr7/myiuvBOCaa65h1qxZAMyePZsrrrjiePyYadOmMW3aNLp27cqZZ57J8uXLWblyZS0fSeBJiGrIWzf34o9DOjN96SYGPz+Tmd/ZZUJjqqoiN4KdDVwDFIpIvht7EHgBCAemuy2Mc1T1t6q6REQmAEtxLg3dpqpHAETkdmAqEAyMU9Ul1Xo0FaCqp2y5LL28rHVVlQceeIBbbrml2vMzJxccJNzarwN9Orbk7rfzuXbcXK7vncjIoUlEhAb7Oj1j/EpFuoBmqaqoarqqZro/k1W1g6omlIr9ttQ2T6jqGaraWVWnlIpPVtVO7rInauqgTmbAgAFMmDCBbdu2AbB9+3Z69+7NW2+9BcAbb7zBOeecA8DZZ5/9s/gxgwcPZty4cezevRuADRs2sHnz5to8jIBXul301a/WcsH/zmLxBmsXNaYy/O5O4NOVkpLCQw89RN++fcnIyODee+/lhRde4D//+Q/p6em8/vrrjBo1CoBRo0YxevRounXrxs6dP324DBo0iCuvvJJevXqRlpbGJZdcQklJia8OKWAdaxd9/cbulOw/xEX/nM3oz1dZu6gxFeR3k8EtW7aM5ORkH2VUs+rzsdW0H/ce5KH3F/NRYTHdEiP5x2WZJEQ19HVaxvhERSeDC7gzAFM/NW8YxotXduUfl2WwvLiEoaO+5F1rFzXmpKwAmHpDRLj4zHim3H0uXTxN+f07Bdz6xkK2W7uoMWXyu+mgjTmV+MiGvPmbnrz85Wr+Pm0FX3y3hUFdWpOTGcc5HaMJDbbvPcaAFQBTTwUHCb/tewb9Ordk/FdrmVy4kQ/yvUQ2DGVYWizZGR66JUYRFBTYs7CawGYFwNRrSTFN+evF6TyancrM77aQW+Bl4sINvPHNOmKbRTA8w0N2hocUT9OAn5LbBB4rACYghIUEcV6X1pzXpTV7Dhzmk2WbyMv3Mm7WGsbMXM0ZLRuRnRFHdqaHdtGNfJ2uMbXCLoZWwfr16+nfvz/JycmkpKQcv29g+/btDBw4kI4dOzJw4EB27NgBODeRpaenk56eTu/evSkoKPBl+gGvUXgIOZlxjL2+G/MeOo8nL0ojunE4z3/6Hf3/NoPsF2fxyper2bQrMJ/QZgKH3QdQBcXFxRQXF3PmmWdSUlLCWWedxQcffMCrr75KVFQUI0eO5KmnnmLHjh08/fTTfPXVVyQnJxMZGcmUKVP485//zDffnDijdt04tkBWvHMfHxYUk1fgpXDDTkSgZ7sWZGd6GJoaY88pNn6jovcBWAGoBjk5Odx+++3cfvvtzJgxg9jYWIqLi+nXrx8rVqz42bo7duwgNTWVDRs2/OJ96uKxBarVW3aTV+AlL9/L6q17CA0W+nZqSXZmHOclt6JhmF09NXVXRQuAX/9X/OikJSz17jr1ipXQxdOUR4anVHj9tWvX8u2339KjRw82bdpEbGwsALGxsWXODzR27FiGDh1abfmamtG+ZWPuPq8Tdw3oyBLvLnLzNzCpoJhPlm2mYVgwA7u0JjvDw7kdWxIWYldSjX/y6wLga7t372bEiBE8//zzNG3a9JTrf/7554wdO/b4dNOm7hMRUuOakRrXjAeGJjN37XZy871MWVxMbr6X5g1DGZoaS06mh+7WVmr8jF8XgMp8U69uhw4dYsSIEVx11VVcfPHFALRu3Zri4uLjl4BatWp1fP1FixZx0003MWXKFFq0aOGrtM1pCAoSerZvQc/2LXg0O4UvV24hr8DLB99u4M2564hpGsHwjFhyMuOsrdT4Bb8uAL6iqtx4440kJydz7733Ho9nZ2czfvx4Ro4cyfjx48nJcZ55v27dOi6++GJef/11OnXq5Ku0TTUKCwliQHJrBiS3Zu/Bw3yybDN5+Rt49au1vPzlGtpHNyI707nHoH3Lxr5O15gy2SBwFcyaNYtzzz2XtLQ0goKc679PPvkkPXr04LLLLmPdunW0adOGd955h6ioKG666Sbee+892rZtC0BISAgnHhfUjWMzp+fHvQeZsngjefle5qzZhiqkxTUjO8PDBRmxxDZr4OsUTQCwLiA/VJ+PLRBt3LmfDxd5ySvwsqjIaSvtnhhFdqaHYamxRDaytlJTMwKiC8iYuiymWQQ3nduem85tz5qte8jL95JbsIGH3l/MI7lL3LZSD+clt6ZRuP1TNLXvlP/ViUgC8BoQAxwFxqjqKBGJAt4GEoG1wGWqusN9iPwoYBiwF7heVRe673Ud8LD71n9R1fHVezjG1E3tohtx13kduXNAB5Z4d5FX4GVSgZdPl2+mQWgw53VpTU6Ghz6drK3U1J6KfO04DNynqgtFpAmwQESmA9cDn6rqUyIyEhgJ3A8MBTq6Pz2Al4AebsF4BMgC1H2fPFXdUd0HZUxdVbqtdOSQJOat3U5egZfJhcVMKvDSrEEow9JiGJ7hoUe7FgRbW6mpQacsAKpaDBS7r0tEZBkQB+QA/dzVxgMzcApADvCaOoMLc0SkuYjEuutOV9XtAG4RGQK8WY3HY4zfCAoSerRvQY/2LfhzdgqzVm4lN38Dufle3py7ntZNw7kg3UNOpoe0uGbWVmqqXaUuPIpIItAV+AZo7RYHVLVYRI41vccB60ttVuTGyoufuI+bgZsB2rRpU5n0jPFbocFB9E9qRf+kVuw7eMSZrbTAy2tfr2XsrDUktmhIdmYc2RkeOrSytlJTPSpcAESkMfAecLeq7jrJt5GyFuhJ4j8PqI4BxoDTBVTR/IypLxqEBTM8w8PwDA879x7i4yXOXcf/+9lKXvh0JSmepuRkergg3YOnubWVmqqr0GiTiITifPi/oaoT3fAm99IO7u9jE98UAQmlNo8HvCeJ+6VRo0aRmppKSkoKzz//PAAFBQX06tWLtLQ0hg8fzq5d1TtPkQk8zRqG8v+6teG/v+nJnAcG8KcLuhASJDw5eTm9n/qMy/79Nf835wd77rGpklMWALerZyywTFX/UWpRHnCd+/o6ILdU/Fpx9AR2upeKpgKDRCRSRCKBQW7M7yxevJiXX36ZuXPnUlBQwIcffsjKlSu56aabeOqppygsLOSiiy7i2Wef9XWqph5p3TSCG89pR+7t5zDj9/24d2Antu0+wMMfLKb7E59ww3/m8sG3G9hz4LCvUzV+oiKXgM4GrgEKRSTfjT0IPAVMEJEbgXXApe6yyTgtoKtw2kBvAFDV7SLyODDPXe+xYwPC/mbZsmX07NmThg0bAtC3b1/ef/99VqxYQZ8+fQAYOHAggwcP5vHHH/dlqqaeSoxuxJ0DOnLHrzqwtNhtK833cvfb+USEBnFesjNbad/OLQkPCfZ1uqaOqkgX0CzKvn4PMKCM9RW4rZz3GgeMq0yCJzVlJGwsrLa3AyAmDYY+ddJVUlNTeeihh9i2bRsNGjRg8uTJZGVlkZqaSl5eHjk5ObzzzjusX7/+pO9jzOkSEVI8zUjxNOP+wUksWLeD3PwNTC7cyIeLimkaEXJ8ttIe7a2t1Pyc3X5YBcnJydx///0MHDiQxo0bk5GRQUhICOPGjePOO+/kscceIzs7m7Awu9Xf1J6gIKFbYhTdEqN4ZHgKs1dtJS/fy4eLvLw9fz2tmjhtpdmZHjLira3U2FxA1eLBBx8kPj6eW2+99Xjsu+++4+qrr2bu3LkVfp+6eGzG/+07eITPlm8mN38DM1Zs4eCRo7Rt0ZCcDKcYdGjVxNcpmmpmcwHVsM2bN9OqVSvWrVvHxIkT+frrr4/Hjh49yl/+8hd++9vf+jpNY2gQFsz56bGcnx7Lzn2HmLp4I3kFXl78fBUvfLaKLrFNyc502k7jrK00oFgBqKIRI0awbds2QkNDGT16NJGRkYwaNYrRo0cDcPHFF3PDDTf4OEtjfq5Zg1Au65bAZd0S2Fyyn48WOfcYPDVlOU9NWU63xEiyM+MYlhpDi8bhvk7X1DC7BFSH1OdjM3XbD9v2MKnAS26+l5WbdxMcJJzbMZrsDA+DUmJobLOV+hW7BGSMqbC2LRpx+686clv/DizfWEJegZe8fC/3TiggPKTQaSvN9NDP2krrFSsAxpjjRITk2KYkxzblD4M68+36HeTme/loUTEfFRbTJCKEoakxZGfE0esMayv1d1YAjDFlCgoSzmobxVlto/ifC7ow+/tt5OV7mVy4kQnzi2jZJJzz05x7DDITmltbqR+yAmCMOaWQ4CD6dmpJ304teeJQKp8v30xuvpf/zl3Hq1+tpU1UQ7IznKmrO7a2tlJ/YQXAGFMpEaHBDE2LZWhaLLv2/9RW+s8Zq3jx81UkxTQhJzOO4RmxxEc29HW65iSsABhjqqxpRCiXZiVwaVYCW0oO8NEiL3kFXp7+eDlPf7ycrLaRZGd6GJYWS7S1ldY59vDRKvr1r39Nq1atSE1NPR4rbzroQ4cOcd1115GWlkZycjJ//etffZW2MTWmZZNwrj+7HRNvPZsv/9ifPwzuTMn+w/xP7hJ6PPkp146by3sLiijZf8jXqRqXFYAquv766/n4449/FitvOuh33nmHAwcOUFhYyIIFC/j3v//N2rVrfZC1MbUjIaoht/XvwNR7+vDx3edyS5/2rN6ym/veKSDrL59w6xsL+HhxMfsPHfF1qgHNLgFVUZ8+fX7xIV7edNAiwp49ezh8+DD79u0jLCyMpk2b+iBrY2pfUkxTkoY05Q+DO7Nw3Y9MKnAmqJtcuJEm4SEMSY0hO9NDr/YtCAm276S1ya8LwNNzn2b59uXV+p5JUUnc3/3+Km1b3nTQl1xyCbm5ucTGxrJ3716ee+45oqKiqjNtY+o8EeGstpGc1TaSh89P5uvV28jN9/Lx4o28s6CI6MbhXJAeS3amh67WVlorrNxWo3HjxjF69GjOOussSkpKjk8HPXfuXIKDg/F6vaxZs4a///3vrF692sfZGuM7IcFBnNuxJX+7NIN5D5/Hv64+k+7tIvnv3HVc/M+v6PPs5zw7dTkrNpb4OtV6za/PAKr6Tb2mJCUlMW3aNMCZDvqjjz4C4L///S9DhgwhNDSUVq1acfbZZzN//nzat2/vy3SNqRMiQoMZkhrLkNRYSvYfYuqSTeQVePnXF6sZ/fn3JMU0YXiGh+wMDwlR1lZanewMoBpt3rwZ4BfTQbdp04bPPvsMVWXPnj3MmTOHpKQkX6ZqTJ3UJCKUS86K57Vfd+ebBwfwWE4KjcJDeHbqCs595nMu/udsxn+1li0lB3ydar3g12cAvnTFFVcwY8YMtm7dSnx8PI8++ii7d+8uczro2267jRtuuIHU1FRUlRtuuIH09HRfpm9MnRfdOJxreyVyba9E1m/fy6RFzgR1j+Qt4dFJSzi7gzNb6eDUGJpGhPo6Xb90yumgRWQccAGwWVVT3Vgm8C8gAjgM3Kqqc8UZtRmF81D4vcD1qrrQ3eY64GH3bf+iquNPlZxNB22MOdF3m0rIy/eSW7CB9dv3ERYSxICkVmRneOif1IqIUJuttDqng34VeBF4rVTsGeBRVZ0iIsPcv/sBQ4GO7k8P4CWgh4hEAY8AWYACC0QkT1V3VPiIjDEG6NS6Cb8f3Jn7BnUif/2P5OZ7+XBRMVMWO22lg1JiyMn00PsMays9lVMWAFWdKSKJJ4aBY43szQCv+zoHeE2d04o5ItJcRGJxisN0Vd0OICLTgSHAm6d7AMaYwCQidG0TSdc2TlvpnNXbyc3fwMdLNvLewiKiG4dxflos2ZlxnNnG2krLUtUxgLuBqSLyN5yB5N5uPA5YX2q9IjdWXvwXRORm4GZwBk+NMeZUQoKDOKdjNOd0jObxC1OZsWILkwq8vDVvPeO//oH4yAYMd2crTYqxmzCPqWoB+B1wj6q+JyKXAWOB84CySqyeJP7LoOoYYAw4YwBVzM8YE6CcttIYhqTGULL/ENOXbiI338uYmat5acb3dG7dhOxMayuFqheA64C73NfvAK+4r4uAhFLrxeNcHirCuQxUOj6jivs2xpgKaRIRysVnxnPxmfFs232AyYXF5OZ7eXbqCp6duoKubZqTk+Hh/HQPLZsE3mylVR0h8QJ93de/Ala6r/OAa8XRE9i7KvWsAAAU+ElEQVSpqsXAVGCQiESKSCQwyI0ZY0ytaNE4nGt6JfLu73oz6/7+3D8kif2HjvLnSUvp8eQnXDP2GybMX8+uAJqt9JQFQETeBL4GOotIkYjcCPwG+LuIFABP4l6zByYDq4FVwMvArQDu4O/jwDz357FjA8L+atSoUaSmppKSksLzzz8PQH5+Pj179iQzM5OsrCzmzp3r4yyNMWWJj2zI7/qdwZS7zmX6PX24rX8Hfti2lz++u4isv3zCLa/PZ3Jh/Z+t9JT3AfhSXb0PYPHixVx++eXMnTuXsLAwhgwZwksvvcRtt93GPffcw9ChQ5k8eTLPPPMMM2bMqPD71oVjMyZQqSoFRTvJzd/Ah4uK2VJygMbhIQxKaU12hodzOkT7TVtpdd4HYE6wbNkyevbsScOGzgBS3759ef/99xGR4w+B2blzJx6Px5dpGmMqQUTITGhOZkJzHj6/C3NWbyMv38vkxcVMXLiBFo3CGJYWS06mhzPbRBIU5P9tpX59BrDxySc5sKx6p4MOT04i5sEHT7rOsmXLyMnJ4euvv6ZBgwYMGDCArKwsbr31VgYPHoyqcvToUb766ivatm1b4X3bGYAxdc+Bw0f4YsUWcgu8fLJ0EwcOHyWueYPjE9Qlxzapc/cY2BlADUpOTub+++9n4MCBNG7cmIyMDEJCQnjppZd47rnnGDFiBBMmTODGG2/kk08+8XW6xpjTEB4SzKCUGAalxLD7wGGmL91IXr6Xl79czb+++J6OrRqTk+khOyOONi38q63Ur88A6ooHH3yQ+Ph4HnjgAX788UdEBFWlWbNmxy8JVURdPDZjTNm27znI5MJi8vK9zF3r9LRkJjQnO8PDBemxtGoa4bPcKnoG4B8jGnXQsamf161bx8SJE7niiivweDx88cUXAHz22Wd07NjRlykaY2pQVKMwru7Zlgm/7cXskb/igaFJHDpylMc+XErPv37KVa/MYcK89ezcV3fbSu0SUBWNGDGCbdu2ERoayujRo4mMjOTll1/mrrvu4vDhw0RERDBmzBhfp2mMqQVxzRtwS98zuKXvGazavJu8Ai95+Rv443uLePiDxfTr3JLsTA8DklrTIKzuzFZql4DqkPp8bMYEGlVlUdFO8gq8TCrwsrnkAI3CnPGE7EynrTS0htpKbRDYGGN8SETISGhORkJzHhyWzDdr3LbSwmLe/3YDUY3CGJYWQ3ZGHFltfdNWagXAGGNqWHCQ0PuMaHqfEc2jOSnM/G4reQVe3l1QxP/NWYenWQTD3QnqusQ2rbW2Ur8sAKpa5/puT1ddvhRnjKk+4SHBDOzSmoFdWrPnwGE+WebMVjr2yzX8+4vVdGjVmGz3HoPE6EY1movfFYCIiAi2bdtGixYt6k0RUFW2bdtGRITv2saMMbWvUXgIOZlx5GTGsWPPQSYvdtpK/zH9Oz5aVMzUe/rU6P79rgDEx8dTVFTEli1bfJ1KtYqIiCA+Pt7XaRhjfCSyURhX9WjLVT3aUrxzHxt37q/xffpdAQgNDaVdu3a+TsMYY2pMbLMGxDZrUOP7sRvBjDEmQFkBMMaYAGUFwBhjApQVAGOMCVBWAIwxJkBV5JnA40Rks4gsPiF+h4isEJElIvJMqfgDIrLKXTa4VHyIG1slIiOr9zCMMcZUVkXaQF8FXgReOxYQkf5ADpCuqgdEpJUb7wJcDqQAHuATEenkbjYaGAgUAfNEJE9Vl1bXgRhjjKmcUxYAVZ0pIoknhH8HPKWqB9x1NrvxHOAtN75GRFYB3d1lq1R1NYCIvOWuawXAGGN8pKpjAJ2Ac0XkGxH5QkS6ufE4YH2p9YrcWHnxXxCRm0VkvojMr293+xpjTF1S1QIQAkQCPYE/ABPEmZinrMl59CTxXwZVx6hqlqpmtWzZsorpGWOMOZWqTgVRBExUZwrLuSJyFIh24wml1osHvO7r8uLGGGN8oKpnAB8AvwJwB3nDgK1AHnC5iISLSDugIzAXmAd0FJF2IhKGM1Ccd7rJG2OMqbpTngGIyJtAPyBaRIqAR4BxwDi3NfQgcJ17NrBERCbgDO4eBm5T1SPu+9wOTAWCgXGquqQGjue4jU8+yYFly2tyF8YYU2PCk5OIefDBGt1HRbqArihn0dXlrP8E8EQZ8cnA5EplZ4wxpsb43XTQFVXTldMYY/ydTQVhjDEBygqAMcYEKCsAxhgToKwAGGNMgLICYIwxAcoKgDHGBCgrAMYYE6CsABhjTICyAmCMMQHKCoAxxgQoKwDGGBOgrAAYY0yAqp+Twe3dDmMH+TqLymncCrpcCCkXQWN7EpoxpubVzwIQFAIxab7OonI2L4Mpf4CPR0L7vpB2KSRdABFNfZ2ZMaaeEuc5LnVTVlaWzp8/39dp1J5NS6DwXVj8Lvy4DoLDodNgpxh0HAShEb7O0BjjB0RkgapmnWq9+nkG4K9apzg/A/4HiuZB4Tuw5H1YlgfhTSF5OKSOgHZ9Idj+rzPGnB47A6jrjhyGNV/A4vdg2SQ4sAsatXTGCtIuhfhuIOLrLI0xdUhFzwBO2QUkIuNEZLP7/N8Tl/1eRFREot2/RUReEJFVIrJIRM4ste51IrLS/bmusgcUsIJDoMMAuPCf8PuVcNnr0KYXLBgPYwfCqHT45FHYtNTXmRpj/MwpzwBEpA+wG3hNVVNLxROAV4Ak4CxV3Soiw4A7gGFAD2CUqvYQkShgPpAFKLDA3WbHyfZtZwAnsX8nLP/IGTNYPQP0CLTqAmmXOJeJIhN9naExxkeq7QxAVWcC28tY9BzwR5wP9GNycAqFquocoLmIxAKDgemqut390J8ODKnAcZjyRDSDzCvhmolw3woY9jcIbwKfPgajMuCVgfDNv2H3Zl9naoypo6p0I5iIZAMbVLXghEVxwPpSfxe5sfLiZb33zSIyX0Tmb9mypSrpBZ7GLaH7b+DGaXDXIhjwCBzaC1P+CH/vDK9dCN++4Zw1GGOMq9IFQEQaAg8B/1PW4jJiepL4L4OqY1Q1S1WzWra0G6IqLbItnHsv/G423DoHzrkXtq+G3Fvh2Y7w9tWw5AM4tM/XmRpjfKwqvYRnAO2AAnG6T+KBhSLSHeebfUKpdeMBrxvvd0J8RhX2bSqjVTIM+BP86mHYsMBpK1080ekmCmsCyRc4Ywbt+llbqTEBqEJtoCKSCHxYehC41LK1QJY7CHw+cDs/DQK/oKrd3UHgBcCxrqCFOIPAZY0tHGeDwDXgyGFY+6Vzs9nSSXBgJzSM/qmtNKG7tZUa4+eq7UYwEXkT59t7tIgUAY+o6thyVp+M8+G/CtgL3ACgqttF5HFgnrveY6f68Dc1JDgEzujv/Az7O6ya7nQSffs6zHsZmrWBtBGQeolzU5oVA2PqLbsRzDj274IVk53LRN9/7rSVtkz+qRhEtfN1hsaYCqroGYAVAPNLe7Y6U1Asfg/Wfe3E4rKcS0QpF0GT1r7NzxhzUlYATPX4cZ0zcFz4LmwqBAmCdn2cs4Lk4dCgua8zNMacwAqAqX6blzuDx4Xvwo41EBzmzFKadgl0GgKhDXydoTEGKwCmJqnChoVOMVj8HuzeBGGNnecXpF3qPM8gONTXWRoTsKwAmNpx9IjTVlr4LizNc9tKWzhjBamXQEIPCLInjxpTm6wAmNp3+ACs+sQpBiumwOF90CwBUi92zgxap1pbqTG1wAqA8a0DJU4RKHwHvv8Mjh6G6M5OIUgbAVHtfZ2hMfWWFQBTd+zZBks/cMYLfpjtxOLOci4RpV4MTWJ8m58x9YwVAFM37SxyCkHhu7BxkdNWmniOc2aQPBwaRPo6Q2P8nhUAU/dt+c5tK33HmbE0OAw6DPyprTSsoa8zNMYvWQEw/kMVvN86ZwVLJkJJsdNW2nmYc2ZwRn9rKzWmEqwAGP909IgzTlD4LizNhf0/QoMoSLnQGTNo08vaSo05BSsAxv8dPgjff+pcIloxxXnKWdO4n9pKY9KtrdSYMlgBMPXLgd1OEVj8rnOvwdHDEN3JOStIuwRanOHrDI2pM6wAmPpr73bn8lDhu25bqYKnqztb6cXQNNbXGRrjU1YATGDYucEZOC58F4rzAXHbSi+B5GxoGOXrDI2pdVYATODZuuqnttJtqyAoFDqc5xSEoGBfZ2dM5TRuBakjqrSpFQATuFShuMApBIsnQonX1xkZU3lxWfCbT6u0abU9E9gYvyMCnkznZ+Djzgylxvgbqfmz1oo8FH4ccAGwWVVT3dizwHDgIPA9cIOq/uguewC4ETgC3KmqU934EGAUEAy8oqpPVf/hGHOCoCCbXsKYclTkjppXgSEnxKYDqaqaDnwHPAAgIl2Ay4EUd5t/ikiwiAQDo4GhQBfgCnddY4wxPnLKAqCqM4HtJ8Smqeph9885QLz7Ogd4S1UPqOoaYBXQ3f1ZpaqrVfUg8Ja7rjHGGB+pjnvqfw1McV/HAetLLStyY+XFf0FEbhaR+SIyf8uWLdWQnjHGmLKcVgEQkYeAw8Abx0JlrKYnif8yqDpGVbNUNatly5ank54xxpiTqHIXkIhchzM4PEB/6iUtAhJKrRYPHOvBKy9ujDHGB6p0BuB29NwPZKvq3lKL8oDLRSRcRNoBHYG5wDygo4i0E5EwnIHivNNL3RhjzOmoSBvom0A/IFpEioBHcLp+woHp4szGOEdVf6uqS0RkArAU59LQbap6xH2f24GpOG2g41R1SQ0cjzHGmAqq03cCi8gW4IfTeItmgL/cBeRPuYJ/5etPuYJ/5etPuYJ/5dsGWFfFbduq6ikHUet0AThdIjJGVW/2dR4V4U+5gn/l60+5gn/l60+5gn/lKyJbKvIhfjrq+6OVJvk6gUrwp1zBv/L1p1zBv/L1p1zBv/L9saZ3UK/PAIwxxl+JyPyKTOh2Our7GYAxxvirMTW9AzsDMMaYAGVnAMYYE6DqTQEQkSEiskJEVonISDc2QEQWiki+iMwSkQ6+zhPKzfVXbq6LRWS8iNSJZzWIyDgR2Swii0vFokRkuoisdH/XmfmWy8n3UhFZIiJHRaRGr6lWRjm5Pisiy0VkkYi8LyLNfZljaeXk+7iba76ITBMRjy9zPKasXEst+72IqIhE+yK3uqReFICTTDf9EnCVqmYC/wUe9l2WjpPkOh643H3mwg/Adb7L8mde5ZfTgY8EPlXVjsCn7t91xav8Mt/FwMXAzFrP5uRepYJTrdcRr/LLfJ9V1XT339iHwP/UelZle5Vf5oqIJAADqXp/fY0op7j+WUQ2uMU1X0SGVfd+60UBoPzpphVo6q7TjLox/1BZuY4ADqjqd+46092Yz5U1HTjO/7bj3dfjgQtrNamTKGf68mWqusJHKZWrklOt+1w5+e4q9WcjypnksbaV898twHPAH6kjeZbyKmUULOA5Vc10fyZX907rxGWGalDWdNM9gJuAySKyD9gF9PRBbicqK9eeQKiIZKnqfOASfj55Xl3TWlWLAVS1WERa+TqheurXwNu+TuJUROQJ4FqcO2z7+zidcolINrBBVQvcKWzqDFWdKSKJtb3f+nIGUN500/cAw1Q1HvgP8I9azapsZeV6FGeCvOdEZC5QgjOXkglQZUy1Xmep6kOqmoCT6+2+zqcsItIQeIi6c4mqom53x1jG1cRYW30pAGVNQ70ZyFDVb9zY20Dv2k6sDGVOma2qX6vquaraHeda9UqfZFcxm0QkFsD9vdnH+dQrpaZav6rUVOv+4L/UkUuXZTgDaAcUiMhanH93C0UkxqdZndxLOHlnAsXA36t7B/WlAJQ33XQzEenkrjMQWOarBEspM9djl1FEJBxnqu1/+TDHU8njp0Hq64BcH+ZSr5xkqvU6SUQ6lvozG1juq1xORlULVbWVqiaqaiLOF7EzVXWjj1Mrl6puUtUjqnoUeBln/LDad1IvfoBhOF0T3wMPubGLgEKgAJgBtPd1nifJ9VmcArUCuNvXOZbK9U2cbx+HcP7R3Ai0wOn+Wen+jvJ1nqfI9yL39QFgEzDV13meJNdVOGNE+e7Pv3yd5ynyfQ+ny2oRzjw7cb7Os7xcT1i+Foj2dZ4n5JQILC71d2yp1/fgPG+9WvdpdwIbY4yPlX7uCs6XlEfcvzNxxjPXAreo23xRbfu1AmCMMYGpvowBGGOMqSS/LADubdyvl/o7RES2iMiHvszLGGP8iV8WAGAPkCoiDdy/BwIbfJiPMcb4HX8tAABTgPPd11fgjPoDICLdReQrEfnW/d3ZjX8pIpml1pstIum1mrUxxtQR/lwA3gIuF5EIIB34ptSy5UAfVe2Kc+ffk278FeB6APf+gHBVXVRrGRtjTB3itwXA/eBOxPn2f+IkSc2Ad9yZ9Z4DUtz4O8AFIhKKM8/Kq7WSrDHG1EF+WwBcecDfKHX5x/U48Lk6UysPByIA1LmzcjrObJaX4dy6bowxAcnfZwMdB+xU1UIR6Vcq3oyfBoWvP2GbV3DuWPxSVcuaLtYYYwKCX58BqGqRqo4qY9EzwF9FZDYQfMI2C3Cmhv5PLaRojDF1VsDdCew+sm4GkKTOJEvGGBOQ/PoMoLJE5FqcbqGH7MPfGBPoAu4MwBhjjCOgzgCMMcb8xK8KgIgkiMjnIrJMRJaIyF1uPEpEpovISvd3pBtPEpGvReSAiPz+hPe6x32PxSLypntDmTHGBAy/KgA4z0i9T1WTcR6kfpuIdAFGAp+qakecB5SMdNffDtyJc6/AcSIS58az3HsFgnGezGWMMQHDrwqAqhar6kL3dQnOE7TicG7sGu+uNh640F1ns6rOw3kq0IlCgAYiEgI0BLw1nL4xxtQpflUAShORRKArTldP62NPynF/tzrZtqq6AeesYB3OY+N2quq0mszXGGPqGr8sACLSGOdZpHer6q4qbB+Jc9bQDvAAjUTk6urN0hhj6ja/KwDuRG7vAW+o6kQ3vElEYt3lscDmU7zNecAaVd2iqoeAiUDvmsrZGGPqIr8qACIiwFhgmar+o9SiPOA69/V1QO4p3mod0FNEGrrvOQBnPMEYYwKGX90IJiLnAF8ChcCxO3kfxBkHmAC0wflwv1RVt4tIDDAfaOquvxvooqq7RORR4P/hdBZ9C9ykqgdq83iMMcaX/KoAGGOMqT5+dQnIGGNM9bECYIwxAcoKgDHGBCgrAMYYE6CsABhjTICyAmCMMQHKCoAxxgSo/w+lNDKU96j90AAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"(ffilled_pivoted_dates_df\n",
" #We can easily \"transpose\" this:\n",
" .T\n",
" #So that it can be plotted: (We're reviewing matplotlib in Module 5)\n",
" .plot()\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 209,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>avg_rate</th>\n",
" </tr>\n",
" <tr>\n",
" <th>code</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>98</th>\n",
" <td>1566.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99</th>\n",
" <td>1302.375</td>\n",
" </tr>\n",
" <tr>\n",
" <th>198</th>\n",
" <td>2676.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>202</th>\n",
" <td>2121.000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" avg_rate\n",
"code \n",
"98 1566.000\n",
"99 1302.375\n",
"198 2676.000\n",
"202 2121.000"
]
},
"execution_count": 209,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Finally, the aggregation?\n",
"\n",
"ffilled_dates_avg_rates_df = (ffilled_dates_df\n",
" #NOW we will apply the filter, after we fffilled the values forward we can ignore historic/future values\n",
" # use the original multi_index which already contains the combos we want\n",
" .loc[multi_index]\n",
" #Could also do this, to filter each \"level\" of the index \n",
" #.loc[idx[user_wants_codes, user_wants_date_range], :]\n",
" .groupby('code')\n",
" .mean()\n",
" .rename(columns={'Rate': 'avg_rate'})\n",
")\n",
"ffilled_dates_avg_rates_df\n"
]
},
{
"cell_type": "code",
"execution_count": 188,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x24411dbfb00>"
]
},
"execution_count": 188,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAETCAYAAAA/NdFSAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFh5JREFUeJzt3X20XXV95/H3pzyIkIwGiMgkgWAnijgFhICMYTGoU0CtPDhSwBnJoG3KTLBUax2sM0sqOkVWtQIiNGoGVEBZpQoWlgxkjC4pUYJShERKCjS5A0IERCikEPjOH2cHr3CT+5Cbe7j5vV9r3XXO+e7f3ue798o6n+yHs0+qCklSe36j3w1IkvrDAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVEGgCQ1att+N7Apu+66a82ePbvfbUjSpHLLLbf8vKqmDzfuRR0As2fPZvny5f1uQ5ImlST/NJJxHgKSpEYZAJLUKANAkhr1oj4HIKkNTz/9NAMDA6xbt67frUwqO+ywAzNnzmS77bYb0/wGgKS+GxgYYOrUqcyePZsk/W5nUqgqHnroIQYGBthrr73GtAwPAUnqu3Xr1rHLLrv44T8KSdhll102a6/JAJD0ouCH/+ht7jYzACSpUZ4DkPSiM/uMa8Z1efee/fZxXd6WsHTpUrbffnve+MY3Tth7GgDSi8h4f/BtKZPhA/XFaP369Wy77dAfu0uXLmXKlCkGgCRNtGOPPZY1a9awbt06Tj/9dJ555hnuuecezjnnHAAuvvhibrnlFs4//3zOOussLr30UmbNmsWuu+7KgQceyIc+9KEhl3v44Yfzxje+kRtvvJGjjz6aV7/61XziE5/gqaeeYpddduHSSy/lySef5KKLLmKbbbbhq1/9Kueffz577703p556KqtXrwbgs5/9LPPmzRvXdTYAJAlYvHgxO++8M08++SQHHXQQS5YsYd68ec8FwNe//nU++tGPsnz5cq688kp+/OMfs379eg444AAOPPDATS77F7/4Bd/97ncBeOSRR1i2bBlJ+OIXv8g555zDpz/9aU499VSmTJnyXJC8+93v5gMf+ACHHnooq1ev5sgjj2TlypXjus4GgCQB5513Ht/4xjcAWLNmDffccw+vetWrWLZsGXPmzOHOO+9k3rx5nHvuuRxzzDG89KUvBeAd73jHsMs+4YQTnns+MDDACSecwP33389TTz210Wv4b7jhBlasWPHc61/+8pc89thjTJ06dXNW89cYAJKat3TpUm644QZuuukmdtxxRw4//HDWrVvHCSecwBVXXMHee+/NcccdRxKqatTL32mnnZ57/v73v58PfvCDHH300SxdupQzzzxzyHmeffZZbrrppueCZkvwMlBJzXv00UeZNm0aO+64Iz/96U9ZtmwZAO985zv55je/yeWXX/7c/+IPPfRQvvWtb7Fu3Toef/xxrrlmdCfuH330UWbMmAHAJZdc8lx96tSpPPbYY8+9PuKII/jc5z733Otbb711zOu3Me4BSHrRmeirjI466iguuugi9t13X17zmtdwyCGHADBt2jT22WcfVqxYwcEHHwzAQQcdxNFHH81+++3Hnnvuydy5c3nZy1424vc688wzOf7445kxYwaHHHII99xzD9A7lPSud72Lq666ivPPP5/zzjuPhQsXsu+++7J+/XoOO+wwLrroonFd74xld2aizJ07t/xBGLWk1ctAV65cyWtf+9pxXeaW9PjjjzNlyhSeeOIJDjvsMBYtWsQBBxzQl16G2nZJbqmqucPN6x6AJI3SggULWLFiBevWrWP+/Pl9+/DfXAaAJI3SZZdd9oLawoULufHGG3+tdvrpp3PKKadMVFujNmwAJJkFfBl4JfAssKiqzk1yJvD7wNpu6J9W1bXdPB8B3gc8A/xhVV3X1Y8CzgW2Ab5YVWeP7+pIUn9ccMEF/W5h1EayB7Ae+OOq+lGSqcAtSa7vpv1lVf3F4MFJ9gFOBF4H/GvghiSv7iZfAPw2MADcnOTqqlqBpOZVlXcEHaXNPYc77GWgVXV/Vf2oe/4YsBKYsYlZjgG+VlX/UlX3AKuAg7u/VVV1d1U9BXytGyupcTvssAMPPfTQZn+gtWTDD8LssMMOY17GqM4BJJkNvB74ATAPOC3JycByensJj9ALh2WDZhvgV4Gx5nn1NwzxHguABQB77LHHaNqTNEnNnDmTgYEB1q5dO/xgPWfDT0KO1YgDIMkU4Ergj6rql0kuBM4Cqnv8NPBeYKh9uGLovY0XxH1VLQIWQe8y0JH2J2ny2m677cb8s4YauxEFQJLt6H34X1pVfwNQVQ8Mmv4F4G+7lwPArEGzzwTu655vrC5JmmDDngNI76zMl4CVVfWZQfXdBw07Dri9e341cGKSlyTZC5gD/BC4GZiTZK8k29M7UXz1+KyGJGm0RrIHMA94D/CTJBtuRvGnwElJ9qd3GOde4A8AquqOJFcAK+hdQbSwqp4BSHIacB29y0AXV9Ud47gukqRRGDYAqur7DH1c/9pNzPNJ4JND1K/d1HySpInj3UAlqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmN2rbfDUjSljD7jGv63cKI3Hv22/v23u4BSFKjDABJapQBIEmNMgAkqVHDBkCSWUm+k2RlkjuSnN7Vd05yfZK7usdpXT1JzkuyKsltSQ4YtKz53fi7kszfcqslSRrOSPYA1gN/XFWvBQ4BFibZBzgDWFJVc4Al3WuAtwJzur8FwIXQCwzgY8AbgIOBj20IDUnSxBs2AKrq/qr6Uff8MWAlMAM4BrikG3YJcGz3/Bjgy9WzDHh5kt2BI4Hrq+rhqnoEuB44alzXRpI0YqM6B5BkNvB64AfAblV1P/RCAnhFN2wGsGbQbANdbWN1SVIfjDgAkkwBrgT+qKp+uamhQ9RqE/Xnv8+CJMuTLF+7du1I25MkjdKIAiDJdvQ+/C+tqr/pyg90h3boHh/s6gPArEGzzwTu20T911TVoqqaW1Vzp0+fPpp1kSSNwkiuAgrwJWBlVX1m0KSrgQ1X8swHrhpUP7m7GugQ4NHuENF1wBFJpnUnf4/oapKkPhjJvYDmAe8BfpLk1q72p8DZwBVJ3gesBo7vpl0LvA1YBTwBnAJQVQ8nOQu4uRv38ap6eFzWQpI0asMGQFV9n6GP3wO8ZYjxBSzcyLIWA4tH06Akacvwm8CS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY3att8NTLTZZ1zT7xZG5N6z397vFiRt5YbdA0iyOMmDSW4fVDszyf9Lcmv397ZB0z6SZFWSO5McOah+VFdbleSM8V8VSdJojOQQ0MXAUUPU/7Kq9u/+rgVIsg9wIvC6bp7PJ9kmyTbABcBbgX2Ak7qxkqQ+GfYQUFV9L8nsES7vGOBrVfUvwD1JVgEHd9NWVdXdAEm+1o1dMeqOJUnjYnNOAp+W5LbuENG0rjYDWDNozEBX21hdktQnYw2AC4HfBPYH7gc+3dUzxNjaRP0FkixIsjzJ8rVr146xPUnScMYUAFX1QFU9U1XPAl/gV4d5BoBZg4bOBO7bRH2oZS+qqrlVNXf69OljaU+SNAJjCoAkuw96eRyw4Qqhq4ETk7wkyV7AHOCHwM3AnCR7Jdme3oniq8fetiRpcw17EjjJ5cDhwK5JBoCPAYcn2Z/eYZx7gT8AqKo7klxB7+TuemBhVT3TLec04DpgG2BxVd0x7msjSRqxkVwFdNIQ5S9tYvwngU8OUb8WuHZU3UmSthhvBSFJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUc39JrDGl7+xLE1e7gFIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJjRo2AJIsTvJgktsH1XZOcn2Su7rHaV09Sc5LsirJbUkOGDTP/G78XUnmb5nVkSSN1Ej2AC4Gjnpe7QxgSVXNAZZ0rwHeCszp/hYAF0IvMICPAW8ADgY+tiE0JEn9MWwAVNX3gIefVz4GuKR7fglw7KD6l6tnGfDyJLsDRwLXV9XDVfUIcD0vDBVJ0gQa6zmA3arqfoDu8RVdfQawZtC4ga62sfoLJFmQZHmS5WvXrh1je5Kk4Yz3SeAMUatN1F9YrFpUVXOrau706dPHtTlJ0q+MNQAe6A7t0D0+2NUHgFmDxs0E7ttEXZLUJ2MNgKuBDVfyzAeuGlQ/ubsa6BDg0e4Q0XXAEUmmdSd/j+hqkqQ+2Xa4AUkuBw4Hdk0yQO9qnrOBK5K8D1gNHN8NvxZ4G7AKeAI4BaCqHk5yFnBzN+7jVfX8E8uSpAk0bABU1UkbmfSWIcYWsHAjy1kMLB5Vd5KkLcZvAktSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNWqzAiDJvUl+kuTWJMu72s5Jrk9yV/c4rasnyXlJViW5LckB47ECkqSxGY89gDdV1f5VNbd7fQawpKrmAEu61wBvBeZ0fwuAC8fhvSVJY7QlDgEdA1zSPb8EOHZQ/cvVswx4eZLdt8D7S5JGYHMDoID/k+SWJAu62m5VdT9A9/iKrj4DWDNo3oGuJknqg203c/55VXVfklcA1yf56SbGZohavWBQL0gWAOyxxx6b2Z4kaWM2aw+gqu7rHh8EvgEcDDyw4dBO9/hgN3wAmDVo9pnAfUMsc1FVza2qudOnT9+c9iRJmzDmAEiyU5KpG54DRwC3A1cD87th84GruudXAyd3VwMdAjy64VCRJGnibc4hoN2AbyTZsJzLqurbSW4GrkjyPmA1cHw3/lrgbcAq4AnglM14b0nSZhpzAFTV3cB+Q9QfAt4yRL2AhWN9P0nS+PKbwJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlREx4ASY5KcmeSVUnOmOj3lyT1TGgAJNkGuAB4K7APcFKSfSayB0lSz0TvARwMrKqqu6vqKeBrwDET3IMkiYkPgBnAmkGvB7qaJGmCbTvB75chavVrA5IFwILu5eNJ7tziXW2+XYGfj+cC86nxXNqk4/YcX27P8TNZtuWeIxk00QEwAMwa9HomcN/gAVW1CFg0kU1triTLq2puv/vYWrg9x5fbc/xsbdtyog8B3QzMSbJXku2BE4GrJ7gHSRITvAdQVeuTnAZcB2wDLK6qOyayB0lSz0QfAqKqrgWunej33cIm1SGrScDtOb7cnuNnq9qWqarhR0mStjreCkKSGmUASFKjDABJapQBIEmNMgBGKcncJN9J8tUks5Jcn+TRJDcneX2/+5tskkxJ8vEkd3TbcW2SZUn+S797m6yS7JHk5d3z2UneleTf9ruvySbJb3X/FtckWZRk2qBpP+xnb+PFABi9zwPnANcAfwf8VVW9DDijm6bRuRS4GzgS+DPgPOA9wJuS/K9+NjYZdbdY/y6wLMnvAd+md/fdryf5YF+bm3wuBM4Efgv4B+D7SX6zm7Zdv5oaT14GOkpJflxVr++er66qPYaappFJ8vdVtd+g1zdX1UFJfgNYUVV797G9SSfJHcBcYEfgXuBVVbU2yU7AD6rKPYERSnJrVe0/6PWb6H0P4D3A56vqgL41N07cAxi9dUmOSHI8UEmOBUjy74Fn+tvapPTPSQ4FSHI08DBAVT3L0DcP1KY9U1VPAr8AngQeAqiqf+5rV5NTkrxsw4uq+g7wH4GvMMKbrb3YuQcwSkn2o3cI6FngA8B/BU6md1O7BVV1Yx/bm3S67fkF4NXA7cB7q+ofkkwHTqqq8/ra4CST5GJge2An4AlgPb3DQG8GplbV7/avu8klybuBu6tq2fPqewD/s6p+vz+djR8DYAyS/BvgOHp3M10PrAIuq6pH+9rYJDVoe84CngbuAi53e45ekm2B4+ndZv2vgTcAJwGrgQvcE9BgBsAoJflD4HeA7wFvA24FHqH3Afbfqmpp/7qbfNyeerHqDv98BDgWmN6VHwSuAs6uql/0q7fxYgCMUpKfAPtX1TNJdgSurarDu93CqzwJPDpuz/GVZArwYXrHqmcCTwH/CFxYVZf0s7fJJsl1wP8FLqmqn3W1VwLzgf9QVb/dz/7GgyeBx2bDXVRfAkwFqKrVbCWXhvWB23P8bOyy2jd7We2oza6qT2348Aeoqp9V1aeAPTYx36RhAIzeF4GbkywCbgI+B9CdtHy4n41NUm7P8TW7qi6uqoGq+gxwdFXdBZwCvLPPvU02/5Tkw0l221BIsluS/86v/7b5pOUhoDFI8jrgtcDtVfXTfvcz2bk9x0+SvwM+XFXfT/IO4LSqOrKbdmdVvaa/HU4e3Td/zwCOAXajd2L9AXq/Yvipqpr0/0ExAKStSJJ96e1VeVntOEiyN71zKcuq6vFB9aOq6tv962x8GABSI5KcUlX/u999TBbdFWoLgZXA/sDpVXVVN+1HW8M3gQ0AqRHPv3WJNq27Qu3fVdXjSWbT+17FV6rq3K3lti8T/pvAkracJLdtbBK949gauW02HPapqnuTHA78dZI92UpuU2IASFuX3ehdAvrI8+qhd/dajdzPkuxfVbcCdHsCvwMspneH0EnPAJC2Ln8LTNnwoTVYkqUT386kdjK9W708p6rWAycn+av+tDS+PAcgSY3yi2CS1CgDQJIaZQBI4yDJ48OPkl5cDABJapQBIA2S5OQktyX5+yRfSbJnkiVdbUl3m2qS7JXkpiQ3Jznrecv4k65+W5I/68+aSMMzAKROd1O6jwJv7n6o/nR6dyf9clXtS+9WyxvupXMuvXvsHwT8bNAyjgDmAAfTu33AgUkOm7i1kEbOy0ClTpL3A6+sqo8Oqv0c2L2qnk6yHXB/Ve2a5KFu7NNJ/hVwX1VNSfIXwLvo/Sg7wBTgz6vqSxO8OtKw/CKY9Cuhd8vfTamNPB+8jD+vqq3ii0LaunkISPqVJcDvJtkFIMnO9G6fcGI3/T8B3++e3/i8+gbXAe/tfpqRJDOSvGJLNy6NhYeApEGSzAf+BHgG+DFwJr17v+wKrAVOqarVSfYCLqO3F30l8D+qasOH/unA73WLfBz4z1X1jxO5HtJIGACS1CgPAUlSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIa9f8Bj8lwMQWFGwgAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"(\n",
" ffilled_dates_avg_rates_df\n",
" .plot(kind='bar')\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 198,
"metadata": {},
"outputs": [],
"source": [
"def mklbl(prefix, n): return [\"%s%s\" % (prefix, i) for i in range(n)]\n",
"miindex = pd.MultiIndex.from_product([mklbl('A', 4),\n",
" mklbl('B', 2),\n",
" mklbl('C', 4),\n",
" mklbl('D', 2)])\n",
" \n",
"micolumns = pd.MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),\n",
" ('b', 'foo'), ('b', 'bah')],\n",
" names=['lvl0', 'lvl1'])\n",
"\n",
"dfmi = pd.DataFrame(np.arange(len(miindex) * len(micolumns))\n",
" .reshape((len(miindex), len(micolumns))),\n",
" index=miindex,\n",
" columns=micolumns).sort_index().sort_index(axis=1)\n",
"#doesnt work\n",
"# dfmi.loc[idx[:, 'B0', :], :]\n",
"#works\n",
"#dfmi.loc[idx[:, 'B0', :], :]\n",
"\n",
"toy_df = pd.DataFrame(np.random.randn(6,1), index=pd.MultiIndex.from_product([(1,2,3), list('AB')]))\n",
"#doesnt work\n",
"# toy_df.loc[idx[:,['A']]]\n",
"#works\n",
"# toy_df.loc[idx[:,['A']], :]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment