Created
October 16, 2017 08:24
-
-
Save yingminc/4db5f12deeb8b38e868a54c8853447b2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"from datetime import datetime, timedelta\n", | |
"from holidays_jp import CountryHolidays" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ds = datetime(2004, 1,1)\n", | |
"de = datetime(2018,2,7)\n", | |
"\n", | |
"delta = de-ds" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dates = [ds+timedelta(days=i) for i in range(delta.days+1)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"d_dates = pd.DataFrame(dates)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"d_dates.columns = ['date']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>date</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2004-01-01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2004-01-02</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2004-01-03</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2004-01-04</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>2004-01-05</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" date\n", | |
"0 2004-01-01\n", | |
"1 2004-01-02\n", | |
"2 2004-01-03\n", | |
"3 2004-01-04\n", | |
"4 2004-01-05" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"d_dates.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"d_dates['weekday']=[i.weekday() for i in dates]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"holidays = [i[0] for i in CountryHolidays.between('JP',2004,2018)]\n", | |
"for i in dates:\n", | |
" if i.weekday()>4:\n", | |
" holidays.append(i)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"holidays = sorted(holidays)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"d_dates['holiday'] = [1 if i in holidays else 0 for i in dates]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>date</th>\n", | |
" <th>weekday</th>\n", | |
" <th>holiday</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2004-01-01</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2004-01-02</td>\n", | |
" <td>4</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2004-01-03</td>\n", | |
" <td>5</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2004-01-04</td>\n", | |
" <td>6</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>2004-01-05</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" date weekday holiday\n", | |
"0 2004-01-01 3 1\n", | |
"1 2004-01-02 4 0\n", | |
"2 2004-01-03 5 1\n", | |
"3 2004-01-04 6 1\n", | |
"4 2004-01-05 0 0" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"d_dates.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"holiday_p = []\n", | |
"for i in dates:\n", | |
" if i == dates[0]:\n", | |
" holiday_p.append(-3)\n", | |
" else:\n", | |
" delta = max([h for h in holidays if h<i ])-i\n", | |
" holiday_p.append(delta.days+1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"d_dates['holiday_previous'] = map(abs,holiday_p)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>date</th>\n", | |
" <th>weekday</th>\n", | |
" <th>holiday</th>\n", | |
" <th>holiday_previous</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2004-01-01</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2004-01-02</td>\n", | |
" <td>4</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2004-01-03</td>\n", | |
" <td>5</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2004-01-04</td>\n", | |
" <td>6</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>2004-01-05</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>2004-01-06</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>2004-01-07</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>2004-01-08</td>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>2004-01-09</td>\n", | |
" <td>4</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>2004-01-10</td>\n", | |
" <td>5</td>\n", | |
" <td>1</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" date weekday holiday holiday_previous\n", | |
"0 2004-01-01 3 1 3\n", | |
"1 2004-01-02 4 0 0\n", | |
"2 2004-01-03 5 1 1\n", | |
"3 2004-01-04 6 1 0\n", | |
"4 2004-01-05 0 0 0\n", | |
"5 2004-01-06 1 0 1\n", | |
"6 2004-01-07 2 0 2\n", | |
"7 2004-01-08 3 0 3\n", | |
"8 2004-01-09 4 0 4\n", | |
"9 2004-01-10 5 1 5" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"d_dates.head(10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"holiday_n = []\n", | |
"for i in dates:\n", | |
" if i == dates[0]:\n", | |
" holiday_n.append(1)\n", | |
" else:\n", | |
" delta = min([h for h in holidays if h>i ])-i\n", | |
" holiday_n.append(delta.days-1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"d_dates['holiday_next'] = holiday_n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"5152" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(d_dates)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"for ind, i in d_dates.iterrows():\n", | |
" if i['holiday']==0:\n", | |
" d_dates.loc[ind,'holiday_length']=0\n", | |
" else:\n", | |
" length =1\n", | |
" num = 1\n", | |
" num2 = 1\n", | |
" while True:\n", | |
" if (ind == len(d_dates)-num) or (d_dates.loc[ind+num]['holiday'] == 0):\n", | |
" \n", | |
" if (ind-num2 < 0)or (d_dates.loc[ind-num2]['holiday'] == 0):\n", | |
" d_dates.loc[ind,'holiday_length']= length\n", | |
" break\n", | |
" \n", | |
" else:\n", | |
" length +=1\n", | |
" num2+=1\n", | |
" else:\n", | |
" length +=1\n", | |
" num+=1\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>date</th>\n", | |
" <th>weekday</th>\n", | |
" <th>holiday</th>\n", | |
" <th>holiday_previous</th>\n", | |
" <th>holiday_next</th>\n", | |
" <th>holiday_length</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>5122</th>\n", | |
" <td>2018-01-09</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5123</th>\n", | |
" <td>2018-01-10</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5124</th>\n", | |
" <td>2018-01-11</td>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5125</th>\n", | |
" <td>2018-01-12</td>\n", | |
" <td>4</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5126</th>\n", | |
" <td>2018-01-13</td>\n", | |
" <td>5</td>\n", | |
" <td>1</td>\n", | |
" <td>4</td>\n", | |
" <td>0</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5127</th>\n", | |
" <td>2018-01-14</td>\n", | |
" <td>6</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>5</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5128</th>\n", | |
" <td>2018-01-15</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5129</th>\n", | |
" <td>2018-01-16</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5130</th>\n", | |
" <td>2018-01-17</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5131</th>\n", | |
" <td>2018-01-18</td>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5132</th>\n", | |
" <td>2018-01-19</td>\n", | |
" <td>4</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5133</th>\n", | |
" <td>2018-01-20</td>\n", | |
" <td>5</td>\n", | |
" <td>1</td>\n", | |
" <td>5</td>\n", | |
" <td>0</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5134</th>\n", | |
" <td>2018-01-21</td>\n", | |
" <td>6</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>5</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5135</th>\n", | |
" <td>2018-01-22</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5136</th>\n", | |
" <td>2018-01-23</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5137</th>\n", | |
" <td>2018-01-24</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5138</th>\n", | |
" <td>2018-01-25</td>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5139</th>\n", | |
" <td>2018-01-26</td>\n", | |
" <td>4</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5140</th>\n", | |
" <td>2018-01-27</td>\n", | |
" <td>5</td>\n", | |
" <td>1</td>\n", | |
" <td>5</td>\n", | |
" <td>0</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5141</th>\n", | |
" <td>2018-01-28</td>\n", | |
" <td>6</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>5</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5142</th>\n", | |
" <td>2018-01-29</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5143</th>\n", | |
" <td>2018-01-30</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5144</th>\n", | |
" <td>2018-01-31</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5145</th>\n", | |
" <td>2018-02-01</td>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5146</th>\n", | |
" <td>2018-02-02</td>\n", | |
" <td>4</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5147</th>\n", | |
" <td>2018-02-03</td>\n", | |
" <td>5</td>\n", | |
" <td>1</td>\n", | |
" <td>5</td>\n", | |
" <td>0</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5148</th>\n", | |
" <td>2018-02-04</td>\n", | |
" <td>6</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>6</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5149</th>\n", | |
" <td>2018-02-05</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>5</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5150</th>\n", | |
" <td>2018-02-06</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>4</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5151</th>\n", | |
" <td>2018-02-07</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" date weekday holiday holiday_previous holiday_next \\\n", | |
"5122 2018-01-09 1 0 0 3 \n", | |
"5123 2018-01-10 2 0 1 2 \n", | |
"5124 2018-01-11 3 0 2 1 \n", | |
"5125 2018-01-12 4 0 3 0 \n", | |
"5126 2018-01-13 5 1 4 0 \n", | |
"5127 2018-01-14 6 1 0 5 \n", | |
"5128 2018-01-15 0 0 0 4 \n", | |
"5129 2018-01-16 1 0 1 3 \n", | |
"5130 2018-01-17 2 0 2 2 \n", | |
"5131 2018-01-18 3 0 3 1 \n", | |
"5132 2018-01-19 4 0 4 0 \n", | |
"5133 2018-01-20 5 1 5 0 \n", | |
"5134 2018-01-21 6 1 0 5 \n", | |
"5135 2018-01-22 0 0 0 4 \n", | |
"5136 2018-01-23 1 0 1 3 \n", | |
"5137 2018-01-24 2 0 2 2 \n", | |
"5138 2018-01-25 3 0 3 1 \n", | |
"5139 2018-01-26 4 0 4 0 \n", | |
"5140 2018-01-27 5 1 5 0 \n", | |
"5141 2018-01-28 6 1 0 5 \n", | |
"5142 2018-01-29 0 0 0 4 \n", | |
"5143 2018-01-30 1 0 1 3 \n", | |
"5144 2018-01-31 2 0 2 2 \n", | |
"5145 2018-02-01 3 0 3 1 \n", | |
"5146 2018-02-02 4 0 4 0 \n", | |
"5147 2018-02-03 5 1 5 0 \n", | |
"5148 2018-02-04 6 1 0 6 \n", | |
"5149 2018-02-05 0 0 0 5 \n", | |
"5150 2018-02-06 1 0 1 4 \n", | |
"5151 2018-02-07 2 0 2 3 \n", | |
"\n", | |
" holiday_length \n", | |
"5122 0.0 \n", | |
"5123 0.0 \n", | |
"5124 0.0 \n", | |
"5125 0.0 \n", | |
"5126 2.0 \n", | |
"5127 2.0 \n", | |
"5128 0.0 \n", | |
"5129 0.0 \n", | |
"5130 0.0 \n", | |
"5131 0.0 \n", | |
"5132 0.0 \n", | |
"5133 2.0 \n", | |
"5134 2.0 \n", | |
"5135 0.0 \n", | |
"5136 0.0 \n", | |
"5137 0.0 \n", | |
"5138 0.0 \n", | |
"5139 0.0 \n", | |
"5140 2.0 \n", | |
"5141 2.0 \n", | |
"5142 0.0 \n", | |
"5143 0.0 \n", | |
"5144 0.0 \n", | |
"5145 0.0 \n", | |
"5146 0.0 \n", | |
"5147 2.0 \n", | |
"5148 2.0 \n", | |
"5149 0.0 \n", | |
"5150 0.0 \n", | |
"5151 0.0 " | |
] | |
}, | |
"execution_count": 19, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"d_dates.tail(30)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"holy=pd.DataFrame(CountryHolidays.between('JP',2004,2018))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"holy.columns = ['date', 'holiday_name']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"holyct = {k:v+1 for v,k in enumerate(set(holy['holiday_name']))}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"holyct['祝祭日以外']=0" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"d_dates=pd.merge(d_dates,holy,on='date',how='left')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"d_dates=d_dates.fillna('祝祭日以外')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"d_dates['holiday_ind']=d_dates['holiday_name'].replace(holyct)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"hd = d_dates\n", | |
"for ind, r in hd.iterrows():\n", | |
" for i in range(1,4):\n", | |
" if ind+i<len(hd):\n", | |
" if hd.loc[ind+i,'holiday']==1:\n", | |
" hd.loc[ind,'holiday_{}'.format(i)]=1\n", | |
" hd.loc[ind,'holiday_name_{}'.format(i)]=hd.loc[ind+i,'holiday_ind']\n", | |
" else:\n", | |
" hd.loc[ind,'holiday_{}'.format(i)]=0\n", | |
" hd.loc[ind,'holiday_name_{}'.format(i)]=0\n", | |
" if ind-i>=0:\n", | |
" if hd.loc[ind-i,'holiday']==1:\n", | |
" hd.loc[ind,'holiday_-{}'.format(i)]=1\n", | |
" hd.loc[ind,'holiday_name_-{}'.format(i)]=hd.loc[ind-i,'holiday_ind']\n", | |
" else:\n", | |
" hd.loc[ind,'holiday_-{}'.format(i)]=0\n", | |
" hd.loc[ind,'holiday_name_-{}'.format(i)]=0\n", | |
" else:\n", | |
" hd.loc[ind,'holiday_{}'.format(i)]=0\n", | |
" hd.loc[ind,'holiday_name_{}'.format(i)]=0\n", | |
" hd.loc[ind,'holiday_-{}'.format(i)]=0\n", | |
" hd.loc[ind,'holiday_name_-{}'.format(i)]=0" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>date</th>\n", | |
" <th>weekday</th>\n", | |
" <th>holiday</th>\n", | |
" <th>holiday_previous</th>\n", | |
" <th>holiday_next</th>\n", | |
" <th>holiday_length</th>\n", | |
" <th>holiday_name</th>\n", | |
" <th>holiday_ind</th>\n", | |
" <th>holiday_1</th>\n", | |
" <th>holiday_name_1</th>\n", | |
" <th>holiday_-1</th>\n", | |
" <th>holiday_name_-1</th>\n", | |
" <th>holiday_2</th>\n", | |
" <th>holiday_name_2</th>\n", | |
" <th>holiday_-2</th>\n", | |
" <th>holiday_name_-2</th>\n", | |
" <th>holiday_3</th>\n", | |
" <th>holiday_name_3</th>\n", | |
" <th>holiday_-3</th>\n", | |
" <th>holiday_name_-3</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2004-01-01</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>1.0</td>\n", | |
" <td>元日</td>\n", | |
" <td>9</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2004-01-02</td>\n", | |
" <td>4</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>祝祭日以外</td>\n", | |
" <td>0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>9.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2004-01-03</td>\n", | |
" <td>5</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>2.0</td>\n", | |
" <td>祝祭日以外</td>\n", | |
" <td>0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>9.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2004-01-04</td>\n", | |
" <td>6</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>5</td>\n", | |
" <td>2.0</td>\n", | |
" <td>祝祭日以外</td>\n", | |
" <td>0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>9.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>2004-01-05</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>0.0</td>\n", | |
" <td>祝祭日以外</td>\n", | |
" <td>0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" date weekday holiday holiday_previous holiday_next \\\n", | |
"0 2004-01-01 3 1 3 1 \n", | |
"1 2004-01-02 4 0 0 0 \n", | |
"2 2004-01-03 5 1 1 0 \n", | |
"3 2004-01-04 6 1 0 5 \n", | |
"4 2004-01-05 0 0 0 4 \n", | |
"\n", | |
" holiday_length holiday_name holiday_ind holiday_1 holiday_name_1 \\\n", | |
"0 1.0 元日 9 0.0 0.0 \n", | |
"1 0.0 祝祭日以外 0 1.0 0.0 \n", | |
"2 2.0 祝祭日以外 0 1.0 0.0 \n", | |
"3 2.0 祝祭日以外 0 0.0 0.0 \n", | |
"4 0.0 祝祭日以外 0 0.0 0.0 \n", | |
"\n", | |
" holiday_-1 holiday_name_-1 holiday_2 holiday_name_2 holiday_-2 \\\n", | |
"0 0.0 0.0 0.0 0.0 0.0 \n", | |
"1 1.0 9.0 0.0 0.0 0.0 \n", | |
"2 0.0 0.0 0.0 0.0 1.0 \n", | |
"3 1.0 0.0 0.0 0.0 0.0 \n", | |
"4 1.0 0.0 0.0 0.0 1.0 \n", | |
"\n", | |
" holiday_name_-2 holiday_3 holiday_name_3 holiday_-3 holiday_name_-3 \n", | |
"0 0.0 0.0 0.0 0.0 0.0 \n", | |
"1 0.0 0.0 0.0 0.0 0.0 \n", | |
"2 9.0 0.0 0.0 0.0 0.0 \n", | |
"3 0.0 0.0 0.0 1.0 9.0 \n", | |
"4 0.0 0.0 0.0 0.0 0.0 " | |
] | |
}, | |
"execution_count": 29, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"hd.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"hd.to_csv('holiday_info.csv',header= True,index=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.13" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment