Skip to content

Instantly share code, notes, and snippets.

@yingminc
Created October 16, 2017 08:24
Show Gist options
  • Save yingminc/4db5f12deeb8b38e868a54c8853447b2 to your computer and use it in GitHub Desktop.
Save yingminc/4db5f12deeb8b38e868a54c8853447b2 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"from datetime import datetime, timedelta\n",
"from holidays_jp import CountryHolidays"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"ds = datetime(2004, 1,1)\n",
"de = datetime(2018,2,7)\n",
"\n",
"delta = de-ds"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"dates = [ds+timedelta(days=i) for i in range(delta.days+1)]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"d_dates = pd.DataFrame(dates)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"d_dates.columns = ['date']"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2004-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2004-01-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2004-01-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2004-01-04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2004-01-05</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date\n",
"0 2004-01-01\n",
"1 2004-01-02\n",
"2 2004-01-03\n",
"3 2004-01-04\n",
"4 2004-01-05"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d_dates.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"d_dates['weekday']=[i.weekday() for i in dates]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"holidays = [i[0] for i in CountryHolidays.between('JP',2004,2018)]\n",
"for i in dates:\n",
" if i.weekday()>4:\n",
" holidays.append(i)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"holidays = sorted(holidays)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"d_dates['holiday'] = [1 if i in holidays else 0 for i in dates]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>weekday</th>\n",
" <th>holiday</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2004-01-01</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2004-01-02</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2004-01-03</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2004-01-04</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2004-01-05</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date weekday holiday\n",
"0 2004-01-01 3 1\n",
"1 2004-01-02 4 0\n",
"2 2004-01-03 5 1\n",
"3 2004-01-04 6 1\n",
"4 2004-01-05 0 0"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d_dates.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"holiday_p = []\n",
"for i in dates:\n",
" if i == dates[0]:\n",
" holiday_p.append(-3)\n",
" else:\n",
" delta = max([h for h in holidays if h<i ])-i\n",
" holiday_p.append(delta.days+1)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"d_dates['holiday_previous'] = map(abs,holiday_p)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>weekday</th>\n",
" <th>holiday</th>\n",
" <th>holiday_previous</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2004-01-01</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2004-01-02</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2004-01-03</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2004-01-04</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2004-01-05</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2004-01-06</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2004-01-07</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2004-01-08</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>2004-01-09</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>2004-01-10</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date weekday holiday holiday_previous\n",
"0 2004-01-01 3 1 3\n",
"1 2004-01-02 4 0 0\n",
"2 2004-01-03 5 1 1\n",
"3 2004-01-04 6 1 0\n",
"4 2004-01-05 0 0 0\n",
"5 2004-01-06 1 0 1\n",
"6 2004-01-07 2 0 2\n",
"7 2004-01-08 3 0 3\n",
"8 2004-01-09 4 0 4\n",
"9 2004-01-10 5 1 5"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d_dates.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"holiday_n = []\n",
"for i in dates:\n",
" if i == dates[0]:\n",
" holiday_n.append(1)\n",
" else:\n",
" delta = min([h for h in holidays if h>i ])-i\n",
" holiday_n.append(delta.days-1)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"d_dates['holiday_next'] = holiday_n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"5152"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(d_dates)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"for ind, i in d_dates.iterrows():\n",
" if i['holiday']==0:\n",
" d_dates.loc[ind,'holiday_length']=0\n",
" else:\n",
" length =1\n",
" num = 1\n",
" num2 = 1\n",
" while True:\n",
" if (ind == len(d_dates)-num) or (d_dates.loc[ind+num]['holiday'] == 0):\n",
" \n",
" if (ind-num2 < 0)or (d_dates.loc[ind-num2]['holiday'] == 0):\n",
" d_dates.loc[ind,'holiday_length']= length\n",
" break\n",
" \n",
" else:\n",
" length +=1\n",
" num2+=1\n",
" else:\n",
" length +=1\n",
" num+=1\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>weekday</th>\n",
" <th>holiday</th>\n",
" <th>holiday_previous</th>\n",
" <th>holiday_next</th>\n",
" <th>holiday_length</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>5122</th>\n",
" <td>2018-01-09</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5123</th>\n",
" <td>2018-01-10</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5124</th>\n",
" <td>2018-01-11</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5125</th>\n",
" <td>2018-01-12</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5126</th>\n",
" <td>2018-01-13</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5127</th>\n",
" <td>2018-01-14</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5128</th>\n",
" <td>2018-01-15</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5129</th>\n",
" <td>2018-01-16</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5130</th>\n",
" <td>2018-01-17</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5131</th>\n",
" <td>2018-01-18</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5132</th>\n",
" <td>2018-01-19</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5133</th>\n",
" <td>2018-01-20</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5134</th>\n",
" <td>2018-01-21</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5135</th>\n",
" <td>2018-01-22</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5136</th>\n",
" <td>2018-01-23</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5137</th>\n",
" <td>2018-01-24</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5138</th>\n",
" <td>2018-01-25</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5139</th>\n",
" <td>2018-01-26</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5140</th>\n",
" <td>2018-01-27</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5141</th>\n",
" <td>2018-01-28</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5142</th>\n",
" <td>2018-01-29</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5143</th>\n",
" <td>2018-01-30</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5144</th>\n",
" <td>2018-01-31</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5145</th>\n",
" <td>2018-02-01</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5146</th>\n",
" <td>2018-02-02</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5147</th>\n",
" <td>2018-02-03</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5148</th>\n",
" <td>2018-02-04</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5149</th>\n",
" <td>2018-02-05</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5150</th>\n",
" <td>2018-02-06</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5151</th>\n",
" <td>2018-02-07</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date weekday holiday holiday_previous holiday_next \\\n",
"5122 2018-01-09 1 0 0 3 \n",
"5123 2018-01-10 2 0 1 2 \n",
"5124 2018-01-11 3 0 2 1 \n",
"5125 2018-01-12 4 0 3 0 \n",
"5126 2018-01-13 5 1 4 0 \n",
"5127 2018-01-14 6 1 0 5 \n",
"5128 2018-01-15 0 0 0 4 \n",
"5129 2018-01-16 1 0 1 3 \n",
"5130 2018-01-17 2 0 2 2 \n",
"5131 2018-01-18 3 0 3 1 \n",
"5132 2018-01-19 4 0 4 0 \n",
"5133 2018-01-20 5 1 5 0 \n",
"5134 2018-01-21 6 1 0 5 \n",
"5135 2018-01-22 0 0 0 4 \n",
"5136 2018-01-23 1 0 1 3 \n",
"5137 2018-01-24 2 0 2 2 \n",
"5138 2018-01-25 3 0 3 1 \n",
"5139 2018-01-26 4 0 4 0 \n",
"5140 2018-01-27 5 1 5 0 \n",
"5141 2018-01-28 6 1 0 5 \n",
"5142 2018-01-29 0 0 0 4 \n",
"5143 2018-01-30 1 0 1 3 \n",
"5144 2018-01-31 2 0 2 2 \n",
"5145 2018-02-01 3 0 3 1 \n",
"5146 2018-02-02 4 0 4 0 \n",
"5147 2018-02-03 5 1 5 0 \n",
"5148 2018-02-04 6 1 0 6 \n",
"5149 2018-02-05 0 0 0 5 \n",
"5150 2018-02-06 1 0 1 4 \n",
"5151 2018-02-07 2 0 2 3 \n",
"\n",
" holiday_length \n",
"5122 0.0 \n",
"5123 0.0 \n",
"5124 0.0 \n",
"5125 0.0 \n",
"5126 2.0 \n",
"5127 2.0 \n",
"5128 0.0 \n",
"5129 0.0 \n",
"5130 0.0 \n",
"5131 0.0 \n",
"5132 0.0 \n",
"5133 2.0 \n",
"5134 2.0 \n",
"5135 0.0 \n",
"5136 0.0 \n",
"5137 0.0 \n",
"5138 0.0 \n",
"5139 0.0 \n",
"5140 2.0 \n",
"5141 2.0 \n",
"5142 0.0 \n",
"5143 0.0 \n",
"5144 0.0 \n",
"5145 0.0 \n",
"5146 0.0 \n",
"5147 2.0 \n",
"5148 2.0 \n",
"5149 0.0 \n",
"5150 0.0 \n",
"5151 0.0 "
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d_dates.tail(30)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"holy=pd.DataFrame(CountryHolidays.between('JP',2004,2018))"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"holy.columns = ['date', 'holiday_name']"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"holyct = {k:v+1 for v,k in enumerate(set(holy['holiday_name']))}"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"holyct['祝祭日以外']=0"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"d_dates=pd.merge(d_dates,holy,on='date',how='left')"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"d_dates=d_dates.fillna('祝祭日以外')"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"d_dates['holiday_ind']=d_dates['holiday_name'].replace(holyct)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"hd = d_dates\n",
"for ind, r in hd.iterrows():\n",
" for i in range(1,4):\n",
" if ind+i<len(hd):\n",
" if hd.loc[ind+i,'holiday']==1:\n",
" hd.loc[ind,'holiday_{}'.format(i)]=1\n",
" hd.loc[ind,'holiday_name_{}'.format(i)]=hd.loc[ind+i,'holiday_ind']\n",
" else:\n",
" hd.loc[ind,'holiday_{}'.format(i)]=0\n",
" hd.loc[ind,'holiday_name_{}'.format(i)]=0\n",
" if ind-i>=0:\n",
" if hd.loc[ind-i,'holiday']==1:\n",
" hd.loc[ind,'holiday_-{}'.format(i)]=1\n",
" hd.loc[ind,'holiday_name_-{}'.format(i)]=hd.loc[ind-i,'holiday_ind']\n",
" else:\n",
" hd.loc[ind,'holiday_-{}'.format(i)]=0\n",
" hd.loc[ind,'holiday_name_-{}'.format(i)]=0\n",
" else:\n",
" hd.loc[ind,'holiday_{}'.format(i)]=0\n",
" hd.loc[ind,'holiday_name_{}'.format(i)]=0\n",
" hd.loc[ind,'holiday_-{}'.format(i)]=0\n",
" hd.loc[ind,'holiday_name_-{}'.format(i)]=0"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>weekday</th>\n",
" <th>holiday</th>\n",
" <th>holiday_previous</th>\n",
" <th>holiday_next</th>\n",
" <th>holiday_length</th>\n",
" <th>holiday_name</th>\n",
" <th>holiday_ind</th>\n",
" <th>holiday_1</th>\n",
" <th>holiday_name_1</th>\n",
" <th>holiday_-1</th>\n",
" <th>holiday_name_-1</th>\n",
" <th>holiday_2</th>\n",
" <th>holiday_name_2</th>\n",
" <th>holiday_-2</th>\n",
" <th>holiday_name_-2</th>\n",
" <th>holiday_3</th>\n",
" <th>holiday_name_3</th>\n",
" <th>holiday_-3</th>\n",
" <th>holiday_name_-3</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2004-01-01</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>元日</td>\n",
" <td>9</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2004-01-02</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>祝祭日以外</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>9.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2004-01-03</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>2.0</td>\n",
" <td>祝祭日以外</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>9.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2004-01-04</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>2.0</td>\n",
" <td>祝祭日以外</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2004-01-05</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>祝祭日以外</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date weekday holiday holiday_previous holiday_next \\\n",
"0 2004-01-01 3 1 3 1 \n",
"1 2004-01-02 4 0 0 0 \n",
"2 2004-01-03 5 1 1 0 \n",
"3 2004-01-04 6 1 0 5 \n",
"4 2004-01-05 0 0 0 4 \n",
"\n",
" holiday_length holiday_name holiday_ind holiday_1 holiday_name_1 \\\n",
"0 1.0 元日 9 0.0 0.0 \n",
"1 0.0 祝祭日以外 0 1.0 0.0 \n",
"2 2.0 祝祭日以外 0 1.0 0.0 \n",
"3 2.0 祝祭日以外 0 0.0 0.0 \n",
"4 0.0 祝祭日以外 0 0.0 0.0 \n",
"\n",
" holiday_-1 holiday_name_-1 holiday_2 holiday_name_2 holiday_-2 \\\n",
"0 0.0 0.0 0.0 0.0 0.0 \n",
"1 1.0 9.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 1.0 \n",
"3 1.0 0.0 0.0 0.0 0.0 \n",
"4 1.0 0.0 0.0 0.0 1.0 \n",
"\n",
" holiday_name_-2 holiday_3 holiday_name_3 holiday_-3 holiday_name_-3 \n",
"0 0.0 0.0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 0.0 \n",
"2 9.0 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 1.0 9.0 \n",
"4 0.0 0.0 0.0 0.0 0.0 "
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"hd.head()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"hd.to_csv('holiday_info.csv',header= True,index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment