-
-
Save prithwi/339f87bf9c3c37bb3188 to your computer and use it in GitHub Desktop.
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import cStringIO as StringIO" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data = '''\n", | |
"Date Open High Low Close Volume\n", | |
"2010-01-04 38.660000 39.299999 38.509998 39.279999 1293400 \n", | |
"2010-01-05 39.389999 39.520000 39.029999 39.430000 1261400 \n", | |
"2010-01-06 39.549999 40.700001 39.020000 40.250000 1879800 \n", | |
"2010-01-07 40.090000 40.349998 39.910000 40.090000 836400 \n", | |
"2010-01-08 40.139999 40.310001 39.720001 40.290001 654600 \n", | |
"2010-01-11 40.209999 40.520000 40.040001 40.290001 963600 \n", | |
"2010-01-12 40.160000 40.340000 39.279999 39.980000 1012800 \n", | |
"2010-01-13 39.930000 40.669998 39.709999 40.560001 1773400 \n", | |
"2010-01-14 40.490002 40.970001 40.189999 40.520000 1240600 \n", | |
"2010-01-15 40.570000 40.939999 40.099998 40.450001 1244200 \n", | |
"'''" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Open</th>\n", | |
" <th>High</th>\n", | |
" <th>Low</th>\n", | |
" <th>Close</th>\n", | |
" <th>Volume</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Date</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2010-01-04</th>\n", | |
" <td>38.660000</td>\n", | |
" <td>39.299999</td>\n", | |
" <td>38.509998</td>\n", | |
" <td>39.279999</td>\n", | |
" <td>1293400</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2010-01-05</th>\n", | |
" <td>39.389999</td>\n", | |
" <td>39.520000</td>\n", | |
" <td>39.029999</td>\n", | |
" <td>39.430000</td>\n", | |
" <td>1261400</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2010-01-06</th>\n", | |
" <td>39.549999</td>\n", | |
" <td>40.700001</td>\n", | |
" <td>39.020000</td>\n", | |
" <td>40.250000</td>\n", | |
" <td>1879800</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2010-01-07</th>\n", | |
" <td>40.090000</td>\n", | |
" <td>40.349998</td>\n", | |
" <td>39.910000</td>\n", | |
" <td>40.090000</td>\n", | |
" <td>836400</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2010-01-08</th>\n", | |
" <td>40.139999</td>\n", | |
" <td>40.310001</td>\n", | |
" <td>39.720001</td>\n", | |
" <td>40.290001</td>\n", | |
" <td>654600</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Open High Low Close Volume\n", | |
"Date \n", | |
"2010-01-04 38.660000 39.299999 38.509998 39.279999 1293400\n", | |
"2010-01-05 39.389999 39.520000 39.029999 39.430000 1261400\n", | |
"2010-01-06 39.549999 40.700001 39.020000 40.250000 1879800\n", | |
"2010-01-07 40.090000 40.349998 39.910000 40.090000 836400\n", | |
"2010-01-08 40.139999 40.310001 39.720001 40.290001 654600" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df = pd.read_csv(StringIO.StringIO(data), delimiter=r'\\s+')\n", | |
"\n", | |
"\n", | |
"df['Date'] = pd.to_datetime(df['Date'])\n", | |
"df.set_index('Date', inplace=True)\n", | |
"df.sort_index(inplace=True)\n", | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Open</th>\n", | |
" <th>High</th>\n", | |
" <th>Low</th>\n", | |
" <th>Close</th>\n", | |
" <th>Volume</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Date</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2010-01-04</th>\n", | |
" <td>38.660000</td>\n", | |
" <td>40.700001</td>\n", | |
" <td>38.509998</td>\n", | |
" <td>40.290001</td>\n", | |
" <td>5925600</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2010-01-11</th>\n", | |
" <td>40.209999</td>\n", | |
" <td>40.970001</td>\n", | |
" <td>39.279999</td>\n", | |
" <td>40.450001</td>\n", | |
" <td>6234600</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Open High Low Close Volume\n", | |
"Date \n", | |
"2010-01-04 38.660000 40.700001 38.509998 40.290001 5925600\n", | |
"2010-01-11 40.209999 40.970001 39.279999 40.450001 6234600" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"def take_first(array_like):\n", | |
" return array_like[0]\n", | |
"\n", | |
"def take_last(array_like):\n", | |
" return array_like[-1]\n", | |
"\n", | |
"output = df.resample('W', # Weekly resample\n", | |
" how={'Open': take_first, \n", | |
" 'High': 'max',\n", | |
" 'Low': 'min',\n", | |
" 'Close': take_last,\n", | |
" 'Volume': 'sum'}, \n", | |
" loffset=pd.offsets.timedelta(days=-6)) # to put the labels to Monday\n", | |
"\n", | |
"output = output[['Open', 'High', 'Low', 'Close', 'Volume']]\n", | |
"output" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
this is fantastic and has helped me move along, thanks.
It gives a warning:
Warning (from warnings module):line 79
loffset=pandas.offsets.timedelta(days=-6)) # to put the labels to Monday
FutureWarning: how in .resample() is deprecated
the new syntax is .resample(...)..apply()
I am not skilled enough to rewrite it into the new syntax, anyone up to the task?
thanks in advance!
I can't comment on original stackoverflow post but I have two questions for this:
- Why using custom handlers for taking the first and last value, when pandas has 'first' and 'last' aggregators
- What's the purpose of the last line of code
output = output[['Open', 'High', 'Low', 'Close', 'Volume']]
Isn'toutput
the same before and after this line? This is only for ordering the columns?
@aristid73, you can use agg()
function, simply:
output = df.resample('W') # Weekly resample
.agg({'Open': take_first,
'High': 'max',
'Low': 'min',
'Close': take_last,
'Volume': 'sum'})
output.index = output.index + pd.DateOffset(days=-6 # to put the labels to Monday
Thanks all of you:
the final code that worked for me after reading all of those comments and some other:
`import pandas as pd
import pandas_datareader as web
import datetime
ticker = "MMM"
start = datetime.datetime(2018, 2, 5)
end = datetime.datetime(2018, 2, 25)
f = web.DataReader(ticker, 'yahoo', start, end)
f.index = pd.to_datetime(f.index)
def take_first(array_like):
return array_like[0]
def take_last(array_like):
return array_like[-1]
output = f.resample('W').agg({'Open': take_first,
'High': 'max',
'Low': 'min',
'Close': take_last,
'Adj Close': take_last,
'Volume': 'sum'}) # to put the labels to Monday
output = output[['Open', 'High', 'Low', 'Close','Adj Close', 'Volume']]
output.index = output.index + pd.DateOffset(days=-6)
print (output)`
Now (pandas 1.0.5) we don't even need to define the take_first and take_last functions. 'first'
and 'last'
would work fine.
output = df.resample('W') # Weekly resample
.agg({'Open': 'first',
'High': 'max',
'Low': 'min',
'Close': 'last',
'Volume': 'sum'})
output.index = output.index + pd.DateOffset(days=-6) # to put the labels to Monday
Beautiful !! thanks.