Skip to content

Instantly share code, notes, and snippets.

@prithwi
Created January 4, 2016 20:27
Show Gist options
  • Save prithwi/339f87bf9c3c37bb3188 to your computer and use it in GitHub Desktop.
Save prithwi/339f87bf9c3c37bb3188 to your computer and use it in GitHub Desktop.
daily stock to weekly stock values
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import cStringIO as StringIO"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"data = '''\n",
"Date Open High Low Close Volume\n",
"2010-01-04 38.660000 39.299999 38.509998 39.279999 1293400 \n",
"2010-01-05 39.389999 39.520000 39.029999 39.430000 1261400 \n",
"2010-01-06 39.549999 40.700001 39.020000 40.250000 1879800 \n",
"2010-01-07 40.090000 40.349998 39.910000 40.090000 836400 \n",
"2010-01-08 40.139999 40.310001 39.720001 40.290001 654600 \n",
"2010-01-11 40.209999 40.520000 40.040001 40.290001 963600 \n",
"2010-01-12 40.160000 40.340000 39.279999 39.980000 1012800 \n",
"2010-01-13 39.930000 40.669998 39.709999 40.560001 1773400 \n",
"2010-01-14 40.490002 40.970001 40.189999 40.520000 1240600 \n",
"2010-01-15 40.570000 40.939999 40.099998 40.450001 1244200 \n",
"'''"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Open</th>\n",
" <th>High</th>\n",
" <th>Low</th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2010-01-04</th>\n",
" <td>38.660000</td>\n",
" <td>39.299999</td>\n",
" <td>38.509998</td>\n",
" <td>39.279999</td>\n",
" <td>1293400</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2010-01-05</th>\n",
" <td>39.389999</td>\n",
" <td>39.520000</td>\n",
" <td>39.029999</td>\n",
" <td>39.430000</td>\n",
" <td>1261400</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2010-01-06</th>\n",
" <td>39.549999</td>\n",
" <td>40.700001</td>\n",
" <td>39.020000</td>\n",
" <td>40.250000</td>\n",
" <td>1879800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2010-01-07</th>\n",
" <td>40.090000</td>\n",
" <td>40.349998</td>\n",
" <td>39.910000</td>\n",
" <td>40.090000</td>\n",
" <td>836400</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2010-01-08</th>\n",
" <td>40.139999</td>\n",
" <td>40.310001</td>\n",
" <td>39.720001</td>\n",
" <td>40.290001</td>\n",
" <td>654600</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Open High Low Close Volume\n",
"Date \n",
"2010-01-04 38.660000 39.299999 38.509998 39.279999 1293400\n",
"2010-01-05 39.389999 39.520000 39.029999 39.430000 1261400\n",
"2010-01-06 39.549999 40.700001 39.020000 40.250000 1879800\n",
"2010-01-07 40.090000 40.349998 39.910000 40.090000 836400\n",
"2010-01-08 40.139999 40.310001 39.720001 40.290001 654600"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv(StringIO.StringIO(data), delimiter=r'\\s+')\n",
"\n",
"\n",
"df['Date'] = pd.to_datetime(df['Date'])\n",
"df.set_index('Date', inplace=True)\n",
"df.sort_index(inplace=True)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Open</th>\n",
" <th>High</th>\n",
" <th>Low</th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2010-01-04</th>\n",
" <td>38.660000</td>\n",
" <td>40.700001</td>\n",
" <td>38.509998</td>\n",
" <td>40.290001</td>\n",
" <td>5925600</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2010-01-11</th>\n",
" <td>40.209999</td>\n",
" <td>40.970001</td>\n",
" <td>39.279999</td>\n",
" <td>40.450001</td>\n",
" <td>6234600</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Open High Low Close Volume\n",
"Date \n",
"2010-01-04 38.660000 40.700001 38.509998 40.290001 5925600\n",
"2010-01-11 40.209999 40.970001 39.279999 40.450001 6234600"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def take_first(array_like):\n",
" return array_like[0]\n",
"\n",
"def take_last(array_like):\n",
" return array_like[-1]\n",
"\n",
"output = df.resample('W', # Weekly resample\n",
" how={'Open': take_first, \n",
" 'High': 'max',\n",
" 'Low': 'min',\n",
" 'Close': take_last,\n",
" 'Volume': 'sum'}, \n",
" loffset=pd.offsets.timedelta(days=-6)) # to put the labels to Monday\n",
"\n",
"output = output[['Open', 'High', 'Low', 'Close', 'Volume']]\n",
"output"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
@kier0n
Copy link

kier0n commented Oct 25, 2017

Beautiful !! thanks.

@aristid73
Copy link

this is fantastic and has helped me move along, thanks.
It gives a warning:

Warning (from warnings module):line 79
loffset=pandas.offsets.timedelta(days=-6)) # to put the labels to Monday
FutureWarning: how in .resample() is deprecated
the new syntax is .resample(...)..apply()

I am not skilled enough to rewrite it into the new syntax, anyone up to the task?
thanks in advance!

@underdpt
Copy link

I can't comment on original stackoverflow post but I have two questions for this:

  1. Why using custom handlers for taking the first and last value, when pandas has 'first' and 'last' aggregators
  2. What's the purpose of the last line of code
    output = output[['Open', 'High', 'Low', 'Close', 'Volume']]
    Isn't output the same before and after this line? This is only for ordering the columns?

@underdpt
Copy link

@aristid73, you can use agg() function, simply:

output = df.resample('W')                                 # Weekly resample
                    .agg({'Open': take_first, 
                          'High': 'max',
                          'Low': 'min',
                          'Close': take_last,
                          'Volume': 'sum'}) 
output.index = output.index + pd.DateOffset(days=-6  # to put the labels to Monday

@arbelkf
Copy link

arbelkf commented Mar 29, 2018

Thanks all of you:
the final code that worked for me after reading all of those comments and some other:

`import pandas as pd
import pandas_datareader as web
import datetime

ticker = "MMM"
start = datetime.datetime(2018, 2, 5)
end = datetime.datetime(2018, 2, 25)

f = web.DataReader(ticker, 'yahoo', start, end)
f.index = pd.to_datetime(f.index)

def take_first(array_like):
return array_like[0]

def take_last(array_like):
return array_like[-1]
output = f.resample('W').agg({'Open': take_first,
'High': 'max',
'Low': 'min',
'Close': take_last,
'Adj Close': take_last,
'Volume': 'sum'}) # to put the labels to Monday

output = output[['Open', 'High', 'Low', 'Close','Adj Close', 'Volume']]
output.index = output.index + pd.DateOffset(days=-6)
print (output)`

@gswaroop-g1
Copy link

Now (pandas 1.0.5) we don't even need to define the take_first and take_last functions. 'first' and 'last' would work fine.

output = df.resample('W')                                 # Weekly resample
                    .agg({'Open': 'first', 
                          'High': 'max',
                          'Low': 'min',
                          'Close': 'last',
                          'Volume': 'sum'}) 
output.index = output.index + pd.DateOffset(days=-6)  # to put the labels to Monday

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment