Skip to content

Instantly share code, notes, and snippets.

@clholgat
Created March 12, 2011 03:39
Show Gist options
  • Save clholgat/867008 to your computer and use it in GitHub Desktop.
Save clholgat/867008 to your computer and use it in GitHub Desktop.
"""
This assignement will walk you through gathering data for a
pair of stocks and calculating the correlation between those stocks
using the daily returns.
Yahoo makes their historical stock data available in CSV format.
Below is the url for IBM stock data from Jan 1, 2010 to Dec 31, 2010.
http://ichart.finance.yahoo.com/table.csv?s=IBM&a=00&b=1&c=2010&d=11&e=31&f=2010&g=d&ignore=.csv
The data is returned in CSV (comma separated format) with the flowing columns:
Date, Open, High, Low, Close, Volume, Adj Close
The daily return of
a stock is defined by:
(C_n - C_n-1) / C_n-1
where C_n denotes the nth adjusted close and C_n-1 denotes the (n-1)th
adjusted close.
The function signatures for various steps of this process have been
given below. The names and parameters of these functions should not
be changed. You are free to write additional functions or classes as
needed. You are welcome to use any modules in the Python
standard library as well as NumPy, SciPy, and Matplotlib external
libraries. All code must run on Python 2.6.5.
"""
from datetime import datetime
import urllib2
import csv
import scipy
from scipy.stats import linregress
from matplotlib import pylab
def build_request_url(symbol, start_date, end_date):
"""
This function should take a stock symbol as a string
along with the start and end dates as Python dates
and return the yahoo csv download url.
"""
values = { 'symbol' : symbol,
'a' : start_date.month-1,
'b' : start_date.day,
'c' : start_date.year,
'd' : end_date.month-1,
'e' : end_date.day,
'f' : end_date.year }
return u'http://ichart.finance.yahoo.com/table.csv?s=%(symbol)s&a=%(a)i&b=%(b)i&c=%(c)i&d=%(d)i&e=%(e)i&f=%(f)i&g=d&ignore=.csv' % values
def get_yahoo_data(url):
"""
This function should take a url as returned by build_request_url
and return a list of tuples with each tuple containing the
date (as a Python date) and the adjusted close (as a float).
"""
try:
response = urllib2.urlopen(url, timeout=10)
except urllib2.HTTPError as e:
print u'HTTPError getting data: %s' % e
return []
except urllib2.URLError as e:
print u'URLError getting data: %s' % e
return []
else:
data = []
reader = csv.DictReader(response)
for row in reader:
data.append((datetime.strptime(row.get('Date', None), '%Y-%m-%d'),
float(row.get('Adj Close', None))))
data.reverse()
return data
def calculate_stock_correlation(data):
"""
This function should take a list containing two lists of the form
returned by get_yahoo_data (list of date, adj. close tuples) and
return the correlation of the daily returns as defined above.
"""
one = []
two = []
for i in xrange(1, len(data[0])):
one.append((data[0][i][1]-data[0][i-1][1])/data[0][i-1][1])
two.append((data[1][i][1]-data[1][i-1][1])/data[1][i-1][1])
return scipy.corrcoef(one, two)[0][1]
pass
def graph_stock_regression(data, filename):
"""
This function should take a list containing two lists of the form
returned by get_yahoo_data (list of date, adj. close tuples) and
save the graph of the series of daily return pairs as well as
the regression line. The graph should be saved to the given
filename.
"""
one = []
two = []
for i in xrange(1, len(data[0])):
one.append((data[0][i][1]-data[0][i-1][1])/data[0][i-1][1])
two.append((data[1][i][1]-data[1][i-1][1])/data[1][i-1][1])
(a_s, b_s, r, tt, stderr) = linregress(one, two)
line = scipy.polyval([a_s, b_s], one)
title = filename.split('.')[0]
axies = title.split('vs')
pylab.title(title)
pylab.plot(one, two, 'r.', one, line, 'k')
pylab.xlabel(axies[0])
pylab.ylabel(axies[1])
pylab.legend(['data', 'regression'])
pylab.savefig(filename)
def main():
"""
This function should get the stock data for Google (GOOG)
and Apple (AAPL) for Jan 1, 2010 to Dec 31, 2010. Using that
data it should calculate and print the correlation of the daily
returns and graph the regression of Google vs Apple. Save the graph as
GOOGvsAAPL.png
"""
start_date = datetime.strptime('2010-1-1', '%Y-%m-%d')
end_date = datetime.strptime('2010-12-31', '%Y-%m-%d')
data = []
google_url = build_request_url('GOOG', start_date, end_date)
data.append(get_yahoo_data(google_url))
apple_url = build_request_url('AAPL', start_date, end_date)
data.append(get_yahoo_data(apple_url))
print calculate_stock_correlation(data)
graph_stock_regression(data, 'GOOGvsAAPL.png')
if __name__ == "__main__":
"""
When this module as run as a script it will call the main function.
You should not modify this code.
"""
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment