Created
March 12, 2011 03:39
-
-
Save clholgat/867008 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This assignement will walk you through gathering data for a | |
pair of stocks and calculating the correlation between those stocks | |
using the daily returns. | |
Yahoo makes their historical stock data available in CSV format. | |
Below is the url for IBM stock data from Jan 1, 2010 to Dec 31, 2010. | |
http://ichart.finance.yahoo.com/table.csv?s=IBM&a=00&b=1&c=2010&d=11&e=31&f=2010&g=d&ignore=.csv | |
The data is returned in CSV (comma separated format) with the flowing columns: | |
Date, Open, High, Low, Close, Volume, Adj Close | |
The daily return of | |
a stock is defined by: | |
(C_n - C_n-1) / C_n-1 | |
where C_n denotes the nth adjusted close and C_n-1 denotes the (n-1)th | |
adjusted close. | |
The function signatures for various steps of this process have been | |
given below. The names and parameters of these functions should not | |
be changed. You are free to write additional functions or classes as | |
needed. You are welcome to use any modules in the Python | |
standard library as well as NumPy, SciPy, and Matplotlib external | |
libraries. All code must run on Python 2.6.5. | |
""" | |
from datetime import datetime | |
import urllib2 | |
import csv | |
import scipy | |
from scipy.stats import linregress | |
from matplotlib import pylab | |
def build_request_url(symbol, start_date, end_date): | |
""" | |
This function should take a stock symbol as a string | |
along with the start and end dates as Python dates | |
and return the yahoo csv download url. | |
""" | |
values = { 'symbol' : symbol, | |
'a' : start_date.month-1, | |
'b' : start_date.day, | |
'c' : start_date.year, | |
'd' : end_date.month-1, | |
'e' : end_date.day, | |
'f' : end_date.year } | |
return u'http://ichart.finance.yahoo.com/table.csv?s=%(symbol)s&a=%(a)i&b=%(b)i&c=%(c)i&d=%(d)i&e=%(e)i&f=%(f)i&g=d&ignore=.csv' % values | |
def get_yahoo_data(url): | |
""" | |
This function should take a url as returned by build_request_url | |
and return a list of tuples with each tuple containing the | |
date (as a Python date) and the adjusted close (as a float). | |
""" | |
try: | |
response = urllib2.urlopen(url, timeout=10) | |
except urllib2.HTTPError as e: | |
print u'HTTPError getting data: %s' % e | |
return [] | |
except urllib2.URLError as e: | |
print u'URLError getting data: %s' % e | |
return [] | |
else: | |
data = [] | |
reader = csv.DictReader(response) | |
for row in reader: | |
data.append((datetime.strptime(row.get('Date', None), '%Y-%m-%d'), | |
float(row.get('Adj Close', None)))) | |
data.reverse() | |
return data | |
def calculate_stock_correlation(data): | |
""" | |
This function should take a list containing two lists of the form | |
returned by get_yahoo_data (list of date, adj. close tuples) and | |
return the correlation of the daily returns as defined above. | |
""" | |
one = [] | |
two = [] | |
for i in xrange(1, len(data[0])): | |
one.append((data[0][i][1]-data[0][i-1][1])/data[0][i-1][1]) | |
two.append((data[1][i][1]-data[1][i-1][1])/data[1][i-1][1]) | |
return scipy.corrcoef(one, two)[0][1] | |
pass | |
def graph_stock_regression(data, filename): | |
""" | |
This function should take a list containing two lists of the form | |
returned by get_yahoo_data (list of date, adj. close tuples) and | |
save the graph of the series of daily return pairs as well as | |
the regression line. The graph should be saved to the given | |
filename. | |
""" | |
one = [] | |
two = [] | |
for i in xrange(1, len(data[0])): | |
one.append((data[0][i][1]-data[0][i-1][1])/data[0][i-1][1]) | |
two.append((data[1][i][1]-data[1][i-1][1])/data[1][i-1][1]) | |
(a_s, b_s, r, tt, stderr) = linregress(one, two) | |
line = scipy.polyval([a_s, b_s], one) | |
title = filename.split('.')[0] | |
axies = title.split('vs') | |
pylab.title(title) | |
pylab.plot(one, two, 'r.', one, line, 'k') | |
pylab.xlabel(axies[0]) | |
pylab.ylabel(axies[1]) | |
pylab.legend(['data', 'regression']) | |
pylab.savefig(filename) | |
def main(): | |
""" | |
This function should get the stock data for Google (GOOG) | |
and Apple (AAPL) for Jan 1, 2010 to Dec 31, 2010. Using that | |
data it should calculate and print the correlation of the daily | |
returns and graph the regression of Google vs Apple. Save the graph as | |
GOOGvsAAPL.png | |
""" | |
start_date = datetime.strptime('2010-1-1', '%Y-%m-%d') | |
end_date = datetime.strptime('2010-12-31', '%Y-%m-%d') | |
data = [] | |
google_url = build_request_url('GOOG', start_date, end_date) | |
data.append(get_yahoo_data(google_url)) | |
apple_url = build_request_url('AAPL', start_date, end_date) | |
data.append(get_yahoo_data(apple_url)) | |
print calculate_stock_correlation(data) | |
graph_stock_regression(data, 'GOOGvsAAPL.png') | |
if __name__ == "__main__": | |
""" | |
When this module as run as a script it will call the main function. | |
You should not modify this code. | |
""" | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment