Created
July 1, 2012 05:45
-
-
Save anonymous/3027014 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import datetime | |
import requests | |
import dateutil.parser | |
from BeautifulSoup import BeautifulSoup | |
def yahoo_finance_ticker_scrape(ticker, date): | |
'''DOCSTRING''' | |
if ticker in ('CEG',): | |
return '' | |
try: | |
url = 'http://finance.yahoo.com/q/hp?s=%s' % ticker | |
resp = requests.get(url) | |
if resp.status_code != 200: | |
raise ScraperError('non-200 status code returned from get request') | |
soup = BeautifulSoup(resp.content) | |
fin_table = soup.find('table', 'yfnc_datamodoutline1') | |
if not fin_table: | |
raise ScraperError('bad url or new page structure: ' + url) | |
except ScraperError as e: | |
print e | |
sys.exit(1) | |
date = dateutil.parser.parse(date) | |
day = datetime.datetime.strftime(date, '%b %d, %Y') | |
tags = fin_table.findAll('td', text=day) | |
if len(tags): | |
#can be more than 1 due to a dividend | |
data_rows = (tag.findParent('tr') for tag in tags) | |
data_cols = [row.findAll('td') for row in data_rows if len(row) == 7] | |
adj_close = data_cols[0][-1].getText() | |
else: | |
print 'Could not find data for ticker/day combo of: ' + ticker, day | |
sys.exit(1) | |
return adj_close | |
class ScraperError(Exception): | |
'''Error class for this module''' | |
def __init__(self, message): | |
self.message = 'ScraperError: ' + message | |
def __str__(self): | |
return self.message | |
if __name__ == '__main__': | |
import time | |
import datetime | |
import sys | |
tickers = ('AFL', 'ALTR', 'AMGN', 'BDX', 'CAM', 'CEG', 'D', 'ECL', 'EL', | |
'EXC', 'GOOG', 'GPS', 'HES', 'HNZ', 'GILD', 'LOW', 'MMM', | |
'NKE', 'PEP', 'PPG', 'QCOM', 'SLB', 'SCHW', 'SIAL', | |
'T', 'TRV', 'UTX', 'VZ', 'WM', 'WIN') | |
def call_wrapper(date): | |
with open('yahoo_ticker_scrape-'+date+'.csv', 'w') as f: | |
for ticker in tickers: | |
value = yahoo_finance_ticker_scrape(ticker, date) | |
f.write(ticker + ', ' + value + '\n') | |
time.sleep(3) | |
if any(s in sys.argv for s in ['-h', '--help']): | |
print '''Usage: ./yahoo_scraper.py [-y] or [-t] or [YYYY-MM-DD]. Specify | |
the option in order to choose what day to scrape. This produces a file | |
labled as yahoo_ticker_scrape-YYYY-MM-DAY.csv where YYYY-MM-DD is | |
replaced by the indicated day. Specifying the -t option will produce a | |
file for today and the -y option will produce a file for yesterday. | |
Modify the internal tickers tuple to change what is fetched.''' | |
elif '-t' in sys.argv: | |
date = str(datetime.date.today()) | |
call_wrapper(date) | |
elif '-y' in sys.argv: | |
date = str(datetime.date.today() - datetime.timedelta(days=1)) | |
call_wrapper(date) | |
elif len(sys.argv) == 2: | |
date = sys.argv[1] | |
call_wrapper(date) | |
else: | |
print 'Error in usage, retry with -h or --help' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment