-
-
Save doda-zz/3029058 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import datetime | |
import requests | |
import dateutil.parser | |
from BeautifulSoup import BeautifulSoup | |
import time | |
import datetime | |
import sys | |
def yahoo_finance_ticker_scrape(ticker, date): | |
'''DOCSTRING''' | |
if ticker in ('CEG',): | |
return '' | |
url = 'http://finance.yahoo.com/q/hp?s=%s' % ticker | |
resp = requests.get(url) | |
if not 200 <= resp.status_code < 300: | |
raise ScraperError('non-200 status code returned from get request') | |
soup = BeautifulSoup(resp.content) | |
fin_table = soup.find('table', 'yfnc_datamodoutline1') | |
if not fin_table: | |
print 'bad url or new page structure:', url | |
sys.exit(1) | |
date = dateutil.parser.parse(date) | |
day = datetime.datetime.strftime(date, '%b %d, %Y') | |
tags = fin_table.findAll('td', text=day) | |
if tags: | |
#can be more than 1 due to a dividend | |
#### this is ugly, error-prone and could probably be done with xpath (using lxml) | |
data_rows = (tag.findParent('tr') for tag in tags) | |
data_cols = [row.findAll('td') for row in data_rows if len(row) == 7] | |
adj_close = data_cols[0][-1].getText() | |
else: | |
print 'Could not find data for ticker/day combo of: ' + ticker, day | |
sys.exit(1) | |
return adj_close | |
if __name__ == '__main__': | |
tickers = ('AFL', 'ALTR', 'AMGN', 'BDX', 'CAM', 'CEG', 'D', 'ECL', 'EL', | |
'EXC', 'GOOG', 'GPS', 'HES', 'HNZ', 'GILD', 'LOW', 'MMM', | |
'NKE', 'PEP', 'PPG', 'QCOM', 'SLB', 'SCHW', 'SIAL', | |
'T', 'TRV', 'UTX', 'VZ', 'WM', 'WIN') | |
def call_wrapper(date): | |
with open('yahoo_ticker_scrape-%s.csv' % date, 'w') as f: | |
for ticker in tickers: | |
value = yahoo_finance_ticker_scrape(ticker, date) | |
f.write(ticker + ', ' + value + '\n') | |
time.sleep(3) | |
# use optparse or argparse here | |
args = sys.argv[1:] | |
if any(s in args for s in ['-h', '--help']): | |
print '''Usage: ./yahoo_scraper.py [-y] or [-t] or [YYYY-MM-DD]. Specify | |
the option in order to choose what day to scrape. This produces a file | |
labled as yahoo_ticker_scrape-YYYY-MM-DAY.csv where YYYY-MM-DD is | |
replaced by the indicated day. Specifying the -t option will produce a | |
file for today and the -y option will produce a file for yesterday. | |
Modify the internal tickers tuple to change what is fetched.''' | |
elif '-t' in args: | |
date = str(datetime.date.today()) | |
call_wrapper(date) | |
elif '-y' in args: | |
date = str(datetime.date.today() - datetime.timedelta(days=1)) | |
call_wrapper(date) | |
elif len(args) == 1: | |
# neat unpacking trick | |
(date,) = sys.argv | |
call_wrapper(date) | |
else: | |
print 'Error in usage, retry with -h or --help' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment