Skip to content

Instantly share code, notes, and snippets.

@doda-zz
Forked from anonymous/test.py
Created July 1, 2012 17:41
Show Gist options
  • Save doda-zz/3029058 to your computer and use it in GitHub Desktop.
Save doda-zz/3029058 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import sys
import datetime
import requests
import dateutil.parser
from BeautifulSoup import BeautifulSoup
import time
import datetime
import sys
def yahoo_finance_ticker_scrape(ticker, date):
'''DOCSTRING'''
if ticker in ('CEG',):
return ''
url = 'http://finance.yahoo.com/q/hp?s=%s' % ticker
resp = requests.get(url)
if not 200 <= resp.status_code < 300:
raise ScraperError('non-200 status code returned from get request')
soup = BeautifulSoup(resp.content)
fin_table = soup.find('table', 'yfnc_datamodoutline1')
if not fin_table:
print 'bad url or new page structure:', url
sys.exit(1)
date = dateutil.parser.parse(date)
day = datetime.datetime.strftime(date, '%b %d, %Y')
tags = fin_table.findAll('td', text=day)
if tags:
#can be more than 1 due to a dividend
#### this is ugly, error-prone and could probably be done with xpath (using lxml)
data_rows = (tag.findParent('tr') for tag in tags)
data_cols = [row.findAll('td') for row in data_rows if len(row) == 7]
adj_close = data_cols[0][-1].getText()
else:
print 'Could not find data for ticker/day combo of: ' + ticker, day
sys.exit(1)
return adj_close
if __name__ == '__main__':
tickers = ('AFL', 'ALTR', 'AMGN', 'BDX', 'CAM', 'CEG', 'D', 'ECL', 'EL',
'EXC', 'GOOG', 'GPS', 'HES', 'HNZ', 'GILD', 'LOW', 'MMM',
'NKE', 'PEP', 'PPG', 'QCOM', 'SLB', 'SCHW', 'SIAL',
'T', 'TRV', 'UTX', 'VZ', 'WM', 'WIN')
def call_wrapper(date):
with open('yahoo_ticker_scrape-%s.csv' % date, 'w') as f:
for ticker in tickers:
value = yahoo_finance_ticker_scrape(ticker, date)
f.write(ticker + ', ' + value + '\n')
time.sleep(3)
# use optparse or argparse here
args = sys.argv[1:]
if any(s in args for s in ['-h', '--help']):
print '''Usage: ./yahoo_scraper.py [-y] or [-t] or [YYYY-MM-DD]. Specify
the option in order to choose what day to scrape. This produces a file
labled as yahoo_ticker_scrape-YYYY-MM-DAY.csv where YYYY-MM-DD is
replaced by the indicated day. Specifying the -t option will produce a
file for today and the -y option will produce a file for yesterday.
Modify the internal tickers tuple to change what is fetched.'''
elif '-t' in args:
date = str(datetime.date.today())
call_wrapper(date)
elif '-y' in args:
date = str(datetime.date.today() - datetime.timedelta(days=1))
call_wrapper(date)
elif len(args) == 1:
# neat unpacking trick
(date,) = sys.argv
call_wrapper(date)
else:
print 'Error in usage, retry with -h or --help'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment