doda-zz/test.py

## test.py
#!/usr/bin/env python
import sys
import datetime
import requests
import dateutil.parser
from BeautifulSoup import BeautifulSoup
import time
import datetime
import sys

def yahoo_finance_ticker_scrape(ticker, date):
    '''DOCSTRING'''

    if ticker in ('CEG',):
        return ''

    url = 'http://finance.yahoo.com/q/hp?s=%s' % ticker
    resp = requests.get(url)
    if not 200 <= resp.status_code < 300:
        raise ScraperError('non-200 status code returned from get request')

    soup = BeautifulSoup(resp.content)
    fin_table = soup.find('table', 'yfnc_datamodoutline1')
    if not fin_table:
        print 'bad url or new page structure:', url
        sys.exit(1)

    date = dateutil.parser.parse(date)
    day = datetime.datetime.strftime(date, '%b %d, %Y')
    tags = fin_table.findAll('td', text=day)

    if tags:
        #can be more than 1 due to a dividend
        #### this is ugly, error-prone and could probably be done with xpath (using lxml)
        data_rows = (tag.findParent('tr') for tag in tags)
        data_cols = [row.findAll('td') for row in data_rows if len(row) == 7]
        adj_close = data_cols[0][-1].getText()
    else:
        print 'Could not find data for ticker/day combo of: ' + ticker, day
        sys.exit(1)

    return adj_close

if __name__ == '__main__':
    tickers = ('AFL', 'ALTR', 'AMGN', 'BDX', 'CAM', 'CEG', 'D', 'ECL', 'EL',
                    'EXC', 'GOOG', 'GPS', 'HES', 'HNZ', 'GILD', 'LOW', 'MMM',
                    'NKE', 'PEP', 'PPG', 'QCOM', 'SLB', 'SCHW', 'SIAL',
                    'T', 'TRV', 'UTX', 'VZ', 'WM', 'WIN')

    def call_wrapper(date):
        with open('yahoo_ticker_scrape-%s.csv' % date, 'w') as f:
            for ticker in tickers:
                value = yahoo_finance_ticker_scrape(ticker, date)
                f.write(ticker + ', ' + value + '\n')
                time.sleep(3)

    # use optparse or argparse here

    args = sys.argv[1:]
    if any(s in args for s in ['-h', '--help']):
        print '''Usage: ./yahoo_scraper.py [-y] or [-t] or [YYYY-MM-DD]. Specify
        the option in order to choose what day to scrape. This produces a file
        labled as yahoo_ticker_scrape-YYYY-MM-DAY.csv where YYYY-MM-DD is
        replaced by the indicated day. Specifying the -t option will produce a
        file for today and the -y option will produce a file for yesterday.
        Modify the internal tickers tuple to change what is fetched.'''

    elif '-t' in args:
        date = str(datetime.date.today())
        call_wrapper(date)

    elif '-y' in args:
        date = str(datetime.date.today() - datetime.timedelta(days=1))
        call_wrapper(date)

    elif len(args) == 1:
        # neat unpacking trick
        (date,) = sys.argv
        call_wrapper(date)

    else:
        print 'Error in usage, retry with -h or --help'
	#!/usr/bin/env python
	import sys
	import datetime
	import requests
	import dateutil.parser
	from BeautifulSoup import BeautifulSoup
	import time
	import datetime
	import sys

	def yahoo_finance_ticker_scrape(ticker, date):
	'''DOCSTRING'''

	if ticker in ('CEG',):
	return ''

	url = 'http://finance.yahoo.com/q/hp?s=%s' % ticker
	resp = requests.get(url)
	if not 200 <= resp.status_code < 300:
	raise ScraperError('non-200 status code returned from get request')

	soup = BeautifulSoup(resp.content)
	fin_table = soup.find('table', 'yfnc_datamodoutline1')
	if not fin_table:
	print 'bad url or new page structure:', url
	sys.exit(1)

	date = dateutil.parser.parse(date)
	day = datetime.datetime.strftime(date, '%b %d, %Y')
	tags = fin_table.findAll('td', text=day)

	if tags:
	#can be more than 1 due to a dividend
	#### this is ugly, error-prone and could probably be done with xpath (using lxml)
	data_rows = (tag.findParent('tr') for tag in tags)
	data_cols = [row.findAll('td') for row in data_rows if len(row) == 7]
	adj_close = data_cols[0][-1].getText()
	else:
	print 'Could not find data for ticker/day combo of: ' + ticker, day
	sys.exit(1)

	return adj_close

	if __name__ == '__main__':
	tickers = ('AFL', 'ALTR', 'AMGN', 'BDX', 'CAM', 'CEG', 'D', 'ECL', 'EL',
	'EXC', 'GOOG', 'GPS', 'HES', 'HNZ', 'GILD', 'LOW', 'MMM',
	'NKE', 'PEP', 'PPG', 'QCOM', 'SLB', 'SCHW', 'SIAL',
	'T', 'TRV', 'UTX', 'VZ', 'WM', 'WIN')

	def call_wrapper(date):
	with open('yahoo_ticker_scrape-%s.csv' % date, 'w') as f:
	for ticker in tickers:
	value = yahoo_finance_ticker_scrape(ticker, date)
	f.write(ticker + ', ' + value + '\n')
	time.sleep(3)

	# use optparse or argparse here

	args = sys.argv[1:]
	if any(s in args for s in ['-h', '--help']):
	print '''Usage: ./yahoo_scraper.py [-y] or [-t] or [YYYY-MM-DD]. Specify
	the option in order to choose what day to scrape. This produces a file
	labled as yahoo_ticker_scrape-YYYY-MM-DAY.csv where YYYY-MM-DD is
	replaced by the indicated day. Specifying the -t option will produce a
	file for today and the -y option will produce a file for yesterday.
	Modify the internal tickers tuple to change what is fetched.'''

	elif '-t' in args:
	date = str(datetime.date.today())
	call_wrapper(date)

	elif '-y' in args:
	date = str(datetime.date.today() - datetime.timedelta(days=1))
	call_wrapper(date)

	elif len(args) == 1:
	# neat unpacking trick
	(date,) = sys.argv
	call_wrapper(date)

	else:
	print 'Error in usage, retry with -h or --help'