chadmhorner/oscars_odds.py

## oscars_odds.py
from readypipe import requests, starting_task, subtask, schedule, save, save_many

BASE_URL = 'https://www.oddschecker.com'
URLS = {
    'Roma': 'https://www.oddschecker.com/awards/oscars/best-picture/bet-history/roma',
    'Cuaron': 'https://www.oddschecker.com/awards/oscars/best-director/bet-history/alfonso-cuaron',
    'Malek': 'https://www.oddschecker.com/awards/oscars/best-actor/bet-history/rami-malek',
    'Close': 'https://www.oddschecker.com/awards/oscars/best-actress/bet-history/glenn-close',
    'Ali': 'https://www.oddschecker.com/awards/oscars/best-supporting-actor/bet-history/mahershala-ali',
    'King': 'https://www.oddschecker.com/awards/oscars/best-supporting-actress/bet-history/regina-king',
}
NOMS_LIST = ['Roma', 'Cuaron', 'Malek', 'Close', 'Ali', 'King']

@starting_task
def load_awards():
    for name in NOMS_LIST:
        schedule('scrape_movie', (URLS[name], True))

@subtask
def scrape_movie(movie_url, first = False):
    page = requests.get_dom_from_content(movie_url)

    if first:
        select = page.xpath('//*/select[@class="field"]')[0]
        _data_pre_url = select.attrib['data-pre-url']
        options = select.xpath('child::option')
        for option in options:
            value = option.attrib['value']
            option_url = BASE_URL + _data_pre_url + value
            if option_url != movie_url:
                schedule('scrape_movie', (option_url, False))
            else:
                continue #don't want to re-add Roma, e.g.

    bookies_list = []
    bookies = page.xpath('//*/tr[@class="eventTableHeader"]')[0].xpath('child::td') #get bookies
    for bookie in bookies[1:]: #skip date
        name = bookie.xpath('span')[0].attrib['data-bk']
        bookies_list.append(name)

    table = page.xpath('//*/tbody[@class="tbodyToScroll"]')[1] #all history table
    rows = table.xpath('child::tr[@class="eventTableRow"]') #get each row (one date per row)
    for row in rows:
        odds_list_to_save = []
        odds = row.xpath('child::td')
        date = odds[0].text
        for i in range(1, len(odds)):
            nums = odds[i].xpath('child::div')
            if nums:
                for num in nums:
                    _odds = num.text
                    _odds_fractional = convert_to_float(num.text)
                    if _odds_fractional:
                        _odds_decimal = _odds_fractional + 1
                    else:
                        _odds_decimal = None
                    odds_list_to_save.append({'bookie_page': bookies_list[i-1],
                                              'movie': movie_url,
                                              'odds_date': date,
                                              'odds': _odds,
                                              'odds_fractional': _odds_fractional,
                                              'odds_decimal': _odds_decimal})
        save_many('oscars_odds_new', odds_list_to_save)

#source: https://ideone.com/ItifKv
def convert_to_float(frac_str):
    if not hasNumbers(frac_str):
        return None
    try:
        return float(frac_str)
    except ValueError:
        num, denom = frac_str.split('/')
        try:
            leading, num = num.split(' ')
            whole = float(leading)
        except ValueError:
            whole = 0
        frac = float(num) / float(denom)
        return whole - frac if whole < 0 else whole + frac

#source: https://stackoverflow.com/questions/19859282/check-if-a-string-contains-a-number
def hasNumbers(inputString):
    return any(char.isdigit() for char in inputString)
	from readypipe import requests, starting_task, subtask, schedule, save, save_many

	BASE_URL = 'https://www.oddschecker.com'
	URLS = {
	'Roma': 'https://www.oddschecker.com/awards/oscars/best-picture/bet-history/roma',
	'Cuaron': 'https://www.oddschecker.com/awards/oscars/best-director/bet-history/alfonso-cuaron',
	'Malek': 'https://www.oddschecker.com/awards/oscars/best-actor/bet-history/rami-malek',
	'Close': 'https://www.oddschecker.com/awards/oscars/best-actress/bet-history/glenn-close',
	'Ali': 'https://www.oddschecker.com/awards/oscars/best-supporting-actor/bet-history/mahershala-ali',
	'King': 'https://www.oddschecker.com/awards/oscars/best-supporting-actress/bet-history/regina-king',
	}
	NOMS_LIST = ['Roma', 'Cuaron', 'Malek', 'Close', 'Ali', 'King']

	@starting_task
	def load_awards():
	for name in NOMS_LIST:
	schedule('scrape_movie', (URLS[name], True))

	@subtask
	def scrape_movie(movie_url, first = False):
	page = requests.get_dom_from_content(movie_url)

	if first:
	select = page.xpath('//*/select[@class="field"]')[0]
	_data_pre_url = select.attrib['data-pre-url']
	options = select.xpath('child::option')
	for option in options:
	value = option.attrib['value']
	option_url = BASE_URL + _data_pre_url + value
	if option_url != movie_url:
	schedule('scrape_movie', (option_url, False))
	else:
	continue #don't want to re-add Roma, e.g.

	bookies_list = []
	bookies = page.xpath('//*/tr[@class="eventTableHeader"]')[0].xpath('child::td') #get bookies
	for bookie in bookies[1:]: #skip date
	name = bookie.xpath('span')[0].attrib['data-bk']
	bookies_list.append(name)

	table = page.xpath('//*/tbody[@class="tbodyToScroll"]')[1] #all history table
	rows = table.xpath('child::tr[@class="eventTableRow"]') #get each row (one date per row)
	for row in rows:
	odds_list_to_save = []
	odds = row.xpath('child::td')
	date = odds[0].text
	for i in range(1, len(odds)):
	nums = odds[i].xpath('child::div')
	if nums:
	for num in nums:
	_odds = num.text
	_odds_fractional = convert_to_float(num.text)
	if _odds_fractional:
	_odds_decimal = _odds_fractional + 1
	else:
	_odds_decimal = None
	odds_list_to_save.append({'bookie_page': bookies_list[i-1],
	'movie': movie_url,
	'odds_date': date,
	'odds': _odds,
	'odds_fractional': _odds_fractional,
	'odds_decimal': _odds_decimal})
	save_many('oscars_odds_new', odds_list_to_save)

	#source: https://ideone.com/ItifKv
	def convert_to_float(frac_str):
	if not hasNumbers(frac_str):
	return None
	try:
	return float(frac_str)
	except ValueError:
	num, denom = frac_str.split('/')
	try:
	leading, num = num.split(' ')
	whole = float(leading)
	except ValueError:
	whole = 0
	frac = float(num) / float(denom)
	return whole - frac if whole < 0 else whole + frac

	#source: https://stackoverflow.com/questions/19859282/check-if-a-string-contains-a-number
	def hasNumbers(inputString):
	return any(char.isdigit() for char in inputString)