Created
February 22, 2019 19:04
-
-
Save chadmhorner/349c634f68f803d5af7fa3c2084bc03d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from readypipe import requests, starting_task, subtask, schedule, save, save_many | |
BASE_URL = 'https://www.oddschecker.com' | |
URLS = { | |
'Roma': 'https://www.oddschecker.com/awards/oscars/best-picture/bet-history/roma', | |
'Cuaron': 'https://www.oddschecker.com/awards/oscars/best-director/bet-history/alfonso-cuaron', | |
'Malek': 'https://www.oddschecker.com/awards/oscars/best-actor/bet-history/rami-malek', | |
'Close': 'https://www.oddschecker.com/awards/oscars/best-actress/bet-history/glenn-close', | |
'Ali': 'https://www.oddschecker.com/awards/oscars/best-supporting-actor/bet-history/mahershala-ali', | |
'King': 'https://www.oddschecker.com/awards/oscars/best-supporting-actress/bet-history/regina-king', | |
} | |
NOMS_LIST = ['Roma', 'Cuaron', 'Malek', 'Close', 'Ali', 'King'] | |
@starting_task | |
def load_awards(): | |
for name in NOMS_LIST: | |
schedule('scrape_movie', (URLS[name], True)) | |
@subtask | |
def scrape_movie(movie_url, first = False): | |
page = requests.get_dom_from_content(movie_url) | |
if first: | |
select = page.xpath('//*/select[@class="field"]')[0] | |
_data_pre_url = select.attrib['data-pre-url'] | |
options = select.xpath('child::option') | |
for option in options: | |
value = option.attrib['value'] | |
option_url = BASE_URL + _data_pre_url + value | |
if option_url != movie_url: | |
schedule('scrape_movie', (option_url, False)) | |
else: | |
continue #don't want to re-add Roma, e.g. | |
bookies_list = [] | |
bookies = page.xpath('//*/tr[@class="eventTableHeader"]')[0].xpath('child::td') #get bookies | |
for bookie in bookies[1:]: #skip date | |
name = bookie.xpath('span')[0].attrib['data-bk'] | |
bookies_list.append(name) | |
table = page.xpath('//*/tbody[@class="tbodyToScroll"]')[1] #all history table | |
rows = table.xpath('child::tr[@class="eventTableRow"]') #get each row (one date per row) | |
for row in rows: | |
odds_list_to_save = [] | |
odds = row.xpath('child::td') | |
date = odds[0].text | |
for i in range(1, len(odds)): | |
nums = odds[i].xpath('child::div') | |
if nums: | |
for num in nums: | |
_odds = num.text | |
_odds_fractional = convert_to_float(num.text) | |
if _odds_fractional: | |
_odds_decimal = _odds_fractional + 1 | |
else: | |
_odds_decimal = None | |
odds_list_to_save.append({'bookie_page': bookies_list[i-1], | |
'movie': movie_url, | |
'odds_date': date, | |
'odds': _odds, | |
'odds_fractional': _odds_fractional, | |
'odds_decimal': _odds_decimal}) | |
save_many('oscars_odds_new', odds_list_to_save) | |
#source: https://ideone.com/ItifKv | |
def convert_to_float(frac_str): | |
if not hasNumbers(frac_str): | |
return None | |
try: | |
return float(frac_str) | |
except ValueError: | |
num, denom = frac_str.split('/') | |
try: | |
leading, num = num.split(' ') | |
whole = float(leading) | |
except ValueError: | |
whole = 0 | |
frac = float(num) / float(denom) | |
return whole - frac if whole < 0 else whole + frac | |
#source: https://stackoverflow.com/questions/19859282/check-if-a-string-contains-a-number | |
def hasNumbers(inputString): | |
return any(char.isdigit() for char in inputString) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment