Skip to content

Instantly share code, notes, and snippets.

@chadmhorner
Created February 22, 2019 19:04
Show Gist options
  • Save chadmhorner/349c634f68f803d5af7fa3c2084bc03d to your computer and use it in GitHub Desktop.
Save chadmhorner/349c634f68f803d5af7fa3c2084bc03d to your computer and use it in GitHub Desktop.
from readypipe import requests, starting_task, subtask, schedule, save, save_many
BASE_URL = 'https://www.oddschecker.com'
URLS = {
'Roma': 'https://www.oddschecker.com/awards/oscars/best-picture/bet-history/roma',
'Cuaron': 'https://www.oddschecker.com/awards/oscars/best-director/bet-history/alfonso-cuaron',
'Malek': 'https://www.oddschecker.com/awards/oscars/best-actor/bet-history/rami-malek',
'Close': 'https://www.oddschecker.com/awards/oscars/best-actress/bet-history/glenn-close',
'Ali': 'https://www.oddschecker.com/awards/oscars/best-supporting-actor/bet-history/mahershala-ali',
'King': 'https://www.oddschecker.com/awards/oscars/best-supporting-actress/bet-history/regina-king',
}
NOMS_LIST = ['Roma', 'Cuaron', 'Malek', 'Close', 'Ali', 'King']
@starting_task
def load_awards():
for name in NOMS_LIST:
schedule('scrape_movie', (URLS[name], True))
@subtask
def scrape_movie(movie_url, first = False):
page = requests.get_dom_from_content(movie_url)
if first:
select = page.xpath('//*/select[@class="field"]')[0]
_data_pre_url = select.attrib['data-pre-url']
options = select.xpath('child::option')
for option in options:
value = option.attrib['value']
option_url = BASE_URL + _data_pre_url + value
if option_url != movie_url:
schedule('scrape_movie', (option_url, False))
else:
continue #don't want to re-add Roma, e.g.
bookies_list = []
bookies = page.xpath('//*/tr[@class="eventTableHeader"]')[0].xpath('child::td') #get bookies
for bookie in bookies[1:]: #skip date
name = bookie.xpath('span')[0].attrib['data-bk']
bookies_list.append(name)
table = page.xpath('//*/tbody[@class="tbodyToScroll"]')[1] #all history table
rows = table.xpath('child::tr[@class="eventTableRow"]') #get each row (one date per row)
for row in rows:
odds_list_to_save = []
odds = row.xpath('child::td')
date = odds[0].text
for i in range(1, len(odds)):
nums = odds[i].xpath('child::div')
if nums:
for num in nums:
_odds = num.text
_odds_fractional = convert_to_float(num.text)
if _odds_fractional:
_odds_decimal = _odds_fractional + 1
else:
_odds_decimal = None
odds_list_to_save.append({'bookie_page': bookies_list[i-1],
'movie': movie_url,
'odds_date': date,
'odds': _odds,
'odds_fractional': _odds_fractional,
'odds_decimal': _odds_decimal})
save_many('oscars_odds_new', odds_list_to_save)
#source: https://ideone.com/ItifKv
def convert_to_float(frac_str):
if not hasNumbers(frac_str):
return None
try:
return float(frac_str)
except ValueError:
num, denom = frac_str.split('/')
try:
leading, num = num.split(' ')
whole = float(leading)
except ValueError:
whole = 0
frac = float(num) / float(denom)
return whole - frac if whole < 0 else whole + frac
#source: https://stackoverflow.com/questions/19859282/check-if-a-string-contains-a-number
def hasNumbers(inputString):
return any(char.isdigit() for char in inputString)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment