Skip to content

Instantly share code, notes, and snippets.

@chadmhorner
Created January 31, 2019 22:56
Show Gist options
  • Save chadmhorner/161122b594082c2407d2cad57df6bc9d to your computer and use it in GitHub Desktop.
Save chadmhorner/161122b594082c2407d2cad57df6bc9d to your computer and use it in GitHub Desktop.
from readypipe import requests, starting_task, subtask, schedule, save
BASE_URL = 'https://www.pro-football-reference.com'
SCOREBOARD_URL = 'https://www.pro-football-reference.com/years/2018/week_%s.htm'
@starting_task
def sweep_game_urls():
for i in range(1, 18): #weeks 1 through 17
week = requests.get_dom_from_content(SCOREBOARD_URL % i) #visit week scoreboard
games = week.xpath('//*/td[@class="right gamelink"]') #get all game links
for game in games:
game_url = game.xpath('descendant::a')[0].attrib['href']
schedule('scrape_scores', BASE_URL + game_url)
@subtask
def scrape_scores(game_url):
game = requests.get_dom_from_content(game_url)
scoreboard = game.xpath('//*/table[@class="linescore nohover stats_table no_freeze"]/tbody')
if scoreboard:
scoreboard = scoreboard[0]
for i in range(0, 2): #for home and away
team = scoreboard.xpath('descendant::tr')[i].xpath('descendant::td')
q1 = int(team[2].text)
q2 = int(team[3].text)
q3 = int(team[4].text)
q4 = int(team[5].text)
if i == 0:
home_away = 'away'
elif i == 1:
home_away = 'home'
else:
home_away = None
save('scores',
{
'team': team[1].xpath('descendant::a')[0].text,
'game_url': game_url,
'home_away': home_away,
'first_quarter_points': q1,
'second_quarter_points': q2,
'third_quarter_points': q3,
'fourth_quarter_points': q4,
'first_quarter_total': q1,
'second_quarter_total': q1 + q2,
'third_quarter_total': q1 + q2 + q3,
'fourth_quarter_total': q1 + q2 + q3 + q4,
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment