Skip to content

Instantly share code, notes, and snippets.

@enkeboll
Last active February 8, 2016 17:59
Show Gist options
  • Save enkeboll/c1fa7df2533a9939ec9a to your computer and use it in GitHub Desktop.
Save enkeboll/c1fa7df2533a9939ec9a to your computer and use it in GitHub Desktop.
Scrapes pro-football-reference.com for a history of super bowl scoring summaries, for the purposes of creating "Super Bowl Boxes" stats.
import csv
import requests
from bs4 import BeautifulSoup
base_url = "http://www.pro-football-reference.com"
next_page = "/super-bowl/i.htm"
# this website doesn't have a "next" button to SB XLIX, for some reason
extra = ['/super-bowl/xlix.htm']
def pair(a, b):
if a < b:
return str(a) + str(b)
return str(b) + str(a)
scores = []
sb = 0
while next_page:
sb += 1
pot = 1250
print next_page
url = base_url + next_page
body = requests.get(url).text
soup = BeautifulSoup(body)
table = soup.find('table', {'id': 'scoring'})
rows = table.find_all('tr')
rows = rows[1:]
score_home, score_away = 0, 0
td_cnt = 0
newrow = (sb, 'S', '00', 50)
scores.append(newrow)
pot -= 50
for scorechange, row in enumerate(rows):
cols = row.find_all('td')
score_home_new = int(cols[-2].text)
score_away_new = int(cols[-1].text)
if cols[0].text == '3rd':
newrow = (sb, 'H', pair(str(score_home)[-1], str(score_away)[-1]), 150)
scores.append(newrow)
pot -= 150
if score_home_new - score_home == 7:
newrow = (sb, str(scorechange + td_cnt + 1), pair(str(score_home_new - 1)[-1], str(score_away_new)[-1]), 37.5)
scores.append(newrow)
pot -= 37.5
td_cnt += 1
elif score_away_new - score_away == 7:
newrow = (sb, str(scorechange + td_cnt + 1), pair(str(score_home_new)[-1], str(score_away_new - 1)[-1]), 37.5)
scores.append(newrow)
pot -= 37.5
td_cnt += 1
newrow = (sb, str(scorechange + td_cnt + 1), pair(str(score_home_new)[-1], str(score_away_new)[-1]),37.5)
scores.append(newrow)
pot -= 37.5
score_home = score_home_new
score_away = score_away_new
newrow = (sb, 'F', pair(str(score_home_new)[-1], str(score_away_new)[-1]), pot)
scores.append(newrow)
next_page = soup.find('a', href=True, text="Next")
if next_page:
next_page = next_page.get('href')
elif extra:
next_page = extra.pop()
with open('super_bowl_scores.csv','w') as out:
csv_out = csv.writer(out)
csv_out.writerow(['bowl_number','payout_reason', 'digits', 'payout'])
for line in scores:
csv_out.writerow(line)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment