Skip to content

Instantly share code, notes, and snippets.

@gjreda
Created November 11, 2018 01:39
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gjreda/b3f2fa16096dfc8990f9b8e5af72f834 to your computer and use it in GitHub Desktop.
Save gjreda/b3f2fa16096dfc8990f9b8e5af72f834 to your computer and use it in GitHub Desktop.
Scraping Nick Saban's seasons as Alabama head coach
"""
Scraping Nick Saban's seasons as Alabama head coach
I was curious what % of his time Alabama has spent at #1
"""
from collections import Counter
from bs4 import BeautifulSoup
import requests
def to_numeric(s):
try:
return int(s)
except:
return None
def parse_season(season: int):
url = f"https://www.sports-reference.com/cfb/schools/alabama/{season}-schedule.html"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'lxml')
rows = soup.find('table', id='polls').find_all('tr')
headers = [th.get('data-stat') + f'/{season}'
for th in rows[0].find_all('th', recursive=False)]
ranks = [to_numeric(td.get_text()) for td in rows[1].children]
return dict(zip(headers, ranks))
def main():
career = {}
for season in range(2007, 2019):
career.update(parse_season(season))
total = len(career)
counts = Counter(career.values())
for rank in range(1, 25):
weeks_at_rank = counts.get(rank, 0)
print(rank, weeks_at_rank / total)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment