Skip to content

Instantly share code, notes, and snippets.

@amcclosky
Forked from ramhiser/download-espn-mlb-standings.py
Last active December 21, 2015 16:09
Show Gist options
  • Save amcclosky/6331174 to your computer and use it in GitHub Desktop.
Save amcclosky/6331174 to your computer and use it in GitHub Desktop.
# The following script scrapes ESPN's MLB Standings Grid and writes the
# standings for each American League (AL) team to a CSV file, which has the following
# format:
# Team, Opponent, Wins, Losses
from bs4 import BeautifulSoup
import urllib2
import re
import csv
csv_filename = 'AL-standings.csv'
year = '2013'
url = 'http://espn.go.com/mlb/standings/grid/_/year/' + year
page = urllib2.urlopen(url)
soup = BeautifulSoup(page.read())
# Extracts the table for the American League (AL) and the rows for each team
AL_table = soup.find(text = re.compile("American")).find_parent("table")
AL_rows = AL_table.findAll('tr', class_ = re.compile("team"))
# Creates a list of the AL teams and then appends NL for National League
AL_teams = [team_row.find('b').text for team_row in AL_rows]
AL_teams.append("NL")
# Opens a CSV file for the AL standings
with open(csv_filename, 'wb') as f:
csv_out = csv.writer(f)
csv_out.writerow(['Team', 'Opponent', 'Wins', 'Losses'])
# For each team in the AL table, identifies the team's name, the opponent,
# and their wins and losses (WL) against that opponent. Then outputs the
# results to the open CSV file
for team_row in AL_rows:
team = team_row.find('b').text
# A cell has the following form:
# <td align="right">
# 7-9</td>
WL_cells = team_row.findAll('td', align = "right")
# Extracts the values for both wins and losses from each WL table cell
wins_losses = [td_cell.text.strip('\n').split('-') for td_cell in WL_cells]
for i, al_team in enumerate(AL_teams):
if team != al_team:
csv_out.writerow([team, AL_teams[i], wins_losses[i][0], wins_losses[i][1]])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment