Skip to content

Instantly share code, notes, and snippets.

@jweissbock
Last active January 3, 2016 02:19
Show Gist options
  • Save jweissbock/8395285 to your computer and use it in GitHub Desktop.
Save jweissbock/8395285 to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
import requests
import re
def getYearIDs(year):
pass
def writeLine(fileName, listOfLists):
pass
def parseSheet(gameid):
url = "http://ontariohockeyleague.com/schedule/show/game/%s" % (gameid)
data = requests.get(url).text
soup = BeautifulSoup(data, "html5lib")
# game summary meta information
date = soup.findAll("td", "content")[0].findAll('b')[1].text
print gameid
print date
# need to get home / away teams here for meta - can get from next section
# scoring by period
topTwo = soup.findAll("table", "margin10 floatLeft")
scoring = topTwo[0].findAll('tr', 'light')
cells = scoring[0].findAll('td')
awayScoring = [gameid, 'Away']+[datum.text for datum in cells]
cells = scoring[1].findAll('td')
homeScoring = [gameid, 'Home']+[datum.text for datum in cells]
# need to check for length
for teamScoring in [awayScoring, homeScoring]:
if len(teamScoring) < 9: teamScoring[len(teamScoring)-1:len(teamScoring)-1] \
= [u'0']*(9-len(teamScoring))
print awayScoring
print homeScoring
# shots by period
shots = topTwo[1].findAll('tr','light')
cells = shots[0].findAll('td')
awayShots = [gameid, 'Away']+[datum.text for datum in cells]
cells = shots[1].findAll('td')
homeShots = [gameid, 'Home']+[datum.text for datum in cells]
# need to check for length
for teamShots in [awayShots, homeShots]:
if len(teamShots) < 9: teamShots[len(teamShots)-1:len(teamShots)-1] \
= [u'0']*(9-len(teamShots))
print awayShots
print homeShots
# rosters of both team
# goalies of both teams
middleTables = soup.findAll('td', valign="top") # away is 0, home is 1
awayRows = middleTables[0].findAll('tr', 'light')
awayRoster = []
awayGoalies = []
for row in awayRows[:-2]:
cells = row.findAll('td')
if cells[1].text != u'\xa0': awayRoster.append([str(gameid), 'Away']+[datum.text for datum in cells])
for row in awayRows[-2:]:
cells = row.findAll('td')
awayGoalies.append([str(gameid), 'Away']+[datum.text for datum in cells])
homeRows = middleTables[1].findAll('tr', 'light')
homeRoster = []
homeGoalies = []
for row in homeRows[:-2]:
cells = row.findAll('td')
if cells[1].text != u'\xa0': homeRoster.append([str(gameid), 'Away']+[datum.text for datum in cells])
for row in homeRows[-2:]:
cells = row.findAll('td')
homeGoalies.append([str(gameid), 'Away']+[datum.text for datum in cells])
print awayRoster
print homeRoster
print awayGoalies
print homeGoalies
# scoring data
scoringData = middleTables[2].findAll('tr', 'light')
scoringOutput = []
onIceGoals = []
for scoring in scoringData:
cells = scoring.findAll('td')
scoringOutput.append([str(gameid)]+[d.text for d in cells[0].findAll('a')])
onice = scoring.findAll('a', href="#")
newHTML = onice[0]['onmouseover']
start = newHTML.find("'")
end = newHTML.rfind("'")
newHTML = BeautifulSoup(newHTML[start+1:end])
teams = newHTML.findAll("td", "dark")
plus = []
minus = []
onIceData = newHTML.findAll("td", "light")
for data in onIceData[0].findAll("tr"): plus += data.text.split(" ", 1)
for data in onIceData[1].findAll("tr"): minus += data.text.split(" ", 1)
onIceGoals.append([str(gameid), teams[0].text]+plus+[teams[1].text]+minus)
print scoringOutput
print onIceGoals
# penalties data
penalties = middleTables[3].findAll('tr', 'light')
penaltiesOutput = []
for pen in penalties:
per = pen.findAll('i')[0].text
person = pen.findAll("a")[0].text
time = re.findall(r'\b\d?\d:\d\d\b', pen.text)[0]
penLen = re.findall(r'\b\d min\b', pen.text)[0]
penTeam = re.findall(r'\b\w\w\w\b', pen.text)[0]
thePenalty = pen.text[pen.text.rfind("-")+2:pen.text.rfind(",")]
penaltiesOutput.append([per, penTeam, person, time, penLen, thePenalty])
print penaltiesOutput
# shootout data
if "shootout".lower() in middleTables[4].text.lower():
pass
# PP / PK data
# arena
# stars
# on ice officials
# off ice officials
parseSheet(63565)
# 66528
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment