Skip to content

Instantly share code, notes, and snippets.

@jweissbock
Created March 28, 2012 01:06
Show Gist options
  • Save jweissbock/2222606 to your computer and use it in GitHub Desktop.
Save jweissbock/2222606 to your computer and use it in GitHub Desktop.
Scrape TimeOnIce.com
import urllib2
from bs4 import BeautifulSoup
import datetime
start = datetime.datetime.now()
teams = ["MIN", "MTL", "N.J", "NSH", "NYI", "NYR",
"OTT", "PHI", "PHX", "PIT", "S.J", "STL", "T.B", "TOR", "VAN", "WSH"]
seasons = ["0809", "0910", "1011"]
tied = {'tied': 'tied', 'close': ''}
for t in teams:
for s in seasons:
for i in tied:
url = "http://timeonice.com/mplayershots"+s+""+tied[i]+".php?team="+t+"&first=20001&last=21230&hv=0"
print "opening: "+url
soup = BeautifulSoup(urllib2.urlopen(url).read())
print "analyzing"
newline = None
rows = soup.table.findAll('tr')[-1]
stats = rows.find_all('td')
newline = [s, t, i, stats[2].text, stats[3].text, stats[4].text, stats[5].text,
stats[7].text, stats[8].text, stats[10].text, stats[11].text]
newline = ",".join(str(x) for x in newline)
file = open('stats.csv','a')
file.write(newline + "\n")
file.close()
print "done"
end = datetime.datetime.now()
print end - start
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment