jweissbock/dailyData.py

## dailyData.py
from bs4 import BeautifulSoup
import urllib2
import csv
import datetime
import sys

"""teams = ['Washington', 'Boston', 'NY Rangers', 'Pittsburgh', 'Ottawa', 'Buffalo', 'Minnesota', 'Colorado', 'Winnipeg', 'Toronto', 'Montreal',
  	'New Jersey', 'Carolina', 'Tampa Bay', 'Phoenix', 'Columbus', 'NY Islanders', 'Florida', 'Anaheim', 'St Louis', 'Chicago',
		'Dallas', 'Detroit', 'Vancouver', 'San Jose', 'Los Angeles']"""

teams = sys.argv[1:]

if len(teams) == 0:
	print "No supplied teams"
	sys.exit()

# so far doesnt like NY Islanders, and CGY but only in behindthenet sites

finalData = [[None]*11 for _ in range(len(teams))]

teamToAbrv = {'Phoenix': "PHX", 'Philadelphia': 'PHI', 'Ottawa': 'OTT', 'NY Rangers': "NYR", 'Nashville': 'NSH', 'NY Islanders': 'NYI',
			'Pittsburgh': 'PIT', 'San Jose': "S.J", 'Washington': 'WSH', 'Vancouver': 'VAN', 'Toronto': 'TOR', 'St Louis': 'STL',
			'Tampa Bay': 'T.B', 'New Jersey': 'N.J', 'Montreal': "MTL", "Columbus": 'CBJ', 'Calgary': 'CGY', 'Carolina': "CAR",
			'Buffalo': 'BUF', 'Winnipeg': 'WPG', 'Boston': 'BOS', 'Chicago': 'CHI', 'Colorado': 'COL', 'Los Angeles': 'L.A',
			'Minnesota': 'MIN', 'Florida': 'FLA', 'Edmonton': 'EDM', 'Dallas': 'DAL', 'Detroit': 'DET', 'Anaheim': "ANA",
			'Winnipeg': 'ATL'}

todaysTeams = [teamToAbrv[k] for k in teams if k in teamToAbrv]

teams = [t.lower() for t in teams]

# for each team we need to get their stats
# [Team, FenwickClose, Goals For, Goals Against, PP%, PK%, sh%, sv%, winstreak, standings, 5-5 F/A]

# FenwickClose
fenwickURL = 'http://behindthenet.ca/fenwick_2012.php'

request = urllib2.Request(fenwickURL)
response = urllib2.urlopen(request)

the_page = response.read()
soup = BeautifulSoup(the_page)

rows = soup.findAll('tr')[2:]
# loop through each row
for r in rows:
	allTDs = r.findAll('td')
	t = allTDs[0].text
	# if a team in a row is a team we are looking for
	# then store its appropriate value into the final data
	if t in todaysTeams:
		tKey = todaysTeams.index(t)
		finalData[tKey][1] = allTDs[6].text

"""# goals for
gfURL = 'http://www.nhl.com/ice/teamstats.htm?fetchKey=20132ALLSAAAll&sort=avgGoalsPerGame&viewName=goalsFor'

request = urllib2.Request(gfURL)
response = urllib2.urlopen(request)

the_page = response.read()
soup = BeautifulSoup(the_page)

rows = soup.findAll('table', 'data stats')[0].findAll('tr')[2:]
# loop through each row
for r in rows:
	allTDs = r.findAll('td')
	t = allTDs[1].text.lower()
	# if a team in a row is a team we are looking for
	# then store its appropriate value into the final data
	if t in teams:
		tKey = teams.index(t)
		finalData[tKey][2] = allTDs[14].text

# goals against
gaURL = 'http://www.nhl.com/ice/teamstats.htm?fetchKey=20132ALLSAAAll&sort=avgGoalsAgainstPerGame&viewName=goalsAgainst'

request = urllib2.Request(gaURL)
response = urllib2.urlopen(request)

the_page = response.read()
soup = BeautifulSoup(the_page)

rows = soup.findAll('table', 'data stats')[0].findAll('tr')[2:]
# loop through each row
for r in rows:
	allTDs = r.findAll('td')
	t = allTDs[1].text.lower()
	# if a team in a row is a team we are looking for
	# then store its appropriate value into the final data
	if t in teams:
		tKey = teams.index(t)
		finalData[tKey][3] = allTDs[14].text
"""
# pp%, pk%
ppURL = "http://www.nhl.com/ice/teamstats.htm"

request = urllib2.Request(ppURL)
response = urllib2.urlopen(request)

the_page = response.read()
soup = BeautifulSoup(the_page)

rows = soup.findAll('table', 'data stats')[0].findAll('tr')[2:]
# loop through each row
for r in rows:
	allTDs = r.findAll('td')
	t = allTDs[1].text.lower()
	# if a team in a row is a team we are looking for
	# then store its appropriate value into the final data
	if t in teams:
		tKey = teams.index(t)
		finalData[tKey][4] = allTDs[11].text
		finalData[tKey][5] = allTDs[12].text
		# we can also get the teams 5-5 F/A Ratio
		finalData[tKey][10] = allTDs[10].text

# sh%, #sv%
PDOurl = 'http://www.behindthenet.ca/2012/team_data3.php'
request = urllib2.Request(PDOurl)
response = urllib2.urlopen(request)

the_page = response.read()
soup = BeautifulSoup(the_page)

rows = soup.findAll('tr')[2:]
# loop through each row
for r in rows:
	allTDs = r.findAll('td')
	t = allTDs[0].text
	# if a team in a row is a team we are looking for
	# then store its appropriate value into the final data
	if t in todaysTeams:
		tKey = todaysTeams.index(t)
		finalData[tKey][6] = allTDs[16].text.strip()
		finalData[tKey][7] = allTDs[19].text.strip()

# win streak & standings
winURL = "http://www.tsn.ca/nhl/standings/"

request = urllib2.Request(winURL)
response = urllib2.urlopen(request)

the_page = response.read()
soup = BeautifulSoup(the_page)

rows = soup.findAll('table')[0]
rows2 = rows.findAll('tbody')[1]
rows = rows.findAll('tbody')[0]
rows = rows.findAll('tr')[0:8] + rows.findAll('tr')[9:16]
rows = rows + rows2.findAll('tr')[0:8] + rows2.findAll('tr')[9:16]
# loop through each row
for r in rows:
	allTDs = r.findAll('td')
	allTHs = r.findAll('th')
	t = allTDs[0].findAll('a')[0].text.strip().lower()
	# if a team in a row is a team we are looking for
	# then store its appropriate value into the final data
	if t in teams:
		tKey = teams.index(t)
		finalData[tKey][8] = allTDs[6].text
		finalData[tKey][8] = finalData[tKey][8].replace('Lost', '-').replace('OT', '-')
		finalData[tKey][8] = finalData[tKey][8].replace('Won', '').replace(' ', '').strip()
		finalData[tKey][9] = allTHs[0].text

		finalData[tKey][2] = allTHs[4].text # GF
		finalData[tKey][3] = allTDs[4].text # GA

i = 0
for t in teams:
	finalData[i][0] = t.title()
	i = i + 1

print finalData

now = datetime.datetime.now()

fileName = "/Users/joshuaweissbock/Dropbox/CSI-5388/Project/dailydata/"+str(now.day)+"-"+str(now.month)+"-"+str(now.year)+".csv"

myfile = open(fileName, 'wb')
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
for line in finalData:
	wr.writerow(line)

print "Wrote file"

# great, let's dump into a CSV

## dailyScores.py
from bs4 import BeautifulSoup
import urllib2
import csv
import datetime
import sys

if len(sys.argv[1:]) == 0:
  print "No supplied URLs"
	sys.exit()

gameIDs = sys.argv[1:]

# [TeamAway, Win/Loss, GF, GA, ShFor, ShA]
# [TeamHome, Loss/Win, GA, GF, ShFor, ShA]

now = datetime.datetime.now()
fileName = "/Users/joshuaweissbock/Dropbox/CSI-5388/Project/dailyscores/"+str(now.day)+"-"+str(now.month)+"-"+str(now.year)+".csv"

myfile = open(fileName, 'wb')
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)

# loop through all gameIDs
for g in gameIDs:
	fenwickURL = "http://www.tsn.ca"+g

	request = urllib2.Request(fenwickURL)
	response = urllib2.urlopen(request)

	the_page = response.read()
	soup = BeautifulSoup(the_page)

	box = soup.findAll('table', 'boxScore') # store the tables
	col = len(box[0].findAll('tr')[1].findAll('td')) # count if it is OT or not
	#print col

	TeamAway = box[0].findAll('tr')[1].findAll('td')[0].text # get the home/away team name
	TeamHome = box[0].findAll('tr')[2].findAll('td')[0].text

	GF = box[0].findAll('tr')[1].findAll('td')[col-1].text # get the teams scores
	GA = box[0].findAll('tr')[2].findAll('td')[col-1].text

	statusAway = "Win" if GF > GA else "Loss"				# determine who won
	statusHome = "Loss" if statusAway == "Win" else "Win"

	try:
		isSO = box[0].findAll('tr')[0].findAll('th')[5].text
		sub = 2 if isSO == 'SO' else 1
	except:
		sub = 1

	ShFor = box[1].findAll('tr')[1].findAll('td')[col-sub].text # get shots for
	ShA = box[1].findAll('tr')[2].findAll('td')[col-sub].text

	Line1 = [TeamAway, statusAway, GF, GA, ShFor, ShA] # store them in the right format
	Line2 = [TeamHome, statusHome, GA, GF, ShA, ShFor]

	# output the lines
	print Line1
	print Line2

	# write to csv
	wr.writerow(Line1)
	wr.writerow(Line2)

## getData.py
from bs4 import BeautifulSoup
import urllib2
import csv
import datetime
import sys

if len(sys.argv[1:]) == 0:
  print "No supplied URLs"
	sys.exit()

gameIDs = sys.argv[1:]

# [TeamAway, Win/Loss, GF, GA, ShFor, ShA]
# [TeamHome, Loss/Win, GA, GF, ShFor, ShA]

now = datetime.datetime.now()
fileName = "/Users/joshuaweissbock/Dropbox/CSI-5388/Project/dailyscores/"+str(now.day)+"-"+str(now.month)+"-"+str(now.year)+".csv"

myfile = open(fileName, 'wb')
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)

# loop through all gameIDs
for g in gameIDs:
	fenwickURL = "http://www.tsn.ca"+g

	request = urllib2.Request(fenwickURL)
	response = urllib2.urlopen(request)

	the_page = response.read()
	soup = BeautifulSoup(the_page)

	box = soup.findAll('table', 'boxScore') # store the tables
	col = len(box[0].findAll('tr')[1].findAll('td')) # count if it is OT or not
	#print col

	TeamAway = box[0].findAll('tr')[1].findAll('td')[0].text # get the home/away team name
	TeamHome = box[0].findAll('tr')[2].findAll('td')[0].text

	GF = box[0].findAll('tr')[1].findAll('td')[col-1].text # get the teams scores
	GA = box[0].findAll('tr')[2].findAll('td')[col-1].text

	statusAway = "Win" if GF > GA else "Loss"				# determine who won
	statusHome = "Loss" if statusAway == "Win" else "Win"

	try:
		isSO = box[0].findAll('tr')[0].findAll('th')[5].text
		sub = 2 if isSO == 'SO' else 1
	except:
		sub = 1

	ShFor = box[1].findAll('tr')[1].findAll('td')[col-sub].text # get shots for
	ShA = box[1].findAll('tr')[2].findAll('td')[col-sub].text

	Line1 = [TeamAway, statusAway, GF, GA, ShFor, ShA] # store them in the right format
	Line2 = [TeamHome, statusHome, GA, GF, ShA, ShFor]

	# output the lines
	print Line1
	print Line2

	# write to csv
	wr.writerow(Line1)
	wr.writerow(Line2)
	from bs4 import BeautifulSoup
	import urllib2
	import csv
	import datetime
	import sys

	"""teams = ['Washington', 'Boston', 'NY Rangers', 'Pittsburgh', 'Ottawa', 'Buffalo', 'Minnesota', 'Colorado', 'Winnipeg', 'Toronto', 'Montreal',
	'New Jersey', 'Carolina', 'Tampa Bay', 'Phoenix', 'Columbus', 'NY Islanders', 'Florida', 'Anaheim', 'St Louis', 'Chicago',
	'Dallas', 'Detroit', 'Vancouver', 'San Jose', 'Los Angeles']"""

	teams = sys.argv[1:]

	if len(teams) == 0:
	print "No supplied teams"
	sys.exit()

	# so far doesnt like NY Islanders, and CGY but only in behindthenet sites

	finalData = [[None]*11 for _ in range(len(teams))]

	teamToAbrv = {'Phoenix': "PHX", 'Philadelphia': 'PHI', 'Ottawa': 'OTT', 'NY Rangers': "NYR", 'Nashville': 'NSH', 'NY Islanders': 'NYI',
	'Pittsburgh': 'PIT', 'San Jose': "S.J", 'Washington': 'WSH', 'Vancouver': 'VAN', 'Toronto': 'TOR', 'St Louis': 'STL',
	'Tampa Bay': 'T.B', 'New Jersey': 'N.J', 'Montreal': "MTL", "Columbus": 'CBJ', 'Calgary': 'CGY', 'Carolina': "CAR",
	'Buffalo': 'BUF', 'Winnipeg': 'WPG', 'Boston': 'BOS', 'Chicago': 'CHI', 'Colorado': 'COL', 'Los Angeles': 'L.A',
	'Minnesota': 'MIN', 'Florida': 'FLA', 'Edmonton': 'EDM', 'Dallas': 'DAL', 'Detroit': 'DET', 'Anaheim': "ANA",
	'Winnipeg': 'ATL'}

	todaysTeams = [teamToAbrv[k] for k in teams if k in teamToAbrv]

	teams = [t.lower() for t in teams]

	# for each team we need to get their stats
	# [Team, FenwickClose, Goals For, Goals Against, PP%, PK%, sh%, sv%, winstreak, standings, 5-5 F/A]

	# FenwickClose
	fenwickURL = 'http://behindthenet.ca/fenwick_2012.php'

	request = urllib2.Request(fenwickURL)
	response = urllib2.urlopen(request)

	the_page = response.read()
	soup = BeautifulSoup(the_page)

	rows = soup.findAll('tr')[2:]
	# loop through each row
	for r in rows:
	allTDs = r.findAll('td')
	t = allTDs[0].text
	# if a team in a row is a team we are looking for
	# then store its appropriate value into the final data
	if t in todaysTeams:
	tKey = todaysTeams.index(t)
	finalData[tKey][1] = allTDs[6].text

	"""# goals for
	gfURL = 'http://www.nhl.com/ice/teamstats.htm?fetchKey=20132ALLSAAAll&sort=avgGoalsPerGame&viewName=goalsFor'

	request = urllib2.Request(gfURL)
	response = urllib2.urlopen(request)

	the_page = response.read()
	soup = BeautifulSoup(the_page)

	rows = soup.findAll('table', 'data stats')[0].findAll('tr')[2:]
	# loop through each row
	for r in rows:
	allTDs = r.findAll('td')
	t = allTDs[1].text.lower()
	# if a team in a row is a team we are looking for
	# then store its appropriate value into the final data
	if t in teams:
	tKey = teams.index(t)
	finalData[tKey][2] = allTDs[14].text

	# goals against
	gaURL = 'http://www.nhl.com/ice/teamstats.htm?fetchKey=20132ALLSAAAll&sort=avgGoalsAgainstPerGame&viewName=goalsAgainst'

	request = urllib2.Request(gaURL)
	response = urllib2.urlopen(request)

	the_page = response.read()
	soup = BeautifulSoup(the_page)

	rows = soup.findAll('table', 'data stats')[0].findAll('tr')[2:]
	# loop through each row
	for r in rows:
	allTDs = r.findAll('td')
	t = allTDs[1].text.lower()
	# if a team in a row is a team we are looking for
	# then store its appropriate value into the final data
	if t in teams:
	tKey = teams.index(t)
	finalData[tKey][3] = allTDs[14].text
	"""
	# pp%, pk%
	ppURL = "http://www.nhl.com/ice/teamstats.htm"

	request = urllib2.Request(ppURL)
	response = urllib2.urlopen(request)

	the_page = response.read()
	soup = BeautifulSoup(the_page)

	rows = soup.findAll('table', 'data stats')[0].findAll('tr')[2:]
	# loop through each row
	for r in rows:
	allTDs = r.findAll('td')
	t = allTDs[1].text.lower()
	# if a team in a row is a team we are looking for
	# then store its appropriate value into the final data
	if t in teams:
	tKey = teams.index(t)
	finalData[tKey][4] = allTDs[11].text
	finalData[tKey][5] = allTDs[12].text
	# we can also get the teams 5-5 F/A Ratio
	finalData[tKey][10] = allTDs[10].text

	# sh%, #sv%
	PDOurl = 'http://www.behindthenet.ca/2012/team_data3.php'
	request = urllib2.Request(PDOurl)
	response = urllib2.urlopen(request)

	the_page = response.read()
	soup = BeautifulSoup(the_page)

	rows = soup.findAll('tr')[2:]
	# loop through each row
	for r in rows:
	allTDs = r.findAll('td')
	t = allTDs[0].text
	# if a team in a row is a team we are looking for
	# then store its appropriate value into the final data
	if t in todaysTeams:
	tKey = todaysTeams.index(t)
	finalData[tKey][6] = allTDs[16].text.strip()
	finalData[tKey][7] = allTDs[19].text.strip()

	# win streak & standings
	winURL = "http://www.tsn.ca/nhl/standings/"

	request = urllib2.Request(winURL)
	response = urllib2.urlopen(request)

	the_page = response.read()
	soup = BeautifulSoup(the_page)

	rows = soup.findAll('table')[0]
	rows2 = rows.findAll('tbody')[1]
	rows = rows.findAll('tbody')[0]
	rows = rows.findAll('tr')[0:8] + rows.findAll('tr')[9:16]
	rows = rows + rows2.findAll('tr')[0:8] + rows2.findAll('tr')[9:16]
	# loop through each row
	for r in rows:
	allTDs = r.findAll('td')
	allTHs = r.findAll('th')
	t = allTDs[0].findAll('a')[0].text.strip().lower()
	# if a team in a row is a team we are looking for
	# then store its appropriate value into the final data
	if t in teams:
	tKey = teams.index(t)
	finalData[tKey][8] = allTDs[6].text
	finalData[tKey][8] = finalData[tKey][8].replace('Lost', '-').replace('OT', '-')
	finalData[tKey][8] = finalData[tKey][8].replace('Won', '').replace(' ', '').strip()
	finalData[tKey][9] = allTHs[0].text

	finalData[tKey][2] = allTHs[4].text # GF
	finalData[tKey][3] = allTDs[4].text # GA

	i = 0
	for t in teams:
	finalData[i][0] = t.title()
	i = i + 1

	print finalData

	now = datetime.datetime.now()

	fileName = "/Users/joshuaweissbock/Dropbox/CSI-5388/Project/dailydata/"+str(now.day)+"-"+str(now.month)+"-"+str(now.year)+".csv"

	myfile = open(fileName, 'wb')
	wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
	for line in finalData:
	wr.writerow(line)

	print "Wrote file"

	# great, let's dump into a CSV