Skip to content

Instantly share code, notes, and snippets.

@GlenHertz
Created August 27, 2013 23:28
Show Gist options
  • Save GlenHertz/6360352 to your computer and use it in GitHub Desktop.
Save GlenHertz/6360352 to your computer and use it in GitHub Desktop.
Game Results
#!/usr/bin/env julia
function get_team_games(games::DataFrame, team_names::Vector, and=true)
df = ""
for i = 1:nrow(games)
if (and && contains(team_names, games[i,"AwayTeam"]) && contains(team_names, games[i, "HomeTeam"])) || (!and && (contains(team_names, games[i,"AwayTeam"]) || contains(team_names, games[i, "HomeTeam"])))
if df == ""
df = games[i,:]
else
df = rbind(df, games[i,:])
end
end
end
df
end
function get_teams(games)
teams = Set()
teams = union!(union!(teams, vector(games["AwayTeam"])), vector(games["HomeTeam"]))
teams = sort!(collect(teams))
end
function calculate_ratings(games, teams=get_teams(games))
games = get_team_games(games, teams)
X = zeros(Float64, nrow(games), length(teams))
Y = zeros(Float64, nrow(games))
for i in 1:nrow(games)
team1 = findfirst(teams, games[i, "AwayTeam"])
team2 = findfirst(teams, games[i, "HomeTeam"])
spread = games[i, "WinMargin"]
if spread != 0
X[i, team1] = sign(games[i, "WinMargin"])
X[i, team2] = -sign(games[i, "WinMargin"])
else
X[i, team1] = 1.0
X[i, team2] = -1.0
end
Y[i] = spread
end
Ratings = X \ Y
end
function calculate_standings(games, teams=get_teams(games))
N = length(teams)
gp = zeros(Int64, N)
wins = zeros(Int64, N)
loss = zeros(Int64, N)
ties = zeros(Int64, N)
pts = zeros(Int64, N)
gf = zeros(Int64, N)
ga = zeros(Int64, N)
diff = zeros(Int64, N)
winpct = zeros(Float64, N)
ratings = calculate_ratings(games, teams)
for i in 1:nrow(games)
team1 = findfirst(teams, games[i, "AwayTeam"])
team2 = findfirst(teams, games[i, "HomeTeam"])
spread = games[i, "WinMargin"]
gp[team1] += 1
gp[team2] += 1
if spread > 0
wins[team1] += 1
loss[team2] += 1
elseif spread < 0
wins[team2] += 1
loss[team1] += 1
else
ties[team1] += 1
ties[team2] += 1
end
gf[team1] += games[i, "AwayScore"]
gf[team2] += games[i, "HomeScore"]
ga[team1] += games[i, "HomeScore"]
ga[team2] += games[i, "AwayScore"]
end
for team in 1:N
pts[team] = wins[team] * 2 + ties[team]
diff[team] = gf[team] - ga[team]
winpct[team] = (wins[team] + ties[team]/2) / gp[team]
end
standings = DataFrame(Team=teams, GP=gp, W=wins, L=loss, T=ties, PTS=pts, GF=gf, GA=ga, DIFF=diff, PCT=winpct, Rating=ratings)
end
games = readtable("games.csv")
ratings = calculate_ratings(games)
standings_by_points = sortby(calculate_standings(games), "PTS")
standings_by_rating = sortby(A_standings, "Rating")
AwayTeam AwayScore HomeTeam HomeScore WinMargin
C 0 A 4 -4
D 1 B 4 -3
E 3 F 2 1
B 4 C 3 1
D 0 C 4 -4
A 12 F 1 11
A 1 D 3 -2
E 4 B 8 -4
E 3 C 5 -2
A 6 B 1 5
F 1 C 7 -6
A 7 E 3 4
F 3 D 4 -1
E 3 D 4 -1
F 0 B 1 -1
A 8 B 2 6
D 6 F 2 4
B 6 E 0 6
C 7 F 1 6
D 3 E 4 -1
F 4 A 6 -2
B 2 C 2 0
E 1 A 5 -4
B 8 F 1 7
C 0 A 1 -1
B 10 D 2 8
E 5 F 1 4
A 4 C 1 3
C 3 F 2 1
F 2 D 3 -1
B 3 A 4 -1
F 2 E 3 -1
C 0 B 5 -5
A 6 D 4 2
B 6 D 3 3
E 4 A 6 -2
E 2 D 4 -2
D 2 C 6 -4
B 4 E 2 2
D 4 F 4 0
D 1 A 7 -6
C 4 B 2 2
C 4 A 0 4
B 5 D 0 5
E 5 F 1 4
A 5 B 2 3
C 6 A 3 3
julia> reload("calc_spread.jl")
julia> standings_by_points
6x11 DataFrame:
Team GP W L T PTS GF GA DIFF PCT Rating
[1,] "F" 15 0 14 1 1 27 77 -50 0.0333333 -1.48166
[2,] "E" 14 5 9 0 10 42 58 -16 0.357143 0.456697
[3,] "D" 16 6 9 1 13 44 69 -25 0.40625 -0.239425
[4,] "C" 15 9 5 1 19 52 34 18 0.633333 0.377248
[5,] "B" 17 11 5 1 23 73 45 28 0.676471 0.309839
[6,] "A" 17 14 3 0 28 85 40 45 0.823529 0.5773
julia> standings_by_rating
6x12 DataFrame:
Team GP W L T OT PTS GF GA DIFF PCT Rating
[1,] "F" 15 0 14 1 0 1 27 77 -50 0.0333333 -1.48166
[2,] "D" 16 6 9 1 0 13 44 69 -25 0.40625 -0.239425
[3,] "B" 17 11 5 1 0 23 73 45 28 0.676471 0.309839
[4,] "C" 15 9 5 1 0 19 52 34 18 0.633333 0.377248
[5,] "E" 14 5 9 0 0 10 42 58 -16 0.357143 0.456697 # This rating seems wrong
[6,] "A" 17 14 3 0 0 28 85 40 45 0.823529 0.5773
X = readdlm("X.tsv", '\t')
Y = readdlm("Y.tsv", '\t')
X\Y
1 0 -1 0 0 0
0 1 0 -1 0 0
0 0 0 0 1 -1
0 1 -1 0 0 0
0 0 1 -1 0 0
1 0 0 0 0 -1
-1 0 0 1 0 0
0 1 0 0 -1 0
0 0 1 0 -1 0
1 -1 0 0 0 0
0 0 1 0 0 -1
1 0 0 0 -1 0
0 0 0 1 0 -1
0 0 0 1 -1 0
0 1 0 0 0 -1
1 -1 0 0 0 0
0 0 0 1 0 -1
0 1 0 0 -1 0
0 0 1 0 0 -1
0 0 0 -1 1 0
1 0 0 0 0 -1
0 1 -1 0 0 0
1 0 0 0 -1 0
0 1 0 0 0 -1
1 0 -1 0 0 0
0 1 0 -1 0 0
0 0 0 0 1 -1
1 0 -1 0 0 0
0 0 1 0 0 -1
0 0 0 1 0 -1
1 -1 0 0 0 0
0 0 0 0 1 -1
0 1 -1 0 0 0
1 0 0 -1 0 0
0 1 0 -1 0 0
1 0 0 0 -1 0
0 0 0 1 -1 0
0 0 1 -1 0 0
0 1 0 0 -1 0
0 0 0 1 0 -1
1 0 0 -1 0 0
0 -1 1 0 0 0
-1 0 1 0 0 0
0 1 0 -1 0 0
0 0 0 0 1 -1
1 -1 0 0 0 0
-1 0 1 0 0 0
We can make this file beautiful and searchable if this error is corrected: No tabs found in this TSV file in line 0.
-4
-3
1
1
-4
11
-2
-4
-2
5
-6
4
-1
-1
-1
6
4
6
6
-1
-2
0
-4
7
-1
8
4
3
1
-1
-1
-1
-5
2
3
-2
-2
-4
2
0
-6
2
4
5
4
3
3
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment