Last active
November 15, 2016 18:16
-
-
Save seanjtaylor/aa9df06c7c9c6ea19af81affad9ed350 to your computer and use it in GitHub Desktop.
Simple ridge regression team ranking
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(tidyr) | |
## 2015 season only | |
g2015 <- game %>% | |
select(seas, wk, ptsh, ptsv, h, v) %>% | |
filter(seas == 2015) %>% | |
arrange(gid) | |
## Makes a 32-column matrix with indicators for home team | |
home <- g2015 %>% | |
mutate(val = 1) %>% | |
spread(h, val, fill = 0) %>% | |
arrange(gid) %>% | |
select(ARI:WAS) %>% | |
as.matrix | |
## Makes a 32-column matrix with indicators for away team | |
away <- g2015 %>% | |
mutate(val = 1) %>% | |
spread(v, val, fill = 0) %>% | |
arrange(gid) %>% | |
select(ARI:WAS) %>% | |
as.matrix | |
## Makes matrix and target variable for regression | |
y <- with(g2015, ptsh - ptsv) | |
X <- cbind(home - away) | |
## Training is up to week 3 | |
## Testing is week 4-8 | |
training <- with(g2015, wk <= 3) | |
testing <- with(g2015, wk > 3 & wk <= 8) | |
## Ridge regression estimates | |
library(glmnet) | |
m1 <- glmnet(X[training,], y[training], alpha = 0.0) | |
# No penalty | |
coef(m1, s = 0.0) | |
# Small penalty | |
coef(m1, s = 10.0) | |
plot(coef(m1, s = 0.0), coef(m1, s = 10.0)) | |
## MSE for a variety of penalties | |
mse1 <- mean((y[testing] - predict(m1, X[testing,], s = 0.0))^2) | |
mse2 <- mean((y[testing] - predict(m1, X[testing,], s = 10.0))^2) | |
mse3 <- mean((y[testing] - predict(m1, X[testing,], s = 100.0))^2) | |
mse4 <- mean((y[testing] - predict(m1, X[testing,], s = 1000.0))^2) | |
## Whole regularization path | |
yhat <- predict(m1, X[testing,]) | |
## Plot of penalty vs MSE | |
plot(log(m1$lambda), colMeans((yhat - y[testing])^2)) | |
## Best penalty | |
best.penalty <- data_frame(lambda = m1$lambda, | |
mse = colMeans((yhat - y[testing])^2)) %>% | |
arrange(mse) %>% | |
head(1) %>% | |
with(lambda) | |
## Rank the teams | |
values <- coef(m1, s = best.penalty) | |
## How many points we expect a team to be favored over an average team | |
data_frame(points.favored = values[,1], team = rownames(values)) %>% | |
filter(team != '(Intercept)') %>% | |
arrange(-points.favored) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment