Skip to content

Instantly share code, notes, and snippets.

@seanjtaylor
Last active November 15, 2016 18:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save seanjtaylor/aa9df06c7c9c6ea19af81affad9ed350 to your computer and use it in GitHub Desktop.
Save seanjtaylor/aa9df06c7c9c6ea19af81affad9ed350 to your computer and use it in GitHub Desktop.
Simple ridge regression team ranking
library(dplyr)
library(tidyr)
## 2015 season only
g2015 <- game %>%
select(seas, wk, ptsh, ptsv, h, v) %>%
filter(seas == 2015) %>%
arrange(gid)
## Makes a 32-column matrix with indicators for home team
home <- g2015 %>%
mutate(val = 1) %>%
spread(h, val, fill = 0) %>%
arrange(gid) %>%
select(ARI:WAS) %>%
as.matrix
## Makes a 32-column matrix with indicators for away team
away <- g2015 %>%
mutate(val = 1) %>%
spread(v, val, fill = 0) %>%
arrange(gid) %>%
select(ARI:WAS) %>%
as.matrix
## Makes matrix and target variable for regression
y <- with(g2015, ptsh - ptsv)
X <- cbind(home - away)
## Training is up to week 3
## Testing is week 4-8
training <- with(g2015, wk <= 3)
testing <- with(g2015, wk > 3 & wk <= 8)
## Ridge regression estimates
library(glmnet)
m1 <- glmnet(X[training,], y[training], alpha = 0.0)
# No penalty
coef(m1, s = 0.0)
# Small penalty
coef(m1, s = 10.0)
plot(coef(m1, s = 0.0), coef(m1, s = 10.0))
## MSE for a variety of penalties
mse1 <- mean((y[testing] - predict(m1, X[testing,], s = 0.0))^2)
mse2 <- mean((y[testing] - predict(m1, X[testing,], s = 10.0))^2)
mse3 <- mean((y[testing] - predict(m1, X[testing,], s = 100.0))^2)
mse4 <- mean((y[testing] - predict(m1, X[testing,], s = 1000.0))^2)
## Whole regularization path
yhat <- predict(m1, X[testing,])
## Plot of penalty vs MSE
plot(log(m1$lambda), colMeans((yhat - y[testing])^2))
## Best penalty
best.penalty <- data_frame(lambda = m1$lambda,
mse = colMeans((yhat - y[testing])^2)) %>%
arrange(mse) %>%
head(1) %>%
with(lambda)
## Rank the teams
values <- coef(m1, s = best.penalty)
## How many points we expect a team to be favored over an average team
data_frame(points.favored = values[,1], team = rownames(values)) %>%
filter(team != '(Intercept)') %>%
arrange(-points.favored)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment