Skip to content

Instantly share code, notes, and snippets.

@chrswt
Created February 23, 2016 07:19
Show Gist options
  • Save chrswt/7c297c6a5f2077dee0b1 to your computer and use it in GitHub Desktop.
Save chrswt/7c297c6a5f2077dee0b1 to your computer and use it in GitHub Desktop.
---
title: "NBA Daily Fantasy"
output: pdf_document
---
```{r}
library(plyr)
library(TTR)
library(tree)
mv_avg_stats <- c("MIN", "FG", "FGA", "X3P", "X3PA", "FT", "FTA",
"OR", "DR", "TOT", "A", "PF", "ST", "TO", "BL", "PTS")
mv_avg_colnames <- function(stats, k) {
paste(stats, "_avg_past_", k, sep = "")
}
mv_avg_window <- 20
# TODO Match up player and team datasets.
nba_player <- read.csv("2016/player.csv")
nba_player$DATE <- as.Date(strptime(nba_player$DATE, f = "%m/%d/%Y"))
nba_team <- read.csv("2016/team.csv")
nba_team$DATE <- as.Date(strptime(nba_team$DATE, f = "%m/%d/%Y"))
mov_avg <- function(dest_df, source_df, stats, k) {
dest_colnames <- mv_avg_colnames(stats, k)
n <- min(k, nrow(source_df) - 1)
for (i in seq(1, length(stats))) {
s <- stats[i]
# Weight of current stat is 0 as we only want historical stats.
wts <- c(rep(1, n), 0)
dest_df[, dest_colnames[i]] <- WMA(source_df[, s], n = n+1, wts = wts)
}
return(dest_df)
}
indiv_train <- ddply(nba_player, .(PLAYER.FULL.NAME), function(df) {
# Sort by date for easy tabulation of moving averages.
df <- df[order(df$DATE), ]
# Features
train_df <- data.frame(venue = df$VENUE..R.H., date = df$DATE)
train_df$team <- df$OWN.TEAM
train_df$opp <- df$OPP.TEAM
train_df$pos <- df$POSITION
train_df$days_rest <- c(0, diff(df$DATE))
train_df <- mov_avg(train_df, df, mv_avg_stats, mv_avg_window)
# Response variables
train_df$resp_pts <- df$PTS
train_df$resp_3pts <- df$X3P
train_df$resp_rebounds <- df$TOT
train_df$resp_assists <- df$A
train_df$resp_steals <- df$ST
train_df$resp_blocks <- df$BL
train_df$resp_turnovers <- df$TO
double_stats <- c("PTS", "TOT", "A", "ST", "BL")
train_df$resp_doubles <- rowSums(df[, double_stats] >= 10)
return(train_df)
})
# NAs created by moving average for past k games (if not enough data)
indiv_train <- na.omit(indiv_train)
# Get aggregate stats of other players.
add_players_stats <- function(curr_train, join_x_only, join_y_only, join_both) {
join_x <- c(join_x_only, join_both)
join_y <- c(join_y_only, join_both)
id_frame <- indiv_train[, c("PLAYER.FULL.NAME", join_x)]
# Hacky. We remove team so that it doesnt get duplicated in the merged dataframe.
if (join_y_only == "opp") {
join_frame <- indiv_train[, -which(names(indiv_train) %in% c("team"))]
} else {
join_frame <- indiv_train
}
other_train <- merge(id_frame, join_frame, by.x = join_x, by.y = join_y)
other_train <- other_train[with(other_train,
PLAYER.FULL.NAME.x != PLAYER.FULL.NAME.y), ]
other_stats <- c("days_rest",
mv_avg_colnames(c("FG", "FGA", "X3P", "X3PA",
"FT", "FTA", "OR", "DR", "TOT", "A",
"PF", "ST", "TO", "BL", "PTS"), mv_avg_window))
other_train <- ddply(other_train,
.(PLAYER.FULL.NAME.x, team, date), function(df) {
apply(df[, other_stats], 2, function(col) {
weighted.mean(col, w = df[, mv_avg_colnames("MIN", mv_avg_window)])
})
})
all_train <- merge(curr_train, other_train,
by.x = c("PLAYER.FULL.NAME", join_x),
by.y = c("PLAYER.FULL.NAME.x", join_x))
return(all_train)
}
all_train <- add_players_stats(indiv_train, "team", "team", "date")
all_train <- add_players_stats(all_train, "team", "opp", "date")
# TODO Verify that join is correct.
team_agg_stats <- c("TEAMS", "DATE", "POSS", "PACE", "OEFF", "DEFF")
team_agg_frame <- nba_team[, team_agg_stats]
all_train <- merge(all_train, team_agg_frame,
by.x = c("team", "date"), by.y = c("TEAMS", "DATE"))
all_train <- merge(all_train, team_agg_frame,
by.x = c("opp", "date"), by.y = c("TEAMS", "DATE"))
tree.pts <- tree(resp_pts ~ . -
team -
resp_rebounds - resp_steals - resp_assists - resp_blocks -
resp_doubles - resp_3pts - resp_turnovers -
PLAYER.FULL.NAME - date,
data = all_train, mindev = 0.001)
set.seed(1337)
pts.cv <- cv.tree(tree.pts)
opt.tree <- which(pts.cv$dev == min(pts.cv$dev))
best.leaves <- min(pts.cv$size[opt.tree])
pts.pruned = prune.tree(tree.pts, best=best.leaves)
infos <- summary(pts.pruned)
plot(pts.pruned)
text(pts.pruned)
sqrt(infos$dev / infos$df)
lm.pts <- lm(resp_doubles ~ . -
team -
resp_rebounds - resp_steals - resp_assists - resp_blocks -
resp_pts - resp_3pts - resp_turnovers -
PLAYER.FULL.NAME - date,
data = all_train)
summary(lm.pts)$sigma
# Neural networks
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment