Skip to content

Instantly share code, notes, and snippets.

@ccagrawal
Created September 9, 2015 21:12
Show Gist options
  • Save ccagrawal/efc1e4fe6d451a8f4901 to your computer and use it in GitHub Desktop.
Save ccagrawal/efc1e4fe6d451a8f4901 to your computer and use it in GitHub Desktop.
Calculate variability in NBA and NFL games
library(ggplot2)
library(moments)
library(sportsTools)
library(dplyr)
clean.lines <- function(data) {
data <- data[-which(is.na(data$home.line)), ]
data$expected <- -data$home.line
data$actual <- data$home.score - data$away.score
data$residual <- data$actual - data$expected
return(data)
}
nba.lines <- GetLinesRange(sport = "NBA", year.start = 2012, year.end = 2015, type = "both")
nfl.lines <- GetLinesRange(sport = "NFL", year.start = 2012, year.end = 2015, type = "both")
nba.clean <- clean.lines(nba.lines)
nfl.clean <- clean.lines(nfl.lines)
# NBA Expected Home Margin of Victory
ggplot(nba.clean, aes(x = expected)) +
geom_histogram(aes(y = ..density..), color = 'grey70', fill = 'grey90', binwidth = 1, origin = -16.5) +
scale_x_continuous(breaks = seq(from = -15, to = 20, by = 5)) +
scale_y_continuous(expand = c(0, 0)) +
labs(title = "NBA: Expected Home Margin of Victory") +
theme(axis.title.x = element_blank(), axis.title.y = element_blank(),
axis.ticks.y = element_blank(), axis.text.y = element_blank(),
panel.border = element_blank(), panel.background = element_blank(),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
plot.title = element_text(size = rel(1.25), face = 'bold', vjust = 1.5),
axis.text.x = element_text(colour = 'black', size = rel(1.25)))
ggsave(filename = './nba_expected_hist.png', width = 14, height = 6, units = 'in')
# NFL Expected Home Margin of Victory
ggplot(nfl.clean, aes(x = expected)) +
geom_histogram(aes(y = ..density..), color = 'grey70', fill = 'grey90', binwidth = 1, origin = -16.5) +
scale_x_continuous(breaks = seq(from = -15, to = 25, by = 5)) +
scale_y_continuous(expand = c(0, 0)) +
labs(title = "NFL: Expected Home Margin of Victory") +
theme(axis.title.x = element_blank(), axis.title.y = element_blank(),
axis.ticks.y = element_blank(), axis.text.y = element_blank(),
panel.border = element_blank(), panel.background = element_blank(),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
plot.title = element_text(size = rel(1.25), face = 'bold', vjust = 1.5),
axis.text.x = element_text(colour = 'black', size = rel(1.25)))
ggsave(filename = './nfl_expected_hist.png', width = 14, height = 6, units = 'in')
# NBA Residual Home Margin of Victory
ggplot(nba.clean, aes(x = residual)) +
geom_histogram(aes(y = ..density..), color = 'grey70', fill = 'grey90', binwidth = 1, origin = -50.5) +
scale_x_continuous(breaks = seq(from = -50, to = 45, by = 5)) +
scale_y_continuous(expand = c(0, 0)) +
labs(title = "NBA: Residual (Actual Outcome - Expected Outcome)") +
theme(axis.title.x = element_blank(), axis.title.y = element_blank(),
axis.ticks.y = element_blank(), axis.text.y = element_blank(),
panel.border = element_blank(), panel.background = element_blank(),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
plot.title = element_text(size = rel(1.25), face = 'bold', vjust = 1.5),
axis.text.x = element_text(colour = 'black', size = rel(1.25))) +
stat_function(fun = dnorm, args = list(mean = mean(nba.clean$residual), sd = sd(nba.clean$residual)), linetype = 'longdash')
ggsave(filename = './nba_residual_hist.png', width = 14, height = 6, units = 'in')
# NFL Residual Home Margin of Victory
ggplot(nfl.clean, aes(x = residual)) +
geom_histogram(aes(y = ..density..), color = 'grey70', fill = 'grey90', binwidth = 1, origin = -40.5) +
scale_x_continuous(breaks = seq(from = -40, to = 50, by = 5)) +
scale_y_continuous(expand = c(0, 0)) +
labs(title = "NFL: Residual (Actual Outcome - Expected Outcome)") +
theme(axis.title.x = element_blank(), axis.title.y = element_blank(),
axis.ticks.y = element_blank(), axis.text.y = element_blank(),
panel.border = element_blank(), panel.background = element_blank(),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
plot.title = element_text(size = rel(1.25), face = 'bold', vjust = 1.5),
axis.text.x = element_text(colour = 'black', size = rel(1.25))) +
stat_function(fun = dnorm, args = list(mean = mean(nfl.clean$residual), sd = sd(nfl.clean$residual)), linetype = 'longdash')
ggsave(filename = './nfl_residual_hist.png', width = 14, height = 6, units = 'in')
nba.sd <- nba.clean$residual %>% sd
nfl.sd <- nfl.clean$residual %>% sd
nba.margins <- quantile(abs(nba.clean$expected), c(.05, .25, .5, .75, .95))
nfl.margins <- quantile(abs(nfl.clean$expected), c(.05, .25, .5, .75, .95))
nba.win.probs <- 1 - pnorm(0, mean = nba.margins, sd = nba.sd)
nfl.win.probs <- 1 - pnorm(0, mean = nfl.margins, sd = nfl.sd)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment