Skip to content

Instantly share code, notes, and snippets.

@ccagrawal
Created December 23, 2015 10:35
Show Gist options
  • Save ccagrawal/f1a91bd3a94aafa94f93 to your computer and use it in GitHub Desktop.
Save ccagrawal/f1a91bd3a94aafa94f93 to your computer and use it in GitHub Desktop.
Calculate homecourt advantage for each NBA team
library(sportsTools)
library(dplyr)
library(ggplot2)
start.year <- 2001
end.year <- 2015
all.games <- data.frame()
for (year in start.year:end.year) {
schedule <- GetSchedule(year, 'regular')
# Create model matrix where home teams have a 1, away teams have a -1
home.model <- model.matrix(~ home.name + 0, data = schedule)
away.model <- model.matrix(~ away.name + 0, data = schedule)
model <- data.frame(home.model - away.model)
colnames(model) <- gsub('home.name(.*)', paste0('\\1.', year), colnames(model))
# Add more columns to represent home court location with a 1
model <- cbind(model, data.frame(home.model))
colnames(model) <- gsub('home.name(.*)', paste0('\\1.Home'), colnames(model))
# Add outcome column
model$Outcome <- schedule$home.margin
all.games <- bind_rows(all.games, model)
}
# Replace all NA with 0
all.games[is.na(all.games)] <- 0
# Remove NOP / OKC home column
bad.col <- which(colnames(all.games) == 'New.Orleans.Oklahoma.City.Hornets.Home')
all.games <- all.games[, -bad.col]
# Combine Charlotte Bobcats and Charlotte Hornets
old.col <- which(colnames(all.games) == 'Charlotte.Bobcats.Home')
new.col <- which(colnames(all.games) == 'Charlotte.Hornets.Home')
all.games[, new.col] <- all.games[, old.col] + all.games[, new.col]
all.games <- all.games[, -old.col]
# Combine New Orleans Hornets and New Orleans Pelicans
old.col <- which(colnames(all.games) == 'New.Orleans.Hornets.Home')
new.col <- which(colnames(all.games) == 'New.Orleans.Pelicans.Home')
all.games[, new.col] <- all.games[, old.col] + all.games[, new.col]
all.games <- all.games[, -old.col]
# Compute regression and save coefficients
fit <- lm(Outcome ~ ., data = all.games)
results <- as.data.frame(summary(fit)$coefficients)
intercept <- results[1, ]
results <- results[grep('Home', row.names(results)), ]
# Incorporate intercept
results$Estimate <- results$Estimate + intercept[1, 1]
#results$`Std. Error` <- sqrt(results$`Std. Error`^2 + intercept[1, 2]^2)
# Calculate quantity for each home
results$Sample <- sapply(row.names(results), function(x) sum(all.games[, x]))
# Calculate 90% confidence interval for each home
results$crit.T <- qt(.95, results$Sample - 1)
results <- results[, c(1, 2, 6)]
results$Upper <- results[, 1] + results[, 2] * results[, 3]
results$Lower <- results[, 1] - results[, 2] * results[, 3]
# Clean Up Results
results$Team <- row.names(results)
row.names(results) <- NULL
results <- results[, c('Team', 'Estimate')]
results$Team <- gsub('\\.', ' ', results$Team)
results$Team <- gsub(' Home', '', results$Team)
results <- results[order(results$Estimate, decreasing = TRUE), ]
write.csv(results, 'hca_results.csv', row.names = FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment