Skip to content

Instantly share code, notes, and snippets.

@nathanesau
Created October 21, 2015 15:53
Show Gist options
  • Save nathanesau/af974da3d581596f9a24 to your computer and use it in GitHub Desktop.
Save nathanesau/af974da3d581596f9a24 to your computer and use it in GitHub Desktop.
Parse NHL boxscores from http://hockey-reference.com/boxscores using R
library(XML)
# parses boxscores from hockey-reference.com.
# Note program may take several hours to run.
# change wd to folder where data should be stored
# change months and year variables to date range preferred
setwd("~/Desktop/Hockey")
months <- c("03", "04", "10", "11", "12")
days <- as.character(seq(1,31))
for(i in 1:31) if(nchar(days[i])==1) days[i] = paste0("0",days[i])
year <- "2014"
teams <- c("CAR", "FLA", "VAN", "CGY",
"DAL", "EDM", "TBL", "LAK",
"NJD", "NSH", "WPG", "MTL",
"PIT", "SJS", "WSH", "STL",
"BUF", "BOS", "NYI", "DET",
"CBJ", "PHI", "OTT", "COL",
"ARI", "MIN", "NYR", "CHI",
"TOR", "ANA")
for(month in months) {
for(day in days) {
for(team in teams) {
url <- paste0("http://www.hockey-reference.com/boxscores/", year, month, day, "0", team, ".html")
team_data <- tryCatch({readHTMLTable(url)},error=function(e)NA)
csv_name <- paste0(year,month,day)
if(!is.na(team_data)) {
# first team is home -> second team is away
x <- names(team_data)
y <- substr(x,1,3)
z <- sapply(teams,function(x) which(y==x)[1])
z <- z[which(!is.na(z))]
znames <- names(z)
for(i in 1:length(z)) {
write.csv(team_data[[z[i]]], file=paste0(znames[i], csv_name,".csv"))
}
}
}
}
}
player_data <- carolina_data[[13]]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment