Skip to content

Instantly share code, notes, and snippets.

@cpsievert
Forked from bayesball/parse.retrosheet2.pbp.R
Last active September 4, 2015 19:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cpsievert/ebb57feda5f66d0d38ec to your computer and use it in GitHub Desktop.
Save cpsievert/ebb57feda5f66d0d38ec to your computer and use it in GitHub Desktop.
R function for downloading, upzipping, and appending Retrosheet play-by-play data
parse.retrosheet2.pbp = function(season){
# ADJUSTED FOR MAC -- function will work for WINDOWS and MAC
# download, unzip, append retrosheet data
# assume current directory has a folder download.folder
# download.folder has two subfolders unzipped and zipped
# program cwevent.exe is in unzipped folder (for windows)
download.retrosheet <- function(season){
# get zip file from retrosheet website
download.file(
url=paste("http://www.retrosheet.org/events/", season, "eve.zip", sep="")
, destfile=paste("download.folder", "/zipped/", season, "eve.zip", sep="")
)
}
unzip.retrosheet <- function(season){
#unzip retrosheet files
unzip(paste("download.folder", "/zipped/", season, "eve.zip", sep=""),
exdir=paste("download.folder", "/unzipped", sep=""))
}
create.csv.file=function(year){
# http://chadwick.sourceforge.net/doc/cwevent.html#cwtools-cwevent
# shell("cwevent -y 2000 2000TOR.EVA > 2000TOR.bev")
wd = getwd()
setwd("download.folder/unzipped")
if (.Platform$OS.type == "unix"){
system(paste(paste("cwevent -y", year, "-f 0-96"),
paste(year,"*.EV*",sep=""),
paste("> all", year, ".csv", sep="")))} else {
shell(paste(paste("cwevent -y", year, "-f 0-96"),
paste(year,"*.EV*",sep=""),
paste("> all", year, ".csv", sep="")))
}
setwd(wd)
}
create.csv.roster = function(year){
# creates a csv file of the rosters
filenames <- list.files(path = "download.folder/unzipped/")
filenames.roster =
subset(filenames, substr(filenames, 4, 11)==paste(year,".ROS",sep=""))
read.csv2 = function(file)
read.csv(paste("download.folder/unzipped/", file, sep=""),header=FALSE)
R = do.call("rbind", lapply(filenames.roster, read.csv2))
names(R)[1:6] = c("Player.ID", "Last.Name", "First.Name",
"Bats", "Pitches", "Team")
wd = getwd()
setwd("download.folder/unzipped")
write.csv(R, file=paste("roster", year, ".csv", sep=""))
setwd(wd)
}
cleanup = function(){
# removes retrosheet files not needed
wd = getwd()
setwd("download.folder/unzipped")
if (.Platform$OS.type == "unix"){
system("rm *.EVN")
system("rm *.EVA")
system("rm *.ROS")
system("rm TEAM*")} else {
shell("del *.EVN")
shell("del *.EVA")
shell("del *.ROS")
shell("del TEAM*")
}
setwd(wd)
setwd("download.folder/zipped")
if (.Platform$OS.type == "unix"){
system("rm *.zip")} else {
shell("del *.zip")
}
setwd(wd)
}
download.retrosheet(season)
unzip.retrosheet(season)
create.csv.file(season)
create.csv.roster(season)
cleanup()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment