Skip to content

Instantly share code, notes, and snippets.

@bayesball
Last active July 20, 2016 21:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bayesball/959d671c4e1db59aca0e14a80019ab79 to your computer and use it in GitHub Desktop.
Save bayesball/959d671c4e1db59aca0e14a80019ab79 to your computer and use it in GitHub Desktop.
Compare batting trajectories by scraping baseball-reference data
compare_batting_trajectories <- function(Names,
table="batting_value",
stat="oWAR",
NCOL=1,
playerIDs=FALSE){
# table value - one of "batting_standard", "batting_value"
require(Lahman)
require(XML)
require(ggplot2)
require(ggthemes)
require(dplyr)
J <- length(Names)
if(length(playerIDs==1)) playerIDs <- rep(playerIDs, J)
ids <- vector(mode = "character", length = length(Names))
for(j in 1:length(Names)){
if (playerIDs[j]==FALSE) {
name <- unlist(strsplit(Names[j], split=" "))
ids[j] <- filter(Master, nameFirst==name[1],
nameLast==name[2])$playerID
} else {
ids[j] <- Names[j]
B <- filter(Master, playerID==ids[j])
Names[j] <- paste(B$nameFirst, B$nameLast)
}
}
data <- NULL
for(j in 1:length(ids)){
filename <- paste("http://www.baseball-reference.com/players/",
substr(ids[j], 1, 1), "/", ids[j], ".shtml", sep="")
d <- readHTMLTable(filename)[[table]]
# remove minor league stats
d <- filter(d, Lg=="AL" | Lg=="NL")
# sum over multiple teams per season
if(table=="batting_value"){
d$Stat <- as.numeric(as.character(d[, stat]))
S <- summarize(group_by(d, Age), Stat=sum(Stat))
names(S)[2] <- stat}
if(table=="batting_standard"){
d <- mutate(d, Year=as.numeric(substr(Year, 1, 4)))
mt_seasons <- filter(d, Tm=="TOT")$Year
s_seasons <- d$Year[!d$Year %in% mt_seasons]
S <- filter(d, (Year %in% s_seasons) |
(Year %in% mt_seasons & Tm=="TOT"))
S[, stat] <- as.numeric(as.character(S[, stat]))
}
d_player <- data.frame(Player=Names[j],
Age=as.numeric(as.character(S$Age)),
Statistic=S[, stat])
data <- rbind(data, d_player)
}
names(data)[3] <- "Statistic"
print(ggplot(data, aes(Age, Statistic)) +
geom_point(color="red") + geom_smooth() +
facet_wrap(~ Player, ncol=NCOL) +
theme_fivethirtyeight() +
theme(plot.title = element_text(size = rel(2), hjust=0.5,
color = "blue")) +
theme(strip.text = element_text(size = rel(1.5), hjust=0.5,
color = "red")) +
ggtitle(paste(stat, "Career Trajectories")))
data
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment