Skip to content

Instantly share code, notes, and snippets.

@bayesball
Last active May 18, 2016 13:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bayesball/d1c3e86ec09eb4895befd814de2699b5 to your computer and use it in GitHub Desktop.
Save bayesball/d1c3e86ec09eb4895befd814de2699b5 to your computer and use it in GitHub Desktop.
Pitch count graphs
# The inputs to these functions are
# data - Retrosheet play-by-play data frame with variable RUNS.VALUE that indicates the runs value for each play, and
# variables c01, c10, etc that indicate if the PA went through the specific pitch counts
# p - name of the player
# type - by default, type = "p" (pitcher); use another value of type for a batter
count_plot <- function(data, p, type="p"){
require(ggplot2)
require(Lahman)
require(tidyr)
require(dplyr)
name <- unlist(strsplit(p, " "))
jake <- filter(Master, nameFirst==name[1],
nameLast==name[2])$retroID
data$c00 <- 1
if(type=="p"){
d_jake <- select(filter(data, PIT_ID==jake), RUNS.VALUE,
c00, c10, c01, c20, c11,
c02, c30, c21, c12, c31, c22, c32)} else
{
d_jake <- select(filter(data, BAT_ID==jake), RUNS.VALUE,
c00, c10, c01, c20, c11,
c02, c30, c21, c12, c31, c22, c32)
}
d_jake1 <- d_jake %>% gather(count, value, -RUNS.VALUE)
d_jake1 <- mutate(d_jake1,
Balls=as.numeric(substr(count, 2, 2)),
Strikes=as.numeric(substr(count, 3, 3)),
N_Pitches=Balls + Strikes,
Count=paste(Balls, Strikes, sep="-"))
S <- summarize(group_by(d_jake1, Count),
Runs=sum(RUNS.VALUE * value, na.rm=TRUE) /
sum(value, na.rm=TRUE), N.Pitches=N_Pitches[1],
P=100 * sum(value, na.rm=TRUE) / n() ,
balls=Balls[1], strikes=Strikes[1])
S_strike0 <- filter(S, strikes==0)
S_strike1 <- filter(S, strikes==1)
S_strike2 <- filter(S, strikes==2)
S_ball0 <- filter(S, balls==0)
S_ball1 <- filter(S, balls==1)
S_ball2 <- filter(S, balls==2)
S_ball3 <- filter(S, balls==3)
the_title <- paste(p, ": Mean Runs Value of PAs Passing Through Different Counts")
p <- ggplot(S, aes(N.Pitches, Runs, label=Count)) +
geom_point() +
geom_path(data=S_strike0,
aes(N.Pitches, Runs), color="blue") +
geom_path(data=S_strike1,
aes(N.Pitches, Runs), color="blue") +
geom_path(data=S_strike2,
aes(N.Pitches, Runs), color="blue") +
geom_path(data=S_ball0,
aes(N.Pitches, Runs), color="blue") +
geom_path(data=S_ball1,
aes(N.Pitches, Runs), color="blue") +
geom_path(data=S_ball2,
aes(N.Pitches, Runs), color="blue") +
geom_path(data=S_ball3,
aes(N.Pitches, Runs), color="blue") +
xlab("Number of Pitch") +
ylab("Runs Value") +
ggtitle(the_title) +
geom_hline(yintercept=0, color="red") +
geom_label()
print(p)
S
}
count_plot_e <- function(data, p, type="p"){
require(ggplot2)
require(Lahman)
require(tidyr)
require(dplyr)
name <- unlist(strsplit(p, " "))
jake <- filter(Master, nameFirst==name[1],
nameLast==name[2])$retroID
data$c00 <- 1
if(type=="p"){
d_jake <- select(filter(data, PIT_ID==jake), RUNS.VALUE,
c00, c10, c01, c20, c11,
c02, c30, c21, c12, c31, c22, c32)} else
{
d_jake <- select(filter(data, BAT_ID==jake), RUNS.VALUE,
c00, c10, c01, c20, c11,
c02, c30, c21, c12, c31, c22, c32)
}
d_jake1 <- d_jake %>% gather(count, value, -RUNS.VALUE)
d_jake1 <- mutate(d_jake1,
Balls=as.numeric(substr(count, 2, 2)),
Strikes=as.numeric(substr(count, 3, 3)),
N_Pitches=Balls + Strikes,
Count=paste(Balls, Strikes, sep="-"))
S <- summarize(group_by(d_jake1, Count),
Runs=sum(RUNS.VALUE * value, na.rm=TRUE) /
sum(value, na.rm=TRUE), N.Pitches=N_Pitches[1],
P=100 * sum(value, na.rm=TRUE) / n() ,
balls=Balls[1], strikes=Strikes[1])
S_strike0 <- filter(S, strikes==0)
S_strike0$Pct <- c(S_strike0$P[-1], 0)
S_strike1 <- filter(S, strikes==1)
S_strike1$Pct <- c(S_strike1$P[-1], 0)
S_strike2 <- filter(S, strikes==2)
S_strike2$Pct <- c(S_strike2$P[-1], 0)
S_ball0 <- filter(S, balls==0)
S_ball0$Pct <- c(S_ball0$P[-1], 0)
S_ball1 <- filter(S, balls==1)
S_ball1$Pct <- c(S_ball1$P[-1], 0)
S_ball2 <- filter(S, balls==2)
S_ball2$Pct <- c(S_ball2$P[-1], 0)
S_ball3 <- filter(S, balls==3)
S_ball3$Pct <- c(S_ball3$P[-1], 0)
the_title <- paste(p, ": Mean Runs Value of PAs Passing Through Different Counts")
p <- ggplot(S, aes(N.Pitches, Runs, label=Count)) +
geom_point() +
geom_path(data=S_strike0,
aes(N.Pitches, Runs, size=Pct), color="blue") +
geom_path(data=S_strike1,
aes(N.Pitches, Runs, size=Pct), color="blue") +
geom_path(data=S_strike2,
aes(N.Pitches, Runs, size=Pct), color="blue") +
geom_path(data=S_ball0,
aes(N.Pitches, Runs, size=Pct), color="blue") +
geom_path(data=S_ball1,
aes(N.Pitches, Runs, size=Pct), color="blue") +
geom_path(data=S_ball2,
aes(N.Pitches, Runs, size=Pct), color="blue") +
geom_path(data=S_ball3,
aes(N.Pitches, Runs, size=Pct), color="blue") +
xlab("Number of Pitch") +
ylab("Runs Value") +
ggtitle(the_title) +
geom_hline(yintercept=0, color="red") +
geom_label()
print(p)
S
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment