Last active
May 18, 2016 13:34
-
-
Save bayesball/d1c3e86ec09eb4895befd814de2699b5 to your computer and use it in GitHub Desktop.
Pitch count graphs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The inputs to these functions are | |
# data - Retrosheet play-by-play data frame with variable RUNS.VALUE that indicates the runs value for each play, and | |
# variables c01, c10, etc that indicate if the PA went through the specific pitch counts | |
# p - name of the player | |
# type - by default, type = "p" (pitcher); use another value of type for a batter | |
count_plot <- function(data, p, type="p"){ | |
require(ggplot2) | |
require(Lahman) | |
require(tidyr) | |
require(dplyr) | |
name <- unlist(strsplit(p, " ")) | |
jake <- filter(Master, nameFirst==name[1], | |
nameLast==name[2])$retroID | |
data$c00 <- 1 | |
if(type=="p"){ | |
d_jake <- select(filter(data, PIT_ID==jake), RUNS.VALUE, | |
c00, c10, c01, c20, c11, | |
c02, c30, c21, c12, c31, c22, c32)} else | |
{ | |
d_jake <- select(filter(data, BAT_ID==jake), RUNS.VALUE, | |
c00, c10, c01, c20, c11, | |
c02, c30, c21, c12, c31, c22, c32) | |
} | |
d_jake1 <- d_jake %>% gather(count, value, -RUNS.VALUE) | |
d_jake1 <- mutate(d_jake1, | |
Balls=as.numeric(substr(count, 2, 2)), | |
Strikes=as.numeric(substr(count, 3, 3)), | |
N_Pitches=Balls + Strikes, | |
Count=paste(Balls, Strikes, sep="-")) | |
S <- summarize(group_by(d_jake1, Count), | |
Runs=sum(RUNS.VALUE * value, na.rm=TRUE) / | |
sum(value, na.rm=TRUE), N.Pitches=N_Pitches[1], | |
P=100 * sum(value, na.rm=TRUE) / n() , | |
balls=Balls[1], strikes=Strikes[1]) | |
S_strike0 <- filter(S, strikes==0) | |
S_strike1 <- filter(S, strikes==1) | |
S_strike2 <- filter(S, strikes==2) | |
S_ball0 <- filter(S, balls==0) | |
S_ball1 <- filter(S, balls==1) | |
S_ball2 <- filter(S, balls==2) | |
S_ball3 <- filter(S, balls==3) | |
the_title <- paste(p, ": Mean Runs Value of PAs Passing Through Different Counts") | |
p <- ggplot(S, aes(N.Pitches, Runs, label=Count)) + | |
geom_point() + | |
geom_path(data=S_strike0, | |
aes(N.Pitches, Runs), color="blue") + | |
geom_path(data=S_strike1, | |
aes(N.Pitches, Runs), color="blue") + | |
geom_path(data=S_strike2, | |
aes(N.Pitches, Runs), color="blue") + | |
geom_path(data=S_ball0, | |
aes(N.Pitches, Runs), color="blue") + | |
geom_path(data=S_ball1, | |
aes(N.Pitches, Runs), color="blue") + | |
geom_path(data=S_ball2, | |
aes(N.Pitches, Runs), color="blue") + | |
geom_path(data=S_ball3, | |
aes(N.Pitches, Runs), color="blue") + | |
xlab("Number of Pitch") + | |
ylab("Runs Value") + | |
ggtitle(the_title) + | |
geom_hline(yintercept=0, color="red") + | |
geom_label() | |
print(p) | |
S | |
} | |
count_plot_e <- function(data, p, type="p"){ | |
require(ggplot2) | |
require(Lahman) | |
require(tidyr) | |
require(dplyr) | |
name <- unlist(strsplit(p, " ")) | |
jake <- filter(Master, nameFirst==name[1], | |
nameLast==name[2])$retroID | |
data$c00 <- 1 | |
if(type=="p"){ | |
d_jake <- select(filter(data, PIT_ID==jake), RUNS.VALUE, | |
c00, c10, c01, c20, c11, | |
c02, c30, c21, c12, c31, c22, c32)} else | |
{ | |
d_jake <- select(filter(data, BAT_ID==jake), RUNS.VALUE, | |
c00, c10, c01, c20, c11, | |
c02, c30, c21, c12, c31, c22, c32) | |
} | |
d_jake1 <- d_jake %>% gather(count, value, -RUNS.VALUE) | |
d_jake1 <- mutate(d_jake1, | |
Balls=as.numeric(substr(count, 2, 2)), | |
Strikes=as.numeric(substr(count, 3, 3)), | |
N_Pitches=Balls + Strikes, | |
Count=paste(Balls, Strikes, sep="-")) | |
S <- summarize(group_by(d_jake1, Count), | |
Runs=sum(RUNS.VALUE * value, na.rm=TRUE) / | |
sum(value, na.rm=TRUE), N.Pitches=N_Pitches[1], | |
P=100 * sum(value, na.rm=TRUE) / n() , | |
balls=Balls[1], strikes=Strikes[1]) | |
S_strike0 <- filter(S, strikes==0) | |
S_strike0$Pct <- c(S_strike0$P[-1], 0) | |
S_strike1 <- filter(S, strikes==1) | |
S_strike1$Pct <- c(S_strike1$P[-1], 0) | |
S_strike2 <- filter(S, strikes==2) | |
S_strike2$Pct <- c(S_strike2$P[-1], 0) | |
S_ball0 <- filter(S, balls==0) | |
S_ball0$Pct <- c(S_ball0$P[-1], 0) | |
S_ball1 <- filter(S, balls==1) | |
S_ball1$Pct <- c(S_ball1$P[-1], 0) | |
S_ball2 <- filter(S, balls==2) | |
S_ball2$Pct <- c(S_ball2$P[-1], 0) | |
S_ball3 <- filter(S, balls==3) | |
S_ball3$Pct <- c(S_ball3$P[-1], 0) | |
the_title <- paste(p, ": Mean Runs Value of PAs Passing Through Different Counts") | |
p <- ggplot(S, aes(N.Pitches, Runs, label=Count)) + | |
geom_point() + | |
geom_path(data=S_strike0, | |
aes(N.Pitches, Runs, size=Pct), color="blue") + | |
geom_path(data=S_strike1, | |
aes(N.Pitches, Runs, size=Pct), color="blue") + | |
geom_path(data=S_strike2, | |
aes(N.Pitches, Runs, size=Pct), color="blue") + | |
geom_path(data=S_ball0, | |
aes(N.Pitches, Runs, size=Pct), color="blue") + | |
geom_path(data=S_ball1, | |
aes(N.Pitches, Runs, size=Pct), color="blue") + | |
geom_path(data=S_ball2, | |
aes(N.Pitches, Runs, size=Pct), color="blue") + | |
geom_path(data=S_ball3, | |
aes(N.Pitches, Runs, size=Pct), color="blue") + | |
xlab("Number of Pitch") + | |
ylab("Runs Value") + | |
ggtitle(the_title) + | |
geom_hline(yintercept=0, color="red") + | |
geom_label() | |
print(p) | |
S | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment