Skip to content

Instantly share code, notes, and snippets.

@bayesball
Created July 16, 2015 13:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bayesball/93cb1c92510c41bea134 to your computer and use it in GitHub Desktop.
Save bayesball/93cb1c92510c41bea134 to your computer and use it in GitHub Desktop.
pitch count transitions
# loads in the Retrosheet data
load("~/OneDriveBusiness/Retrosheet/pbp.2014.Rdata")
# removes all non-pitches from PITCH_SEQ_TX
pbp.14$pseq <- gsub("[.>123N+*]", "", pbp.14$PITCH_SEQ_TX)
# create a b and s sequence
pbp.14$pseq <-gsub("[BIPV]", "b", pbp.14$pseq)
pbp.14$pseq <-gsub("[CFKLMOQRST]", "s", pbp.14$pseq)
# function to create vectors of beginning and end counts given a string of pitches
one.string <- function(ex){
# replace s and b with X for strikeouts and walks
ex <- gsub("s$", "X", ex)
ex <- gsub("b$", "X", ex)
# create a vector of individual outcomes
ex.v <- unlist(strsplit(ex,""))
# remove last X from vector
ex.v <- ex.v[-length(ex.v)]
# compute cumulative total of balls and strikes
n.balls <- cumsum(ex.v == "b")
n.strikes <- pmin(cumsum(ex.v == "s"), 2)
# create pitch count variable
S <- paste(n.balls, n.strikes, sep="-")
# add a beginning and end outcome
S <- c("0-0", S, "X")
# before and after counts
b.count <- S[1:(length(S) - 1)]
e.count <- S[-1]
list(b.count, e.count)
}
# applies this function to all strings
S <- sapply(pbp.14$pseq, one.string)
# creates the matrix of transition counts and probabilities
TR <- table(unlist(S[1, ]), unlist(S[2, ]))
P <- prop.table(TR[1:12, -12], 1)
P <- rbind(P, c(rep(0, 11), 1))
P <- cbind(rep(0, 13), P)
dimnames(P)[[1]][13] <- "X"
dimnames(P)[[2]][1] <- "0-0"
# plots probability of adding strike to count
R <- c("0-0", "0-1", "1-0", "1-1", "2-0", "2-1", "3-0", "3-1")
C <- c("0-1", "0-2", "1-1", "1-2", "2-1", "2-2", "3-1", "3-2")
Prob <- diag(P[R, C])
d <- data.frame(Count=R, Probability=Prob)
library(ggplot2)
ggplot(d, aes(Probability, Count)) +
geom_point(size=4, color="blue") +
ggtitle("Probability of Adding a Strike to the Count")
# plots probability of adding ball to count
R <- c("0-0", "0-1", "0-2", "1-0", "1-1", "1-2", "2-0", "2-1", "2-2")
C <- c("1-0", "1-1", "1-2", "2-0", "2-1", "2-2", "3-0", "3-1", "3-2")
Prob <- diag(P[R, C])
d <- data.frame(Count=R, Probability=Prob)
library(ggplot2)
ggplot(d, aes(Probability, Count)) +
geom_point(size=4, color="blue") +
ggtitle("Probability of Adding a Ball to the Count")
# plots probability of keeping the count at 2 strikes
R <- c("0-2", "1-2", "2-2", "3-2")
C <- c("0-2", "1-2", "2-2", "3-2")
Prob <- diag(P[R, C])
d <- data.frame(Count=R, Probability=Prob)
library(ggplot2)
ggplot(d, aes(Probability, Count)) +
geom_point(size=4, color="blue") +
ggtitle("Probability of Keeping the Count")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment