Skip to content

Instantly share code, notes, and snippets.

@bayesball
Created October 26, 2021 19:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bayesball/19dff0093c3dbd2ad4c1a345b562e041 to your computer and use it in GitHub Desktop.
Save bayesball/19dff0093c3dbd2ad4c1a345b562e041 to your computer and use it in GitHub Desktop.
Function for finding all perfect-k games from Retrosheet data for a particular season
perfect <- function(d, n_innings){
# inputs are Retrosheet play-by-play data frame d and
# number of innings n_innings
# output is a data frame containing all Perfect-k
# games for that season
require(dplyr)
d %>%
mutate(PIT_ID = as.character(PIT_ID),
GAME_ID = as.character(GAME_ID)) -> d
# N_opp <- 2 * length(unique(d$GAME_ID))
# first consider top of inning (BAT_HOME_ID = 0)
d %>%
filter(BAT_HOME_ID == 0,
BAT_EVENT_FL == TRUE) %>%
group_by(GAME_ID, INN_CT) %>%
summarize(N = n(),
S = sum(EVENT_CD %in% c(2, 3)),
Perfect = ifelse(N == S, 1, 0),
PIT_ID_first = first(PIT_ID),
Pit_N = sum(PIT_ID == PIT_ID_first),
.groups = "drop") ->
S_top
# now bottom of inning
d %>%
filter(BAT_HOME_ID == 1,
BAT_EVENT_FL == TRUE) %>%
group_by(GAME_ID, INN_CT) %>%
summarize(N = n(),
S = sum(EVENT_CD %in% c(2, 3)),
Perfect = ifelse(N == S, 1, 0),
PIT_ID_first = first(PIT_ID),
Pit_N = sum(PIT_ID == PIT_ID_first),
.groups = "drop") ->
S_bottom
# find pitchers with n_innings perfect innings
S_top %>%
filter(INN_CT <= n_innings) %>%
group_by(GAME_ID) %>%
summarize(Pitcher_First = first(PIT_ID_first),
Pitcher_Last = last(PIT_ID_first),
N_Perfect = sum(Perfect),
N_Pitcher = sum(Pit_N),
.groups = "drop") %>%
filter(Pitcher_First == Pitcher_Last,
N_Perfect == n_innings,
N_Pitcher == 3 * n_innings) %>%
select(GAME_ID, Pitcher_First) -> S2_top
S_bottom %>%
filter(INN_CT <= n_innings) %>%
group_by(GAME_ID) %>%
summarize(Pitcher_First = first(PIT_ID_first),
Pitcher_Last = last(PIT_ID_first),
N_Perfect = sum(Perfect),
N_Pitcher = sum(Pit_N),
.groups = "drop") %>%
filter(Pitcher_First == Pitcher_Last,
N_Perfect == n_innings,
N_Pitcher == 3 * n_innings) %>%
select(GAME_ID, Pitcher_First) -> S2_bottom
P_out <- rbind(S2_top, S2_bottom)
P_out$N_Innings <- n_innings
P_out$Season <- substr(d$GAME_ID[1], 4, 7)
P_out
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment