Skip to content

Instantly share code, notes, and snippets.

@blackerby
Last active April 9, 2022 19:52
Show Gist options
  • Save blackerby/e8641cbd7bff5821a1edc252919515ec to your computer and use it in GitHub Desktop.
Save blackerby/e8641cbd7bff5821a1edc252919515ec to your computer and use it in GitHub Desktop.
cfbfastR workflow script for one week, all plays, all teams, for LS 590 Linked Data
## William Blackerby
## LS 590 Linked Data
## University of Alabama SLIS
## experiment in using cfbdfastR to wrangle play-by-play data from
## http://collegefootballdata.com API, adapted to wrangle every play
## from one week of a season
# copied and pasted (with unused packages removed) from
# https://cfbfastr.sportsdataverse.org/articles/intro.html
if (!requireNamespace('pacman', quietly = TRUE)){
install.packages('pacman')
}
pacman::p_load(tidyverse, cfbfastR, lubridate)
args <- commandArgs(trailingOnly=TRUE)
year <- strtoi(args[1])
season_type <- args[2]
week <- strtoi(args[3])
# constant: Q codes for play types, gets used for recoding during wrangling phase later
replacements <- c(
`End Period` = "Q827",
`Pass Incompletion` = "Q828",
`Pass Completion` = "Q829",
`Rush` = "Q830",
`Pass Interception` = "Q831",
`Sack` = "Q832",
`Penalty` = "Q833",
`Fumble Recovery (Own)` = "Q834",
`Kickoff Return (Offense)` = "Q835",
`Kickoff Return (Defense)` = "Q836",
`Punt Return` = "Q837",
`Two Point Pass` = "Q838",
`Two Point Rush` = "Q839",
`Blocked Punt` = "Q840",
`Blocked Field Goal` = "Q841",
`Safety` = "Q842",
`Timeout` = "Q843",
`Pass Reception` = "Q844",
`Pass Interception Return` = "Q845",
`Fumble Recovery (Opponent)` = "Q846",
`Kickoff Return Touchdown` = "Q847",
`Punt Return Touchdown` = "Q848",
`Interception Return Touchdown` = "Q849",
`Blocked Punt Touchdown` = "Q850",
`Blocked Field Goal Touchdown` = "Q851",
`Fumble Return Touchdown` = "Q852",
`Missed Field Goal Return` = "Q853",
`Missed Field Goal Return Touchdown` = "Q854",
`Blocked PAT` = "Q855",
`Pass` = "Q856",
`Punt` = "Q857",
`Kickoff` = "Q858",
`2pt Conversion` = "Q859",
`Defensive 2pt Conversion` = "Q860",
`Field Goal Good` = "Q861",
`Field Goal Missed` = "Q862",
`Extra Point Good` = "Q863",
`Extra Point Missed` = "Q864",
`Interception` = "Q865",
`End of Half` = "Q866",
`End of Game` = "Q867",
`Passing Touchdown` = "Q868",
`Rushing Touchdown` = "Q869",
`placeholder` = "Q870",
`Offensive 1pt Safety` = "Q871",
`Uncategorized` = "Q872"
)
# offense and defense columns for yard_line wrangling
teams <- read_csv("Teams.csv", show_col_types = FALSE) %>% select(school, abbreviation)
offense <- teams %>% select(offense_play = school, offense_abbrev = abbreviation)
defense <- teams %>% select(defense_play = school, defense_abbrev = abbreviation)
week1 <- cfbfastR::cfbd_pbp_data(year = year, season_type = season_type, week = week) %>%
# get only the columns we want
select(id_play:wallclock) %>%
# join helper columns for yard_line wrangling
inner_join(offense, by = "offense_play") %>%
inner_join(defense, by = "defense_play") %>%
# wrangle the columns
mutate(
# concat clock.minutes and padded seconds into temporary column called clock
clock = str_c(clock.minutes, str_pad(clock.seconds, 2, "left", "0"), sep = ":"),
# yard_line wrangling
yard_line = ifelse(yard_line > 50, 100 - yard_line, yard_line),
yard_line = case_when(yards_to_goal > 50 ~ str_c(offense_abbrev, yard_line, sep = " "),
yards_to_goal < 50 ~ str_c(defense_abbrev, yard_line, sep = " "),
TRUE ~ as.character(yard_line)),
# down and distance
# down from cardinal to ordinal
down = case_when(down == 1 ~ "1st",
down == 2 ~ "2nd",
down == 3 ~ "3rd",
down == 4 ~ "4th",
TRUE ~ as.character(down)),
down_and_distance = str_c(down, distance, sep = " and "),
# play_type to q value
play_type = recode_factor(play_type, !!!replacements),
# downs
down = case_when(down == "1st" ~ "Q303",
down == "2nd" ~ "Q304",
down == "3rd" ~ "Q305",
down == "4th" ~ "Q306",
TRUE ~ as.character(down)),
# new columns
qid = "",
Len = str_c(play_text, str_c(c("("), id_play, c(")")), sep = " "),
Den = str_glue("Game action from the {year(wallclock)} {home} versus {away} Football Game"),
P1 = "Q1585"
) %>%
# format strings for quick statements
mutate(
id_play = str_glue('""""{id_play}"'),
yard_line = str_glue('""""{yard_line}"'),
clock = str_glue('""""{clock}"'),
play_text = str_glue('""""{play_text}"'),
down_and_distance = str_glue('""""{down_and_distance}"'),
wallclock = str_glue('""""{wallclock}"')
) %>%
# select columns we want and rename to corresponding P codes
select(
qid,
Len,
Den,
P1, # "Q1585"
P54 = id_play,
P16 = offense_play,
P17 = defense_play,
P18 = game_id,
P19 = drive_id,
P21 = period,
P23 = yard_line,
P22 = clock,
P24 = down,
P25 = distance,
P55 = scoring,
P56 = yards_gained,
P26 = play_type,
P57 = play_text,
P31 = down_and_distance,
P78 = wallclock
) %>%
write_csv("wrangled_week.csv")
@blackerby
Copy link
Author

Revision 2 incorporates simple command line functionality. Needs to be built out more.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment