-
-
Save blackerby/e8641cbd7bff5821a1edc252919515ec to your computer and use it in GitHub Desktop.
cfbfastR workflow script for one week, all plays, all teams, for LS 590 Linked Data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## William Blackerby | |
## LS 590 Linked Data | |
## University of Alabama SLIS | |
## experiment in using cfbdfastR to wrangle play-by-play data from | |
## http://collegefootballdata.com API, adapted to wrangle every play | |
## from one week of a season | |
# copied and pasted (with unused packages removed) from | |
# https://cfbfastr.sportsdataverse.org/articles/intro.html | |
if (!requireNamespace('pacman', quietly = TRUE)){ | |
install.packages('pacman') | |
} | |
pacman::p_load(tidyverse, cfbfastR, lubridate) | |
args <- commandArgs(trailingOnly=TRUE) | |
year <- strtoi(args[1]) | |
season_type <- args[2] | |
week <- strtoi(args[3]) | |
# constant: Q codes for play types, gets used for recoding during wrangling phase later | |
replacements <- c( | |
`End Period` = "Q827", | |
`Pass Incompletion` = "Q828", | |
`Pass Completion` = "Q829", | |
`Rush` = "Q830", | |
`Pass Interception` = "Q831", | |
`Sack` = "Q832", | |
`Penalty` = "Q833", | |
`Fumble Recovery (Own)` = "Q834", | |
`Kickoff Return (Offense)` = "Q835", | |
`Kickoff Return (Defense)` = "Q836", | |
`Punt Return` = "Q837", | |
`Two Point Pass` = "Q838", | |
`Two Point Rush` = "Q839", | |
`Blocked Punt` = "Q840", | |
`Blocked Field Goal` = "Q841", | |
`Safety` = "Q842", | |
`Timeout` = "Q843", | |
`Pass Reception` = "Q844", | |
`Pass Interception Return` = "Q845", | |
`Fumble Recovery (Opponent)` = "Q846", | |
`Kickoff Return Touchdown` = "Q847", | |
`Punt Return Touchdown` = "Q848", | |
`Interception Return Touchdown` = "Q849", | |
`Blocked Punt Touchdown` = "Q850", | |
`Blocked Field Goal Touchdown` = "Q851", | |
`Fumble Return Touchdown` = "Q852", | |
`Missed Field Goal Return` = "Q853", | |
`Missed Field Goal Return Touchdown` = "Q854", | |
`Blocked PAT` = "Q855", | |
`Pass` = "Q856", | |
`Punt` = "Q857", | |
`Kickoff` = "Q858", | |
`2pt Conversion` = "Q859", | |
`Defensive 2pt Conversion` = "Q860", | |
`Field Goal Good` = "Q861", | |
`Field Goal Missed` = "Q862", | |
`Extra Point Good` = "Q863", | |
`Extra Point Missed` = "Q864", | |
`Interception` = "Q865", | |
`End of Half` = "Q866", | |
`End of Game` = "Q867", | |
`Passing Touchdown` = "Q868", | |
`Rushing Touchdown` = "Q869", | |
`placeholder` = "Q870", | |
`Offensive 1pt Safety` = "Q871", | |
`Uncategorized` = "Q872" | |
) | |
# offense and defense columns for yard_line wrangling | |
teams <- read_csv("Teams.csv", show_col_types = FALSE) %>% select(school, abbreviation) | |
offense <- teams %>% select(offense_play = school, offense_abbrev = abbreviation) | |
defense <- teams %>% select(defense_play = school, defense_abbrev = abbreviation) | |
week1 <- cfbfastR::cfbd_pbp_data(year = year, season_type = season_type, week = week) %>% | |
# get only the columns we want | |
select(id_play:wallclock) %>% | |
# join helper columns for yard_line wrangling | |
inner_join(offense, by = "offense_play") %>% | |
inner_join(defense, by = "defense_play") %>% | |
# wrangle the columns | |
mutate( | |
# concat clock.minutes and padded seconds into temporary column called clock | |
clock = str_c(clock.minutes, str_pad(clock.seconds, 2, "left", "0"), sep = ":"), | |
# yard_line wrangling | |
yard_line = ifelse(yard_line > 50, 100 - yard_line, yard_line), | |
yard_line = case_when(yards_to_goal > 50 ~ str_c(offense_abbrev, yard_line, sep = " "), | |
yards_to_goal < 50 ~ str_c(defense_abbrev, yard_line, sep = " "), | |
TRUE ~ as.character(yard_line)), | |
# down and distance | |
# down from cardinal to ordinal | |
down = case_when(down == 1 ~ "1st", | |
down == 2 ~ "2nd", | |
down == 3 ~ "3rd", | |
down == 4 ~ "4th", | |
TRUE ~ as.character(down)), | |
down_and_distance = str_c(down, distance, sep = " and "), | |
# play_type to q value | |
play_type = recode_factor(play_type, !!!replacements), | |
# downs | |
down = case_when(down == "1st" ~ "Q303", | |
down == "2nd" ~ "Q304", | |
down == "3rd" ~ "Q305", | |
down == "4th" ~ "Q306", | |
TRUE ~ as.character(down)), | |
# new columns | |
qid = "", | |
Len = str_c(play_text, str_c(c("("), id_play, c(")")), sep = " "), | |
Den = str_glue("Game action from the {year(wallclock)} {home} versus {away} Football Game"), | |
P1 = "Q1585" | |
) %>% | |
# format strings for quick statements | |
mutate( | |
id_play = str_glue('""""{id_play}"'), | |
yard_line = str_glue('""""{yard_line}"'), | |
clock = str_glue('""""{clock}"'), | |
play_text = str_glue('""""{play_text}"'), | |
down_and_distance = str_glue('""""{down_and_distance}"'), | |
wallclock = str_glue('""""{wallclock}"') | |
) %>% | |
# select columns we want and rename to corresponding P codes | |
select( | |
qid, | |
Len, | |
Den, | |
P1, # "Q1585" | |
P54 = id_play, | |
P16 = offense_play, | |
P17 = defense_play, | |
P18 = game_id, | |
P19 = drive_id, | |
P21 = period, | |
P23 = yard_line, | |
P22 = clock, | |
P24 = down, | |
P25 = distance, | |
P55 = scoring, | |
P56 = yards_gained, | |
P26 = play_type, | |
P57 = play_text, | |
P31 = down_and_distance, | |
P78 = wallclock | |
) %>% | |
write_csv("wrangled_week.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Revision 2 incorporates simple command line functionality. Needs to be built out more.