Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
library(baseballr)
library(dplyr)
obtain_data = function(type="postgres",
start_date="2017-03-29",
end_date="2017-10-03", infile=NULL) {
if (type=="postgres") {
# postgres db connection here
# e.g.
# library(RPostgres)
# library(DBI)
# conn <- dbConnect(RPostgres::Postgres(),
# password=SOMEPASS, user=SOME_USER, port=SOME_PORT_PROBABLY_5432, dbname=SOME_NAME)
# df1 = dbGetQuery(conn, "select * from SOME_TABLE_NAME where SOME_SELECTION_CRITERIA")
} else if (type=="rds") {
readRDS(infile)
} else if (type=="csv") {
read.csv(infile, stringsAsFactors = FALSE)
} else if (type=="scrape") {
date_seq = seq(as.Date(start_date), as.Date(end_date), by=1)
dplyr::bind_rows(lapply(date_seq), function(d) {
baseballr::scrape_statcast_savant(as.character(d), as.character(d))
})
}
}
statcast_impute_derive = function(statcast_df, inverse_precision=10000) {
# statcast_df must have columns launch_angle, launch_speed, bb_type, events
aa = statcast_df %>%
filter(!is.na(launch_speed)) %>%
mutate(ila=round(launch_angle*inverse_precision),
ils=round(launch_speed*inverse_precision))
la_ls_count = aa %>% group_by(ila, ils, bb_type, events) %>% summarise(n=n())
# use n >= 5 here? some other number? 99.X percentile? this is why it's a heuristic
la_ls_filtered = la_ls_count %>% select(ila, ils, bb_type, events) %>% filter(n>=5)
la_ls_filtered %>% write.csv("CSV_FILE_TO_LOAD_LATER.csv", row.names = FALSE)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment