Skip to content

Instantly share code, notes, and snippets.

View bayesball's full-sized avatar

Jim Albert bayesball

View GitHub Profile
@bayesball
bayesball / baseball_scores.R
Last active March 26, 2023 14:17
Code to download Retrosheet game log data with the focus on studying game scores.
# Main function to get Retrosheet game scores
get_scores <- function(season){
require(dplyr)
require(readr)
load_gamelog <- function(season) {
glheaders <- read_csv("https://raw.githubusercontent.com/beanumber/baseball_R/master/data/game_log_header.csv")
remote <- paste0("http://www.retrosheet.org/gamelogs/gl",
season, ".zip")
local <- paste0("gl", season, ".zip")
@bayesball
bayesball / baseballr_examples.R
Created March 7, 2023 16:51
Examples of data acquisition functions using the baseballr package
# reference
# https://billpetti.github.io/baseballr/articles/baseballr.html#follow-the-sportsdataverse-on-twitter-and-star-this-repo
library(baseballr)
# -----------------------------------------------------
# Retrosheet data
# -----------------------------------------------------
# acquire retro play-by-play data for seasons 2020:2022
@bayesball
bayesball / two_astros.R
Created February 4, 2023 21:41
R script for comparing two Astros pitchers post
# load in required packages
library(dplyr)
library(ggplot2)
library(CalledStrike)
library(janitor)
library(ShinyBaseball)
library(readr)
# collect mlb ids from two pitchers
@bayesball
bayesball / RunMe.R
Created November 13, 2022 22:58
R code to fit nonnested multilevel model to compare the roles of offense and defense in baseball run scoring
# load in required packages
library(purrr)
library(ggplot2)
library(tidyr)
library(dplyr)
# read in modeling functions
source("fit_model.R")
@bayesball
bayesball / app.R
Last active November 8, 2022 21:08
Shiny app to compare career trajectories of HOF candidates with contemporary players already in the HOF.
library(dplyr)
library(ggplot2)
library(geomtextpath)
library(readr)
# datasets are read from a Github respository
fg_batting <- read_csv("https://raw.githubusercontent.com/bayesball/HomeRuns2021/main/fgbatting_complete.csv")
hof <- read_csv("https://raw.githubusercontent.com/bayesball/HomeRuns2021/main/hofdata.csv")
hof_candidates <- read_csv("https://raw.githubusercontent.com/bayesball/HomeRuns2021/main/hofdata_candidates.csv")
hof_cand_batting <- filter(hof_candidates,
@bayesball
bayesball / estimate_batting.R
Last active November 5, 2022 19:43
R work for multinomial post -- main file is multinomial_setup.R
estimate_batting <- function(retro_final_PA_1990_2020d,
season,
s_woba = 0.5){
require(dplyr)
require(LearnBayes)
retro_final_PA_1990_2020d %>%
filter(YEAR == season) -> retroseason
retroseason %>%
group_by(BAT_ID) %>%
summarize(PA = n(),
@bayesball
bayesball / get_hr_data.R
Last active September 26, 2022 00:43
R work for the Predicting Home Runs using a Multilevel Model post
get_hr_data <- function(pred_season,
retro_data,
n_prev_seasons = 4,
mPA = 1000,
mPA_season = 200){
# n_prev_seasons is number of previous seasons
# mPA is the minimum number of cumulative PA
# retrodata - Retrosheet data for current season
# mPA_season - minimum number of PA in both
@bayesball
bayesball / prediction_work.R
Created September 16, 2022 12:29
R function to compare five methods in predicting future home run rates
prediction_work <- function(seasons,
mPA = 1000,
retrodata,
mPA_season = 200){
# seasons is a vector of previous seasons
# mPA is the minimum number of cumulative PA
# retrodata - Retrosheet data for current season
# mPA_season - minimum number of PA in both
# halves of current season
@bayesball
bayesball / sac_fly_work.R
Created August 19, 2022 20:04
R function to implement computations for sacrifice flies blog post
sac_fly_work <- function(sc, season){
# load required packages
require(dplyr)
require(ggplot2)
require(metR)
require(mgcv)
require(CalledStrike)
# define location, distance, and spray angle vars
@bayesball
bayesball / trout_ofer_work.R
Created June 6, 2022 13:48
R script to compute and graph lengths of ofers for Mike Trout
# load in Retrosheet pbp data frames
# for seasons 2011 through 2021
library(Lahman)
library(dplyr)
library(ggplot2)
# get Mike Trout's retro id from the People
# data frame in Lahman package