Skip to content

Instantly share code, notes, and snippets.

@bayesball
Created March 7, 2023 16:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bayesball/c4ddc6540ecf550087fcc36c20f5d878 to your computer and use it in GitHub Desktop.
Save bayesball/c4ddc6540ecf550087fcc36c20f5d878 to your computer and use it in GitHub Desktop.
Examples of data acquisition functions using the baseballr package
# reference
# https://billpetti.github.io/baseballr/articles/baseballr.html#follow-the-sportsdataverse-on-twitter-and-star-this-repo
library(baseballr)
# -----------------------------------------------------
# Retrosheet data
# -----------------------------------------------------
# acquire retro play-by-play data for seasons 2020:2022
d <- retrosheet_data(years_to_acquire = 2020:2022)
# d is a list of three elements '2020', "2021' "2022'
# requires use of Chadwick CLI tools that are
# preinstalled
# -----------------------------------------------------
# Baseball Reference data
# -----------------------------------------------------
brdata <- bref_daily_batter(t1="2021-05-10", t2="2021-06-20")
brdata2 <- bref_daily_pitcher(t1="2021-05-10", t2="2021-06-20")
# -----------------------------------------------------
# FanGraphs data
# -----------------------------------------------------
# extract batter game logs
fg_data <- fg_batter_game_logs(playerid = 6184, year = 2017)
# say you want Mike Trout in 2022
id <- playerid_lookup("Trout", "Mike")
fg_id <- id$fangraphs_id
fg_data_trout <-
fg_batter_game_logs(playerid = fg_id, year = 2022)
# also pitcher logs, say for Aaron Nola in 2022 season
id <- playerid_lookup("Nola", "Aaron")
fg_id <- id$fangraphs_id
fg_data_nola <-
fg_pitcher_game_logs(playerid = fg_id, year = 2022)
# batter leaderboards
b_leaders <- fg_batter_leaders(2021, 2022)
# -----------------------------------------------------
# MLB data
# -----------------------------------------------------
# pitch by pitch data for Major and Minor League games
d1 <- mlb_pbp(game_pk = 632970)
# does it work for spring training 2023 games?
# found the id for this game by first downloading
# statcast data
d2 <- mlb_pbp(game_pk = 719263)
game_pks <- mlb_game_pks("2019-04-29")
# -----------------------------------------------------
# Statcast data
# -----------------------------------------------------
# all 3846 pitches on 2016-04-06
sc_data <- statcast_search(start_date = "2016-04-06",
end_date = "2016-04-06",
player_type = 'batter')
sc_data <- statcast_search(start_date = "2023-03-05",
end_date = "2023-03-05",
player_type = 'batter')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment