Created
November 13, 2022 22:58
-
-
Save bayesball/3595f541b57eed6759326d0aa27ce8b0 to your computer and use it in GitHub Desktop.
R code to fit nonnested multilevel model to compare the roles of offense and defense in baseball run scoring
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fit_model <- function(season){ | |
# required packages | |
require(dplyr) | |
require(readr) | |
require(lme4) | |
# read in retrosheet game logs for that season | |
# from my hard drive | |
# you can obtain these files from retrosheet.org | |
name1 <- "~/Dropbox/Google Drive/gamelogs/gamelogs/gl" | |
file_name <- paste(name1, season, ".txt", sep = "") | |
gl_season <- read_csv(file_name) | |
# create response and off, def team variables | |
gl_season %>% | |
mutate(RootRuns = sqrt(HomeRunsScore), | |
OffensiveTeam = HomeTeam, | |
DefensiveTeam = VisitingTeam) %>% | |
select(RootRuns, OffensiveTeam, | |
DefensiveTeam) -> d1 | |
# again for visiting runs scored | |
gl_season %>% | |
mutate(RootRuns = sqrt(VisitorRunsScored), | |
OffensiveTeam = VisitingTeam, | |
DefensiveTeam = HomeTeam) %>% | |
select(RootRuns, OffensiveTeam, | |
DefensiveTeam) -> d2 | |
# row merge two datasets | |
d12 <- rbind(d1, d2) | |
# random effects model fit | |
fit <- lmer(RootRuns ~ (1 | OffensiveTeam) + | |
(1 | DefensiveTeam), | |
data = d12) | |
# collect sd estimates | |
vcomp <- VarCorr(fit) | |
sd_off <- sqrt(vcomp$OffensiveTeam[1, 1]) | |
sd_def <- sqrt(vcomp$DefensiveTeam[1, 1]) | |
# return data frame | |
data.frame(Season = season, | |
Offensive = sd_off, | |
Defense = sd_def) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fit_model_home <- function(season){ | |
# required packages | |
require(dplyr) | |
require(readr) | |
require(lme4) | |
# read in retrosheet game logs for that season | |
# from my hard drive | |
# you can obtain these files from retrosheet.org | |
name1 <- "~/Dropbox/Google Drive/gamelogs/gamelogs/gl" | |
file_name <- paste(name1, season, ".txt", sep = "") | |
gl_season <- read_csv(file_name) | |
# create response and off, def team variables | |
gl_season %>% | |
mutate(RootRuns = sqrt(HomeRunsScore), | |
OffensiveTeam = HomeTeam, | |
DefensiveTeam = VisitingTeam, | |
Home = "yes") %>% | |
select(RootRuns, OffensiveTeam, | |
DefensiveTeam, Home) -> d1 | |
# again for visiting runs scored | |
gl_season %>% | |
mutate(RootRuns = sqrt(VisitorRunsScored), | |
OffensiveTeam = VisitingTeam, | |
DefensiveTeam = HomeTeam, | |
Home = "no") %>% | |
select(RootRuns, OffensiveTeam, | |
DefensiveTeam, Home) -> d2 | |
# row merge two datasets | |
d12 <- rbind(d1, d2) | |
# random effects model fit | |
fit <- lmer(RootRuns ~ (1 | OffensiveTeam) + | |
(1 | DefensiveTeam) + | |
Home, | |
data = d12) | |
# collect sd estimates | |
vcomp <- VarCorr(fit) | |
sd_off <- sqrt(vcomp$OffensiveTeam[1, 1]) | |
sd_def <- sqrt(vcomp$DefensiveTeam[1, 1]) | |
beta <- coef(fit) | |
# return data frame | |
data.frame(Season = season, | |
Offensive = sd_off, | |
Defense = sd_def, | |
Home = beta$OffensiveTeam[1, 2]) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load in required packages | |
library(purrr) | |
library(ggplot2) | |
library(tidyr) | |
library(dplyr) | |
# read in modeling functions | |
source("fit_model.R") | |
source("fit_model_home.R") | |
# run this function for seasons 1970-2021 (omitting 2020) | |
# collecting sd estimates | |
out <- map_df(c(1970:2019, 2021), fit_model) | |
# one plot -- plot of offensive and defensive estimates | |
# first pivot longer the output data frame | |
out %>% | |
pivot_longer( | |
cols = Offensive:Defense, | |
names_to = "Type", | |
values_to = "SD" | |
) -> out1 | |
# here is the plot | |
ggplot(out1, aes(Season, SD, | |
color = Type)) + | |
geom_point() + | |
geom_smooth(se = FALSE, | |
method = "loess", | |
formula = "y ~ x") + | |
theme(text=element_text(size=18)) + | |
theme(plot.title = element_text(colour = "blue", | |
size = 18, | |
hjust = 0.5, | |
vjust = 0.8, angle = 0)) + | |
ggtitle("Random Effect SD Estimates: 1970-2019, 2021") | |
# another plot - plot of ratio | |
# of sd estimates | |
ggplot(out, aes(Season, | |
Defense / Offensive)) + | |
geom_point() + | |
geom_smooth(se = FALSE, | |
method = "loess", | |
formula = "y ~ x") + | |
geom_hline(yintercept = 1, color = "red") + | |
theme(text=element_text(size=18)) + | |
theme(plot.title = element_text(colour = "blue", | |
size = 18, | |
hjust = 0.5, | |
vjust = 0.8, angle = 0)) + | |
ggtitle("Ratio of Defensive to Offensive SD Estimates: 1970-2019, 2021") | |
###### look at home effects | |
out_home <- map_df(c(1970:2019, 2021), fit_model_home) | |
ggplot(out_home, aes(Season, Home)) + | |
geom_point() + | |
geom_smooth(se = FALSE, | |
method = "loess", | |
formula = "y ~ x") + | |
theme(text=element_text(size=18)) + | |
ylab("Home Coefficient") + | |
theme(plot.title = element_text(colour = "blue", | |
size = 18, | |
hjust = 0.5, | |
vjust = 0.8, angle = 0)) + | |
ggtitle("Home Park Effects: 1970-2019, 2021") | |
Author
bayesball
commented
Nov 17, 2022
via email
George:
That particular header file for the gl files can be found here:
https://github.com/beanumber/baseball_R/blob/master/data/game_log_header.csv
Jim
[image: Bowling Green State University] Jim Albert
Emeritus Professor,
Mathematics and Statistics
Bowling Green State University
***@***.***
…On Thu, Nov 17, 2022 at 10:20 AM George McGinn ***@***.***> wrote:
***@***.**** commented on this gist.
------------------------------
I ran your code and it fails with ! object 'HomeRunsScore' not found. I
am assuming that in your datasets in ~/Dropbox/Google
Drive/gamelogs/gamelogs/gl you've added the headers for your .csv files.
I am wondering if you can post that here as a file (I plan to add a
col_names = pull( ...` to my code to add the header names so the objects in
the source can be found. Thanks.
—
Reply to this email directly, view it on GitHub
<https://gist.github.com/3595f541b57eed6759326d0aa27ce8b0#gistcomment-4372756>
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AAYE55DHY4VFIHN4OVJIHJTWIZENLBFKMF2HI4TJMJ2XIZLTSKBKK5TBNR2WLJDHNFZXJJDOMFWWLK3UNBZGKYLEL52HS4DFQKSXMYLMOVS2I5DSOVS2I3TBNVS3W5DIOJSWCZC7OBQXE5DJMNUXAYLOORPWCY3UNF3GS5DZVRZXKYTKMVRXIX3UPFYGLK2HNFZXIQ3PNVWWK3TUUZ2G64DJMNZZDAVEOR4XAZNEM5UXG5FFOZQWY5LFVEYTCOJTGMZDCMJYU52HE2LHM5SXFJTDOJSWC5DF>
.
You are receiving this email because you authored a thread.
Triage notifications on the go with GitHub Mobile for iOS
<https://apps.apple.com/app/apple-store/id1477376905?ct=notification-email&mt=8&pt=524675>
or Android
<https://play.google.com/store/apps/details?id=com.github.android&referrer=utm_campaign%3Dnotification-email%26utm_medium%3Demail%26utm_source%3Dgithub>
.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment