Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Exploring values of home runs
# load in two packages
library(tidyverse)
library(WinProbability)
# assume dataset all2018.csv is in current working
# directory -- these functions compute the runs
# expectancies and WPA values
d2018 <- compute.runs.expectancy(2018)
d2018 <- compute.win.probs(d2018)
# focus on home runs
hr <- filter(d2018, EVENT_CD == 23)
# graph of runs values of home runs
ggplot(hr, aes(RUNS.VALUE)) +
geom_bar(width = 0.05)
# for all players
# find hr count and mean runs values
# graph runs values against HR
hr %>%
group_by(BAT_ID) %>%
summarize(N = n(), R = sum(RUNS.VALUE)) %>%
ggplot(aes(N, R / N)) +
geom_point()
# for all players
# find hr count and mean WPA value
# graph runs values against HR
hr %>%
group_by(BAT_ID) %>%
summarize(N = n(), WPA = sum(abs(WPA))) %>%
ggplot(aes(N, WPA / N)) +
geom_point()
# store summaries in a data frame S
hr %>%
group_by(BAT_ID) %>%
summarize(N = n(), WPA = sum(abs(WPA))) -> S
# add first and last names from Lahman Master file
# to S, storing in data frame S2
library(Lahman)
library(ggrepel)
inner_join(S, select(Master, retroID, nameFirst, nameLast),
by = c("BAT_ID" = "retroID")) %>%
mutate(Name = paste(nameFirst, nameLast)) %>%
select(Name, N, WPA) -> S2
# here is a theme for changing attributes of plot title
TH <- theme(
plot.title = element_text(
colour = "blue",
size = 18,
hjust = 0.5,
vjust = 0.8,
angle = 0
)
)
# graph of HR against mean WPA with interesting
# points labeled
ggplot(S2, aes(N, WPA / N, label = Name)) +
geom_point() +
geom_label_repel(data = filter(S2, WPA / N > 0.4 |
N > 40 |
N > 14.5 & WPA /N > .2)) +
xlab("HR") + ylab("AVERAGE WPA") + TH +
ggtitle("Scatterplot of HR and Average WPA for 2008 Players")
# focus on comparing two players
# first get Retro ids for both players
# create new data frame hr2 and define labels for
# new factor variable
jd <- "martj006"
kd <- "davik003"
hr2 <- filter(hr, BAT_ID == jd | BAT_ID == kd)
hr2$BAT_ID <- factor(hr2$BAT_ID,
labels = c("Khris Davis",
"J.D. Martinez"))
# parallel dotplot of abs(WPA) for two players
hr2 %>%
ggplot(aes(abs(WPA))) +
geom_dotplot() +
facet_wrap(~ BAT_ID, ncol = 1) +
theme(text = element_text(size = 16)) +
ggtitle("Values of Home Runs for Two Sluggers") +
TH
# parallel dotplot of Runs Value for two players
filter(hr, BAT_ID == jd | BAT_ID == kd) %>%
ggplot(aes(RUNS.VALUE)) +
geom_dotplot(dotsize = 0.6) +
facet_wrap(~ BAT_ID, ncol = 1)
# display average Runs Value and average abs(WPA) for
# two players
hr2 %>%
group_by(BAT_ID) %>%
summarize(N = n(),
Mean_Runs = mean(RUNS.VALUE),
Mean_WPA = mean(abs(WPA)))
# out of curiousity do a similar thing for 2017
# season (need to have the file all2017.csv in current working directory)
d2017 <- compute.runs.expectancy(2017)
d2017 <- compute.win.probs(d2017)
hr2 <- filter(d2017, EVENT_CD == 23,
BAT_ID == jd | BAT_ID == kd)
hr2$BAT_ID <- factor(hr2$BAT_ID,
labels = c("Khris Davis",
"J.D. Martinez"))
hr2 %>%
group_by(BAT_ID) %>%
summarize(N = n(),
Mean_Runs = mean(RUNS.VALUE),
Mean_WPA = mean(abs(WPA)))
filter(d2017, EVENT_CD == 23) %>%
summarize(N = n(),
Mean_Runs = mean(RUNS.VALUE),
Mean_WPA = mean(abs(WPA)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.