Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Plotting distributions of batting averages
# load a few packages
# need to install the BApredict package
# devtools::install_github("bayesball/BApredict")
# the Lahman data frame Batting has most of the batting data
library(tidyverse)
library(Lahman)
library(ggridges)
library(BApredict)
# scrape data from the SI site for the 2018 season
Batting18 <- collect_hitting_data()
# some data manipulation to get the data frame to plot
Batting %>% filter(yearID %in%
seq(1995, 2016, by = 3)) %>%
group_by(yearID, playerID) %>%
summarize(H = sum(H), AB = sum(AB)) %>%
as.tibble() -> S
S %>% filter(AB >= 428) %>%
mutate(AVG = H / AB) %>%
select(yearID, H, AB, AVG) -> S
S18 <- mutate(Batting18, AVG = H / AB, yearID = 2018) %>%
select(yearID, H, AB, AVG)
Sall <- bind_rows(S, S18)
############## Graph 1: scatterplots
TH <- theme(
plot.title = element_text(
colour = "blue",
size = 18,
hjust = 0.5
)
)
ggplot(Sall, aes(yearID, AVG)) +
geom_point() +
coord_flip() +
xlab("Season") +
ylab("Batting Average") +
ggtitle("Scatterplots of AVGs for Nine Seasons") +
TH
############# Graph 2: jittered scatterplots
ggplot(Sall, aes(yearID, AVG)) +
geom_jitter(width = 0.15) +
coord_flip() +
xlab("Season") +
ylab("Batting Average") +
ggtitle("Jittered Scatterplots of AVGs for Nine Seasons") +
TH
############# Graph 3: error bar plots
MS <- Sall %>% group_by(yearID) %>%
summarize(M = mean(AVG), S = sd(AVG), N = n())
ggplot(MS, aes(yearID, M)) +
geom_point() +
geom_errorbar(aes(ymin = M - S, ymax = M + S)) +
ylim(.16, .38) +
coord_flip() +
xlab("Season") +
ylab("Batting Average") +
ggtitle("Error Bar Plots of AVGs for Nine Seasons") +
TH
############ Graph 4: boxplots
ggplot(Sall, aes(as.factor(yearID), AVG)) +
geom_boxplot() +
coord_flip() +
xlab("Season") +
ylab("Batting Average") +
ggtitle("Boxplots of AVGs for Nine Seasons") +
TH
########## Graph 5: violin plots
ggplot(Sall, aes(as.factor(yearID), AVG)) +
geom_violin() +
coord_flip() +
xlab("Season") +
ylab("Batting Average") +
ggtitle("Violin Plots of AVGs for Nine Seasons") +
TH
########## Graph 6: violin plots plus
ggplot(Sall, aes(as.factor(yearID), AVG)) +
geom_violin() +
geom_point(data = MS,
aes(as.factor(yearID), M),
color = "red",
size = 3) +
coord_flip() +
xlab("Season") +
ylab("Batting Average") +
ggtitle("Violin Plots Plus of AVGs for Nine Seasons") +
TH
######### Graph 7: ridgeline plots
ggplot(Sall, aes(AVG, as.factor(yearID))) +
geom_density_ridges(fill = "orange",
color = "white") +
ylab("Season") +
xlab("Batting Average") +
ggtitle("Ridgeline Plots of AVGs for Nine Seasons") +
TH
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.