Skip to content

Instantly share code, notes, and snippets.

@bayesball
Created March 20, 2018 11:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bayesball/0d1cd460a0902ecf3d3802328d91dd5d to your computer and use it in GitHub Desktop.
Save bayesball/0d1cd460a0902ecf3d3802328d91dd5d to your computer and use it in GitHub Desktop.
Exploring three seasons of Jake Arrieta's pitches
# Read in data and a ggplot2 theme for the title
library(tidyverse)
sc15 <- read_csv("statcast2015.csv")
sc16 <- read_csv("statcast2016.csv")
sc17 <- read_csv("statcast2017.csv")
TH <- theme(
plot.title = element_text(
colour = "red",
size = 16,
hjust = 0.5,
vjust = 0.8,
angle = 0
)
)
# obtain data for Arrieta
# library(baseballr)
# playerid_lookup(last_name = "Arrieta")
# id is 453562
j15 <- filter(sc15, pitcher == "453562")
j16 <- filter(sc16, pitcher == "453562")
j17 <- filter(sc17, pitcher == "453562")
jake <- rbind(data.frame(j15, Season = 2015),
data.frame(j16, Season = 2016),
data.frame(j17, Season = 2017))
# what pitches did Jake throw each season?
library(cowplot)
ggplot(filter(jake, pitch_type %in%
c("CH", "CU", "FF", "SI", "SL")),
aes(pitch_type,
y = (..count..) / sum(..count..))) +
geom_bar() +
facet_wrap(~ Season, ncol = 1) +
ggtitle("Frequencies of Pitch Types") + TH
# boxplots of release speeds
filter(jake, pitch_type %in%
c("CH", "CU", "FF", "SI", "SL")) %>%
ggplot(aes(pitch_type, release_speed,
color=factor(Season))) +
geom_boxplot() + coord_flip() +
ylim(70, 100) + TH +
ggtitle("Release S
peeds for Each Pitch Type") +
draw_image("jake.jpg", x = 3, y = 75,
scale=15)
# horizontal breaks
p1 <- filter(jake, pitch_type %in%
c("CH", "CU", "FF", "SI", "SL")) %>%
ggplot(aes(pitch_type, pfx_x,
color=factor(Season))) +
geom_boxplot() + coord_flip() + TH +
ggtitle("Horizontal Break for Each Pitch Type")
# vertical breaks
p2 <- filter(jake, pitch_type %in%
c("CH", "CU", "FF", "SI", "SL")) %>%
ggplot(aes(pitch_type, pfx_z,
color=factor(Season))) +
geom_boxplot() + coord_flip() + TH +
ggtitle("Vertical Break for Each Pitch Type")
plot_grid(p1, p2, ncol=1)
# spin rates
filter(jake, pitch_type %in%
c("CH", "CU", "FF", "SI", "SL")) %>%
ggplot(aes(pitch_type, release_spin_rate,
color=factor(Season))) +
geom_boxplot() + coord_flip()
####### swing and miss rates
jake %>%
mutate(swing = ifelse(description %in%
c("foul", "foul_bunt",
"foul_tip", "hit_into_play",
"hit_into_play_no_out",
"hit_into_play_score",
"missed_bunt", "swinging_strike",
"swinging_strike_blocked"),
1, 0),
miss = ifelse(description %in%
c("missed_bunt", "swinging_strike",
"swinging_strike_blocked"),
1, 0)) -> jake
# find number of swings and misses for each season
jake %>% group_by(pitch_type) %>%
summarize(S = sum(swing),
M = sum(miss))
# plot the miss rates
filter(jake, swing == 1,
pitch_type %in%
c("CH", "CU", "FF", "SI", "SL")) %>%
group_by(Season, pitch_type) %>%
summarize(Miss_Rate =
mean(miss, na.rm = TRUE)) %>%
ggplot(aes(pitch_type, Miss_Rate,
color=factor(Season))) +
geom_point(size=3) + coord_flip() + TH +
ggtitle("Miss Rates for Each Pitch Type")
# exit velocities
filter(jake, swing == 1, miss == 0,
pitch_type %in%
c("CH", "CU", "FF", "SI", "SL")) %>%
group_by(Season, pitch_type) %>%
ggplot(aes(pitch_type, launch_speed,
color=factor(Season))) +
geom_boxplot() + coord_flip() + TH +
ggtitle("Exit Velocities for Each Pitch Type")
# launch angles
filter(jake, swing == 1, miss == 0,
pitch_type %in%
c("CH", "CU", "FF", "SI", "SL")) %>%
group_by(Season, pitch_type) %>%
ggplot(aes(pitch_type, launch_angle,
color=factor(Season))) +
geom_boxplot() + coord_flip() + TH +
ggtitle("Launch Angles for Each Pitch Type")
# hr rates
jake %>%
mutate(HR = ifelse(events == "home_run",
1, 0)) -> jake
filter(jake, type == "X",
pitch_type %in%
c("CH", "CU", "FF", "SI", "SL")) %>%
group_by(Season, pitch_type) %>%
summarize(N = n(),
HR_count = sum(HR),
HR_Rate = mean(HR)) %>%
ggplot(aes(pitch_type, HR_Rate,
color=factor(Season),
label = HR_count)) +
ggtitle("HR Rates by Pitch Type and Season") +
geom_point(size=3) + coord_flip() + TH +
geom_label()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment