Last active
December 29, 2015 17:11
-
-
Save bayesball/d051a9b88faaf4bf2e27 to your computer and use it in GitHub Desktop.
Plots trajectories of strikeout rates, home run rates, and hit-in-play rates for players with similar batting averages
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# requires packages | |
# dplyr, Lahman, ggplot2 | |
# some preliminary work | |
library(dplyr) | |
library(Lahman) | |
get.birthyear <- function(player.id){ | |
birthdata <- select(filter(Master, playerID==player.id), | |
birthMonth, birthYear) | |
with(birthdata, | |
ifelse(birthMonth >= 7, birthYear + 1, birthYear)) | |
} | |
Batting1 <- summarize(group_by(Batting, playerID, yearID), | |
AB=sum(AB), | |
H=sum(H), | |
SO=sum(SO), | |
HR=sum(HR)) | |
Master1 <- mutate(Master, | |
BirthYear = get.birthyear(playerID)) | |
Batting1 <- mutate(inner_join(Batting1, | |
select(Master1, playerID, BirthYear), | |
by="playerID"), Age=yearID - BirthYear) | |
S <- summarize(group_by(Batting1, playerID), | |
MinYear = min(yearID), | |
MaxYear = max(yearID), | |
MidCareer = (MinYear + MaxYear) / 2, | |
CareerAB = sum(AB), | |
CareerH = sum(H), | |
CareerAVG = round(CareerH / CareerAB, 3)) | |
Batting1 <- inner_join(Batting1, | |
select(S, playerID, MidCareer, CareerAB, CareerAVG), | |
by="playerID") | |
# look at players with similar career batting averages with a min | |
# number of career AB in a particular | |
# era -- look at trajectories of SO Rate = SO / AB and | |
# HIP Rate = H / (AB - SO) | |
# idea is that the SO Rates should be more stable over time than | |
# HIP rates | |
# by default find players with at least 3000 AB and career AVG of .300 | |
# target midcareer year that is input | |
# allow errors of 4 for midcareer and .002 for target career AVG | |
compare_rates <- function(target.year, Career_eps = 4, | |
AVG_target = .300, | |
AVG_eps = .002, | |
Career_target = 3000){ | |
require(ggplot2) | |
S_select <- filter(S, | |
abs(MidCareer - target.year) <= Career_eps, | |
abs(CareerAVG - AVG_target) <= AVG_eps, | |
CareerAB >= Career_target) | |
Batting_select <- filter(Batting1, | |
playerID %in% S_select$playerID) | |
Batting_select <- inner_join(Batting_select, | |
select(Master, playerID, nameFirst, nameLast), | |
by="playerID") | |
Batting_select <- mutate(Batting_select, | |
Name=paste(nameFirst, nameLast)) | |
if(dim(Batting_select)[1] == 0) cat("No players matched the criteria.") | |
if(dim(Batting_select)[1] > 0) { | |
plot1 <- ggplot(Batting_select, aes(Age, SO / AB)) + | |
geom_point(color="red") + geom_smooth(se=FALSE) + | |
ggtitle("Strikeout Rates") + | |
facet_wrap(~ Name, ncol=2) | |
plot2 <- ggplot(Batting_select, aes(Age, HR / (AB - SO))) + | |
geom_point(color="red") + geom_smooth(se=FALSE) + | |
ggtitle("Home Run Rates") + | |
facet_wrap(~ Name, ncol=2) | |
plot3 <- ggplot(Batting_select, aes(Age, (H - HR) / (AB - SO - HR))) + | |
geom_point(color="red") + geom_smooth(se=FALSE) + | |
ggtitle("Hit-In-Play Rates") + | |
facet_wrap(~ Name, ncol=2) | |
print(plot1) | |
print(plot2) | |
print(plot3) | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment