bayesball/component_average.R

## component_average.R
#######################################################################################
# R functions for Paper "Improved Component Predictions of Batting and Pitching Measures"
# Journal of Quantitative Analysis of Sports (2016)
# Jim Albert, albert@bgsu.edu
# functions fit_component_average, plot_avg_results
#           fit_component_obp, plot_obp_results
#           fit_component_fip, plot_fip_results
# require installation of packages Lahman, dplyr, ggplot2, and LearnBayes
########################################################################################

fit_component_average <- function(season){
  # implements component method for estimating group of batting averages for nonpitchers
  # for a particular baseball season
  #
  # input:  season
  # output:  a list with components
  #   component -- gives values of K and eta for all component fits
  #   shrinkage -- gives value of K and eta for shrinkage fit
  #   S -- gives raw stats and component and shrinkage estimates for all players
  require(Lahman)
  require(dplyr)
  require(LearnBayes)
  ##---------------------------------------------------------------------
  get.data <- function(season){
    B <- summarize(group_by(filter(Batting, yearID==season), playerID),
                   AB=sum(AB), SO=sum(SO), H=sum(H), HR=sum(HR))
    B <- filter(B, AB > 0)
    NonPitchers <- unique(select(filter(Fielding,
                                        yearID==season, POS!="P"), playerID))
    merge(B, NonPitchers, by="playerID")
  }

  ## ------------------------------------------------------------------------
  fit.model <- function(data){
    mode <- laplace(betabinexch, c(1, 1),
                    cbind(data$y, data$n))$mode
    eta <- exp(mode[1]) / (1 + exp(mode[1]))
    K <- exp(mode[2])
    list(eta=eta, K=K,
         d=data.frame(data, est=(data$y + K * eta) / (data$n + K)))
  }
  ## ------------------------------------------------------------------------
  d <- get.data(season)

  ## ------------------------------------------------------------------------
  S.SO <- fit.model(data.frame(playerID=d$playerID,
                               y=d$SO, n=d$AB))
  S.HR <- fit.model(data.frame(playerID=d$playerID,
                               y=d$HR, n=d$AB - d$SO))
  S.H <- fit.model(data.frame(playerID=d$playerID,
                              y=d$H - d$HR,
                              n=d$AB - d$HR - d$SO))
  S <- merge(S.SO$d, S.HR$d, by="playerID")
  S <- merge(S, S.H$d, by="playerID")
  names(S) <- c("playerID", "SO", "AB", "SO.Rate",
                "HR", "AB.SO", "HR.Rate",
                "H.HR", "AB.SO.HR", "H.Rate")

  component.fit <- data.frame(eta=c(S.SO$eta, S.HR$eta, S.H$eta),
                              K=c(S.SO$K, S.HR$K, S.H$K))
  row.names(component.fit) <- c("SO", "HR", "H")

  ## ------------------------------------------------------------------------
  S$Est <- with(S,
                (1 - SO.Rate) * (HR.Rate + (1 - HR.Rate) * H.Rate))

  ## ------------------------------------------------------------------------
  S2 <- fit.model(data.frame(playerID=d$playerID,
                             y=d$H, n=d$AB))
  shrinkage.fit <- c(eta=S2$eta, K=S2$K)

  S <- merge(S, S2$d, by="playerID")
  names(S)[c(11:14)] <- c("Comp.Est", "H", "AB1", "Shrinkage.Est")
  S$Season <- season
  list(S=S, component=component.fit, shrinkage=shrinkage.fit)
}

plot_avg_results <- function(fitwork){
  # function constructs plot of observed and predicted one.minus.strikeout.rates and his.in.non.so.ab rates
  # from output of fit_component_average function
  require(ggplot2)
  require(dplyr)
  S <- fitwork$S
  myf <- function(x, p) p / x
  AVG1 <- data.frame(P=.25, x=seq(.6, 1, .01), y=myf(seq(.6, 1, .01), .25))
  AVG2 <- data.frame(P=.3, x=seq(.6, 1, .01), y=myf(seq(.6, 1, .01), .3))
  AVG3 <- data.frame(P=.2, x=seq(.6, 1, .01), y=myf(seq(.6, 1, .01), .2))
  AVG4 <- data.frame(P=.35, x=seq(.6, 1, .01), y=myf(seq(.6, 1, .01), .35))
  AVG5 <- data.frame(P=.4, x=seq(.6, 1, .01), y=myf(seq(.6, 1, .01), .4))
  AVG <- rbind(AVG1, AVG2, AVG3, AVG4, AVG5)
  AVG <- mutate(AVG, BA=factor(P))
  d1 <- data.frame(Type="Observed BA",
                   One.Minus.SO.Rate=1 - S$SO / S$AB,
                   H.Rate=S$H / S$AB.SO,
                   AB=S$AB)
  d2 <- data.frame(Type="Predicted BA",
                   One.Minus.SO.Rate=1 - S$SO.Rate,
                   H.Rate=S$HR.Rate + (1 - S$HR.Rate) * S$H.Rate,
                   AB=S$AB)
  dd <- rbind(d1, d2)
  dd1 <- filter(dd, AB >= 300)
  Xlow <- range(dd1$One.Minus.SO.Rate)
  Ylow <- range(dd1$H.Rate)
  p <- ggplot(dd1, aes(One.Minus.SO.Rate, H.Rate)) +
    geom_point() +
    facet_wrap(~ Type, ncol=1) +
    geom_line(data=AVG, aes(x, y, color=BA)) +
    ylim(Ylow[1], Ylow[2]) + xlim(Xlow[1], Xlow[2]) +
    ggtitle(paste("Hitters with at least 300 AB in",S$Season,"Season" )) +
    labs(y = "Hits in Non-SO-AB Rate")
  print(p)
}

fit_component_obp <- function(season){
  # implement component method for estimating group of on-base percentages for nonpitchers
  # for a particular baseball season
  #
  # input:  season
  # output:  a list with components
  #   component -- gives values of K and eta for all component fits
  #   shrinkage -- gives value of K and eta for shrinkage fit
  #   ST -- gives raw stats and component and shrinkage estimates for all players
  require(Lahman)
  require(dplyr)
  require(LearnBayes)

  get.data <- function(season){
    B <- summarize(group_by(filter(Batting,
                                   yearID==season), playerID),
                   PA=sum(AB + BB + HBP),
                   AB=sum(AB), BB=sum(BB + HBP), H=sum(H),
                   SO=sum(SO), HR=sum(HR))
    NonPitchers <- unique(select(filter(Fielding,
                                        yearID==season, POS!="P"),
                                 playerID))
    merge(B, NonPitchers, by="playerID")
  }
  ## ------------------------------------------------------------------------
  fit.model <- function(data){
    mode <- laplace(betabinexch, c(1, 1),
                    cbind(data$y, data$n))$mode
    eta <- exp(mode[1]) / (1 + exp(mode[1]))
    K <- exp(mode[2])
    list(eta=eta, K=K,
         d=data.frame(data, est=(data$y + K * eta) / (data$n + K)))
  }

  ## ------------------------------------------------------------------------
  d <- get.data(season)

  ## ------------------------------------------------------------------------
  S.BB <- fit.model(data.frame(playerID=d$playerID,
                               y=d$BB, n=d$PA))
  S.SO <- fit.model(data.frame(playerID=d$playerID,
                               y=d$SO, n=d$AB))
  S.HR <- fit.model(data.frame(playerID=d$playerID,
                               y=d$HR, n=d$AB - d$SO))
  S.H <- fit.model(data.frame(playerID=d$playerID,
                              y=d$H - d$HR,
                              n=d$AB - d$HR - d$SO))
  S <- merge(S.BB$d, S.SO$d, by="playerID")
  S <- merge(S, S.HR$d, by="playerID")
  names(S) <- c("playerID", "BB", "PA", "BB.Rate",
                "SO", "AB", "SO.Rate",
                "HR", "AB.SO", "HR.Rate")
  S <- merge(S, S.H$d, by="playerID")
  names(S)[11:13] <- c("HIP", "IP", "HIP.Rate")

  S$BAEst <- with(S,
                  (1 - SO.Rate) * (HR.Rate + (1 - HR.Rate) * HIP.Rate))
  S$Est <- with(S,
                BB.Rate + (1 - BB.Rate) * BAEst)

  component.fit <- data.frame(eta=c(S.BB$eta, S.SO$eta,
                                    S.HR$eta, S.H$eta),
                              K=c(S.BB$K, S.SO$K, S.HR$K, S.H$K))
  row.names(component.fit) <- c("BB", "SO", "HR", "H")

  ## ------------------------------------------------------------------------
  S2 <- fit.model(data.frame(playerID=d$playerID,
                             y=(d$H + d$BB),
                             n=d$PA))
  shrinkage.fit <- c(eta=S2$eta, K=S2$K)
  ST <- merge(S, S2$d, by="playerID")
  names(ST)[c(15, 18)] <- c("Combo.Est", "Shrinkage.Est")

  ST$Season <- season
  list(ST=ST, component=component.fit, shrinkage=shrinkage.fit)
}

plot_obp_results <- function(fitwork){
  # function constructs plot of observed and predicted walk and hit rates
  # from output of fit_component_obp function
  require(ggplot2)
  require(dplyr)
  ST <- fitwork$ST
  myf <- function(x, p) (p - x) / (1 - x)
  AVG1 <- data.frame(P=.25, x=seq(.02, .3, .01), y=myf(seq(.02, .3, .01), .25))
  AVG2 <- data.frame(P=.3, x=seq(.02, .3, .01), y=myf(seq(.02, .3, .01), .3))
  AVG3 <- data.frame(P=.35, x=seq(.02, .3, .01), y=myf(seq(.02, .3, .01), .35))
  AVG4 <- data.frame(P=.4, x=seq(.02, .3, .01), y=myf(seq(.02, .3, .01), .4))
  AVG5 <- data.frame(P=.45, x=seq(.02, .3, .01), y=myf(seq(.02, .3, .01), .45))
  AVG <- rbind(AVG1, AVG2, AVG3, AVG4, AVG5)
  AVG <- mutate(AVG, OBP=factor(P))
  d1 <- data.frame(Type="Observed OBP",
                   Walk.Rate=ST$BB / ST$PA,
                   H.Rate=(ST$HIP + ST$HR) / ST$AB,
                   PA=ST$PA)
  d2 <- data.frame(Type="Predicted OBP",
                   Walk.Rate=ST$BB.Rate,
                   H.Rate=ST$BAEst,
                   PA=ST$PA)
  dd <- rbind(d1, d2)
  dd1 <- filter(dd, PA >= 300)
  Xlim <- range(dd1$Walk.Rate); Ylim <- range(dd1$H.Rate)
  p <- ggplot(dd1, aes(Walk.Rate, H.Rate)) +
    geom_point() +
    facet_wrap(~ Type, ncol=1) +
    geom_line(data=AVG, aes(x, y, color=OBP)) +
    ylim(Ylim[1], Ylim[2]) + xlim(Xlim[1], Xlim[2]) +
    ggtitle(paste("Hitters with at least 300 PA in",ST$Season,"Season")) +
    labs(x = "Walk Rate", y = "Hit Rate")
  print(p)
}

fit_component_fip <- function(season){
  # implement component method for estimating group of FIPs for pitchers in a particular season
  #
  # input:  season
  # output:
  #   S -- gives raw stats and component and shrinkage estimates for all pitches who face at least 50 batters
require(Lahman)
require(dplyr)
require(LearnBayes)
get.data <- function(season){
  summarize(group_by(filter(Pitching, yearID==season), playerID),
            PA=sum(BFP),
            AB=sum(BFP - BB - HBP, na.rm=TRUE),
            SO=sum(SO, na.rm=TRUE),
            BB=sum(BB + HBP, na.rm=TRUE),
            HR=sum(HR),
            H=sum(H),
            IP=sum(IPouts) / 3)
}
fit.model <- function(data){
  mode <- laplace(betabinexch, c(1, 1),
                  cbind(data$y, data$n))$mode
  eta <- exp(mode[1]) / (1 + exp(mode[1]))
  K <- exp(mode[2])
  list(eta=eta, K=K,
       d=data.frame(data, est=(data$y + K * eta) / (data$n + K)))
}
d <- filter(get.data(season), PA >= 50)

S.BB <- fit.model(data.frame(playerID=d$playerID,
                             y=d$BB, n=d$PA))
names(S.BB$d) <- c("playerID", "BB", "PA", "BB.Rate")
S.SO <- fit.model(data.frame(playerID=d$playerID,
                             y=d$SO, n=d$AB))
names(S.SO$d) <- c("playerID", "SO", "AB", "SO.Rate")
S.HR <- fit.model(data.frame(playerID=d$playerID,
                             y=d$HR, n=d$AB - d$SO))
names(S.HR$d) <- c("playerID", "HR", "AB.minus.SO", "HR.Rate")
S.H <- fit.model(data.frame(playerID=d$playerID,
                            y=d$H - d$HR,
                            n=d$AB - d$SO - d$HR))
names(S.H$d) <- c("playerID", "HIP", "OIP", "HIP.Rate")
S <- merge(S.BB$d, S.SO$d, by="playerID")
S <- merge(S, S.HR$d, by="playerID")
S <- merge(S, S.H$d, by="playerID")

S$Est <- with(S,
              (39 * (1 - BB.Rate) * (1 - SO.Rate) * HR.Rate +
                 9 * BB.Rate - 6 * (1 - BB.Rate) * SO.Rate) /
                ((1 - BB.Rate) * (SO.Rate +
                                    (1 - SO.Rate) * (1 - HR.Rate) * (1 - HIP.Rate))))
S$Obs <- with(S,
              ((13 * HR) + (3 * BB) - (2 * SO))) / d$IP

vary <- 5.1 ^ 2 / d$IP
dd <- cbind(S$Obs, vary)
dd <- dd[complete.cases(dd), ]
fit <- laplace(normnormexch, c(0, 0), dd)
mn <- fit$mode[1]
tau <- exp(fit$mode[2])
S$shrink <- (S$Obs / vary + mn / tau ^ 2) /
  (1 / vary + 1 / tau ^ 2)
S
}

plot_fip_results <- function(S){
  # function constructs plot of observed and predicted FIP measures
  # from output of fit_component_fip function
library(ggplot2)
d1 <- data.frame(PA=S$PA, FIP=S$Obs, Type="Observed")
d2 <- data.frame(PA=S$PA, FIP=S$Est, Type="Estimated")
d3 <- data.frame(PA=S$PA, FIP=S$shrink, Type="Shrinkage")
d <- rbind(d1, d2, d3)
p1 <- ggplot(filter(d, Type == "Observed"  | Type == "Shrinkage"),
       aes(PA, FIP, color=Type)) + geom_point()
p2 <- ggplot(filter(d, Type == "Estimated"  | Type == "Shrinkage"),
       aes(PA, FIP, color=Type)) + geom_point()
print(p1)
print(p2)
}
	#######################################################################################
	# R functions for Paper "Improved Component Predictions of Batting and Pitching Measures"
	# Journal of Quantitative Analysis of Sports (2016)
	# Jim Albert, albert@bgsu.edu
	# functions fit_component_average, plot_avg_results
	# fit_component_obp, plot_obp_results
	# fit_component_fip, plot_fip_results
	# require installation of packages Lahman, dplyr, ggplot2, and LearnBayes
	########################################################################################

	fit_component_average <- function(season){
	# implements component method for estimating group of batting averages for nonpitchers
	# for a particular baseball season
	#
	# input: season
	# output: a list with components
	# component -- gives values of K and eta for all component fits
	# shrinkage -- gives value of K and eta for shrinkage fit
	# S -- gives raw stats and component and shrinkage estimates for all players
	require(Lahman)
	require(dplyr)
	require(LearnBayes)
	##---------------------------------------------------------------------
	get.data <- function(season){
	B <- summarize(group_by(filter(Batting, yearID==season), playerID),
	AB=sum(AB), SO=sum(SO), H=sum(H), HR=sum(HR))
	B <- filter(B, AB > 0)
	NonPitchers <- unique(select(filter(Fielding,
	yearID==season, POS!="P"), playerID))
	merge(B, NonPitchers, by="playerID")
	}

	## ------------------------------------------------------------------------
	fit.model <- function(data){
	mode <- laplace(betabinexch, c(1, 1),
	cbind(data$y, data$n))$mode
	eta <- exp(mode[1]) / (1 + exp(mode[1]))
	K <- exp(mode[2])
	list(eta=eta, K=K,
	d=data.frame(data, est=(data$y + K * eta) / (data$n + K)))
	}
	## ------------------------------------------------------------------------
	d <- get.data(season)

	## ------------------------------------------------------------------------
	S.SO <- fit.model(data.frame(playerID=d$playerID,
	y=d$SO, n=d$AB))
	S.HR <- fit.model(data.frame(playerID=d$playerID,
	y=d$HR, n=d$AB - d$SO))
	S.H <- fit.model(data.frame(playerID=d$playerID,
	y=d$H - d$HR,
	n=d$AB - d$HR - d$SO))
	S <- merge(S.SO$d, S.HR$d, by="playerID")
	S <- merge(S, S.H$d, by="playerID")
	names(S) <- c("playerID", "SO", "AB", "SO.Rate",
	"HR", "AB.SO", "HR.Rate",
	"H.HR", "AB.SO.HR", "H.Rate")

	component.fit <- data.frame(eta=c(S.SO$eta, S.HR$eta, S.H$eta),
	K=c(S.SO$K, S.HR$K, S.H$K))
	row.names(component.fit) <- c("SO", "HR", "H")

	## ------------------------------------------------------------------------
	S$Est <- with(S,
	(1 - SO.Rate) * (HR.Rate + (1 - HR.Rate) * H.Rate))

	## ------------------------------------------------------------------------
	S2 <- fit.model(data.frame(playerID=d$playerID,
	y=d$H, n=d$AB))
	shrinkage.fit <- c(eta=S2$eta, K=S2$K)

	S <- merge(S, S2$d, by="playerID")
	names(S)[c(11:14)] <- c("Comp.Est", "H", "AB1", "Shrinkage.Est")
	S$Season <- season
	list(S=S, component=component.fit, shrinkage=shrinkage.fit)
	}

	plot_avg_results <- function(fitwork){
	# function constructs plot of observed and predicted one.minus.strikeout.rates and his.in.non.so.ab rates
	# from output of fit_component_average function
	require(ggplot2)
	require(dplyr)
	S <- fitwork$S
	myf <- function(x, p) p / x
	AVG1 <- data.frame(P=.25, x=seq(.6, 1, .01), y=myf(seq(.6, 1, .01), .25))
	AVG2 <- data.frame(P=.3, x=seq(.6, 1, .01), y=myf(seq(.6, 1, .01), .3))
	AVG3 <- data.frame(P=.2, x=seq(.6, 1, .01), y=myf(seq(.6, 1, .01), .2))
	AVG4 <- data.frame(P=.35, x=seq(.6, 1, .01), y=myf(seq(.6, 1, .01), .35))
	AVG5 <- data.frame(P=.4, x=seq(.6, 1, .01), y=myf(seq(.6, 1, .01), .4))
	AVG <- rbind(AVG1, AVG2, AVG3, AVG4, AVG5)
	AVG <- mutate(AVG, BA=factor(P))
	d1 <- data.frame(Type="Observed BA",
	One.Minus.SO.Rate=1 - S$SO / S$AB,
	H.Rate=S$H / S$AB.SO,
	AB=S$AB)
	d2 <- data.frame(Type="Predicted BA",
	One.Minus.SO.Rate=1 - S$SO.Rate,
	H.Rate=S$HR.Rate + (1 - S$HR.Rate) * S$H.Rate,
	AB=S$AB)
	dd <- rbind(d1, d2)
	dd1 <- filter(dd, AB >= 300)
	Xlow <- range(dd1$One.Minus.SO.Rate)
	Ylow <- range(dd1$H.Rate)
	p <- ggplot(dd1, aes(One.Minus.SO.Rate, H.Rate)) +
	geom_point() +
	facet_wrap(~ Type, ncol=1) +
	geom_line(data=AVG, aes(x, y, color=BA)) +
	ylim(Ylow[1], Ylow[2]) + xlim(Xlow[1], Xlow[2]) +
	ggtitle(paste("Hitters with at least 300 AB in",S$Season,"Season" )) +
	labs(y = "Hits in Non-SO-AB Rate")
	print(p)
	}

	fit_component_obp <- function(season){
	# implement component method for estimating group of on-base percentages for nonpitchers
	# for a particular baseball season
	#
	# input: season
	# output: a list with components
	# component -- gives values of K and eta for all component fits
	# shrinkage -- gives value of K and eta for shrinkage fit
	# ST -- gives raw stats and component and shrinkage estimates for all players
	require(Lahman)
	require(dplyr)
	require(LearnBayes)

	get.data <- function(season){
	B <- summarize(group_by(filter(Batting,
	yearID==season), playerID),
	PA=sum(AB + BB + HBP),
	AB=sum(AB), BB=sum(BB + HBP), H=sum(H),
	SO=sum(SO), HR=sum(HR))
	NonPitchers <- unique(select(filter(Fielding,
	yearID==season, POS!="P"),
	playerID))
	merge(B, NonPitchers, by="playerID")
	}
	## ------------------------------------------------------------------------
	fit.model <- function(data){
	mode <- laplace(betabinexch, c(1, 1),
	cbind(data$y, data$n))$mode
	eta <- exp(mode[1]) / (1 + exp(mode[1]))
	K <- exp(mode[2])
	list(eta=eta, K=K,
	d=data.frame(data, est=(data$y + K * eta) / (data$n + K)))
	}

	## ------------------------------------------------------------------------
	d <- get.data(season)

	## ------------------------------------------------------------------------
	S.BB <- fit.model(data.frame(playerID=d$playerID,
	y=d$BB, n=d$PA))
	S.SO <- fit.model(data.frame(playerID=d$playerID,
	y=d$SO, n=d$AB))
	S.HR <- fit.model(data.frame(playerID=d$playerID,
	y=d$HR, n=d$AB - d$SO))
	S.H <- fit.model(data.frame(playerID=d$playerID,
	y=d$H - d$HR,
	n=d$AB - d$HR - d$SO))
	S <- merge(S.BB$d, S.SO$d, by="playerID")
	S <- merge(S, S.HR$d, by="playerID")
	names(S) <- c("playerID", "BB", "PA", "BB.Rate",
	"SO", "AB", "SO.Rate",
	"HR", "AB.SO", "HR.Rate")
	S <- merge(S, S.H$d, by="playerID")
	names(S)[11:13] <- c("HIP", "IP", "HIP.Rate")

	S$BAEst <- with(S,
	(1 - SO.Rate) * (HR.Rate + (1 - HR.Rate) * HIP.Rate))
	S$Est <- with(S,
	BB.Rate + (1 - BB.Rate) * BAEst)

	component.fit <- data.frame(eta=c(S.BB$eta, S.SO$eta,
	S.HR$eta, S.H$eta),
	K=c(S.BB$K, S.SO$K, S.HR$K, S.H$K))
	row.names(component.fit) <- c("BB", "SO", "HR", "H")

	## ------------------------------------------------------------------------
	S2 <- fit.model(data.frame(playerID=d$playerID,
	y=(d$H + d$BB),
	n=d$PA))
	shrinkage.fit <- c(eta=S2$eta, K=S2$K)
	ST <- merge(S, S2$d, by="playerID")
	names(ST)[c(15, 18)] <- c("Combo.Est", "Shrinkage.Est")

	ST$Season <- season
	list(ST=ST, component=component.fit, shrinkage=shrinkage.fit)
	}

	plot_obp_results <- function(fitwork){
	# function constructs plot of observed and predicted walk and hit rates
	# from output of fit_component_obp function
	require(ggplot2)
	require(dplyr)
	ST <- fitwork$ST
	myf <- function(x, p) (p - x) / (1 - x)
	AVG1 <- data.frame(P=.25, x=seq(.02, .3, .01), y=myf(seq(.02, .3, .01), .25))
	AVG2 <- data.frame(P=.3, x=seq(.02, .3, .01), y=myf(seq(.02, .3, .01), .3))
	AVG3 <- data.frame(P=.35, x=seq(.02, .3, .01), y=myf(seq(.02, .3, .01), .35))
	AVG4 <- data.frame(P=.4, x=seq(.02, .3, .01), y=myf(seq(.02, .3, .01), .4))
	AVG5 <- data.frame(P=.45, x=seq(.02, .3, .01), y=myf(seq(.02, .3, .01), .45))
	AVG <- rbind(AVG1, AVG2, AVG3, AVG4, AVG5)
	AVG <- mutate(AVG, OBP=factor(P))
	d1 <- data.frame(Type="Observed OBP",
	Walk.Rate=ST$BB / ST$PA,
	H.Rate=(ST$HIP + ST$HR) / ST$AB,
	PA=ST$PA)
	d2 <- data.frame(Type="Predicted OBP",
	Walk.Rate=ST$BB.Rate,
	H.Rate=ST$BAEst,
	PA=ST$PA)
	dd <- rbind(d1, d2)
	dd1 <- filter(dd, PA >= 300)
	Xlim <- range(dd1$Walk.Rate); Ylim <- range(dd1$H.Rate)
	p <- ggplot(dd1, aes(Walk.Rate, H.Rate)) +
	geom_point() +
	facet_wrap(~ Type, ncol=1) +
	geom_line(data=AVG, aes(x, y, color=OBP)) +
	ylim(Ylim[1], Ylim[2]) + xlim(Xlim[1], Xlim[2]) +
	ggtitle(paste("Hitters with at least 300 PA in",ST$Season,"Season")) +
	labs(x = "Walk Rate", y = "Hit Rate")
	print(p)
	}

	fit_component_fip <- function(season){
	# implement component method for estimating group of FIPs for pitchers in a particular season
	#
	# input: season
	# output:
	# S -- gives raw stats and component and shrinkage estimates for all pitches who face at least 50 batters
	require(Lahman)
	require(dplyr)
	require(LearnBayes)
	get.data <- function(season){
	summarize(group_by(filter(Pitching, yearID==season), playerID),
	PA=sum(BFP),
	AB=sum(BFP - BB - HBP, na.rm=TRUE),
	SO=sum(SO, na.rm=TRUE),
	BB=sum(BB + HBP, na.rm=TRUE),
	HR=sum(HR),
	H=sum(H),
	IP=sum(IPouts) / 3)
	}
	fit.model <- function(data){
	mode <- laplace(betabinexch, c(1, 1),
	cbind(data$y, data$n))$mode
	eta <- exp(mode[1]) / (1 + exp(mode[1]))
	K <- exp(mode[2])
	list(eta=eta, K=K,
	d=data.frame(data, est=(data$y + K * eta) / (data$n + K)))
	}
	d <- filter(get.data(season), PA >= 50)

	S.BB <- fit.model(data.frame(playerID=d$playerID,
	y=d$BB, n=d$PA))
	names(S.BB$d) <- c("playerID", "BB", "PA", "BB.Rate")
	S.SO <- fit.model(data.frame(playerID=d$playerID,
	y=d$SO, n=d$AB))
	names(S.SO$d) <- c("playerID", "SO", "AB", "SO.Rate")
	S.HR <- fit.model(data.frame(playerID=d$playerID,
	y=d$HR, n=d$AB - d$SO))
	names(S.HR$d) <- c("playerID", "HR", "AB.minus.SO", "HR.Rate")
	S.H <- fit.model(data.frame(playerID=d$playerID,
	y=d$H - d$HR,
	n=d$AB - d$SO - d$HR))
	names(S.H$d) <- c("playerID", "HIP", "OIP", "HIP.Rate")
	S <- merge(S.BB$d, S.SO$d, by="playerID")
	S <- merge(S, S.HR$d, by="playerID")
	S <- merge(S, S.H$d, by="playerID")

	S$Est <- with(S,
	(39 * (1 - BB.Rate) * (1 - SO.Rate) * HR.Rate +
	9 * BB.Rate - 6 * (1 - BB.Rate) * SO.Rate) /
	((1 - BB.Rate) * (SO.Rate +
	(1 - SO.Rate) * (1 - HR.Rate) * (1 - HIP.Rate))))
	S$Obs <- with(S,
	((13 * HR) + (3 * BB) - (2 * SO))) / d$IP

	vary <- 5.1 ^ 2 / d$IP
	dd <- cbind(S$Obs, vary)
	dd <- dd[complete.cases(dd), ]
	fit <- laplace(normnormexch, c(0, 0), dd)
	mn <- fit$mode[1]
	tau <- exp(fit$mode[2])
	S$shrink <- (S$Obs / vary + mn / tau ^ 2) /
	(1 / vary + 1 / tau ^ 2)
	S
	}

	plot_fip_results <- function(S){
	# function constructs plot of observed and predicted FIP measures
	# from output of fit_component_fip function
	library(ggplot2)
	d1 <- data.frame(PA=S$PA, FIP=S$Obs, Type="Observed")
	d2 <- data.frame(PA=S$PA, FIP=S$Est, Type="Estimated")
	d3 <- data.frame(PA=S$PA, FIP=S$shrink, Type="Shrinkage")
	d <- rbind(d1, d2, d3)
	p1 <- ggplot(filter(d, Type == "Observed" \| Type == "Shrinkage"),
	aes(PA, FIP, color=Type)) + geom_point()
	p2 <- ggplot(filter(d, Type == "Estimated" \| Type == "Shrinkage"),
	aes(PA, FIP, color=Type)) + geom_point()
	print(p1)
	print(p2)
	}