statwonk/simple_multilevel.R

## simple_multilevel.R
library(tidyverse)
library(brms)
library(tidybayes)
library(ggthemes)

# Simulation settings
innings <- 9
games <- 162
number_of_opposing_pitchers <- 20 # purposly set low to illustrate variation
opposing_pitchers <- rnorm(number_of_opposing_pitchers, sd = 0.5) # a set of opposing pitcher effects

# Create a simulated design matrix
tibble(
  inning = seq_len(games*innings)
) %>%
  mutate(opposing_pitcher = map_int(inning, ~ sample(seq_len(number_of_opposing_pitchers), 1)),
         opposing_pitcher_effect = opposing_pitchers[opposing_pitcher],
         opposing_pitcher = factor(opposing_pitcher)) %>%
  mutate(intercept = -1,
         linear_predictor = exp(intercept + opposing_pitcher_effect)) %>%
  mutate(runs = rpois(n(), linear_predictor)) -> design_matrix

# Model the "oracle" known opposing pitcher effects on runs
brm(
  runs ~ (1|opposing_pitcher), # + (1|umpires) ... would expand further into multilevel space.
  data = design_matrix,
  family = "poisson",
  cores = 4
) -> bfit

# A view of opposing pitcher effects
design_matrix %>%
  distinct(opposing_pitcher, linear_predictor) %>%
  sample_n(4) %>% # let's take a look at some opposing pitchers
  tidybayes::add_fitted_draws(bfit) %>% ungroup() %>%
  mutate(opposing_pitcher = factor(paste("Pitcher", as.character(opposing_pitcher)))) %>%
  ggplot(aes(x = .value)) +
  geom_density(aes(fill = opposing_pitcher,
                   color = opposing_pitcher),
               alpha = 0.4, size = 1) +
  ggtitle("Opposing pitcher effects on runs in an inning") +
  labs(x = "Expected runs in an inning given opposing pitcher",
       y = "Density of posterior beliefs") +
  scale_fill_few(name = "Opposing pitchers") +
  scale_color_few(guide = FALSE) +
  theme(legend.position = "top") +
  scale_x_continuous(breaks = seq(0, 10, 0.25)) +
  theme_bw(25)

# Using this type of model we could:
# - compare opposing pitchers and their affect on team runs
# - assess how opposing pitchers affect runs
# - compare the size of pitcher effect to other sources of variability (umpires?)
# - estimate expected runs per inning, predict runs per inning
	library(tidyverse)
	library(brms)
	library(tidybayes)
	library(ggthemes)

	# Simulation settings
	innings <- 9
	games <- 162
	number_of_opposing_pitchers <- 20 # purposly set low to illustrate variation
	opposing_pitchers <- rnorm(number_of_opposing_pitchers, sd = 0.5) # a set of opposing pitcher effects

	# Create a simulated design matrix
	tibble(
	inning = seq_len(games*innings)
	) %>%
	mutate(opposing_pitcher = map_int(inning, ~ sample(seq_len(number_of_opposing_pitchers), 1)),
	opposing_pitcher_effect = opposing_pitchers[opposing_pitcher],
	opposing_pitcher = factor(opposing_pitcher)) %>%
	mutate(intercept = -1,
	linear_predictor = exp(intercept + opposing_pitcher_effect)) %>%
	mutate(runs = rpois(n(), linear_predictor)) -> design_matrix

	# Model the "oracle" known opposing pitcher effects on runs
	brm(
	runs ~ (1\|opposing_pitcher), # + (1\|umpires) ... would expand further into multilevel space.
	data = design_matrix,
	family = "poisson",
	cores = 4
	) -> bfit

	# A view of opposing pitcher effects
	design_matrix %>%
	distinct(opposing_pitcher, linear_predictor) %>%
	sample_n(4) %>% # let's take a look at some opposing pitchers
	tidybayes::add_fitted_draws(bfit) %>% ungroup() %>%
	mutate(opposing_pitcher = factor(paste("Pitcher", as.character(opposing_pitcher)))) %>%
	ggplot(aes(x = .value)) +
	geom_density(aes(fill = opposing_pitcher,
	color = opposing_pitcher),
	alpha = 0.4, size = 1) +
	ggtitle("Opposing pitcher effects on runs in an inning") +
	labs(x = "Expected runs in an inning given opposing pitcher",
	y = "Density of posterior beliefs") +
	scale_fill_few(name = "Opposing pitchers") +
	scale_color_few(guide = FALSE) +
	theme(legend.position = "top") +
	scale_x_continuous(breaks = seq(0, 10, 0.25)) +
	theme_bw(25)

	# Using this type of model we could:
	# - compare opposing pitchers and their affect on team runs
	# - assess how opposing pitchers affect runs
	# - compare the size of pitcher effect to other sources of variability (umpires?)
	# - estimate expected runs per inning, predict runs per inning