bayesball/BTsimulation.R

## BTsimulation.R
library(baseballr)
library(tidyverse)
NL <- standings_on_date_bref("2018-04-15", "NL Overall")
AL <- standings_on_date_bref("2018-04-15", "AL Overall")

#save(AL, NL, file="Standings")
load("Standings")

library(ggrepel)
ggplot(data.frame(Team = c(AL[[1]]$Tm, NL[[1]]$Tm),
                Wins = c(AL[[1]]$W, NL[[1]]$W),
                  Y = 1),
       aes(Wins, Y, label=Team)) +
  geom_text_repel(angle = 60, color="blue") +
  theme(axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank()) +
  ggtitle("Number of Games Won by MLB Teams: April 15, 2018")

# get schedule?

teams <- unique(c(AL[[1]]$Tm, NL[[1]]$Tm))
get_schedule <- function(team){
  d <- team_results_bref(team, 2018)
  d[complete.cases(d),]
}
sch <- map_df(teams, get_schedule)
#save(sch, file="Schedule")

load("Schedule")
sch %>% group_by(Attendance) %>%
  summarize(Date = first(Date),
            Tm = first(Tm),
            Opp = first(Opp),
            R = first(R),
            RA = first(RA)) %>%
  mutate(Win = ifelse(R > RA, 1, 0)) -> sch2

# here is the function that implements the simulation

one.simulation.18a <- function(teams, schedule,
                              s.talent = 0.25){
  # simulate talents
  TAL <- data.frame(Team = teams,
                    Talent = rnorm(30, 0, s.talent))
  TAL$Team <- as.character(TAL$Team)
  # merge talents and win probs with schedule data frame
  schedule %>%
    inner_join(TAL, by=c("Tm"="Team")) %>%
    inner_join(TAL, by=c("Opp"="Team")) -> SCH
  # play season
  N <- nrow(SCH)
  SCH %>%
    mutate(prob.Tm = exp(Talent.x) /
            (exp(Talent.x) + exp(Talent.y)),
       outcome = rbinom(nrow(SCH), 1, prob.Tm),
       winner = ifelse(outcome, Tm, Opp)) -> SCH
  # compute sd of number of games won for all teams
  SCH %>% group_by(winner) %>%
    count() -> Results
  # output sd
  sd(Results$n)
}

one.simulation.18a(teams, select(sch2, Tm, Opp))
W <- replicate(1000,
   one.simulation.18a(teams, select(sch2, Tm, Opp)))

observed_sd <- sd(c(AL[[1]]$W, NL[[1]]$W))
ggplot(data.frame(SD = W), aes(W)) +
  geom_histogram(bins = 10,
                 color="black",
                 fill="orange") +
  geom_vline(xintercept = observed_sd,
             color="red", size=2) +
  xlab("Standard Deviation") +
  ggtitle("Histogram of Predictive Standard Deviation
        of Number of Games Won from Bradley-Terry Model")
	library(baseballr)
	library(tidyverse)
	NL <- standings_on_date_bref("2018-04-15", "NL Overall")
	AL <- standings_on_date_bref("2018-04-15", "AL Overall")

	#save(AL, NL, file="Standings")
	load("Standings")

	library(ggrepel)
	ggplot(data.frame(Team = c(AL[[1]]$Tm, NL[[1]]$Tm),
	Wins = c(AL[[1]]$W, NL[[1]]$W),
	Y = 1),
	aes(Wins, Y, label=Team)) +
	geom_text_repel(angle = 60, color="blue") +
	theme(axis.title.y=element_blank(),
	axis.text.y=element_blank(),
	axis.ticks.y=element_blank()) +
	ggtitle("Number of Games Won by MLB Teams: April 15, 2018")

	# get schedule?

	teams <- unique(c(AL[[1]]$Tm, NL[[1]]$Tm))
	get_schedule <- function(team){
	d <- team_results_bref(team, 2018)
	d[complete.cases(d),]
	}
	sch <- map_df(teams, get_schedule)
	#save(sch, file="Schedule")

	load("Schedule")
	sch %>% group_by(Attendance) %>%
	summarize(Date = first(Date),
	Tm = first(Tm),
	Opp = first(Opp),
	R = first(R),
	RA = first(RA)) %>%
	mutate(Win = ifelse(R > RA, 1, 0)) -> sch2

	# here is the function that implements the simulation

	one.simulation.18a <- function(teams, schedule,
	s.talent = 0.25){
	# simulate talents
	TAL <- data.frame(Team = teams,
	Talent = rnorm(30, 0, s.talent))
	TAL$Team <- as.character(TAL$Team)
	# merge talents and win probs with schedule data frame
	schedule %>%
	inner_join(TAL, by=c("Tm"="Team")) %>%
	inner_join(TAL, by=c("Opp"="Team")) -> SCH
	# play season
	N <- nrow(SCH)
	SCH %>%
	mutate(prob.Tm = exp(Talent.x) /
	(exp(Talent.x) + exp(Talent.y)),
	outcome = rbinom(nrow(SCH), 1, prob.Tm),
	winner = ifelse(outcome, Tm, Opp)) -> SCH
	# compute sd of number of games won for all teams
	SCH %>% group_by(winner) %>%
	count() -> Results
	# output sd
	sd(Results$n)
	}

	one.simulation.18a(teams, select(sch2, Tm, Opp))
	W <- replicate(1000,
	one.simulation.18a(teams, select(sch2, Tm, Opp)))

	observed_sd <- sd(c(AL[[1]]$W, NL[[1]]$W))
	ggplot(data.frame(SD = W), aes(W)) +
	geom_histogram(bins = 10,
	color="black",
	fill="orange") +
	geom_vline(xintercept = observed_sd,
	color="red", size=2) +
	xlab("Standard Deviation") +
	ggtitle("Histogram of Predictive Standard Deviation
	of Number of Games Won from Bradley-Terry Model")