Last active
July 13, 2018 18:02
-
-
Save turgeonmaxime/b593efd933c696c4ec5a9caa8d2c3308 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
# 1. Theoretical answer | |
# It's the probability that both teams score no goal, | |
# plus the probability they both score one goal, | |
# plus ... plus the probability they both score five goals | |
# | |
# If we assume the scores are binomial random variables | |
# and that the scores of each team are independent, | |
# this is simply a sum of products of binomial probabilities. | |
true_prob <- sum(dbinom(x = 0:5, size = 5, prob = 0.75)^2) | |
# 2. Simulation answer | |
set.seed(12345) | |
B <- 50000 | |
results <- purrr::map_df(seq_len(B), function(b) { | |
# Generate each teams' score | |
home <- rbinom(n = 1, size = 5, prob = 0.75) | |
away <- rbinom(n = 1, size = 5, prob = 0.75) | |
return(tibble::tibble( | |
index = b, | |
home = home, | |
away = away | |
)) | |
}) %>% | |
mutate(tied = as.numeric(home == away), | |
cum_prob_est = cumsum(tied)/index) | |
# The estimate is the mean of the variable tied | |
results %>% summarise(est_prob = mean(tied)) | |
# We can also visualize how quickly it converges to the estimate | |
results %>% | |
ggplot(aes(index, cum_prob_est)) + | |
geom_line() + | |
geom_hline(yintercept = true, | |
linetype = 'dotted', | |
size = 1) + | |
theme_bw() + | |
coord_cartesian(ylim = c(0.25, 0.35)) + | |
scale_x_continuous(labels = scales::comma) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment