Last active
October 1, 2019 12:33
-
-
Save StewSchrieff/d336b17650b8199f619a7313d5839ca3 to your computer and use it in GitHub Desktop.
Riddler League Baseball Simulation (https://fivethirtyeight.com/features/which-baseball-team-will-win-the-riddler-fall-classic/)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
# Note that each inning is independent, so we only need to simulate one inning at a time | |
moonwalkers = { | |
'name': "Mississippi Moonwalkers", | |
'color': 'blue', | |
'avoidStrikeout': 0.4, # Chance of "success" at bat | |
'numBasesOnHit': 1 | |
} | |
doubloons = { | |
'name': "Delaware Doubloons", | |
'color': 'red', | |
'avoidStrikeout': 0.2, # Chance of "success" at bat | |
'numBasesOnHit': 2 | |
} | |
taters = { | |
'name': "Tennessee Taters", | |
'color': 'green', | |
'avoidStrikeout': 0.1, # Chance of "success" at bat | |
'numBasesOnHit': 4 # where four bases is a home run | |
} | |
teams = [] | |
teams.append(moonwalkers) | |
teams.append(doubloons) | |
teams.append(taters) | |
def moveBases(num_bases, bases, runs): | |
# where bases is a list of 0s and 1s of length 3 - simulating "runners" on bases | |
# Move the baseRunners | |
i = 2 | |
for runner in bases[::-1]: | |
# Loop through the bases backwards, and move each runner forward by num_bases | |
if (runner == 1): | |
# Advance the runner | |
new_base = i + num_bases | |
if (new_base > 2): | |
runs += 1 | |
# print('run scored! Woohoo') | |
else: | |
bases[new_base] = 1 | |
bases[i] = 0 # Move the runner off of the base he was on | |
i -= 1 | |
# Move the batter | |
if num_bases < 3: | |
bases[num_bases - 1] = 1 | |
else: | |
# Is a home run | |
runs += 1 | |
# print(bases) | |
return bases, runs | |
### Simulate an inning | |
def simulate_inning(team): | |
inning_score = 0 | |
bases = [0,0,0] | |
runs = 0 | |
outs = 0 | |
while outs in range(0,3): | |
if (bool(np.random.binomial(1, team['avoidStrikeout']))): | |
bases, runs = moveBases(team['numBasesOnHit'], bases, runs) | |
else: | |
# print('strikeout') | |
outs += 1 | |
# print(f"End of inning. \nRuns Scored: {runs}") | |
return runs | |
def plot_inning_statistics(num_innings): | |
fig = plt.figure() | |
ax = fig.add_subplot(111) | |
for team in teams: | |
scores = [] | |
for i in range(1, num_innings): | |
scores.append(simulate_inning(moonwalkers)) | |
print(f'{team["name"]}The average runs scored in an inning is : {sum(scores) / len(scores)}') | |
ax.hist(scores, bins=max(scores), normed=True, edgecolor="None", alpha=0.2, color=team['color']) | |
ax.hist(scores, bins=max(scores), normed=True, ls='dashed', lw=3, facecolor="None") | |
# plt.title(f"{team['name']}'s Distribution of Runs Per Inning") | |
# ax.xlabel("Runs scored") | |
# ax.ylabel("Probablity") | |
# plt.show() | |
plt.show() | |
def simulate_game(teamA, teamB): | |
#Simulates a single game between two teams passed in | |
a_score = 0 | |
b_score = 0 | |
# Always simulate 9 innings, then need to check if the score is tied | |
for i in range(1, 10): | |
a_score += simulate_inning(teamA) | |
b_score += simulate_inning(teamB) | |
while a_score == b_score: | |
# Continue to simulate extra innings until the score is not tied | |
a_score += simulate_inning(teamA) | |
b_score += simulate_inning(teamB) | |
win_margin = max(a_score, b_score) - min(a_score, b_score) | |
winner = teamA if (a_score > b_score) else teamB | |
return winner, win_margin | |
def simulate_season(num_rounds): | |
moonwalkers_wins = 0 | |
taters_wins = 0 | |
doubloons_wins = 0 | |
moonwalkers_losses = 0 | |
taters_losses = 0 | |
doubloons_losses = 0 | |
moonwalkers_win_margin = 0 | |
taters_win_margin = 0 | |
doubloons_win_margin = 0 | |
moonwalkers_victory_margin = 0 | |
taters_victory_margin = 0 | |
doubloons_victory_margin = 0 | |
moonwalkers_loss_margin = 0 | |
taters_loss_margin = 0 | |
doubloons_loss_margin = 0 | |
num_games = 0 | |
for i in range(0, num_rounds): | |
num_games += 1 | |
winner, win_margin = simulate_game(doubloons, moonwalkers) | |
if winner == moonwalkers: | |
moonwalkers_wins += 1 | |
doubloons_losses += 1 | |
moonwalkers_victory_margin += win_margin | |
doubloons_loss_margin += -win_margin | |
moonwalkers_win_margin += win_margin | |
doubloons_win_margin += -win_margin | |
else: | |
doubloons_wins += 1 | |
moonwalkers_losses += 1 | |
doubloons_victory_margin += win_margin | |
moonwalkers_loss_margin += -win_margin | |
doubloons_win_margin += win_margin | |
moonwalkers_win_margin += -win_margin | |
num_games += 1 | |
winner, win_margin = simulate_game(taters, moonwalkers) | |
if winner == moonwalkers : | |
moonwalkers_wins += 1 | |
taters_losses += 1 | |
moonwalkers_victory_margin += win_margin | |
taters_loss_margin += -win_margin | |
moonwalkers_win_margin += win_margin | |
taters_win_margin += -win_margin | |
else: | |
taters_wins += 1 | |
moonwalkers_losses += 1 | |
taters_victory_margin += win_margin | |
moonwalkers_loss_margin += -win_margin | |
taters_win_margin += win_margin | |
moonwalkers_win_margin += -win_margin | |
num_games += 1 | |
winner, win_margin = simulate_game(taters, doubloons) | |
if winner == taters: | |
taters_wins +=1 | |
doubloons_losses += 1 | |
taters_victory_margin += win_margin | |
doubloons_loss_margin += -win_margin | |
taters_win_margin += win_margin | |
doubloons_win_margin += -win_margin | |
else: | |
doubloons_wins += 1 | |
taters_losses += 1 | |
doubloons_victory_margin += win_margin | |
taters_loss_margin += -win_margin | |
doubloons_win_margin += win_margin | |
taters_win_margin += -win_margin | |
print(f"In a season with {num_games} games:") | |
print(f"The Moonwalkers have {moonwalkers_wins} wins.") | |
print(f"The Doubloons have {doubloons_wins} wins.") | |
print(f"The Taters have {taters_wins} wins.\n\n") | |
print(f"The Moonwalkers point differential is: {moonwalkers_win_margin}") | |
print(f"The Doubloons point differential is: {doubloons_win_margin}") | |
print(f"The Taters point differential is: {taters_win_margin}\n\n") | |
print(f"The Moonwalkers average margin of victory is: {moonwalkers_victory_margin / moonwalkers_wins}") | |
print(f"The Doubloons average margin of victory is: {doubloons_victory_margin / doubloons_wins}") | |
print(f"The Taters average margin of victory is: {taters_victory_margin / taters_wins}\n\n") | |
print(f"The Moonwalkers average margin of loss is: {moonwalkers_loss_margin / moonwalkers_losses}") | |
print(f"The Doubloons average margin of loss is: {doubloons_loss_margin / doubloons_losses}") | |
print(f"The Taters average margin of loss is: {taters_loss_margin / taters_losses}") | |
if __name__ == '__main__': | |
simulate_season(30000) | |
# plot_inning_statistics(3000000) |
It would seem that the Moonwalkers are more likely to blow opponents out of the water, while the taters are more likely to "squeak out" wins.
That's what I was thinking! For what it's worth I did a write-up as well, which might help clarify my snippets above.
Nice investigation!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Ah, I see what you mean with classifying this as a negative binomial problem. Thanks for the clear explanation! Interesting that the numpy line (line 67) I call is the binomial distribution. That is, I used statistics to get the probability of successes, and then used code to handle the failures, while I could have used statistics to model the failures as well. Guess that's why I'm a software engineer rather than a statistician :)
Here's another writeup that addresses the negative binomial distribution as a statistical approach to this problem. I'm not incredibly familiar with R, so I don't quite understand everything, but it appears like Stephen Penrice is able to use negative binomial distribution to also model the Moonwalkers.
Looks like his explanation is that the Taters are less likely to get shut out in a game:
This conclusion would contradict my original conclusion that the only statistic that matters is the expected runs per inning. It appears as though because baseball games are so "short" (only 9 innings, rather than thousands of innings), the clustering of the runs is detrimental to the Moonwalkers. It would seem that the Moonwalkers are more likely to blow opponents out of the water, while the taters are more likely to "squeak out" wins.