Last active
September 27, 2023 15:32
-
-
Save SteveDaulton/0f0e3966c96dc5f5af3319a7af7b0610 to your computer and use it in GitHub Desktop.
Birthday Paradox - a Monte Carlo experiment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Explore the surprising probabilities of the "Birthday Paradox". | |
Idea inspired by The Big Book of Small Python Projects by Al Sweigart. | |
https://inventwithpython.com/bigbookpython/project2.html | |
This version runs much faster than the version in the book, and so | |
can handle much larger numbers of people. | |
The main optimisations are: | |
1. `get_birthdays_till_match` exits early when a match is found. | |
2. A cache list is used for fast lookups of random dates. | |
It also considers leap years, for a more accurate distribution of dates. | |
""" | |
from datetime import datetime, timedelta | |
from random import choice | |
from collections import Counter | |
def get_number() -> int: | |
"""Prompt for an integer and return the number.""" | |
prompt = True | |
while prompt: | |
try: | |
number = int(input()) | |
except ValueError: | |
print('Please enter a number.') | |
else: | |
if number < 2: | |
print("Don't be silly. We need at least 2") | |
elif number > 10000: | |
print('Enter a smaller number (max 10,000') | |
else: | |
prompt = False | |
return number | |
def get_birthdays(num: int, cache: list[str]) -> list[str]: | |
"""Return a list of 'num' birthdays.""" | |
return [choice(cache) for _ in range(num)] | |
def print_birthdays(bdays: list[str]) -> None: | |
"""Print a formatted list of birthdays.""" | |
count = 0 | |
for bd in bdays: | |
print(bd, end=', ') | |
count += 1 | |
if count % 10 == 0: | |
print() | |
print() | |
def find_duplicates(bdays: list[str]) -> dict: | |
"""Return a dict of duplicate birthdays.""" | |
bctr = Counter(bdays) | |
dups = {key: val for key, val in bctr.items() if val > 1} | |
return dups | |
def report_duplicates(dups: dict) -> None: | |
"""Report results from a single run.""" | |
print('In this simulation, multiple people have birtdays on') | |
for key, _ in list(dups.items())[:-1]: | |
print(key, end=', ') | |
print(f'and {list(dups.keys())[-1]}.') | |
def get_birthdays_till_match(num_bdays: int, cache: list[str]) -> bool: | |
"""Return True if match found in birthdays, else False.""" | |
bdays: set = set() | |
for _ in range(num_bdays): | |
bday = choice(cache) | |
if bday in bdays: | |
# Early termination is much quicker | |
# when there's a large numbers of birthdays. | |
return True | |
bdays.add(bday) | |
return False | |
def sim_loop(num_bdays: int, cache: list[str]) -> int: | |
"""Run simulation 1000,000 times.""" | |
matches = 0 | |
for i in range(10): | |
for _ in range(10000): | |
if get_birthdays_till_match(num_bdays, cache): | |
matches += 1 | |
print(f'{(i + 1) * 10000} simulations run.') | |
return matches | |
def print_summary(bdays: int, dups: int) -> None: | |
"""Print summary of 100,000 simulations.""" | |
print(f"""Out of 100,000 simulations of {bdays} people, there was a | |
matching birthday in that group {dups} times. This means | |
that {bdays} people have a {(dups / 1000):.2f} % chance of | |
having a matching birthday in their group. | |
That's probably more than you would think!""") | |
def generate_dates_cache(): | |
"""Return a tuple of dates for fast lookup.""" | |
today = datetime.now() | |
# Generate enough dates to achieve a reasonable | |
# distribution of "Feb 29" birthdays. | |
return [(today - timedelta(days=delta)).strftime('%b %d') | |
for delta in range(1, 36500)] # approx 100 years. | |
if __name__ == '__main__': | |
CACHE = generate_dates_cache() | |
print('How many birthdays shall I generate? (Max 10,000)') | |
number_of_birthdays = get_number() | |
birthdays = get_birthdays(number_of_birthdays, CACHE) | |
print(f'Here are {number_of_birthdays} birthdays:') | |
print_birthdays(birthdays) | |
duplicates = find_duplicates(birthdays) | |
if len(duplicates) > 0: | |
report_duplicates(duplicates) | |
# Run through 100,000 simulations: | |
print(f'Generating {number_of_birthdays} ' | |
f'random birthdays 100,000 times...') | |
input('Press Enter to begin...') | |
DUP_COUNT = sim_loop(number_of_birthdays, CACHE) | |
print_summary(number_of_birthdays, DUP_COUNT) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment