Skip to content

Instantly share code, notes, and snippets.

@SteveDaulton
Last active September 27, 2023 15:32
Show Gist options
  • Save SteveDaulton/0f0e3966c96dc5f5af3319a7af7b0610 to your computer and use it in GitHub Desktop.
Save SteveDaulton/0f0e3966c96dc5f5af3319a7af7b0610 to your computer and use it in GitHub Desktop.
Birthday Paradox - a Monte Carlo experiment
"""Explore the surprising probabilities of the "Birthday Paradox".
Idea inspired by The Big Book of Small Python Projects by Al Sweigart.
https://inventwithpython.com/bigbookpython/project2.html
This version runs much faster than the version in the book, and so
can handle much larger numbers of people.
The main optimisations are:
1. `get_birthdays_till_match` exits early when a match is found.
2. A cache list is used for fast lookups of random dates.
It also considers leap years, for a more accurate distribution of dates.
"""
from datetime import datetime, timedelta
from random import choice
from collections import Counter
def get_number() -> int:
"""Prompt for an integer and return the number."""
prompt = True
while prompt:
try:
number = int(input())
except ValueError:
print('Please enter a number.')
else:
if number < 2:
print("Don't be silly. We need at least 2")
elif number > 10000:
print('Enter a smaller number (max 10,000')
else:
prompt = False
return number
def get_birthdays(num: int, cache: list[str]) -> list[str]:
"""Return a list of 'num' birthdays."""
return [choice(cache) for _ in range(num)]
def print_birthdays(bdays: list[str]) -> None:
"""Print a formatted list of birthdays."""
count = 0
for bd in bdays:
print(bd, end=', ')
count += 1
if count % 10 == 0:
print()
print()
def find_duplicates(bdays: list[str]) -> dict:
"""Return a dict of duplicate birthdays."""
bctr = Counter(bdays)
dups = {key: val for key, val in bctr.items() if val > 1}
return dups
def report_duplicates(dups: dict) -> None:
"""Report results from a single run."""
print('In this simulation, multiple people have birtdays on')
for key, _ in list(dups.items())[:-1]:
print(key, end=', ')
print(f'and {list(dups.keys())[-1]}.')
def get_birthdays_till_match(num_bdays: int, cache: list[str]) -> bool:
"""Return True if match found in birthdays, else False."""
bdays: set = set()
for _ in range(num_bdays):
bday = choice(cache)
if bday in bdays:
# Early termination is much quicker
# when there's a large numbers of birthdays.
return True
bdays.add(bday)
return False
def sim_loop(num_bdays: int, cache: list[str]) -> int:
"""Run simulation 1000,000 times."""
matches = 0
for i in range(10):
for _ in range(10000):
if get_birthdays_till_match(num_bdays, cache):
matches += 1
print(f'{(i + 1) * 10000} simulations run.')
return matches
def print_summary(bdays: int, dups: int) -> None:
"""Print summary of 100,000 simulations."""
print(f"""Out of 100,000 simulations of {bdays} people, there was a
matching birthday in that group {dups} times. This means
that {bdays} people have a {(dups / 1000):.2f} % chance of
having a matching birthday in their group.
That's probably more than you would think!""")
def generate_dates_cache():
"""Return a tuple of dates for fast lookup."""
today = datetime.now()
# Generate enough dates to achieve a reasonable
# distribution of "Feb 29" birthdays.
return [(today - timedelta(days=delta)).strftime('%b %d')
for delta in range(1, 36500)] # approx 100 years.
if __name__ == '__main__':
CACHE = generate_dates_cache()
print('How many birthdays shall I generate? (Max 10,000)')
number_of_birthdays = get_number()
birthdays = get_birthdays(number_of_birthdays, CACHE)
print(f'Here are {number_of_birthdays} birthdays:')
print_birthdays(birthdays)
duplicates = find_duplicates(birthdays)
if len(duplicates) > 0:
report_duplicates(duplicates)
# Run through 100,000 simulations:
print(f'Generating {number_of_birthdays} '
f'random birthdays 100,000 times...')
input('Press Enter to begin...')
DUP_COUNT = sim_loop(number_of_birthdays, CACHE)
print_summary(number_of_birthdays, DUP_COUNT)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment