A Python script to estimate the probabilities of the birthday problem (http://en.wikipedia.org/wiki/Birthday_problem) for subgroups of size 2, 3, 4, 5, and 6 that have the same birthday.
import random | |
maxBirthdays = 6 | |
numDays = 365 | |
numTrials = 100000 | |
maxGroupSize = 1000 | |
def BirthdayExperiment(): | |
birthdays = [0] * numDays | |
numToDuplicates = {} | |
r = random.Random() | |
numPeople = 0 | |
while True: | |
numPeople += 1 | |
nextBirthday = r.randint(0,numDays-1) | |
birthdays[nextBirthday] += 1 | |
numBirthdays = birthdays[nextBirthday] | |
if (numBirthdays > 1) and (not numBirthdays in numToDuplicates): | |
numToDuplicates[numBirthdays] = numPeople | |
if numBirthdays == maxBirthdays: | |
break | |
return numToDuplicates | |
random.seed() | |
cumDistFunctions = {} | |
for i in range(2,maxBirthdays+1): | |
cumDistFunctions[i] = [0] * (maxGroupSize+1) | |
for trial in range(numTrials): | |
numToDuplicates= BirthdayExperiment() | |
for i in range(2,maxBirthdays+1): | |
for j in range(numToDuplicates[i],maxGroupSize+1): | |
cumDistFunctions[i][j] += 1 | |
line = "# People,2 analytic" | |
for i in range(2,maxBirthdays+1): | |
line = "%s,%d same" % (line, i) | |
print line | |
analytic = 1.0 | |
for numPeople in range(1,maxGroupSize+1): | |
if numPeople <= numDays: | |
analytic *= float(numDays-numPeople+1)/numDays | |
line = "%d,%f" % (numPeople, 1-analytic) | |
for i in range(2,maxBirthdays+1): | |
line = "%s,%f" % (line, float(cumDistFunctions[i][numPeople])/numTrials) | |
print line |
This comment has been minimized.
This comment has been minimized.
Here's my code: birthdays.py |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
We were discussing this problem on an internet forum and came across your solution. Just FYI, the probability can be computed exactly, using a recursive counting function that computes the exact number of suitable combinations, and dividing that by 365**numPeople. I can post code later. For lots of people (e.g., > 100) it becomes slow due to the sheer size of the numbers involved. We were solving it for 60 people, where it finishes in a second or two.