Skip to content

Instantly share code, notes, and snippets.

@teamdandelion
Created April 3, 2015 00:46
Show Gist options
  • Save teamdandelion/9de65947f812b2ef49a2 to your computer and use it in GitHub Desktop.
Save teamdandelion/9de65947f812b2ef49a2 to your computer and use it in GitHub Desktop.
Given you have 1000 people in a room, what is expected # of countries represented?
1369060000
1269140000
320711000
255461700
204105000
189397000
183523000
158093000
146267288
126910000
121005815
101232000
90730000
90076012
88275000
80925000
78231400
77695904
71246000
66109000
64871000
64105654
60788845
54002000
51419420
51342881
48064700
47421786
46749000
46464053
43131966
42928900
39500000
38484000
38435252
36004552
35702707
34856813
33337529
31521418
31151643
30620404
30539100
30492800
28037904
27043093
26556800
25956000
25727911
25155000
24383301
23796400
23445534
23128376
22671331
21842167
21143237
20675000
19942642
19268000
18450494
18006407
17439300
16896300
16310431
16259000
15964000
15806675
15473905
15405157
13606000
13508715
13061239
11892934
11410651
11239755
11210064
11123000
10996891
10992589
10982754
10911819
10628972
10528477
10477800
10378267
10315244
9849000
9823827
9753627
9593000
9577000
9481000
8725111
8579747
8354000
8320100
8211700
7398500
7264100
7245677
7171000
7146759
7003406
6802000
6738000
6708760
6401240
6319000
6317000
6134270
5895100
5659715
5475526
5469700
5421034
5165802
4803000
4773130
4751120
4682467
4671000
4609600
4572330
4503000
4490500
4267558
4146558
4104000
3791622
3764166
3631775
3557600
3548397
3404189
3268431
3013900
3000000
2919306
2893005
2717991
2334029
2120000
2113077
2065769
2066154
2024904
1986700
1882450
1827231
1788000
1751000
1430000
1328019
1316500
1312252
1261208
1212107
1119375
900000
859178
858000
844994
763952
758920
746900
636200
620029
581344
549700
534189
518467
510713
505153
425384
405739
393372
381326
368390
358899
341256
329100
294906
285000
268767
268270
264652
240705
239648
212645
187820
187356
185000
159358
154843
109000
107394
106461
106405
103328
103252
101351
99000
89949
86295
84497
76949
71293
64237
63085
56086
55984
55691
55519
55000
53883
51547
48724
37429
37370
36950
35742
32789
31458
30001
28875
28054
23296
20901
14974
13452
13135
11323
10084
9131
6069
4922
4000
3000
2562
2302
2072
1613
1411
839
550
56
import random
def sortedIndex(x, arr):
# returns insertion index
low = -1
high = len(arr)
while low+1 < high:
guess = int((high-low)/2) + low
value = arr[guess]
if value == x:
return guess+1
elif value < x:
low = guess
elif value > x:
high = guess
return low+1
assert sortedIndex(0, [1,2,3]) == 0
assert sortedIndex(0.5, [1,2,3]) == 0
assert sortedIndex(1, [1,2,3]) == 1
assert sortedIndex(1.5, [1,2,3]) == 1
assert sortedIndex(2, [1,2,3]) == 2
assert sortedIndex(3, [1,2,3]) == 3
assert sortedIndex(4, [1,2,3]) == 3
assert sortedIndex(0, [0]) == 1
assert sortedIndex(-1, [0]) == 0
assert sortedIndex(-1, []) == 0
assert sortedIndex(1, []) == 0
def cumulativeProbability(populations):
total = sum(populations)
accumulatedProbability = 0
accumulated = [None]*len(populations)
for i, x in enumerate(populations):
accumulatedProbability += float(x)/total
accumulated[i] = accumulatedProbability
return accumulated
def numberDistinctFromCumulativeDist(cumulative, nTrials):
s = set()
for t in range(nTrials):
x = random.random()
i = sortedIndex(x, cumulative)
s.add(i)
return len(s)
assert cumulativeProbability([0,1,1,2]) == [0, 0.25, 0.5, 1.0]
def main():
population = []
with open("data.txt", "r") as f:
for line in f:
population.append(int(line))
# just throw value errors if they come up
cumulative = cumulativeProbability(population)
outerTrials = 1000
outerSum = 0
for i in range(outerTrials):
outerSum += numberDistinctFromCumulativeDist(cumulative, 1000)
print float(outerSum) / outerTrials
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment