Created
June 6, 2017 19:03
-
-
Save loisaidasam/b76cd56938863d88e0900b054a0f2783 to your computer and use it in GitHub Desktop.
Sampling with replacement covers ~63.2% of source data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
In [1]: import random | |
In [2]: import numpy as np | |
In [3]: SIZE = 1000000 | |
In [4]: data = range(SIZE) | |
In [5]: results = [] | |
In [6]: for run in xrange(1, 101): | |
...: counts = {} | |
...: for i in xrange(SIZE): | |
...: item = random.choice(data) | |
...: counts[item] = counts.get(item, 0) + 1 | |
...: results.append(len(counts)) | |
...: print "Run %s: %s (avg %s, %s%%)" % (run, len(counts), np.average(results), 100.0 * np. | |
...: average(results) / SIZE) | |
...: | |
...: | |
Run 1: 632137 (avg 632137.0, 63.2137%) | |
Run 2: 632020 (avg 632078.5, 63.20785%) | |
Run 3: 631915 (avg 632024.0, 63.2024%) | |
Run 4: 631839 (avg 631977.75, 63.197775%) | |
Run 5: 632021 (avg 631986.4, 63.19864%) | |
Run 6: 631660 (avg 631932.0, 63.1932%) | |
Run 7: 632454 (avg 632006.571429, 63.2006571429%) | |
Run 8: 632607 (avg 632081.625, 63.2081625%) | |
Run 9: 631580 (avg 632025.888889, 63.2025888889%) | |
Run 10: 631856 (avg 632008.9, 63.20089%) | |
Run 11: 632433 (avg 632047.454545, 63.2047454545%) | |
Run 12: 632293 (avg 632067.916667, 63.2067916667%) | |
Run 13: 632303 (avg 632086.0, 63.2086%) | |
Run 14: 632373 (avg 632106.5, 63.21065%) | |
Run 15: 631974 (avg 632097.666667, 63.2097666667%) | |
Run 16: 632207 (avg 632104.5, 63.21045%) | |
Run 17: 632368 (avg 632120.0, 63.212%) | |
Run 18: 632355 (avg 632133.055556, 63.2133055556%) | |
Run 19: 631806 (avg 632115.842105, 63.2115842105%) | |
Run 20: 631754 (avg 632097.75, 63.209775%) | |
Run 21: 631686 (avg 632078.142857, 63.2078142857%) | |
Run 22: 632017 (avg 632075.363636, 63.2075363636%) | |
Run 23: 632130 (avg 632077.73913, 63.207773913%) | |
Run 24: 632147 (avg 632080.625, 63.2080625%) | |
Run 25: 632068 (avg 632080.12, 63.208012%) | |
Run 26: 631961 (avg 632075.538462, 63.2075538462%) | |
Run 27: 631866 (avg 632067.777778, 63.2067777778%) | |
Run 28: 632078 (avg 632068.142857, 63.2068142857%) | |
Run 29: 632384 (avg 632079.034483, 63.2079034483%) | |
Run 30: 631649 (avg 632064.7, 63.20647%) | |
Run 31: 632279 (avg 632071.612903, 63.2071612903%) | |
Run 32: 632408 (avg 632082.125, 63.2082125%) | |
Run 33: 632218 (avg 632086.242424, 63.2086242424%) | |
Run 34: 632392 (avg 632095.235294, 63.2095235294%) | |
Run 35: 631973 (avg 632091.742857, 63.2091742857%) | |
Run 36: 632245 (avg 632096.0, 63.2096%) | |
Run 37: 632449 (avg 632105.540541, 63.2105540541%) | |
Run 38: 632963 (avg 632128.105263, 63.2128105263%) | |
Run 39: 631538 (avg 632112.974359, 63.2112974359%) | |
Run 40: 632874 (avg 632132.0, 63.2132%) | |
Run 41: 631764 (avg 632123.02439, 63.212302439%) | |
Run 42: 631965 (avg 632119.261905, 63.2119261905%) | |
Run 43: 632068 (avg 632118.069767, 63.2118069767%) | |
Run 44: 632347 (avg 632123.272727, 63.2123272727%) | |
Run 45: 631758 (avg 632115.155556, 63.2115155556%) | |
Run 46: 631820 (avg 632108.73913, 63.210873913%) | |
Run 47: 631881 (avg 632103.893617, 63.2103893617%) | |
Run 48: 632107 (avg 632103.958333, 63.2103958333%) | |
Run 49: 632383 (avg 632109.653061, 63.2109653061%) | |
Run 50: 632392 (avg 632115.3, 63.21153%) | |
Run 51: 632253 (avg 632118.0, 63.2118%) | |
Run 52: 632051 (avg 632116.711538, 63.2116711538%) | |
Run 53: 631981 (avg 632114.150943, 63.2114150943%) | |
Run 54: 631867 (avg 632109.574074, 63.2109574074%) | |
Run 55: 631482 (avg 632098.163636, 63.2098163636%) | |
Run 56: 632318 (avg 632102.089286, 63.2102089286%) | |
Run 57: 631949 (avg 632099.403509, 63.2099403509%) | |
Run 58: 632139 (avg 632100.086207, 63.2100086207%) | |
Run 59: 632003 (avg 632098.440678, 63.2098440678%) | |
Run 60: 632210 (avg 632100.3, 63.21003%) | |
Run 61: 632058 (avg 632099.606557, 63.2099606557%) | |
Run 62: 631439 (avg 632088.951613, 63.2088951613%) | |
Run 63: 632453 (avg 632094.730159, 63.2094730159%) | |
Run 64: 631209 (avg 632080.890625, 63.2080890625%) | |
Run 65: 632046 (avg 632080.353846, 63.2080353846%) | |
Run 66: 632121 (avg 632080.969697, 63.2080969697%) | |
Run 67: 631738 (avg 632075.850746, 63.2075850746%) | |
Run 68: 632580 (avg 632083.264706, 63.2083264706%) | |
Run 69: 632087 (avg 632083.318841, 63.2083318841%) | |
Run 70: 631803 (avg 632079.314286, 63.2079314286%) | |
Run 71: 631761 (avg 632074.830986, 63.2074830986%) | |
Run 72: 631486 (avg 632066.652778, 63.2066652778%) | |
Run 73: 632648 (avg 632074.616438, 63.2074616438%) | |
Run 74: 631955 (avg 632073.0, 63.2073%) | |
Run 75: 632000 (avg 632072.026667, 63.2072026667%) | |
Run 76: 632128 (avg 632072.763158, 63.2072763158%) | |
Run 77: 632871 (avg 632083.12987, 63.208312987%) | |
Run 78: 632008 (avg 632082.166667, 63.2082166667%) | |
Run 79: 632240 (avg 632084.164557, 63.2084164557%) | |
Run 80: 631747 (avg 632079.95, 63.207995%) | |
Run 81: 631926 (avg 632078.049383, 63.2078049383%) | |
Run 82: 632082 (avg 632078.097561, 63.2078097561%) | |
Run 83: 631800 (avg 632074.746988, 63.2074746988%) | |
Run 84: 632386 (avg 632078.452381, 63.2078452381%) | |
Run 85: 631943 (avg 632076.858824, 63.2076858824%) | |
Run 86: 631361 (avg 632068.534884, 63.2068534884%) | |
Run 87: 632003 (avg 632067.781609, 63.2067781609%) | |
Run 88: 632132 (avg 632068.511364, 63.2068511364%) | |
Run 89: 631978 (avg 632067.494382, 63.2067494382%) | |
Run 90: 632086 (avg 632067.7, 63.20677%) | |
Run 91: 632281 (avg 632070.043956, 63.2070043956%) | |
Run 92: 632428 (avg 632073.934783, 63.2073934783%) | |
Run 93: 632104 (avg 632074.258065, 63.2074258065%) | |
Run 94: 632120 (avg 632074.744681, 63.2074744681%) | |
Run 95: 632108 (avg 632075.094737, 63.2075094737%) | |
Run 96: 632093 (avg 632075.28125, 63.207528125%) | |
Run 97: 632396 (avg 632078.587629, 63.2078587629%) | |
Run 98: 632454 (avg 632082.418367, 63.2082418367%) | |
Run 99: 632610 (avg 632087.747475, 63.2087747475%) | |
Run 100: 632485 (avg 632091.72, 63.209172%) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment