Created
January 11, 2020 08:09
-
-
Save gvdr/341fde4b6dbf9993451c300f4500ac03 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using StatsBase, Random, Distributions | |
using Plots | |
# parameters for the simulations | |
blocks = 70000; # Number of ticket blocks | |
tickets = 7000000; # Number of tickets | |
draws = 200; # Number of prizes | |
K = 100000; # Number of lotteries to simulate at each replication | |
R = 100; # Number of replications | |
# building blocks | |
tick_per_block = Int(tickets / blocks); # How many tickets per block? | |
## build the population of tickets | |
## block i is composed of `tick_per_block` reps of Integer i (32 bits is the smallest encoding I can thing about) | |
lottery = repeat(convert.(Int32, [1:blocks;]), outer = tick_per_block); | |
## counting functions (this is a computational bottleneck, I fear) | |
function count_two(sim) | |
length(filter(p-> p.second > 1, StatsBase.countmap(sim))) | |
end; | |
function count_three(sim) | |
length(filter(p-> p.second > 2, StatsBase.countmap(sim))) | |
end; | |
# simulate a K lotteries, with `D` draws, and a `L` population of tickets | |
function sim_lotteries_extractions(L, D, K) | |
# we sample D tickets from L, K times | |
sims = [sample(L, D, replace = false) for i in 1:K] | |
# we count all entries in the simulations | |
# and keep only those entries with at least 2 repeated blocks | |
# get how many of these | |
pairs_sims = [count_two(sim) for sim in sims] | |
percent_pairs = sum(pairs_sims .> 0) * 100 / K | |
# we count all entries in the simulations | |
# and keep only those entries with at least 3 repeated blocks | |
# get how many of these | |
triplets_sims = [count_three(sim) for sim in sims] | |
percent_triplets = sum(triplets_sims .> 0) * 100 / K | |
return (percent_pairs, percent_triplets) | |
end; | |
# we repeat the experiment a 100 times to get a distribution on the two quantities | |
result = [sim_lotteries_extractions(lottery, draws, K) for i in 1:100] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment