Last active
February 18, 2017 17:03
-
-
Save nandanrao/f62e52589797b85d5f2ff67141d402bc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Distributions | |
using Gadfly | |
using StatsBase | |
using KernelDensity | |
using DataFrames | |
function generate_population(n::Int, d::Sampleable) | |
# Create population, with criminals distributed according to | |
# the provided distribution (d) | |
x = rand(n) * 20 - 10 | |
y = [pdf(d, i) > rand() ? 1 : 0 for i in x] | |
collect(zip(x,y)) | |
end | |
function catcher(population, estimate, unif::Bool) | |
# If we are sampling uniformly, pick 1/10 of the population at random, | |
# otherwise, use the previous estimated density function to sample. | |
sampler = !unif ? x -> pdf(estimate, x) > rand() : x -> 1/10 > rand() | |
# Stop people according to our sampling strategy. If we find | |
# a 1, that indicates a criminal, and we catch them. | |
stopped = [(x,y) for (x,y) in population if sampler(x)] | |
caught = [x for (x,y) in stopped if y == 1] | |
# Restimate a probability density based on those we caught | |
estimate = InterpKDE(kde(caught)) | |
caught, estimate | |
end | |
function simulate(n::Int, K::Int, unif::Bool, d::Sampleable = Normal()) | |
# Generate our initial population | |
pop = generate_population(n, d) | |
# Run the catcher K times | |
caught, _ = reduce((t, _) -> catcher(pop, t[2], unif), ([], d), 1:K) | |
# Return the original criminals as well, to compare | |
true_criminals = [x for (x,y) in pop if y == 1] | |
caught, true_criminals | |
end | |
function create_df(caught, badguys) | |
[DataFrame(x = caught, Distribution = "Caught"); | |
DataFrame(x = badguys, Distribution = "True Criminals")] | |
end | |
function resample(n::Int, K::Int, unif::Bool, d::Sampleable, geom, coords) | |
caught, badguys = simulate(n, K, unif, d) | |
df = create_df(caught, badguys) | |
plot(df, x = "x", color="Distribution", geom, coords) | |
end | |
to_svg(plots) = [draw(SVG(n, 7inch, 5inch), p) for (n, p) in plots] | |
d = Cauchy(0, 10) | |
geom = Geom.density(bandwidth = 3) | |
coords = Coord.cartesian(xmin=-10, xmax=10) | |
to_svg([("day-one.svg", plot(x = simulate(1000000, 1, true, d)[2], geom, coords)), | |
("uniformly-50.svg", resample(100000, 50, true, d, geom, coords)), | |
("efficient-one.svg", resample(100000, 1, false, d, geom, coords)), | |
("efficient-100.svg", resample(100000, 100, false, d, geom, coords))]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment