Skip to content

Instantly share code, notes, and snippets.

@nandanrao
Last active February 18, 2017 17:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nandanrao/f62e52589797b85d5f2ff67141d402bc to your computer and use it in GitHub Desktop.
Save nandanrao/f62e52589797b85d5f2ff67141d402bc to your computer and use it in GitHub Desktop.
using Distributions
using Gadfly
using StatsBase
using KernelDensity
using DataFrames
function generate_population(n::Int, d::Sampleable)
# Create population, with criminals distributed according to
# the provided distribution (d)
x = rand(n) * 20 - 10
y = [pdf(d, i) > rand() ? 1 : 0 for i in x]
collect(zip(x,y))
end
function catcher(population, estimate, unif::Bool)
# If we are sampling uniformly, pick 1/10 of the population at random,
# otherwise, use the previous estimated density function to sample.
sampler = !unif ? x -> pdf(estimate, x) > rand() : x -> 1/10 > rand()
# Stop people according to our sampling strategy. If we find
# a 1, that indicates a criminal, and we catch them.
stopped = [(x,y) for (x,y) in population if sampler(x)]
caught = [x for (x,y) in stopped if y == 1]
# Restimate a probability density based on those we caught
estimate = InterpKDE(kde(caught))
caught, estimate
end
function simulate(n::Int, K::Int, unif::Bool, d::Sampleable = Normal())
# Generate our initial population
pop = generate_population(n, d)
# Run the catcher K times
caught, _ = reduce((t, _) -> catcher(pop, t[2], unif), ([], d), 1:K)
# Return the original criminals as well, to compare
true_criminals = [x for (x,y) in pop if y == 1]
caught, true_criminals
end
function create_df(caught, badguys)
[DataFrame(x = caught, Distribution = "Caught");
DataFrame(x = badguys, Distribution = "True Criminals")]
end
function resample(n::Int, K::Int, unif::Bool, d::Sampleable, geom, coords)
caught, badguys = simulate(n, K, unif, d)
df = create_df(caught, badguys)
plot(df, x = "x", color="Distribution", geom, coords)
end
to_svg(plots) = [draw(SVG(n, 7inch, 5inch), p) for (n, p) in plots]
d = Cauchy(0, 10)
geom = Geom.density(bandwidth = 3)
coords = Coord.cartesian(xmin=-10, xmax=10)
to_svg([("day-one.svg", plot(x = simulate(1000000, 1, true, d)[2], geom, coords)),
("uniformly-50.svg", resample(100000, 50, true, d, geom, coords)),
("efficient-one.svg", resample(100000, 1, false, d, geom, coords)),
("efficient-100.svg", resample(100000, 100, false, d, geom, coords))])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment