Weighted random sample talk notes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pond = {salmon:1, crucian:3, carp:4, herring:6, sturgeon:8, gudgeon:10, minnow:50} | |
pond.map { |_, weight| 0.01 ** (1.0 / weight) } | |
# Fractional exponents are the same as a radical, so | |
# crucian is the cube-root of 0.01, carp is the 4th-root of 0.01... | |
# https://www.desmos.com/calculator/zjgygztibi | |
pond.map { |_, weight| 0.99 ** (1.0 / weight) } | |
# crucian is the cube-root of 0.99, carp is the 4th-root of 0.99... | |
pond.map { |_, weight| rand ** (1.0 / weight) } | |
# crucian is the cube-root of rand, carp is the 4th-root of a different rand... | |
# This function by Efraimidis and Spirakis (2005) | |
# Values will be >= the random number and < 1 | |
pond.max_by { |_, weight| rand ** (1.0 / weight) }.first | |
# wrs = -> (freq) { freq.max_by { |_, weight| rand ** (1.0 / weight) }.first } | |
def wrs(freq) | |
freq.max_by { |_, weight| rand ** (1.0 / weight) }.first | |
end | |
wrs(pond) | |
wrs(pond) | |
wrs(pond) | |
20.times.map { wrs(pond) } | |
num_catches = 250_000 | |
catches = num_catches.times.each_with_object(Hash.new(0)) { |_, freq| freq[wrs(pond)] += 1 } | |
catch_distribution = catches.map { |fish, catch_count| [fish, catch_count / num_catches.to_f] }.to_h | |
total_fish = pond.values.reduce(:+) | |
pond_distribution = pond.map { |fish, stock_count| [fish, stock_count / total_fish.to_f] }.to_h | |
require 'pp' | |
pp catch_distribution.map {|fish, catch_ratio| [fish, catch_ratio, pond_distribution[fish]] } | |
# Someone asked if this is computationally any better than calculating their ratios before | |
# sampling. Let's find out! | |
def naive_wrs(freq) | |
total = freq.values.reduce(:+) | |
ratios = freq.map { |fish, stock_count| [fish, stock_count / total.to_f] }.to_h | |
accumulator = 0.0 | |
ranges = Hash[ratios.map{ |v, p| [accumulator += p, v] }] | |
r = rand | |
ranges.find{ |p, _| p > r }.last | |
end | |
require 'benchmark' | |
n = 250_000 | |
Benchmark.bm(8) do |x| | |
x.report("naïve:") { n.times { naive_wrs(pond) } } | |
x.report("radical:") { n.times { wrs(pond) } } | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment