Skip to content

Instantly share code, notes, and snippets.

@jiahao
Last active March 23, 2021 02:22
Show Gist options
  • Save jiahao/7033758 to your computer and use it in GitHub Desktop.
Save jiahao/7033758 to your computer and use it in GitHub Desktop.
Julia implementation of the Bag of Little Bootstraps (BLB) method (documented as, e.g., Algorithm 1 in arXiv:1112.5016)
using Distributions
export Measure, DiscreteMeasure, Estimator, EstimatorQuality,
blb, randsubset
abstract Measure
type DiscreteMeasure{S<:Number,T<:Number} <: Measure
points :: Vector{S}
weights :: Vector{T}
function DiscreteMeasure{S<:Number, T<:Number}(points::Vector{S}, weights::Vector{T})
length(points) == length(weights) ?
new(points, weights)
: error("number of point masses and weights don't add up")
end
end
#If no explicit weights are given, assume uniform weight
DiscreteMeasure{S<:Number}(points::Vector{S}) =
DiscreteMeasure(points, ones(length(points))/length(points))
typealias Estimator Function
typealias EstimatorQuality Function
#Inputs:
# Data X1,...,Xn
# θ̂: estimator
# ξ : estimator quality assessment
# b : subset size
# s : number of sampled subsets
# r : number of Monte Carlo iterations
# Pn = sum([delta(X[i]) for i=1:n)/n #empirical distribution
function blb(Data, θ̂::Estimator, ξ::EstimatorQuality, b::Int, s::Int, r::Int)
n = length(Data)
Z = Multinomial(n, ones(b)/b)
ξ = zeros(s)
for j=1:s #Iterate over subsets
#Subsample the data
I = randsubset(n, b)
#Approximate ξ(Qn(P(j)nb))
θ̂⭒n = zeros(r)
for k=1:r #Iterate over Monte Carlo iterations
nn = rand(Z)
P⭒nk = DiscreteMeasure([X[I[a]] for a=1:b], nn[a]/n)
θ̂⭒n[k] = θ̂(P⭒nk)
end
Q⭒nj = DiscreteMeasure(θ̂⭒n)
ξ⭒n[j] = ξ(Q⭒nj)
end
#Average values of ξ(Qn(P(j)nb)) computed for different data subsets
return sum([ξ⭒n[j] for j=1:s])/s
end
#Uses Floyd's algorithm for determining a random subset of size M
#from 1:N
#Citation:
# J. Bentley, B. Floyd, "Programming pearls: a sample of brilliance",
# Commun. ACM 30 (9), 1987, 754-757
# doi:10.1145/30401.315746
function randsubset(N::Int, M::Int)
M > N ? error("Subset cannot be larger than original set") : none
S = Set{Int}()
for J = (N-M+1):N
T = 1 + int(rand()*(J-1))
add!(S, (T in S) ? J : T)
end
S
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment