jiahao/BagOfLittleBootstraps.jl

## BagOfLittleBootstraps.jl
using Distributions

export Measure, DiscreteMeasure, Estimator, EstimatorQuality,
       blb, randsubset

abstract Measure

type DiscreteMeasure{S<:Number,T<:Number} <: Measure
  points :: Vector{S}
  weights :: Vector{T}
  function DiscreteMeasure{S<:Number, T<:Number}(points::Vector{S}, weights::Vector{T})
    length(points) == length(weights) ?
      new(points, weights)
    : error("number of point masses and weights don't add up")
  end
end

#If no explicit weights are given, assume uniform weight
DiscreteMeasure{S<:Number}(points::Vector{S}) =
DiscreteMeasure(points, ones(length(points))/length(points))

typealias Estimator Function
typealias EstimatorQuality Function

#Inputs:
#  Data X1,...,Xn
#  θ̂: estimator
#  ξ : estimator quality assessment
#  b : subset size
#  s : number of sampled subsets
#  r : number of Monte Carlo iterations
#  Pn = sum([delta(X[i]) for i=1:n)/n #empirical distribution
function blb(Data, θ̂::Estimator, ξ::EstimatorQuality, b::Int, s::Int, r::Int)
  n = length(Data)
  Z = Multinomial(n, ones(b)/b)
  ξ = zeros(s)
  for j=1:s #Iterate over subsets
    #Subsample the data
    I = randsubset(n, b)
    #Approximate ξ(Qn(P(j)nb))
    θ̂⭒n = zeros(r)
    for k=1:r #Iterate over Monte Carlo iterations
      nn = rand(Z)
      P⭒nk = DiscreteMeasure([X[I[a]] for a=1:b], nn[a]/n)
      θ̂⭒n[k] = θ̂(P⭒nk)
    end
    Q⭒nj = DiscreteMeasure(θ̂⭒n)
    ξ⭒n[j] = ξ(Q⭒nj)
  end
  #Average values of ξ(Qn(P(j)nb)) computed for different data subsets
  return sum([ξ⭒n[j] for j=1:s])/s
end


#Uses Floyd's algorithm for determining a random subset of size M
#from 1:N
#Citation:
# J. Bentley, B. Floyd, "Programming pearls: a sample of brilliance",
# Commun. ACM 30 (9), 1987, 754-757
# doi:10.1145/30401.315746
function randsubset(N::Int, M::Int)
  M > N ? error("Subset cannot be larger than original set") : none
  S = Set{Int}()
  for J = (N-M+1):N
    T = 1 + int(rand()*(J-1))
    add!(S, (T in S) ? J : T)
  end
  S
end
	using Distributions

	export Measure, DiscreteMeasure, Estimator, EstimatorQuality,
	blb, randsubset

	abstract Measure

	type DiscreteMeasure{S<:Number,T<:Number} <: Measure
	points :: Vector{S}
	weights :: Vector{T}
	function DiscreteMeasure{S<:Number, T<:Number}(points::Vector{S}, weights::Vector{T})
	length(points) == length(weights) ?
	new(points, weights)
	: error("number of point masses and weights don't add up")
	end
	end

	#If no explicit weights are given, assume uniform weight
	DiscreteMeasure{S<:Number}(points::Vector{S}) =
	DiscreteMeasure(points, ones(length(points))/length(points))

	typealias Estimator Function
	typealias EstimatorQuality Function

	#Inputs:
	# Data X1,...,Xn
	# θ̂: estimator
	# ξ : estimator quality assessment
	# b : subset size
	# s : number of sampled subsets
	# r : number of Monte Carlo iterations
	# Pn = sum([delta(X[i]) for i=1:n)/n #empirical distribution
	function blb(Data, θ̂::Estimator, ξ::EstimatorQuality, b::Int, s::Int, r::Int)
	n = length(Data)
	Z = Multinomial(n, ones(b)/b)
	ξ = zeros(s)
	for j=1:s #Iterate over subsets
	#Subsample the data
	I = randsubset(n, b)
	#Approximate ξ(Qn(P(j)nb))
	θ̂⭒n = zeros(r)
	for k=1:r #Iterate over Monte Carlo iterations
	nn = rand(Z)
	P⭒nk = DiscreteMeasure([X[I[a]] for a=1:b], nn[a]/n)
	θ̂⭒n[k] = θ̂(P⭒nk)
	end
	Q⭒nj = DiscreteMeasure(θ̂⭒n)
	ξ⭒n[j] = ξ(Q⭒nj)
	end
	#Average values of ξ(Qn(P(j)nb)) computed for different data subsets
	return sum([ξ⭒n[j] for j=1:s])/s
	end


	#Uses Floyd's algorithm for determining a random subset of size M
	#from 1:N
	#Citation:
	# J. Bentley, B. Floyd, "Programming pearls: a sample of brilliance",
	# Commun. ACM 30 (9), 1987, 754-757
	# doi:10.1145/30401.315746
	function randsubset(N::Int, M::Int)
	M > N ? error("Subset cannot be larger than original set") : none
	S = Set{Int}()
	for J = (N-M+1):N
	T = 1 + int(rand()*(J-1))
	add!(S, (T in S) ? J : T)
	end
	S
	end