Skip to content

Instantly share code, notes, and snippets.

@binarybana
Last active August 29, 2015 14:06
Show Gist options
  • Save binarybana/0d8a76e4169a491a249b to your computer and use it in GitHub Desktop.
Save binarybana/0d8a76e4169a491a249b to your computer and use it in GitHub Desktop.
data = rand(0:1000,48,18000)
function test()
reload("uniquename2.jl")
normfac = vec(mapslices(x->quantile(x,0.75), data, 2))
cls = MPM.mpm_classifier(rand(0:100,4,3), rand(0:100,4,3))
MPM.sample(cls,1)
end
for i=1:100
println(i)
test()
end
module MPM
using Distributions
#import Distributions.sample
#using Iterators
export
Sampler,
MHRecord,
AMWGRecord,
sample,
#propose,
#energy,
#reject,
logsum,
get_bbox,
gen_grid,
MPMPrior, MPMParams, MPMSampler, calc_g, gen_points,
gen_posterior_points, calc_pvals, error_points, predict, error_moments_cube,
var_error_hists, e_error_hists, e_error_eff, mpm_classifier
#set_energy_limits,
abstract Sampler
abstract MCMC
type BinaryClassifier{T<:Sampler,S<:MCMC}
cls1 :: T
cls2 :: T
mcmc1 :: S
mcmc2 :: S
end
type AMWGRecord <: MCMC
obj :: Sampler
blocks :: Int
batchsize :: Int
sigmas :: Vector{Float64}
target :: Float64
mapvalue :: Sampler
mapenergy :: Float64
db :: Vector{Any}
block_accept :: Vector{Int}
iterations :: Int
burn :: Int
thin :: Int
end
import Base: length
length(x::AMWGRecord) = length(x.db)
AMWGRecord(obj::Sampler, blocks; burn=0, thin=1) = AMWGRecord(deepcopy(obj),
blocks, #Gibbs updates
50, # batchsize
ones(blocks), #sigmas
0.44, #target
deepcopy(obj), #mapvalue
Inf, #mapenergy
Any[], #db
zeros(Int,blocks),1, #accept, iterations
burn,thin) #burn, thin
function sample(rec::AMWGRecord, iters::Int; verbose=false, adjust="burnin")
oldenergy = energy(rec.obj)
if verbose
println("Initial energy: $oldenergy")
end
for current_iter = rec.iterations:(rec.iterations+iters-1)
sigma = randn(rec.blocks) .* rec.sigmas
for i in 1:rec.blocks
save!(rec.obj)
#propose new object in gibbs block i, with sigma sigma[i]
propose!(rec.obj, i, rec.sigmas[i])
newenergy = energy(rec.obj, i)
if newenergy < rec.mapenergy
rec.mapvalue = deepcopy(rec.obj)
rec.mapenergy = newenergy
end
r = oldenergy - newenergy
### Acceptance of new moves ###
if rand() < exp(r)
rec.block_accept[i] += 1
oldenergy = newenergy
else
reject!(rec.obj)
end
end
# Adjust weights
if current_iter % rec.batchsize == 0 && ((adjust == "burnin" && current_iter <= rec.burn) || adjust == "always")
delta = min(0.3, (current_iter / rec.batchsize)^-0.9)
verbose && println("Sigmas before tuning: $(rec.sigmas)")
verbose && println("Delta: $delta")
verbose && println("Block_accept: $(rec.block_accept)")
for i in 1:length(rec.sigmas)
rec.sigmas[i] *= rec.block_accept[i] / rec.batchsize < rec.target ? exp(-delta) : exp(delta)
rec.block_accept[i] = 0 # FIXME, is this right? I couldn't find it in mamba, but ow above doesn't make sense
end
verbose && println("Sigmas after tuning: $(rec.sigmas)")
end
#if rec.iterations >= rec.burn && rec.iterations%rec.thin == 0
#push!(rec.db, deepcopy(rec.obj.curr))
#end
#if (rec.iterations) % 1000 == 0 && verbose
#@printf "Iteration: %8d, best energy: %7f, current energy: %7f\n" rec.iterations rec.mapenergy oldenergy
#end
rec.iterations += 1
end
#if verbose
#println("Accepted samples: $(rec.block_accept)")
#println("Acceptance ratios: $(rec.block_accept./(rec.iterations-rec.burn))")
#end
end
# To be overwritten
energy(x::Sampler) = error("energy() must be instantiated for your sampler object")
propose!(x::Sampler) = error("propose!() must be instantiated for your sampler object")
reject!(x::Sampler) = error("reject!() must be instantiated for your sampler object")
save!(x::Sampler) = error("save!() must be instantiated for your sampler object")
function gelman_rubin(samplers)
# parameters in db must be scalars, vectors or matrices
m = length(samplers)
n = length(samplers[1])
assert(all((map(length,samplers) .- n).==0)) # ... for now
paramnames = names(samplers[1].db[1])
dnostics = Dict()
for p in paramnames
p == :energy && continue
ex = getfield(samplers[1].db[1],p)
extype = typeof(ex)
if extype <: Number
# vector of vectors
posts = [convert(Vector{extype}, map(y->getfield(y,p), x.db)) for x in samplers]
elseif ndims(ex) == 1
# vector of matrices
posts = [vcat(map(y->getfield(y,p)', x.db)...) for x in samplers]
elseif ndims(ex) == 2
# vector of matrices
posts = [vcat(map(y->vec(getfield(y,p))', x.db)...) for x in samplers]
end
# Dims: [chain] X [iteration X params]
vars = vcat(map(x->var(x,1), posts)...)
means = vcat(map(x->mean(x,1), posts)...)
# Dims: [chain X params]
W = mean(vars,1)
B_jk = var(means,1)
# Dims: [params]
dnostics[string(p)] = vec(sqrt(((n-1)/n .* W .+ B_jk) ./ W))
end
return dnostics
end
function logsum(x::AbstractArray)
if length(x) == 1
return x[1]
else
a = maximum(x)
return a .+ log(sum_kbn(exp(x.-a)))
end
end
function logsum(x::AbstractArray, w::AbstractArray)
if length(x) == 1 && w[1] == 1.0
return x[1]
else
a = maximum(x)
return a .+ log(sum_kbn(w.*exp(x.-a)))
end
end
function logsum(x::Float64, y::Float64)
x > y ? x+log1p(exp(y-x)) : y+log1p(exp(x-y))
end
function get_bbox(data; factor=1.5, positive=true)
D = size(data,2)
#maxs = vec(maximum(data, 1))
maxs = vec(mapslices(x->quantile(x,0.75), data, 1))
maxs .+= 3*maximum(maxs) / 4
if positive
mins = zeros(D)
else
#mins = vec(minimum(data,1))
maxs = vec(mapslices(x->quantile(x,0.25), data, 1))
end
spreads = maxs .- mins
if !positive
mins .-= spreads .* factor
end
maxs .+= spreads .* factor
mins, maxs
end
function get_grid(data; factor=1.5, N=30, positive=true)
mins, maxs = get_bbox(data, factor=factor, positive=positive)
mins, maxs, gen_grid(mins, maxs, N)
end
gen_unit_grid(mins,maxs) = gen_grid(mins,maxs,typemax(Int))
function gen_grid(mins, maxs, N=30)
D = length(mins)
stepsizes = ceil(float(maxs .- mins) ./N)
ranges = [mins[i]:stepsizes[i]:maxs[i] for i=1:D]
@show ranges
grid = hcat(map(collect, product(ranges...))...)
return ranges, grid
end
######################################################################
######################################################################
######################################################################
######################################################################
type MPMPrior
D :: Int
mu :: Vector{Float64}
mu_sigmas :: Vector{Float64} # Unvariate variances on mu
kappa :: Float64
S :: Matrix{Float64}
end
function MPMPrior(;D=2, args...)
kappa = 6.0
p = MPMPrior(D, zeros(D), ones(D)*30, kappa, (kappa-1-D)*eye(D,D))
for (sym, val) in args
if sym in MPMPrior.names
setfield!(p, sym, val)
else
error("Quantity $sym not recognized in MPMPrior constructor")
end
end
p
end
######################################################################
######################################################################
######################################################################
######################################################################
type MPMParams
mu :: Vector{Float64}
sigma :: Matrix{Float64}
sigpre :: Matrix{Float64}
lam :: Matrix{Float64}
energy :: Float64 # I dunno... does this belong here?
# d :: Vector{Float64}
function MPMParams(mu, sigpre, lam)
@assert size(sigpre) == (size(mu,1),size(mu,1))
@assert size(lam,1) == size(mu,1)
new(mu, sigpre'*sigpre, sigpre, lam, -Inf)
end
end
function copy!(to::MPMParams, from::MPMParams)
to.mu[:] = from.mu
to.sigma[:] = from.sigma
to.sigpre[:] = from.sigpre
to.lam[:] = from.lam
to.energy = from.energy
end
######################################################################
######################################################################
######################################################################
######################################################################
type MPMSampler <: Sampler
curr :: MPMParams
old :: MPMParams
prior :: MPMPrior
data :: Matrix{Int}
usepriors :: Bool
d :: Vector{Float64}
end
function MPMSampler(prior::MPMPrior, data::Matrix, obj::MPMParams, d::Float64)
MPMSampler(prior, data, obj, d*ones(size(data,1)))
end
function MPMSampler(prior::MPMPrior, data::Matrix, obj::MPMParams, d::Vector{Float64})
@assert size(data,1) > size(data,2)
if !(eltype(data) <: Integer)
warn("Data is being truncated from type $(eltype(data)) to Int, dataloss might occur")
end
MPMSampler(deepcopy(obj), deepcopy(obj), prior, int(data), true, d)
end
const blocks = 3
function propose!(obj::MPMSampler, block::Int, sigma::Float64)
curr = obj.curr
prior = obj.prior
if block == 1 #Modify means
curr.mu[:] += randn(prior.D) * sigma*0.1
elseif block == 2
#Modify covariances
#temp = curr.sigma .* (0.5 .+ rand(prior.D, prior.D)) #.* curr.sigma ./ mean(curr.sigma)
#temp = curr.sigma .+ diagm(abs(clamp(diag(curr.sigma),0.001,Inf)) .* (rand(prior.D).-0.5))
#temp = (temp'*temp).^(1/2)
#if isposdef(temp) && false #rand(1:100) > 2
#curr.sigma = temp
#else
for i=1:prior.D*prior.D
curr.sigpre[i] += rand(Normal(0.0,sigma*0.1))
end
At_mul_B!(curr.sigma, curr.sigpre, curr.sigpre) # sigma = sigpre'*sigpre
#curr.sigma = rand(InverseWishart(propmoves.priorkappa,
#curr.sigma*(propmoves.priorkappa-1-prior.D)))
#end
elseif block == 3
# Modify lambdas
for i in 1:length(curr.lam)
if randbool()
curr.lam[i] += randn() * sigma*0.1
end
end
#elseif block == 3
#Modify di's
#curr.d = clamp(curr.d + randn(self.n) * 0.2, 8,12) #FIXME
end
nothing
end
function energy(obj::MPMSampler, block::Int=0) #block currently unused
accum = 0.0
#likelihoods
for i in 1:size(obj.data,1), j in 1:size(obj.data,2)
accum -= logpdf(Poisson(obj.d[i] * exp(obj.curr.lam[j,i])),
obj.data[i,j])
end
#lambdas
accum -= sum(logpdf(MultivariateNormal(obj.curr.mu, obj.curr.sigma), obj.curr.lam))
if obj.usepriors
# Sigma
#accum -= sum(logpdf(Normal(0.0, obj.propmoves.premove), obj.sigpre))
accum -= logpdf(InverseWishart(obj.prior.kappa, obj.prior.S), obj.curr.sigma)
# mu
accum -= sum(logpdf(
DiagNormal(obj.prior.mu, obj.prior.mu_sigmas), obj.curr.mu))
end
obj.curr.energy = accum
return accum
end
reject!(obj::MPMSampler) = copy!(obj.curr, obj.old)
save!(obj::MPMSampler) = copy!(obj.old, obj.curr)
function gen_points(n, dmean, pt)
D = size(pt.lam,1)
newpts = Array(Float64,D,n)
for i=1:n
tempmv = rand(MultivariateNormal(pt.mu, pt.sigma))
for j=1:D
rate = dmean*exp(tempmv[j])
if rate > 1000
newpts[j,i] = rand(Normal(rate,sqrt(rate)))
else
try
newpts[j,i] = rand(Poisson(rate))
catch x
@show newpts[j,i]
@show rate
throw(x)
end
end
end
end
# Or using lams below is another way to go, but it is not ideal for getting a true
# 'feel' for the posterior fit
#
#for (i,lamind)=enumerate(sample(1:size(pt.lam,2), n))
#newpts[:,i] = map(x->rand(Poisson(d*exp(x))), pt.lam[:,lamind])
#end
return newpts
end
function gen_posterior_points(n, dmean, posterior)
npost = length(posterior)
nper = iceil(n/npost)
pts = Array(Float64, size(posterior[1].lam, 1), nper*npost)
for i in 1:npost
pts[:, (i-1)*nper+1:i*nper] = gen_points(nper, dmean, posterior[i])
end
return pts
end
function calc_pvals(Ts, points, db, d=10.0)
numpoints = size(points, 2) # FIXME Which direction are we going again?
ndims = size(points,1)
tvals = Array(Float64, ndims, length(Ts), length(db))
tru_ts = Array(Float64, ndims, length(Ts))
pvals = similar(tru_ts)
for i in 1:length(db)
newpts = gen_points(numpoints, d, db[i])
for dim=1:ndims
for (j,T) in enumerate(Ts)
tvals[dim,j,i] = T(vec(newpts[dim,:]))
end
end
end
for dim=1:ndims
for (j,T) in enumerate(Ts)
tru_ts[dim,j] = T(vec(points[dim,:]))
pvals[dim,j] = mean(tru_ts[dim,j] .< tvals[dim,j,:]) + 0.5*mean(tru_ts[dim,j] .== tvals[dim,j,:])
end
end
return pvals, tru_ts, tvals
end
function calc_g(points, db, numlam; dmean=10.0)
# Note: dmean here could actually be d if we knew valid values of d for
# each point we are calculating the effective density for
numpts = size(points, 2)
dims = size(points, 1)
dblen = length(db)
res = zeros(numpts)
lams = Array(Float64, dims, numlam)
llams = Array(Float64, dims, numlam)
rlams = Array(Float64, dims, numlam)
points = floor(points)
lgpoints = lgamma(points .+ 1)
for i in 1:dblen # each draw of theta
curr = db[i]
rand!(MultivariateNormal(curr.mu, curr.sigma), lams)
for k=1:numlam*dims
rlams[k] = dmean * exp(lams[k])
llams[k] = log(dmean)+lams[k]
end
for j in 1:numpts # each point
accumlam = 0.0
for s in 1:numlam # each lambda
accumD = 0.0
for d in 1:dims # each dimension
accumD += points[d,j]*llams[d,s] - rlams[d,s] - lgpoints[d,j]
end
accumlam += exp(accumD)
end
res[j] += (accumlam/numlam)
end
end
return log(res / length(db))
end
# Utility convenience function
function mpm_model(data; burn=1000, thin=50, d=100.0, usepriors=true)
D = size(data, 2)
cov = eye(D,D) .* 0.1
mu = ones(D)
prior = MPM.MPMPrior(D=D, kappa=10., S=eye(D)*0.05*10)
start = MPM.MPMParams(mu, cov,
clamp(log(data'/d),-8.0,Inf)) #lam
obj = MPM.MPMSampler(prior, data, deepcopy(start), d)
obj.usepriors = usepriors
#mymh_a = MPM.MHRecord(obj_a,burn=burn,thin=thin)
return MPM.AMWGRecord(obj, blocks, burn=burn,thin=thin)
end
# Everything dealing with TWO classifier objects
function mpm_classifier(data1, data2; burn=1000, thin=50, d1=100.0, d2=100.0, kappa=10.0, usepriors=true)
assert(size(data1,2) == size(data1,2))
D = size(data1, 2)
cov = eye(D,D) .* 0.1
mu = ones(D)
prior = MPM.MPMPrior(D=D, kappa=kappa, S=eye(D)*0.05*10)
# Class 1
start = MPM.MPMParams(mu, cov,
clamp(log(data1./d1)',-8.0,Inf)) #lam
obj_a = MPM.MPMSampler(prior, data1, deepcopy(start), d1)
obj_a.usepriors = usepriors
#mymh_a = MPM.MHRecord(obj_a,burn=burn,thin=thin)
mymh_a = MPM.AMWGRecord(obj_a, blocks, burn=burn,thin=thin)
# Class 2
start.lam = clamp(log(data2./d2)',-8.0,Inf) # lam
obj_b = MPM.MPMSampler(prior, data2, deepcopy(start), d2)
obj_b.usepriors = usepriors
#mymh_b = MPM.MHRecord(obj_b,burn=burn,thin=thin)
mymh_b = MPM.AMWGRecord(obj_b, blocks, burn=burn,thin=thin)
BinaryClassifier(obj_a, obj_b, mymh_a, mymh_b)
end
function sample(cls::BinaryClassifier, iters=10000; verbose=false)
t0 = time()
sample(cls.mcmc1, iters, verbose=verbose)
t1 = time()
sample(cls.mcmc2, iters, verbose=verbose)
t2 = time()
return t1-t0, t2-t1
end
import Base: vcat
function vcat(xs::BinaryClassifier...)
x = xs[1]
for y in xs[2:end]
append!(x.mcmc1.db, y.mcmc1.db)
append!(x.mcmc2.db, y.mcmc2.db)
end
return x
end
function gelman_rubin(samplers::Vector{BinaryClassifier})
return [gelman_rubin([x.mcmc1 for x in samplers]), gelman_rubin([x.mcmc1 for x in samplers])]
end
function acceptance_rates(cls::BinaryClassifier)
mc1,mc2 = cls.mcmc1, cls.mcmc2
accs = Dict{String,Any}()
accs["1"] = cls.mcmc1.block_accept
accs["2"] = cls.mcmc2.block_accept
return accs
end
function bee_moments(cls::BinaryClassifier; dmean=10.0, numlam=20, numpts=20)
bee_moments(cls, (dmean,dmean), numlam=numlam, numpts=numpts)
end
function bee_moments(cls::BinaryClassifier, dmeans::NTuple{2,Float64}; numlam=20, numpts=20)
# FIXME This is only valid for c=0.5
dmean1,dmean2 = dmeans
db1 = cls.mcmc1.db
db2 = cls.mcmc2.db
assert(length(db1) == length(db2))
dblen = length(db1)
dim = length(db1[1].mu)
points = Array(Int, dim, numpts*2)
lamsx1 = Array(Float64, dim, numpts)
lamsx2 = Array(Float64, dim, numpts)
lams1 = Array(Float64, dim, numlam)
lams2 = Array(Float64, dim, numlam)
llams1 = Array(Float64, dim, numlam)
llams2 = Array(Float64, dim, numlam)
rlams1 = Array(Float64, dim, numlam)
rlams2 = Array(Float64, dim, numlam)
beem1 = beem2 = 0.0
for i in 1:dblen
dbpass = 0.0
#dbpass = 0.0
curr1 = db1[i]
curr2 = db2[i]
# Generate x values (from lams or from new lams?)
rand!(MultivariateNormal(curr1.mu, curr1.sigma), lamsx1)
rand!(MultivariateNormal(curr2.mu, curr2.sigma), lamsx2)
if any(lamsx1 .> 35) || any(lamsx2 .> 35)
#warn("Encountered lambda value greater than 35, skipping iteration")
continue
end
for i=1:dim, j=1:numpts
points[i,j] = rand(Poisson(dmean1*exp(lamsx1[i,j])))
points[i,j+numpts] = rand(Poisson(dmean2*exp(lamsx2[i,j])))
end
g1 = calc_g(points, db1, numlam, dmean=dmean1)
g2 = calc_g(points, db2, numlam, dmean=dmean2)
g1 = clamp(g1, -10_000_000.0, Inf)
g2 = clamp(g2, -10_000_000.0, Inf)
# Now compute p(y|x,\theta)
rand!(MultivariateNormal(curr1.mu, curr1.sigma), lams1)
rand!(MultivariateNormal(curr2.mu, curr2.sigma), lams2)
for k=1:numlam*dim
rlams1[k] = dmean1 * exp(lams1[k])
llams1[k] = log(dmean1) + lams1[k]
rlams2[k] = dmean2 * exp(lams2[k])
llams2[k] = log(dmean2) + lams2[k]
end
for j in 1:numpts*2 # each point
accumlam1 = 0.0
accumlam2 = 0.0
#accumlam1 = -Inf
#accumlam2 = -Inf
for s in 1:numlam # each lambda
accumD1 = 0.0
accumD2 = 0.0
for d in 1:dim # each dimension
accumD1 += points[d,j]*llams1[d,s] - rlams1[d,s] - lgamma(points[d,j]+1)
accumD2 += points[d,j]*llams2[d,s] - rlams2[d,s] - lgamma(points[d,j]+1)
end
accumlam1 += exp(accumD1)
accumlam2 += exp(accumD2)
#accumlam1 = accumlam1 == -Inf ? accumD1 : logsum(accumlam1, accumD1)
#accumlam2 = accumlam2 == -Inf ? accumD2 : logsum(accumlam2, accumD2)
end
temp = g1[j] < g2[j] ? accumlam1 : accumlam2 #FIXME c!=0.5
z = accumlam1 + accumlam2 #FIXME c!=0.5
if z != 0.0
dbpass += temp/z #FIXME c!=0.5
end
#z = logsum(accumlam1, accumlam2)
#@show exp(temp-z)
#dbpass = dbpass == -Inf ? temp-z : logsum(dbpass,temp-z) #FIXME c!=0.5
#dbpass += exp(temp-z)
end
temp = dbpass/numpts/2 #FIXME c!=0.5 AND CHECK 4 constant
beem1 += temp
beem2 += temp^2
end
beem1 /= dblen
beem2 /= dblen
return beem1, beem2
end
function bee_e_data(data, db1, db2, numlam; dmean=10.0, maxtry=10)
# FIXME This is only valid for c=0.5
local volume, points, g1, g2
g1sum = g2sum = 0.0
factor = 0.0
D = size(data,2)
maxN = iround(20_000^(1/D))
trycount = 1
numpts = 0
while max(abs(g1sum-1.0),abs(g2sum-1.0)) > 0.1
mins, maxs = get_bbox(data, factor=factor)
lens, steps, points = gen_grid(mins, maxs, maxN)
volume = prod(steps)
g1 = calc_g(points, db1, numlam, dmean=dmean)
g2 = calc_g(points, db2, numlam, dmean=dmean)
g1sum = exp(logsum(g1 .+ log(volume)))
g2sum = exp(logsum(g2 .+ log(volume)))
println("g1 sum: $g1sum")
println("g2 sum: $g2sum")
numpts += length(points)
#println("steps: $steps")
#println(maxs, " ", size(points), " ", factor)
factor += 1.0
if trycount > maxtry
return -min(g1sum, g2sum)
else
trycount += 1
end
end
println("bee_e_data used $trycount iterations, and $numpts * numclasses evaluations")
bee_e_eff(g1,g2,volume)
end
function bee_e_mc(cls::BinaryClassifier; dmean=10.0, numpts=100)
bee_e_mc(cls, (dmean,dmean), numpts=numpts)
end
function bee_e_mc(cls::BinaryClassifier, dmeans::NTuple{2,Float64}; numpts=100)
#FIXME only c=0.5
#Generate points from each class
dmean1,dmean2 = dmeans
pts1 = gen_posterior_points(numpts, dmean1, cls.mcmc1.db)
pts2 = gen_posterior_points(numpts, dmean2, cls.mcmc2.db)
acc_numpts = size(pts1,2) + size(pts2,2) # requested != generated
points = hcat(pts1,pts2)
g0 = calc_g(points, cls.mcmc1.db, 20, dmean=dmean1)
g1 = calc_g(points, cls.mcmc2.db, 20, dmean=dmean2)
g0 = clamp(g0, -10_000_000.0, Inf)
g1 = clamp(g1, -10_000_000.0, Inf)
z = mapslices(logsum, hcat(g0,g1), 2)
res = exp(logsum(min(g0,g1) .- z .- log(acc_numpts)))
return res
end
bee_moments_2sample(cls::BinaryClassifier; dmean=10.0, numpts=100) = bee_moments_2sample(cls, (dmean,dmean), numpts=numpts)
function bee_moments_2sample(cls::BinaryClassifier, dmeans::NTuple{2,Float64}; numpts=100)
#FIXME only c=0.5
#Generate points from each class
dmean1,dmean2 = dmeans
@assert length(cls.mcmc1.db) == length(cls.mcmc2.db)
dbsize = length(cls.mcmc1.db)
bee = 0.0
bee2 = 0.0
numpts = div(numpts,2)
for i=1:dbsize
#pts1x = gen_points(numpts, dmean1, cls.mcmc1.db[i])
#pts1z = gen_points(numpts, dmean1, cls.mcmc1.db[i])
#pts2x = gen_points(numpts, dmean2, cls.mcmc2.db[i])
#pts2z = gen_points(numpts, dmean2, cls.mcmc2.db[i])
#pointsx = hcat(pts1x,pts2x)
#pointsz = hcat(pts1z,pts2z)
#g1x = calc_g(pointsx, [cls.mcmc1.db[i]], 20, dmean=dmean1)
#g1z = calc_g(pointsz, [cls.mcmc1.db[i]], 20, dmean=dmean1)
#g2x = calc_g(pointsx, [cls.mcmc2.db[i]], 20, dmean=dmean2)
#g2z = calc_g(pointsz, [cls.mcmc2.db[i]], 20, dmean=dmean2)
#g1x = clamp(g1x, -10_000_000.0, Inf)
#g1z = clamp(g1z, -10_000_000.0, Inf)
#g2x = clamp(g2x, -10_000_000.0, Inf)
#g2z = clamp(g2z, -10_000_000.0, Inf)
pts1 = gen_points(numpts*2, dmean1, cls.mcmc1.db[i]) # x and z
pts2 = gen_points(numpts*2, dmean2, cls.mcmc2.db[i]) # x and z
allpts = hcat(pts1,pts2)
g1 = clamp(calc_g(allpts, [cls.mcmc1.db[i]], 20, dmean=dmean1), -10_000_000.0, Inf)
g2 = clamp(calc_g(allpts, [cls.mcmc2.db[i]], 20, dmean=dmean2), -10_000_000.0, Inf)
g1eff = clamp(calc_g(allpts, cls.mcmc1.db, 20, dmean=dmean1), -10_000_000.0, Inf)
g2eff = clamp(calc_g(allpts, cls.mcmc2.db, 20, dmean=dmean2), -10_000_000.0, Inf)
# Could inline and optimize calc_g here
g1x = g1[[1:numpts,numpts*2+1:numpts*3]]
g1z = g1[[numpts+1:numpts*2,numpts*3+1:end]]
g2x = g2[[1:numpts,numpts*2+1:numpts*3]]
g2z = g2[[numpts+1:numpts*2,numpts*3+1:end]]
g1xeff = g1eff[[1:numpts,numpts*2+1:numpts*3]]
g1zeff = g1eff[[numpts+1:numpts*2,numpts*3+1:end]]
g2xeff = g2eff[[1:numpts,numpts*2+1:numpts*3]]
g2zeff = g2eff[[numpts+1:numpts*2,numpts*3+1:end]]
#agree = (g1x .< g2x) .== (g1z .< g2z)
labelx = (g1xeff .< g2xeff)
labelz = (g1zeff .< g2zeff)
zx = Float64[logsum(g1x[j],g2x[j]) for j=1:numpts*2]
zz = Float64[logsum(g1z[j],g2z[j]) for j=1:numpts*2]
lpyx = min(g1x,g2x) .- zx
lpyz = min(g1z,g2z) .- zz
## BEE calculation
temp = exp(logsum(vcat(lpyx,lpyz)))
@show exp(minimum(lpyx))
@show exp(minimum(lpyz))
#@show temp/numpts/2
@show sum(exp(lpyx))/numpts/2
@show size(lpyx)
@show numpts
#@show exp(lpyx[1:3])
#@show exp(lpyz[1:3])
bee += exp(logsum(vcat(lpyx,lpyz)))
for j=1:numpts
if labelx[j]==labelz[j]
bee2 += exp(lpyx[j] + lpyz[j])
end
end
end
bee /= (numpts*2 * dbsize)
bee2 /= (numpts * dbsize) # FIXME not sure about this 2 constant
return bee, bee2, bee2 - bee^2
end
function predict(db0, db1, points; dmean=10.0)
g0 = calc_g(points, db0, 20, dmean=dmean)
g1 = calc_g(points, db1, 20, dmean=dmean)
return vec((g0 .- g1) .< 0) * 1.0
end
function error_points(cls::BinaryClassifier, points, labels; dmean=10.0)
return sum(abs(predict(cls.mcmc1.db, cls.mcmc2.db, points, dmean=dmean) - labels))/size(labels,1)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment