Attempt to factor a binary matrix,
Because of non-identifiability problems, we use a consistent
Attempt to factor a binary matrix,
Because of non-identifiability problems, we use a consistent
using Distributions | |
using Optim | |
using Calculus | |
function invlogit(z::Float64) | |
clamp(1 / (1 + exp(-z)), sqrt(eps(0.0)), 1.0 - sqrt(eps(1.0))) | |
end | |
function logit(p::Float64) | |
p = clamp(p, sqrt(eps(0.0)), 1.0 - sqrt(eps(1.0))) | |
log(p / (1 - p)) | |
end |
function inference( | |
D::Matrix{Float64}, | |
k::Int, | |
sigma::Float64 | |
) | |
N_r, N_c = size(D) | |
E = Array(Float64, N_r, N_c) | |
v0 = randn(@n_params) | |
gr = Array(Float64, @n_params) | |
neglp(v::Vector{Float64}) = -logposterior!(D, E, k, sigma, v) | |
function grneglp!(v::Vector{Float64}, neggrad::Vector{Float64}) | |
gr!(neggrad, D, E, k, sigma, v) | |
return | |
end | |
# Initialize the easy model parameters to better starting values | |
v0[1] = logit(mean(D)) | |
for i in 1:N_r | |
tmp = 0.0 | |
for j in 1:N_c | |
tmp += D[i, j] | |
end | |
v0[@ind_a_i] = logit(tmp / N_c) | |
end | |
for j in 1:N_c | |
tmp = 0.0 | |
for i in 1:N_r | |
tmp += D[i, j] | |
end | |
v0[@ind_b_j] = logit(tmp / N_r) | |
end | |
res = optimize( | |
neglp, | |
grneglp!, | |
v0, | |
method = :l_bfgs, | |
iterations = 100, | |
show_trace = true | |
) | |
return unpack(res.minimum, N_r, N_c, k) | |
end |
macro ind_mu() | |
:(1) | |
end | |
macro ind_a_i() | |
:(1 + i) | |
end | |
macro ind_b_j() | |
:(1 + N_r + j) | |
end | |
macro ind_x_di() | |
:(1 + N_r + N_c + (i - 1) * k + d) | |
end | |
macro ind_y_dj() | |
:(1 + N_r + N_c + k * N_r + (j - 1) * k + d) | |
end | |
macro n_params() | |
:(1 + N_r + N_c + k * N_r + k * N_c) | |
end | |
function pack!(v, mu, a, b, X, Y) | |
k, N_r = size(X) | |
k, N_c = size(Y) | |
v[@ind_mu] = mu | |
for i in 1:N_r | |
v[@ind_a_i] = a[i] | |
end | |
for j in 1:N_c | |
v[@ind_b_j] = b[j] | |
end | |
for i in 1:N_r | |
for d in 1:k | |
v[@ind_x_di] = X[d, i] | |
end | |
end | |
for j in 1:N_c | |
for d in 1:k | |
v[@ind_y_dj] = Y[d, j] | |
end | |
end | |
return | |
end | |
function unpack(v, N_r, N_c, k) | |
mu = v[@ind_mu] | |
a = Array(Float64, N_r) | |
b = Array(Float64, N_c) | |
X = Array(Float64, k, N_r) | |
Y = Array(Float64, k, N_c) | |
for i in 1:N_r | |
a[i] = v[@ind_a_i] | |
end | |
for j in 1:N_c | |
b[j] = v[@ind_b_j] | |
end | |
for i in 1:N_r | |
for d in 1:k | |
X[d, i] = v[@ind_x_di] | |
end | |
end | |
for j in 1:N_c | |
for d in 1:k | |
Y[d, j] = v[@ind_y_dj] | |
end | |
end | |
return mu, a, b, X, Y | |
end | |
function expectation!( | |
E::Matrix{Float64}, | |
k::Int, | |
v::Vector{Float64} | |
) | |
N_r, N_c = size(E) | |
mu = v[@ind_mu] | |
for i in 1:N_r | |
a_i = v[@ind_a_i] | |
for j in 1:N_c | |
b_j = v[@ind_b_j] | |
XiYj = 0.0 | |
for d in 1:k | |
X_di = v[@ind_x_di] | |
Y_dj = v[@ind_y_dj] | |
XiYj += X_di * Y_dj | |
end | |
E[i, j] = invlogit(mu + a_i + b_j + XiYj) | |
end | |
end | |
return | |
end | |
function generate!( | |
D::Matrix{Float64}, | |
E::Matrix{Float64}, | |
k::Int, | |
v::Vector{Float64} | |
) | |
N_r, N_c = size(D) | |
expectation!(E, k, v) | |
for i in 1:N_r | |
for j in 1:N_c | |
D[i, j] = rand(Bernoulli(E[i, j])) | |
end | |
end | |
return | |
end | |
function loglikelihood!( | |
D::Matrix{Float64}, | |
E::Matrix{Float64}, | |
k::Int, | |
v::Vector{Float64} | |
) | |
N_r, N_c = size(D) | |
expectation!(E, k, v) | |
ll = 0.0 | |
for i in 1:N_r | |
for j in 1:N_c | |
p = D[i, j] * E[i, j] + (1 - D[i, j]) * (1 - E[i, j]) | |
ll += log(p) | |
end | |
end | |
return ll | |
end | |
function logposterior!( | |
D::Matrix{Float64}, | |
E::Matrix{Float64}, | |
k::Int, | |
sigma::Float64, | |
v::Vector{Float64} | |
) | |
N_r, N_c = size(D) | |
lp = 0.0 | |
lp += logpdf(Normal(0, sigma), v[@ind_mu]) | |
for i in 1:N_r | |
lp += logpdf(Normal(0, sigma), v[@ind_a_i]) | |
end | |
for j in 1:N_c | |
lp += logpdf(Normal(0, sigma), v[@ind_b_j]) | |
end | |
for i in 1:N_r | |
for d in 1:k | |
lp += logpdf(Normal(0, sigma), v[@ind_x_di]) | |
end | |
end | |
for j in 1:N_c | |
for d in 1:k | |
lp += logpdf(Normal(0, sigma), v[@ind_y_dj]) | |
end | |
end | |
ll = loglikelihood!(D, E, k, v) | |
return lp + ll | |
end | |
function gr!( | |
gr::Vector{Float64}, | |
D::Matrix{Float64}, | |
E::Matrix{Float64}, | |
k::Int, | |
sigma::Float64, | |
v::Vector{Float64} | |
) | |
N_r, N_c = size(D) | |
# Gradient from log prior | |
gr[@ind_mu] = v[@ind_mu] / sigma^2 | |
for i in 1:N_r | |
gr[@ind_a_i] = v[@ind_a_i] / sigma^2 | |
end | |
for j in 1:N_c | |
gr[@ind_b_j] = v[@ind_b_j] / sigma^2 | |
end | |
for i in 1:N_r | |
for d in 1:k | |
gr[@ind_x_di] = v[@ind_x_di] / sigma^2 | |
end | |
end | |
for j in 1:N_c | |
for d in 1:k | |
gr[@ind_y_dj] = v[@ind_y_dj] / sigma^2 | |
end | |
end | |
# Gradient from log likelihood | |
ind_mu = @ind_mu | |
for i in 1:N_r | |
ind_a_i = @ind_a_i | |
for j in 1:N_c | |
ind_b_j = @ind_b_j | |
dotprod = 0.0 | |
for d in 1:k | |
dotprod += v[@ind_x_di] * v[@ind_y_dj] | |
end | |
P_ij = invlogit(v[ind_mu] + v[ind_a_i] + v[ind_b_j] + dotprod) | |
delta = D[i, j] - P_ij | |
gr[ind_mu] += delta * (-1) | |
gr[ind_a_i] += delta * (-1) | |
gr[ind_b_j] += delta * (-1) | |
for d in 1:k | |
gr[@ind_x_di] += delta * -v[@ind_y_dj] | |
gr[@ind_y_dj] += delta * -v[@ind_x_di] | |
end | |
end | |
end | |
return | |
end |
include("basics.jl") | |
include("unpacked_model.jl") | |
include("packed_model.jl") | |
include("inference.jl") | |
sigma = 1.0 | |
N_r = 250 | |
N_c = 100 | |
k = 2 | |
srand(1) | |
mu, a, b, X, Y = random_parameters(sigma, N_r, N_c, k) | |
D = Array(Float64, N_r, N_c) | |
E = Array(Float64, N_r, N_c) | |
generate!(D, E, mu, a, b, X, Y) | |
ll = loglikelihood!(D, E, mu, a, b, X, Y) | |
lp = logposterior!(D, E, mu, a, b, X, Y, sigma) | |
mu_hat, a_hat, b_hat, X_hat, Y_hat = inference(D, k, sigma) | |
ll = loglikelihood!(D, E, mu, a, b, X, Y) | |
lp = logposterior!(D, E, mu, a, b, X, Y, sigma) | |
@printf "True parameters:\n\tLL: %f\n\tLP: %f\n" ll lp | |
ll = loglikelihood!(D, E, mu_hat, a_hat, b_hat, X_hat, Y_hat) | |
lp = logposterior!(D, E, mu_hat, a_hat, b_hat, X_hat, Y_hat, sigma) | |
@printf "Fitted parameters:\n\tLL: %f\n\tLP: %f\n" ll lp |
include("basics.jl") | |
include("unpacked_model.jl") | |
include("packed_model.jl") | |
include("inference.jl") | |
sigma = 1.0 | |
N_r = 25 | |
N_c = 10 | |
k = 2 | |
srand(1) | |
mu1, a1, b1, X1, Y1 = random_parameters(sigma, N_r, N_c, k) | |
n_params = 1 + N_r + N_c + k * N_r + k * N_c | |
v = Array(Float64, n_params) | |
gr = Array(Float64, n_params) | |
pack!(v, mu1, a1, b1, X1, Y1) | |
mu2, a2, b2, X2, Y2 = unpack(v, N_r, N_c, k) | |
@assert mu1 == mu2 | |
@assert all(a1 .== a2) | |
@assert all(b1 .== b2) | |
@assert all(X1 .== X2) | |
@assert all(Y1 .== Y2) | |
D1 = Array(Float64, N_r, N_c) | |
D2 = Array(Float64, N_r, N_c) | |
E = Array(Float64, N_r, N_c) | |
srand(1) | |
generate!(D1, E, mu1, a1, b1, X1, Y1) | |
srand(1) | |
generate!(D2, E, k, v) | |
@assert all(D1 .== D2) | |
ll1 = loglikelihood!(D1, E, mu1, a1, b1, X1, Y1) | |
ll2 = loglikelihood!(D2, E, k, v) | |
@assert ll1 == ll2 | |
lp1 = logposterior!(D1, E, mu1, a1, b1, X1, Y1, sigma) | |
lp2 = logposterior!(D2, E, k, sigma, v) | |
@assert lp1 == lp2 | |
f(v::Vector{Float64}) = -logposterior!(D2, E, k, sigma, v) | |
function g(v::Vector{Float64}) | |
mygr = similar(v) | |
gr!(mygr, D1, E, k, sigma, v) | |
return mygr | |
end | |
@printf "Maximum gradient error: %.24f\n" check_gradient(f, g, v) | |
print([g(v) Calculus.finite_difference(f, v)][1:5, :]) |
function random_parameters( | |
sigma::Float64, | |
N_r::Int, | |
N_c::Int, | |
k::Int | |
) | |
mu = rand(Normal(0, sigma)) | |
a = rand(Normal(0, sigma), N_r) | |
b = rand(Normal(0, sigma), N_c) | |
X = rand(Normal(0, sigma), k, N_r) | |
Y = rand(Normal(0, sigma), k, N_c) | |
return mu, a, b, X, Y | |
end | |
function expectation!( | |
E::Matrix{Float64}, | |
mu::Float64, | |
a::Vector{Float64}, | |
b::Vector{Float64}, | |
X::Matrix{Float64}, | |
Y::Matrix{Float64} | |
) | |
N_r, N_c = size(E) | |
for i in 1:N_r | |
for j in 1:N_c | |
E[i, j] = invlogit(mu + a[i] + b[j] + dot(X[:, i], Y[:, j])) | |
end | |
end | |
return | |
end | |
function generate!( | |
D::Matrix{Float64}, | |
E::Matrix{Float64}, | |
mu::Float64, | |
a::Vector{Float64}, | |
b::Vector{Float64}, | |
X::Matrix{Float64}, | |
Y::Matrix{Float64} | |
) | |
N_r, N_c = size(D) | |
k = size(X, 1) | |
expectation!(E, mu, a, b, X, Y) | |
for i in 1:N_r | |
for j in 1:N_c | |
D[i, j] = rand(Bernoulli(E[i, j])) | |
end | |
end | |
return | |
end | |
function loglikelihood!( | |
D::Matrix{Float64}, | |
E::Matrix{Float64}, | |
mu::Float64, | |
a::Vector{Float64}, | |
b::Vector{Float64}, | |
X::Matrix{Float64}, | |
Y::Matrix{Float64} | |
) | |
N_r, N_c = size(D) | |
k = size(X, 1) | |
expectation!(E, mu, a, b, X, Y) | |
ll = 0.0 | |
for i in 1:N_r | |
for j in 1:N_c | |
p = D[i, j] * E[i, j] + (1 - D[i, j]) * (1 - E[i, j]) | |
ll += log(p) | |
end | |
end | |
return ll | |
end | |
function logposterior!( | |
D::Matrix{Float64}, | |
E::Matrix{Float64}, | |
mu::Float64, | |
a::Vector{Float64}, | |
b::Vector{Float64}, | |
X::Matrix{Float64}, | |
Y::Matrix{Float64}, | |
sigma::Float64 | |
) | |
N_r, N_c = size(D) | |
k = size(X, 1) | |
lp = 0.0 | |
lp += logpdf(Normal(0, sigma), mu) | |
for i in 1:N_r | |
lp += logpdf(Normal(0, sigma), a[i]) | |
end | |
for j in 1:N_c | |
lp += logpdf(Normal(0, sigma), b[j]) | |
end | |
for i in 1:N_r | |
for d in 1:k | |
lp += logpdf(Normal(0, sigma), X[d, i]) | |
end | |
end | |
for j in 1:N_c | |
for d in 1:k | |
lp += logpdf(Normal(0, sigma), Y[d, j]) | |
end | |
end | |
ll = loglikelihood!(D, E, mu, a, b, X, Y) | |
return lp + ll | |
end |