Skip to content

Instantly share code, notes, and snippets.

@johnmyleswhite
Last active August 29, 2015 13:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save johnmyleswhite/9625015 to your computer and use it in GitHub Desktop.
Save johnmyleswhite/9625015 to your computer and use it in GitHub Desktop.
Bernoulli matrix factorization

BinaryMF

Attempt to factor a binary matrix, $D$, under the assumption that entries are IID Bernoulli draws, conditional on the value of a link-transformed linear predictor, $\mathbb{E}[D_{i, j}] = I(\mu + a_i + b_j + X_{i}^{T} Y_{j})$, where $I$ is the inverse logit link function.

Because of non-identifiability problems, we use a consistent $\mathcal{N}(0, \sigma^2)$ prior over all parameters in the model (even $\mu$ for now) to try to resolve translation invariance in the log likelihood function. Even with this prior, the model still suffers from invariance under unitary transforms of $X$ and $Y$. We just hope for the best when doing hill-climbing.

using Distributions
using Optim
using Calculus
function invlogit(z::Float64)
clamp(1 / (1 + exp(-z)), sqrt(eps(0.0)), 1.0 - sqrt(eps(1.0)))
end
function logit(p::Float64)
p = clamp(p, sqrt(eps(0.0)), 1.0 - sqrt(eps(1.0)))
log(p / (1 - p))
end
function inference(
D::Matrix{Float64},
k::Int,
sigma::Float64
)
N_r, N_c = size(D)
E = Array(Float64, N_r, N_c)
v0 = randn(@n_params)
gr = Array(Float64, @n_params)
neglp(v::Vector{Float64}) = -logposterior!(D, E, k, sigma, v)
function grneglp!(v::Vector{Float64}, neggrad::Vector{Float64})
gr!(neggrad, D, E, k, sigma, v)
return
end
# Initialize the easy model parameters to better starting values
v0[1] = logit(mean(D))
for i in 1:N_r
tmp = 0.0
for j in 1:N_c
tmp += D[i, j]
end
v0[@ind_a_i] = logit(tmp / N_c)
end
for j in 1:N_c
tmp = 0.0
for i in 1:N_r
tmp += D[i, j]
end
v0[@ind_b_j] = logit(tmp / N_r)
end
res = optimize(
neglp,
grneglp!,
v0,
method = :l_bfgs,
iterations = 100,
show_trace = true
)
return unpack(res.minimum, N_r, N_c, k)
end
macro ind_mu()
:(1)
end
macro ind_a_i()
:(1 + i)
end
macro ind_b_j()
:(1 + N_r + j)
end
macro ind_x_di()
:(1 + N_r + N_c + (i - 1) * k + d)
end
macro ind_y_dj()
:(1 + N_r + N_c + k * N_r + (j - 1) * k + d)
end
macro n_params()
:(1 + N_r + N_c + k * N_r + k * N_c)
end
function pack!(v, mu, a, b, X, Y)
k, N_r = size(X)
k, N_c = size(Y)
v[@ind_mu] = mu
for i in 1:N_r
v[@ind_a_i] = a[i]
end
for j in 1:N_c
v[@ind_b_j] = b[j]
end
for i in 1:N_r
for d in 1:k
v[@ind_x_di] = X[d, i]
end
end
for j in 1:N_c
for d in 1:k
v[@ind_y_dj] = Y[d, j]
end
end
return
end
function unpack(v, N_r, N_c, k)
mu = v[@ind_mu]
a = Array(Float64, N_r)
b = Array(Float64, N_c)
X = Array(Float64, k, N_r)
Y = Array(Float64, k, N_c)
for i in 1:N_r
a[i] = v[@ind_a_i]
end
for j in 1:N_c
b[j] = v[@ind_b_j]
end
for i in 1:N_r
for d in 1:k
X[d, i] = v[@ind_x_di]
end
end
for j in 1:N_c
for d in 1:k
Y[d, j] = v[@ind_y_dj]
end
end
return mu, a, b, X, Y
end
function expectation!(
E::Matrix{Float64},
k::Int,
v::Vector{Float64}
)
N_r, N_c = size(E)
mu = v[@ind_mu]
for i in 1:N_r
a_i = v[@ind_a_i]
for j in 1:N_c
b_j = v[@ind_b_j]
XiYj = 0.0
for d in 1:k
X_di = v[@ind_x_di]
Y_dj = v[@ind_y_dj]
XiYj += X_di * Y_dj
end
E[i, j] = invlogit(mu + a_i + b_j + XiYj)
end
end
return
end
function generate!(
D::Matrix{Float64},
E::Matrix{Float64},
k::Int,
v::Vector{Float64}
)
N_r, N_c = size(D)
expectation!(E, k, v)
for i in 1:N_r
for j in 1:N_c
D[i, j] = rand(Bernoulli(E[i, j]))
end
end
return
end
function loglikelihood!(
D::Matrix{Float64},
E::Matrix{Float64},
k::Int,
v::Vector{Float64}
)
N_r, N_c = size(D)
expectation!(E, k, v)
ll = 0.0
for i in 1:N_r
for j in 1:N_c
p = D[i, j] * E[i, j] + (1 - D[i, j]) * (1 - E[i, j])
ll += log(p)
end
end
return ll
end
function logposterior!(
D::Matrix{Float64},
E::Matrix{Float64},
k::Int,
sigma::Float64,
v::Vector{Float64}
)
N_r, N_c = size(D)
lp = 0.0
lp += logpdf(Normal(0, sigma), v[@ind_mu])
for i in 1:N_r
lp += logpdf(Normal(0, sigma), v[@ind_a_i])
end
for j in 1:N_c
lp += logpdf(Normal(0, sigma), v[@ind_b_j])
end
for i in 1:N_r
for d in 1:k
lp += logpdf(Normal(0, sigma), v[@ind_x_di])
end
end
for j in 1:N_c
for d in 1:k
lp += logpdf(Normal(0, sigma), v[@ind_y_dj])
end
end
ll = loglikelihood!(D, E, k, v)
return lp + ll
end
function gr!(
gr::Vector{Float64},
D::Matrix{Float64},
E::Matrix{Float64},
k::Int,
sigma::Float64,
v::Vector{Float64}
)
N_r, N_c = size(D)
# Gradient from log prior
gr[@ind_mu] = v[@ind_mu] / sigma^2
for i in 1:N_r
gr[@ind_a_i] = v[@ind_a_i] / sigma^2
end
for j in 1:N_c
gr[@ind_b_j] = v[@ind_b_j] / sigma^2
end
for i in 1:N_r
for d in 1:k
gr[@ind_x_di] = v[@ind_x_di] / sigma^2
end
end
for j in 1:N_c
for d in 1:k
gr[@ind_y_dj] = v[@ind_y_dj] / sigma^2
end
end
# Gradient from log likelihood
ind_mu = @ind_mu
for i in 1:N_r
ind_a_i = @ind_a_i
for j in 1:N_c
ind_b_j = @ind_b_j
dotprod = 0.0
for d in 1:k
dotprod += v[@ind_x_di] * v[@ind_y_dj]
end
P_ij = invlogit(v[ind_mu] + v[ind_a_i] + v[ind_b_j] + dotprod)
delta = D[i, j] - P_ij
gr[ind_mu] += delta * (-1)
gr[ind_a_i] += delta * (-1)
gr[ind_b_j] += delta * (-1)
for d in 1:k
gr[@ind_x_di] += delta * -v[@ind_y_dj]
gr[@ind_y_dj] += delta * -v[@ind_x_di]
end
end
end
return
end
include("basics.jl")
include("unpacked_model.jl")
include("packed_model.jl")
include("inference.jl")
sigma = 1.0
N_r = 250
N_c = 100
k = 2
srand(1)
mu, a, b, X, Y = random_parameters(sigma, N_r, N_c, k)
D = Array(Float64, N_r, N_c)
E = Array(Float64, N_r, N_c)
generate!(D, E, mu, a, b, X, Y)
ll = loglikelihood!(D, E, mu, a, b, X, Y)
lp = logposterior!(D, E, mu, a, b, X, Y, sigma)
mu_hat, a_hat, b_hat, X_hat, Y_hat = inference(D, k, sigma)
ll = loglikelihood!(D, E, mu, a, b, X, Y)
lp = logposterior!(D, E, mu, a, b, X, Y, sigma)
@printf "True parameters:\n\tLL: %f\n\tLP: %f\n" ll lp
ll = loglikelihood!(D, E, mu_hat, a_hat, b_hat, X_hat, Y_hat)
lp = logposterior!(D, E, mu_hat, a_hat, b_hat, X_hat, Y_hat, sigma)
@printf "Fitted parameters:\n\tLL: %f\n\tLP: %f\n" ll lp
include("basics.jl")
include("unpacked_model.jl")
include("packed_model.jl")
include("inference.jl")
sigma = 1.0
N_r = 25
N_c = 10
k = 2
srand(1)
mu1, a1, b1, X1, Y1 = random_parameters(sigma, N_r, N_c, k)
n_params = 1 + N_r + N_c + k * N_r + k * N_c
v = Array(Float64, n_params)
gr = Array(Float64, n_params)
pack!(v, mu1, a1, b1, X1, Y1)
mu2, a2, b2, X2, Y2 = unpack(v, N_r, N_c, k)
@assert mu1 == mu2
@assert all(a1 .== a2)
@assert all(b1 .== b2)
@assert all(X1 .== X2)
@assert all(Y1 .== Y2)
D1 = Array(Float64, N_r, N_c)
D2 = Array(Float64, N_r, N_c)
E = Array(Float64, N_r, N_c)
srand(1)
generate!(D1, E, mu1, a1, b1, X1, Y1)
srand(1)
generate!(D2, E, k, v)
@assert all(D1 .== D2)
ll1 = loglikelihood!(D1, E, mu1, a1, b1, X1, Y1)
ll2 = loglikelihood!(D2, E, k, v)
@assert ll1 == ll2
lp1 = logposterior!(D1, E, mu1, a1, b1, X1, Y1, sigma)
lp2 = logposterior!(D2, E, k, sigma, v)
@assert lp1 == lp2
f(v::Vector{Float64}) = -logposterior!(D2, E, k, sigma, v)
function g(v::Vector{Float64})
mygr = similar(v)
gr!(mygr, D1, E, k, sigma, v)
return mygr
end
@printf "Maximum gradient error: %.24f\n" check_gradient(f, g, v)
print([g(v) Calculus.finite_difference(f, v)][1:5, :])
function random_parameters(
sigma::Float64,
N_r::Int,
N_c::Int,
k::Int
)
mu = rand(Normal(0, sigma))
a = rand(Normal(0, sigma), N_r)
b = rand(Normal(0, sigma), N_c)
X = rand(Normal(0, sigma), k, N_r)
Y = rand(Normal(0, sigma), k, N_c)
return mu, a, b, X, Y
end
function expectation!(
E::Matrix{Float64},
mu::Float64,
a::Vector{Float64},
b::Vector{Float64},
X::Matrix{Float64},
Y::Matrix{Float64}
)
N_r, N_c = size(E)
for i in 1:N_r
for j in 1:N_c
E[i, j] = invlogit(mu + a[i] + b[j] + dot(X[:, i], Y[:, j]))
end
end
return
end
function generate!(
D::Matrix{Float64},
E::Matrix{Float64},
mu::Float64,
a::Vector{Float64},
b::Vector{Float64},
X::Matrix{Float64},
Y::Matrix{Float64}
)
N_r, N_c = size(D)
k = size(X, 1)
expectation!(E, mu, a, b, X, Y)
for i in 1:N_r
for j in 1:N_c
D[i, j] = rand(Bernoulli(E[i, j]))
end
end
return
end
function loglikelihood!(
D::Matrix{Float64},
E::Matrix{Float64},
mu::Float64,
a::Vector{Float64},
b::Vector{Float64},
X::Matrix{Float64},
Y::Matrix{Float64}
)
N_r, N_c = size(D)
k = size(X, 1)
expectation!(E, mu, a, b, X, Y)
ll = 0.0
for i in 1:N_r
for j in 1:N_c
p = D[i, j] * E[i, j] + (1 - D[i, j]) * (1 - E[i, j])
ll += log(p)
end
end
return ll
end
function logposterior!(
D::Matrix{Float64},
E::Matrix{Float64},
mu::Float64,
a::Vector{Float64},
b::Vector{Float64},
X::Matrix{Float64},
Y::Matrix{Float64},
sigma::Float64
)
N_r, N_c = size(D)
k = size(X, 1)
lp = 0.0
lp += logpdf(Normal(0, sigma), mu)
for i in 1:N_r
lp += logpdf(Normal(0, sigma), a[i])
end
for j in 1:N_c
lp += logpdf(Normal(0, sigma), b[j])
end
for i in 1:N_r
for d in 1:k
lp += logpdf(Normal(0, sigma), X[d, i])
end
end
for j in 1:N_c
for d in 1:k
lp += logpdf(Normal(0, sigma), Y[d, j])
end
end
ll = loglikelihood!(D, E, mu, a, b, X, Y)
return lp + ll
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment