Skip to content

Instantly share code, notes, and snippets.

@Nosferican
Last active Oct 3, 2018
Embed
What would you like to do?
Draft of Multinomial Logistic Regression
# This is the draft code for generalizing GLM to vector generalized linear models (VGLM)
# Compared to Stata's mlogit (13.1) and R's multinom {nnet}, need to verify and correct mlogit vcov
# Code tested with Julia v"0.7.0-DEV.4993"
# Special Matrices
using LinearAlgebra: Hermitian, LowerTriangular, UpperTriangular, Diagonal
# Factorizations
using LinearAlgebra: Factorization, qrfact, cholfact!, QRPivoted, norm
# Functions
using LinearAlgebra: mul!, diag, copytri!
# LAPACK
using LinearAlgebra.LAPACK.potri!
# StatsBase
using StatsBase: ConvergenceException, StatisticalModel, RegressionModel,
FrequencyWeights, model_response
# Distributions
using Distributions: Distribution, Normal, Multinomial, Poisson, logpdf
# StatsFuns
using StatsFuns: xlogy, logit, softmax
# DataFrames
using DataFrames: DataFrame, categorical!
# CategoricalArrays
using CategoricalArrays: AbstractCategoricalVector, CategoricalVector,
CategoricalValue, levels
# StatsModels
using StatsModels: @formula, ModelFrame, ModelMatrix, DummyCoding
# StatsBase
import StatsBase: fit!, deviance, loglikelihood, coef, informationmatrix,
isfitted, islinear, vcov, response, weights, stderror
const AcceptedValues = Union{AbstractFloat,Integer,AbstractString}
const AcceptedDistributions = Union{Normal,Poisson,Multinomial}
"""
AbstractLink
Abstract type for link functions.
"""
abstract type AbstractLink end
"""
IdentityLink <: AbstractLink
Return a contrete type of the identity link function.
"""
struct IdentityLink <: AbstractLink end
"""
LogitLink <: AbstractLink
Return a contrete type of the logit link function.
"""
struct LogitLink <: AbstractLink end
"""
LogLink <: AbstractLink
Return a contrete type of the log link function.
"""
struct LogLink <: AbstractLink end
"""
RandomComponent(obj::AbstractVector{<:AcceptedValues})
Fields:
Response::AbstractVecOrMat{<:AcceptedValues}
Distribution::AcceptedDistributions
Return a composite type of a response vector and a suitable distribution.
For continous values (AbstractFloat) the distribution is: Normal.
For integer values (Integer) the distribution is: Poisson.
For categorical values (AbstractString,CategoricalValue) the distribution is Multinomial.
"""
mutable struct RandomComponent{R<:AbstractVecOrMat{<:AcceptedValues},
D<:AcceptedDistributions}
Response::R
Distribution::D
Reference::Int64
Offset::Vector{Float64}
function RandomComponent(obj::AbstractVector{<:AbstractFloat})
return new{Vector{Float64},Normal}(obj, Normal(), zero(Int64), zeros(0))
end
function RandomComponent(obj::AbstractVector{<:Integer},
offset::AbstractVector{<:Real})
all(obj .≥ zero(eltype(obj))) || throw(ArgumentError("response vector should be non-negative."))
return new{Vector{Int64},Poisson}(obj, Poisson(), zero(Int64), offset)
end
function RandomComponent(obj::AbstractVector{<:T},
base::V) where {T<:Union{AbstractString,CategoricalValue}, V<:Any}
vals = levels(obj)
base vals || throw(ArgumentError("base category must be a level of the response"))
length(vals) > one(Int64) || throw(ArgumentError("response must have more than one value."))
return new{BitMatrix,Multinomial}(mapreduce(val -> obj .== val, hcat, levels(obj)),
Multinomial(0,[1]), findfirst(isequal(base), vals),
zeros(0))
end
end
"""
LinearPredictor(obj::AbstractMatrix{<:Real})
Fields:
ModelMatrix::Matrix{Float64}
Factorization::QRPivoted{Float64,Matrix{Float64}}
Rank::BitVector
Return a composite type with the model matrix promoted to full rank by dropping
columns which have a diagonal absolute value less than `sqrt(eps())` in the R
matrix from a pivoted QR factorization.
"""
mutable struct LinearPredictor
ModelMatrix::Matrix{Float64}
Factorization::QRPivoted{Float64,Matrix{Float64}}
Rank::BitVector
function LinearPredictor(obj::AbstractMatrix{<:Real})
F = qrfact(obj, Val(true))
Rank = abs.(diag(F.R)) .≥ sqrt(eps())
if any(elem -> !elem, Rank)
obj = obj[:,Rank]
F = qrfact(obj, Val(true))
end
return new(obj, F, Rank)
end
end
"""
GeneralizedLinearModel <: RegressionModel
GeneralizedLinearModel(linearpredictor::AbstractMatrix{<:Real},
response::AbstractVector{<:Union{Real,AbstractString,CategoricalValue}};
weights::FrequencyWeights = FrequencyWeights(Vector{Int64}()))
Generalized linear model for countinous, count, and categorical models.
"""
mutable struct GeneralizedLinearModel{RC<:RandomComponent,
LP<:LinearPredictor,
LF<:AbstractLink,
θ<:AbstractVecOrMat{<:Real}} <: RegressionModel
RandomComponent::RC
LinearPredictor::LP
LinkFunction::LF
Weights::FrequencyWeights
Fit::Bool
β::θ
Ψ::Vector{Hermitian{Float64,Matrix{Float64}}}
Deviance::Float64
LogLikelihood::Float64
function GeneralizedLinearModel(linearpredictor::AbstractMatrix{<:Real},
response::AbstractVector{<:Integer};
weights::FrequencyWeights = FrequencyWeights(Vector{Int64}()),
offset::AbstractVector{<:Real} = zeros(0))
length(response) == size(linearpredictor, 1) ||
throw(DimensionMismatch("number of observations in response does not match those in the linear predictor."))
rc = RandomComponent(response, offset)
lp = LinearPredictor(linearpredictor)
lf = canlink(rc)
Fit = false
β = zeros(size(lp.ModelMatrix, 2))
Ψ = Vector{Hermitian{Float64,Matrix{Float64}}}(0)
D = Inf
= Inf
return new{typeof(rc),typeof(lp),typeof(lf),typeof(β)}(rc, lp, lf, weights, Fit, β, Ψ, D, ℓ)
end
function GeneralizedLinearModel(linearpredictor::AbstractMatrix{<:Real},
response::AbstractVector{<:Union{AbstractString,CategoricalValue}};
weights::FrequencyWeights = FrequencyWeights(Vector{Int64}()),
base::Any = first(levels(response)))
length(response) == size(linearpredictor, 1) ||
throw(DimensionMismatch("number of observations in response does not match those in the linear predictor."))
rc = RandomComponent(response, base)
lp = LinearPredictor(linearpredictor)
lf = canlink(rc)
Fit = false
β = zeros(size(lp.ModelMatrix, 2), length(levels(response)) - 1)
Ψ = Vector{Hermitian{Float64,Matrix{Float64}}}(0)
D = Inf
= Inf
return new{typeof(rc),typeof(lp),typeof(lf),typeof(β)}(rc, lp, lf, weights, Fit, β, Ψ, D, ℓ)
end
function GeneralizedLinearModel(linearpredictor::AbstractMatrix{<:Real},
response::AbstractVector{<:AbstractFloat};
weights::FrequencyWeights = FrequencyWeights(Vector{Int64}()))
length(response) == size(linearpredictor, 1) ||
throw(DimensionMismatch("number of observations in response does not match those in the linear predictor."))
rc = RandomComponent(response)
lp = LinearPredictor(linearpredictor)
lf = canlink(rc)
Fit = false
k = size(lp.ModelMatrix, 2)
β = zeros(size(lp.ModelMatrix, 2))
Ψ = Vector{Hermitian{Float64,Matrix{Float64}}}(0)
D = Inf
= Inf
return new{typeof(rc),typeof(lp),typeof(lf),typeof(β)}(rc, lp, lf, weights, Fit, β, Ψ, D, ℓ)
end
end
"""
link(::GeneralizedLinearModel)
Return the link.
"""
link(obj::GeneralizedLinearModel) = obj.LinkFunction
"""
distribution(::GeneralizedLinearModel)
Return the distribution.
"""
distribution(obj::GeneralizedLinearModel) = obj.RandomComponent.Distribution
"""
canonicallink(::Distribution)
Return the canonical link function for the distribution.
"""
function canlink(obj::Distribution)
throw(error("canlink is not defined for $(typeof(obj))"))
end
canlink(::Multinomial) = LogitLink()
canlink(::Normal) = IdentityLink()
canlink(::Poisson) = LogLink()
canlink(::RandomComponent{<:AbstractMatrix{<:Bool},Multinomial}) = LogitLink()
canlink(::RandomComponent{<:AbstractVector{<:AbstractFloat},Normal}) = IdentityLink()
canlink(::RandomComponent{<:AbstractVector{<:Integer},Poisson}) = LogLink()
"""
linkfunc(::AbstractLink, obj::T) where T<:Union{AbstractVector{<:Bool},Real}
Return the value of the link function for `obj`.
"""
function linkfunc(l::AbstractLink, obj::T) where T<:Union{Real,AbstractVector{<:Bool}}
throw(error("linkfunc is not defined for $(typeof(l))"))
end
linkfunc(::LogitLink, obj::AbstractVector{<:Bool}) = softmax(obj)
linkfunc(::IdentityLink, obj::Real) = obj
linkfunc(::LogLink, obj::Real) = log(obj)
"""
η₀(distribution::Distribution, obj::T) where T<:Union{AbstractVector{<:Bool},Real}
Return suitable initiation for η given the distribution.
"""
η₀(distribution::Distribution, obj::Real) = linkfunc(canlink(distribution), obj)
η₀(distribution::Multinomial, obj::AbstractVector{<:Bool}) = softmax(obj)
"""
g(::AbstractLink, η::T) where T<:Union{AbstractVector{<:Bool},Real}
Return μ.
"""
function g end
g(::IdentityLink, η::Real) = η
g(::LogitLink, η::AbstractVector{<:Real}) = softmax(η)
g(::LogLink, η::Real) = exp(η)
"""
g′(::AbstractLink, μ::T) where T<:Union{AbstractVector{<:Bool},Real}
Return μ′ which is w for models with canonical link and `eps()` if the value is less.
"""
function g′ end
g′(::IdentityLink, μ::Real) = one(Float64)
function g′(::LogitLink, μ::AbstractVector{<:Real})
return map(elem -> max(eps(), elem * (1 - elem)), μ)
end
g′(::LogLink, μ::Real) = max(eps(), μ)
"""
wz!(w::AbstractVector{<:Real},
z::AbstractVector{<:Real},
η::AbstractVector{<:Real},
model::GeneralizedLinearModel)
In-place update for variables and return the loglikelihood.
"""
function wz!(w::AbstractVector{<:AbstractFloat},
z::AbstractVector{<:AbstractFloat},
η::AbstractVector{<:AbstractFloat},
model::GeneralizedLinearModel)
Link = link(model)
Dist = distribution(model)
b = response(model)
ω = weights(model)
ℓℓ = zero(Float64)
offset = model.RandomComponent.Offset
if isempty(ω)
if isempty(offset)
@inbounds for idx eachindex(η, w, z)
μ = g(Link, η[idx])
w[idx] = g′(Link, μ)
z[idx] = η[idx] + (b[idx] - μ) / w[idx]
ℓℓ += unitloglik(Dist, b[idx], μ)
end
else
@inbounds for idx eachindex(η, w, z, offset)
μ = g(Link, η[idx])
w[idx] = g′(Link, μ)
z[idx] = η[idx] + (b[idx] - μ) / w[idx] - offset[idx]
ℓℓ += unitloglik(Dist, b[idx], μ)
end
end
else
if isempty(offset)
@inbounds for idx eachindex(η, w, z, ω)
μ = g(Link, η[idx])
w[idx] = g′(Link, μ)
z[idx] = η[idx] + (b[idx] - μ) / w[idx]
w[idx] *= ω[idx]
ℓℓ += ω[idx] * unitloglik(Dist, b[idx], μ)
end
else
@inbounds for idx eachindex(η, w, z, ω, offset)
μ = g(Link, η[idx])
w[idx] = g′(Link, μ)
z[idx] = η[idx] + (b[idx] - μ) / w[idx] - offset[idx]
w[idx] *= ω[idx]
ℓℓ += ω[idx] * unitloglik(Dist, b[idx], μ)
end
end
end
return ℓℓ
end
function wz!(w::AbstractMatrix{<:AbstractFloat},
z::AbstractMatrix{<:AbstractFloat},
η::AbstractMatrix{<:AbstractFloat},
model::GeneralizedLinearModel)
Link = link(model)
Dist = distribution(model)
b = response(model)
ω = weights(model)
ℓℓ = zero(Float64)
if isempty(ω)
@inbounds for idx 1:size(b, 1)
μ = g(Link, η[idx,:])
w[idx,:] = g′(Link, μ)
z[idx,:] = η[idx,:] .+ (b[idx,:] .- μ) ./ w[idx,:]
ℓℓ += unitloglik(Dist, b[idx,:], μ)
end
else
@inbounds for idx eachindex(η, w, z, ω)
μ = g(Link, η[idx,:])
w[idx,:] = g′(Link, μ)
z[idx,:] = η[idx,:] + (b[idx,:] .- μ) ./ w[idx,:]
w[idx,:] .*= ω[idx]
ℓℓ += ω[idx] * unitloglik(Dist, b[idx,:], μ)
end
end
return ℓℓ
end
"""
initialvalues(obj::GeneralizedLinearModel)
Return A, b, F, Q, distribution, link, x, w, η, μ, μ′, σ², ω, v, categories, ℓℓ, lastchange, tol.
"""
function initialvalues end
function initialvalues(obj::GeneralizedLinearModel{<:RandomComponent{<:AbstractVector{<:Real},D},
<:LinearPredictor,
<:AbstractLink}) where D<:Distribution
m, n = size(obj.LinearPredictor.ModelMatrix)
b = obj.RandomComponent.Response
Dist = obj.RandomComponent.Distribution
offset = obj.RandomComponent.Offset
x = zeros(n)
w = Vector{Float64}(undef, m)
z = Vector{Float64}(undef, m)
if isempty(offset)
η = η₀.(Ref(Dist), b)
else
η = η₀.(Ref(Dist), b + offset)
end
return x, w, z, η
end
function initialvalues(obj::GeneralizedLinearModel{<:RandomComponent{<:AbstractMatrix{<:Bool},Multinomial},
<:LinearPredictor,
<:LogitLink})
m, n = size(obj.LinearPredictor.ModelMatrix)
b = obj.RandomComponent.Response
k = size(b, 2)
x = zeros(n, k)
w = Matrix{Float64}(undef, m, k)
z = Matrix{Float64}(undef, m, k)
η = mapslices(softmax, b, 2)
return x, w, z, η
end
"""
update_xη!(x::AbstractVector{<:AbstractFloat},
η::AbstractVector{<:AbstractFloat},
w::AbstractVector{<:Real},
z::AbstractVector{<:Real},
model::GeneralizedLinearModel)
update_xη!(x::AbstractMatrix{<:AbstractFloat},
η::AbstractMatrix{<:AbstractFloat},
w::AbstractMatrix{<:Real},
z::AbstractMatrix{<:Real},
model::GeneralizedLinearModel)
In-place update of x and η.
"""
function update_xη! end
function update_xη!(x::AbstractVector{<:AbstractFloat},
η::AbstractVector{<:AbstractFloat},
w::AbstractVector{<:Real},
z::AbstractVector{<:Real},
model::GeneralizedLinearModel)
Q = Matrix(model.LinearPredictor.Factorization.Q)
offset = model.RandomComponent.Offset
C = cholfact!(Hermitian(transpose(Q) * Diagonal(w) * Q)).factors
x[:] = LowerTriangular(transpose(C)) \ (transpose(Q) * Diagonal(w) * z)
x[:] = UpperTriangular(C) \ x
η[:] = muladd(Q, x, offset)
end
function update_xη!(x::AbstractMatrix{<:AbstractFloat},
η::AbstractMatrix{<:AbstractFloat},
w::AbstractMatrix{<:Real},
z::AbstractMatrix{<:Real},
model::GeneralizedLinearModel)
Q = Matrix(model.LinearPredictor.Factorization.Q)
categories = find(1:size(obj.RandomComponent.Response, 2) .!=
obj.RandomComponent.Reference)
for idx categories
C = cholfact!(Hermitian(transpose(Q) * Diagonal(w[:,idx]) * Q)).factors
x[:,idx] = LowerTriangular(transpose(C)) \ (transpose(Q) * Diagonal(w[:,idx]) * z[:,idx])
x[:,idx] = UpperTriangular(C) \ x[:,idx]
end
mul!(η, Q, x)
end
"""
update!(obj::GeneralizedLinearModel,
w::AbstractVector{<:Real},
η::AbstractVecOrMat{<:Real},
ℓℓ::Real)
In-place update of the model with results from fit.
"""
function update! end
function update!(obj::GeneralizedLinearModel,
w::AbstractVector{<:Real},
η::AbstractVector{<:Real},
ℓℓ::Real)
A = obj.LinearPredictor.ModelMatrix
F = obj.LinearPredictor.Factorization
ω = obj.Weights
b = obj.RandomComponent.Response
offset = obj.RandomComponent.Offset
Dist = obj.RandomComponent.Distribution
Link = obj.LinkFunction
if isempty(offset)
setfield!(obj, , F \ η)
else
setfield!(obj, , F \- offset))
end
setfield!(obj, :LogLikelihood, ℓℓ)
setfield!(obj, :Deviance, deviance(Dist, b, g.(Ref(Link), η), ω))
setfield!(obj, , [Hermitian(inv(cholfact!(Hermitian(transpose(A) *
Diagonal(w) * A))))])
setfield!(obj, :Fit, true)
return nothing
end
function update!(obj::GeneralizedLinearModel,
w::AbstractMatrix{<:Real},
η::AbstractMatrix{<:Real},
ℓℓ::Real)
A = obj.LinearPredictor.ModelMatrix
F = obj.LinearPredictor.Factorization
categories = find(1:size(η, 2) .!= obj.RandomComponent.Reference)
setfield!(obj, , (F \ η)[:,categories])
setfield!(obj, :LogLikelihood, ℓℓ)
setfield!(obj, :Deviance, -2ℓℓ)
setfield!(obj, , map(idx -> Hermitian(inv(cholfact!(
Hermitian(transpose(A) *
Diagonal(w[:,idx]) *
A)))), categories))
setfield!(obj, :Fit, true)
return nothing
end
function fit!(obj::GeneralizedLinearModel{<:RandomComponent{<:AbstractVector{<:AbstractFloat},
<:Normal},
<:LinearPredictor,
<:IdentityLink}; iterations = 50, tol = 1e-9)
A = obj.LinearPredictor.ModelMatrix
F = obj.LinearPredictor.Factorization
ω = obj.Weights
b = obj.RandomComponent.Response
Dist = obj.RandomComponent.Distribution
offset = obj.RandomComponent.Offset
if isempty(ω)
x = F \ b
Ψ = Hermitian(copytri!(potri!('U', copy(F.R)), 'U', true)[F.p, F.p])
else
C = cholfact!(Hermitian(transpose(Q) * Diagonal(w) * Q)).factors
x = LowerTriangular(transpose(C)) \ (transpose(Q) * Diagonal(w) * y)
x .= F \ (Q * (UpperTriangular(C) \ x))
Ψ = Hermitian(inv(cholfact!(transpose(A) * Diagonal(w) * A)))
end
μ = A * x
ℓℓ = loglikelihood(Dist, b, μ, ω)
setfield!(obj, , x)
setfield!(obj, :LogLikelihood, ℓℓ)
setfield!(obj, :Deviance, deviance(Dist, b, μ, ω))
setfield!(obj, , [Ψ])
setfield!(obj, :Fit, true)
return nothing
end
function fit!(obj::GeneralizedLinearModel; iterations = 50, tol = 1e-9)
x, w, z, η = initialvalues(obj)
F = obj.LinearPredictor.Factorization
ℓℓ = -Inf
lastchange = Inf
for itt 1:iterations
ℓℓ₀ = ℓℓ
ℓℓ = wz!(w, z, η, obj)
update_xη!(x, η, w, z, obj)
lastchange = abs((ℓℓ - ℓℓ₀) / ℓℓ₀)
if lastchange < tol || iszero(ℓℓ)
update!(obj, w, η, ℓℓ)
return nothing
end
end
throw(ConvergenceException(iterations, lastchange, tol))
end
"""
unitdeviance(::Distribution, y, μ)
Return the unit deviance.
"""
function unitdeviance end
unitdeviance(::Multinomial, y::AbstractVector{<:Bool}, μ::AbstractVector{<:Real}) =
-2 * log(μ[findfirst(y)])
unitdeviance(::Normal, y::Real, μ::Real) = abs2(y - μ)
unitdeviance(::Poisson, y::Real, μ::Real) = 2 * (xlogy(y, y / μ) - (y - μ))
"""
unitloglik(::Distribution, y, μ)
Return the unit log-likelihood.
"""
unitloglik(::Multinomial, y::AbstractVector{<:Bool}, μ::AbstractVector{<:Real}) = logpdf(Multinomial(1, μ), y)
unitloglik(::Normal, y::Real, μ::Real) = logpdf(Normal(μ), y)
unitloglik(::Poisson, y::Real, μ::Real) = logpdf(Poisson(μ), y)
function loglikelihood(distribution::Distribution,
y::AbstractVector{<:Real},
μ::AbstractVector{<:Real},
ω::FrequencyWeights)
ℓℓ = zero(Float64)
if isempty(ω)
for idx eachindex(y, μ)
@inbounds ℓℓ += unitloglik(distribution, y[idx], μ[idx])
end
else
for idx eachindex(y, μ, ω)
@inbounds ℓℓ += ω[idx] * unitloglik(distribution, y[idx], μ[idx])
end
end
return ℓℓ
end
function loglikelihood(distribution::Multinomial,
y::AbstractMatrix{<:Bool},
μ::AbstractMatrix{<:Real},
ω::FrequencyWeights)
ℓℓ = zero(Float64)
if isempty(ω)
for idx 1:size(y, 1)
@inbounds ℓℓ += unitloglik(distribution, y[idx,:], μ[idx,:])
end
else
for idx 1:size(y, 1)
@inbounds ℓℓ += ω[idx] * unitloglik(distribution, y[idx,:], μ[idx,:])
end
end
return ℓℓ
end
function deviance(distribution::Distribution,
y::AbstractVector{<:Real},
μ::AbstractVector{<:Real},
ω::FrequencyWeights)
dev = zero(Float64)
if isempty(ω)
for idx eachindex(y, μ)
@inbounds dev += unitdeviance(distribution, y[idx], μ[idx])
end
else
for idx eachindex(y, μ, ω)
@inbounds dev += ω[idx] * unitdeviance(distribution, y[idx], μ[idx])
end
end
return dev
end
# These methods are part of the StatsBase StatisticalModel API
response(obj::GeneralizedLinearModel) = obj.RandomComponent.Response
weights(obj::GeneralizedLinearModel) = obj.Weights
deviance(obj::GeneralizedLinearModel) = obj.Deviance
loglikelihood(obj::GeneralizedLinearModel) = obj.LogLikelihood
coef(obj::GeneralizedLinearModel) = obj.β
informationmatrix(obj::GeneralizedLinearModel) = obj.Ψ
isfitted(obj::GeneralizedLinearModel) = obj.Fit
islinear(obj::GeneralizedLinearModel) = isa(obj.LinkFunction, IdentityLink)
function vcov(obj::GeneralizedLinearModel)
im = informationmatrix(obj)
if isone(length(im))
im = first(im)
end
return im
end
function stderror(obj::GeneralizedLinearModel)
V = vcov(obj)
if isa(V, AbstractMatrix{<:Real})
return sqrt.(diag(V))
else
return mapreduce(im -> sqrt.(diag(im)), hcat, V)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment