Created
September 16, 2021 14:26
-
-
Save phipsgabler/7af6a80c7106af18fcce3fbfae690e36 to your computer and use it in GitHub Desktop.
Rough sketch of a mixed-effects beta regression allowing to use MixedModels.jl formula syntax
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using CategoricalArrays | |
using DataFrames | |
using Distributions | |
using DynamicPPL | |
using MixedModels | |
using Plots | |
using StatsFuns | |
using StatsModels | |
using StatsPlots | |
using Turing | |
function modelsize(X, Z) | |
N, K = size(X) | |
@assert size(Z, 1) == N | |
M = size(Z, 2) | |
return N, K, M | |
end | |
# See https://arxiv.org/pdf/1201.2375.pdf for mixed-effects beta regression | |
@model function mixed_effects_beta( | |
X, | |
Z | |
) | |
N, K, M = modelsize(X, Z) | |
y = similar(X, N) | |
# priors | |
# TODO: use TDist priors for random effects -- outlier users! | |
θ_fixed ~ MvNormal(zeros(K), ones(K) * 30) | |
θ_random ~ MvNormal(zeros(M), M == 0 ? ones(0, 0) : ones(M) * 30) | |
# conditional to circumevent PDMats bug: https://github.com/JuliaStats/PDMats.jl/issues/138 | |
γ_fixed ~ MvNormal(zeros(K), ones(K) * 30) | |
γ_random ~ MvNormal(zeros(M), M == 0 ? ones(0, 0) : ones(M) * 30) | |
# observations | |
η = X * θ_fixed .+ Z * θ_random | |
μ = logistic.(η) | |
ψ = X * γ_fixed .+ Z * γ_random | |
ϕ = exp.(ψ) | |
α = μ .* ϕ | |
β = (1 .- μ) .* ϕ | |
ϵ = eps(eltype(α)) | |
y .~ Beta.(clamp.(α, ϵ, Inf), clamp.(β, ϵ, Inf)) # prevent numerically zero α and β | |
end | |
apply_formula(f, data) = apply_schema(f, schema(f, data), MixedModel) | |
function get_modelcols(f, data) | |
f = apply_formula(f, data) | |
_, M = modelcols(f, data) | |
if M isa Tuple | |
return M | |
else | |
# no random effects -> no Z matrix | |
return M, similar(M, size(M, 1), 0) | |
end | |
end | |
function dummy_data(n_observations, n_stimuli, n_authors) | |
stimuli = [Symbol("stimulus_", i) => rand(n_observations) for i = 1:n_stimuli] | |
return DataFrame(stimuli..., | |
:author => categorical(rand(1:n_authors, n_observations))) | |
end | |
function construct_model(f, data, conditions) | |
X, Z = get_modelcols(f, data) | |
N, K, M = modelsize(X, Z) | |
haskey(conditions, :θ_fixed) && @assert size(conditions.θ_fixed) == (K,) | |
haskey(conditions, :γ_fixed) && @assert size(conditions.γ_fixed) == (K,) | |
haskey(conditions, :θ_random) && @assert size(conditions.θ_random) == (M,) | |
haskey(conditions, :γ_random) && @assert size(conditions.γ_random) == (M,) | |
haskey(conditions, :y) && @assert size(conditions.y) == (N,) | |
model = mixed_effects_beta(X, Z) | conditions | |
return model | |
end | |
function simulate(f, n_observations, n_authors, n_samples; conditions...) | |
data = simulate_data(n_observations, n_authors) | |
return simulate(f, data, n_samples; conditions...) | |
end | |
function simulate(f, data, n_samples; conditions...) | |
model = construct_model(f, data, NamedTuple(conditions)) | |
return [model() for _ in 1:n_samples] | |
end | |
infer(f::FormulaTerm, data, n_samples; conditions...) = infer(f, data, NUTS(0.65), n_samples; conditions...) | |
function infer(f::FormulaTerm, data, sampler, n_samples; conditions...) | |
model = construct_model(f, data, NamedTuple(conditions)) | |
return Turing.sample(model, sampler, n_samples) | |
end | |
## Example: | |
# data = dummy_data(10, 2, 2); | |
# data.response = simulate( | |
# @formula(0 ~ 1 + stimulus_1 + stimulus_2 + (1|author)), data, 1, | |
# θ_fixed = [-5, 5, 0], γ_fixed = [2, 0, 0], θ_random = zeros(2), γ_random = zeros(2) | |
# )[1]; | |
# s = infer(@formula(0 ~ 1 + stimulus_1 + stimulus_2 + (1|author)), data, NUTS(0.65), 5000; | |
# y = data.response, θ_random = zeros(2), γ_random = zeros(2)); | |
# plot(s) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment