Skip to content

Instantly share code, notes, and snippets.

@phipsgabler
Created September 16, 2021 14:26
Show Gist options
  • Save phipsgabler/7af6a80c7106af18fcce3fbfae690e36 to your computer and use it in GitHub Desktop.
Save phipsgabler/7af6a80c7106af18fcce3fbfae690e36 to your computer and use it in GitHub Desktop.
Rough sketch of a mixed-effects beta regression allowing to use MixedModels.jl formula syntax
using CategoricalArrays
using DataFrames
using Distributions
using DynamicPPL
using MixedModels
using Plots
using StatsFuns
using StatsModels
using StatsPlots
using Turing
function modelsize(X, Z)
N, K = size(X)
@assert size(Z, 1) == N
M = size(Z, 2)
return N, K, M
end
# See https://arxiv.org/pdf/1201.2375.pdf for mixed-effects beta regression
@model function mixed_effects_beta(
X,
Z
)
N, K, M = modelsize(X, Z)
y = similar(X, N)
# priors
# TODO: use TDist priors for random effects -- outlier users!
θ_fixed ~ MvNormal(zeros(K), ones(K) * 30)
θ_random ~ MvNormal(zeros(M), M == 0 ? ones(0, 0) : ones(M) * 30)
# conditional to circumevent PDMats bug: https://github.com/JuliaStats/PDMats.jl/issues/138
γ_fixed ~ MvNormal(zeros(K), ones(K) * 30)
γ_random ~ MvNormal(zeros(M), M == 0 ? ones(0, 0) : ones(M) * 30)
# observations
η = X * θ_fixed .+ Z * θ_random
μ = logistic.(η)
ψ = X * γ_fixed .+ Z * γ_random
ϕ = exp.(ψ)
α = μ .* ϕ
β = (1 .- μ) .* ϕ
ϵ = eps(eltype(α))
y .~ Beta.(clamp.(α, ϵ, Inf), clamp.(β, ϵ, Inf)) # prevent numerically zero α and β
end
apply_formula(f, data) = apply_schema(f, schema(f, data), MixedModel)
function get_modelcols(f, data)
f = apply_formula(f, data)
_, M = modelcols(f, data)
if M isa Tuple
return M
else
# no random effects -> no Z matrix
return M, similar(M, size(M, 1), 0)
end
end
function dummy_data(n_observations, n_stimuli, n_authors)
stimuli = [Symbol("stimulus_", i) => rand(n_observations) for i = 1:n_stimuli]
return DataFrame(stimuli...,
:author => categorical(rand(1:n_authors, n_observations)))
end
function construct_model(f, data, conditions)
X, Z = get_modelcols(f, data)
N, K, M = modelsize(X, Z)
haskey(conditions, :θ_fixed) && @assert size(conditions.θ_fixed) == (K,)
haskey(conditions, :γ_fixed) && @assert size(conditions.γ_fixed) == (K,)
haskey(conditions, :θ_random) && @assert size(conditions.θ_random) == (M,)
haskey(conditions, :γ_random) && @assert size(conditions.γ_random) == (M,)
haskey(conditions, :y) && @assert size(conditions.y) == (N,)
model = mixed_effects_beta(X, Z) | conditions
return model
end
function simulate(f, n_observations, n_authors, n_samples; conditions...)
data = simulate_data(n_observations, n_authors)
return simulate(f, data, n_samples; conditions...)
end
function simulate(f, data, n_samples; conditions...)
model = construct_model(f, data, NamedTuple(conditions))
return [model() for _ in 1:n_samples]
end
infer(f::FormulaTerm, data, n_samples; conditions...) = infer(f, data, NUTS(0.65), n_samples; conditions...)
function infer(f::FormulaTerm, data, sampler, n_samples; conditions...)
model = construct_model(f, data, NamedTuple(conditions))
return Turing.sample(model, sampler, n_samples)
end
## Example:
# data = dummy_data(10, 2, 2);
# data.response = simulate(
# @formula(0 ~ 1 + stimulus_1 + stimulus_2 + (1|author)), data, 1,
# θ_fixed = [-5, 5, 0], γ_fixed = [2, 0, 0], θ_random = zeros(2), γ_random = zeros(2)
# )[1];
# s = infer(@formula(0 ~ 1 + stimulus_1 + stimulus_2 + (1|author)), data, NUTS(0.65), 5000;
# y = data.response, θ_random = zeros(2), γ_random = zeros(2));
# plot(s)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment