torfjelde/loo.jl

## loo.jl
julia> using ArviZ, Turing

julia> J = 8;

julia> y = [28.0, 8.0, -3.0, 7.0, -1.0, 1.0, 18.0, 12.0];

julia> σ = [15.0, 10.0, 16.0, 11.0, 9.0, 11.0, 10.0, 18.0];

julia> schools = [
           "Choate",
           "Deerfield",
           "Phillips Andover",
           "Phillips Exeter",
           "Hotchkiss",
           "Lawrenceville",
           "St. Paul's",
           "Mt. Hermon"
       ];

julia> nwarmup, nsamples, nchains = 1000, 1000, 4;

julia> @model function model(J, y, σ, ::Type{TV} = Vector{Float64}) where {TV}
           μ ~ Normal(0, 5)
           τ ~ truncated(Cauchy(0, 5), 0, Inf)

           θ = TV(undef, J)
           θ .~ Normal(μ, τ)

           # If we want the POINT-WISE likelihoods, we need to refer by different symbols,
           # i.e. cannot use `MvNormal`.
           for j = 1:J
               y[j] ~ Normal(θ[j], σ[j])
           end
       end;

julia> param_mod = model(J, y, σ);

julia> sampler = NUTS(nwarmup, 0.8);

julia> turing_chns = sample(
           param_mod,
           sampler,
           nwarmup + nsamples;
           progress = false,
       );
┌ Info: Found initial step size
└   ϵ = 0.4

julia> # Extract the loglikelihoods, and convert into form which ArviZ.jl accepts
       ℓ = DynamicPPL.elementwise_loglikelihoods(param_mod, turing_chns)
Dict{DynamicPPL.VarName,Array{Float64,1}} with 8 entries:
  y[3] => [-3.72618, -3.7039, -3.70514, -3.84801, -3.95597, -3.73676, -3.70973, -3.70654, -3.69412, -3.78229  …  -3.73184, -3.72254, -3.97129, -3.81096, -3.73516, -3.7398, -3.77288, -3.7099, -3.69716, -3.69155]
  y[1] => [-5.16847, -5.37776, -5.33898, -4.71719, -4.4846, -5.20381, -5.38348, -5.52579, -5.52824, -4.96575  …  -4.82861, -4.82866, -4.65692, -4.99942, -4.61389, -5.07198, -5.26182, -5.36327, -5.89517, -6.03677]
  y[4] => [-3.51839, -3.50616, -3.52167, -3.32892, -3.33368, -3.3899, -3.46509, -3.62935, -3.56666, -3.34411  …  -3.31696, -3.31689, -3.64576, -3.49002, -3.31885, -3.31966, -3.37919, -3.69016, -3.47888, -3.57988]
  y[5] => [-3.13272, -3.19451, -3.14482, -3.3349, -3.76755, -3.18067, -3.12868, -3.11639, -3.11626, -3.26104  …  -3.16744, -3.16209, -3.42108, -3.11728, -3.22843, -3.41312, -3.20184, -3.2123, -3.13486, -3.39949]
  y[6] => [-3.32513, -3.31684, -3.31948, -3.40729, -3.5989, -3.32907, -3.31691, -3.35456, -3.33646, -3.35431  …  -3.33286, -3.3366, -3.49421, -3.3826, -3.36658, -3.32905, -3.50545, -3.42483, -3.47718, -3.44098]
  y[7] => [-4.53587, -4.90501, -5.01885, -3.89998, -3.68522, -4.53326, -4.84988, -5.1454, -4.98813, -4.195  …  -4.00933, -3.9955, -3.98242, -3.91085, -4.58724, -4.00131, -5.01847, -4.40975, -4.14685, -3.79092]
  y[8] => [-4.01703, -4.03218, -4.01876, -3.87603, -3.8295, -3.95203, -4.01166, -4.07744, -4.04852, -3.89826  …  -3.9914, -4.0061, -3.81249, -3.82682, -4.09209, -3.86704, -4.13824, -3.96275, -4.17168, -4.22254]
  y[2] => [-3.46757, -3.50013, -3.45111, -3.24411, -3.23219, -3.35582, -3.45173, -3.70297, -3.59883, -3.31984  …  -3.256, -3.27523, -3.25873, -3.33843, -3.23371, -3.34246, -3.30696, -3.87759, -3.40094, -3.53952]

julia> ℓ_mat = reduce(hcat, values(ℓ));

julia> ℓ_arr = reshape(ℓ_mat, 1, size(ℓ_mat)...); # (chain_idx, sample_idx, parameter_idx)

julia> idata = ArviZ.from_mcmcchains(
           turing_chns,
           coords = Dict("school" => schools),
           dims = Dict(
               "y" => ["school"],
               "σ" => ["school"],
               "θ" => ["school"],
           ),
           library = "Turing",
           log_likelihood = Dict("y" => ℓ_arr)
       )
InferenceData with groups:
        > posterior
        > log_likelihood
        > sample_stats

julia> ArviZ.loo(idata)
1×7 DataFrame
│ Row │ loo     │ loo_se  │ p_loo    │ n_samples │ n_data_points │ warning │ loo_scale │
│     │ Float64 │ Float64 │ Float64  │ Int64     │ Int64         │ Bool    │ String    │
├─────┼─────────┼─────────┼──────────┼───────────┼───────────────┼─────────┼───────────┤
│ 1   │ -30.717 │ 1.38963 │ 0.781578 │ 1000      │ 8             │ 0       │ log       │
	julia> using ArviZ, Turing

	julia> J = 8;

	julia> y = [28.0, 8.0, -3.0, 7.0, -1.0, 1.0, 18.0, 12.0];

	julia> σ = [15.0, 10.0, 16.0, 11.0, 9.0, 11.0, 10.0, 18.0];

	julia> schools = [
	"Choate",
	"Deerfield",
	"Phillips Andover",
	"Phillips Exeter",
	"Hotchkiss",
	"Lawrenceville",
	"St. Paul's",
	"Mt. Hermon"
	];

	julia> nwarmup, nsamples, nchains = 1000, 1000, 4;

	julia> @model function model(J, y, σ, ::Type{TV} = Vector{Float64}) where {TV}
	μ ~ Normal(0, 5)
	τ ~ truncated(Cauchy(0, 5), 0, Inf)

	θ = TV(undef, J)
	θ .~ Normal(μ, τ)

	# If we want the POINT-WISE likelihoods, we need to refer by different symbols,
	# i.e. cannot use `MvNormal`.
	for j = 1:J
	y[j] ~ Normal(θ[j], σ[j])
	end
	end;

	julia> param_mod = model(J, y, σ);

	julia> sampler = NUTS(nwarmup, 0.8);

	julia> turing_chns = sample(
	param_mod,
	sampler,
	nwarmup + nsamples;
	progress = false,
	);
	┌ Info: Found initial step size
	└ ϵ = 0.4

	julia> # Extract the loglikelihoods, and convert into form which ArviZ.jl accepts
	ℓ = DynamicPPL.elementwise_loglikelihoods(param_mod, turing_chns)
	Dict{DynamicPPL.VarName,Array{Float64,1}} with 8 entries:
	y[3] => [-3.72618, -3.7039, -3.70514, -3.84801, -3.95597, -3.73676, -3.70973, -3.70654, -3.69412, -3.78229 … -3.73184, -3.72254, -3.97129, -3.81096, -3.73516, -3.7398, -3.77288, -3.7099, -3.69716, -3.69155]
	y[1] => [-5.16847, -5.37776, -5.33898, -4.71719, -4.4846, -5.20381, -5.38348, -5.52579, -5.52824, -4.96575 … -4.82861, -4.82866, -4.65692, -4.99942, -4.61389, -5.07198, -5.26182, -5.36327, -5.89517, -6.03677]
	y[4] => [-3.51839, -3.50616, -3.52167, -3.32892, -3.33368, -3.3899, -3.46509, -3.62935, -3.56666, -3.34411 … -3.31696, -3.31689, -3.64576, -3.49002, -3.31885, -3.31966, -3.37919, -3.69016, -3.47888, -3.57988]
	y[5] => [-3.13272, -3.19451, -3.14482, -3.3349, -3.76755, -3.18067, -3.12868, -3.11639, -3.11626, -3.26104 … -3.16744, -3.16209, -3.42108, -3.11728, -3.22843, -3.41312, -3.20184, -3.2123, -3.13486, -3.39949]
	y[6] => [-3.32513, -3.31684, -3.31948, -3.40729, -3.5989, -3.32907, -3.31691, -3.35456, -3.33646, -3.35431 … -3.33286, -3.3366, -3.49421, -3.3826, -3.36658, -3.32905, -3.50545, -3.42483, -3.47718, -3.44098]
	y[7] => [-4.53587, -4.90501, -5.01885, -3.89998, -3.68522, -4.53326, -4.84988, -5.1454, -4.98813, -4.195 … -4.00933, -3.9955, -3.98242, -3.91085, -4.58724, -4.00131, -5.01847, -4.40975, -4.14685, -3.79092]
	y[8] => [-4.01703, -4.03218, -4.01876, -3.87603, -3.8295, -3.95203, -4.01166, -4.07744, -4.04852, -3.89826 … -3.9914, -4.0061, -3.81249, -3.82682, -4.09209, -3.86704, -4.13824, -3.96275, -4.17168, -4.22254]
	y[2] => [-3.46757, -3.50013, -3.45111, -3.24411, -3.23219, -3.35582, -3.45173, -3.70297, -3.59883, -3.31984 … -3.256, -3.27523, -3.25873, -3.33843, -3.23371, -3.34246, -3.30696, -3.87759, -3.40094, -3.53952]

	julia> ℓ_mat = reduce(hcat, values(ℓ));

	julia> ℓ_arr = reshape(ℓ_mat, 1, size(ℓ_mat)...); # (chain_idx, sample_idx, parameter_idx)

	julia> idata = ArviZ.from_mcmcchains(
	turing_chns,
	coords = Dict("school" => schools),
	dims = Dict(
	"y" => ["school"],
	"σ" => ["school"],
	"θ" => ["school"],
	),
	library = "Turing",
	log_likelihood = Dict("y" => ℓ_arr)
	)
	InferenceData with groups:
	> posterior
	> log_likelihood
	> sample_stats

	julia> ArviZ.loo(idata)
	1×7 DataFrame
	│ Row │ loo │ loo_se │ p_loo │ n_samples │ n_data_points │ warning │ loo_scale │
	│ │ Float64 │ Float64 │ Float64 │ Int64 │ Int64 │ Bool │ String │
	├─────┼─────────┼─────────┼──────────┼───────────┼───────────────┼─────────┼───────────┤
	│ 1 │ -30.717 │ 1.38963 │ 0.781578 │ 1000 │ 8 │ 0 │ log │