devmotion/processes.jl

## processes.jl
using Turing
using Turing.RandomMeasures

using Random

function stickbreaking(rpm = DirichletProcess(0.25))
    # Data
    data = [-2,2,-1.5,1.5]

    # Base distribution
    mu_0 = mean(data)
    sigma_0 = 4
    sigma_1 = 0.5
    tau0 = 1/sigma_0^2
    tau1 = 1/sigma_1^2

    # stick-breaking process based on Papaspiliopoulos and Roberts (2008).
    @model sbimm(y, rpm, trunc) = begin
        # Base distribution.
        H = Normal(mu_0, sigma_0)

        # Latent assignments.
        N = length(y)
        z = tzeros(Int, N)

        # Infinite (truncated) collection of breaking points on unit stick.
        v = tzeros(Float64, trunc)

        # Cluster locations.
        x = tzeros(Float64, trunc)

        # Draw weights and locations.
        for k in 1:trunc
            v[k] ~ StickBreakingProcess(rpm)
            x[k] ~ H
        end

        # Weights.
        w = vcat(v[1], v[2:end] .* cumprod(1 .- v[1:end-1]))

        # Normalize weights to ensure they sum exactly to one.
        # This is required by the Categorical distribution in Distributions.
        w ./= sum(w)

        for i in 1:N
            # Draw location
            z[i] ~ Categorical(w)

            # Draw observation.
            y[i] ~ Normal(x[z[i]], sigma_1)
        end
    end

    # Compute empirical posterior distribution over partitions
    Random.seed!(1234)
    mf = sbimm(data, rpm, 10)
    samples = sample(mf, SMC(), 10000)

    nothing
end

function sizebased(rpm = DirichletProcess(0.25))
    # Data
    data = [-2,2,-1.5,1.5]

    # Base distribution
    mu_0 = mean(data)
    sigma_0 = 4
    sigma_1 = 0.5
    tau0 = 1/sigma_0^2
    tau1 = 1/sigma_1^2

    # size-biased sampling process
    @model sbsimm(y, rpm, trunc) = begin
        # Base distribution.
        H = Normal(mu_0, sigma_0)

        # Latent assignments.
        N = length(y)
        z = tzeros(Int, N)

        x = tzeros(Float64, N)
        J = tzeros(Float64, N)

        k = 0
        surplus = 1.0

        for i in 1:N
            ps = vcat(J[1:k], surplus)

            z[i] ~ Categorical(ps)
            if z[i] > k
                k = k + 1
                J[k] ~ SizeBiasedSamplingProcess(rpm, surplus)
                x[k] ~ H
                surplus -= J[k]
            end
            y[i] ~ Normal(x[z[i]], sigma_1)
        end
    end

    # Compute empirical posterior distribution over partitions
    Random.seed!(1234)
    mf = sbsimm(data, rpm, 100)
    samples = sample(mf, SMC(), 1000)

    nothing
end
	using Turing
	using Turing.RandomMeasures

	using Random

	function stickbreaking(rpm = DirichletProcess(0.25))
	# Data
	data = [-2,2,-1.5,1.5]

	# Base distribution
	mu_0 = mean(data)
	sigma_0 = 4
	sigma_1 = 0.5
	tau0 = 1/sigma_0^2
	tau1 = 1/sigma_1^2

	# stick-breaking process based on Papaspiliopoulos and Roberts (2008).
	@model sbimm(y, rpm, trunc) = begin
	# Base distribution.
	H = Normal(mu_0, sigma_0)

	# Latent assignments.
	N = length(y)
	z = tzeros(Int, N)

	# Infinite (truncated) collection of breaking points on unit stick.
	v = tzeros(Float64, trunc)

	# Cluster locations.
	x = tzeros(Float64, trunc)

	# Draw weights and locations.
	for k in 1:trunc
	v[k] ~ StickBreakingProcess(rpm)
	x[k] ~ H
	end

	# Weights.
	w = vcat(v[1], v[2:end] .* cumprod(1 .- v[1:end-1]))

	# Normalize weights to ensure they sum exactly to one.
	# This is required by the Categorical distribution in Distributions.
	w ./= sum(w)

	for i in 1:N
	# Draw location
	z[i] ~ Categorical(w)

	# Draw observation.
	y[i] ~ Normal(x[z[i]], sigma_1)
	end
	end

	# Compute empirical posterior distribution over partitions
	Random.seed!(1234)
	mf = sbimm(data, rpm, 10)
	samples = sample(mf, SMC(), 10000)

	nothing
	end

	function sizebased(rpm = DirichletProcess(0.25))
	# Data
	data = [-2,2,-1.5,1.5]

	# Base distribution
	mu_0 = mean(data)
	sigma_0 = 4
	sigma_1 = 0.5
	tau0 = 1/sigma_0^2
	tau1 = 1/sigma_1^2

	# size-biased sampling process
	@model sbsimm(y, rpm, trunc) = begin
	# Base distribution.
	H = Normal(mu_0, sigma_0)

	# Latent assignments.
	N = length(y)
	z = tzeros(Int, N)

	x = tzeros(Float64, N)
	J = tzeros(Float64, N)

	k = 0
	surplus = 1.0

	for i in 1:N
	ps = vcat(J[1:k], surplus)

	z[i] ~ Categorical(ps)
	if z[i] > k
	k = k + 1
	J[k] ~ SizeBiasedSamplingProcess(rpm, surplus)
	x[k] ~ H
	surplus -= J[k]
	end
	y[i] ~ Normal(x[z[i]], sigma_1)
	end
	end

	# Compute empirical posterior distribution over partitions
	Random.seed!(1234)
	mf = sbsimm(data, rpm, 100)
	samples = sample(mf, SMC(), 1000)

	nothing
	end