torfjelde/prophet-using-Turing.jl

## prophet-using-Turing.jl
# Partial implementation of linear-trend model described in [1] using Turing.jl.
#
# # References
# [1] Taylor, S. J., & Letham, B., Forecasting at scale, PeerJ Preprints, 5(), 3190–2 (2017).  http://dx.doi.org/10.7287/peerj.preprints.3190v2

using DataFrames, CSV
using Turing
using MCMCChain, Plots, StatsPlots

pyplot()

# seems to be fastest in simple case, and reverse-mode completely fails in full case due to memory-usage
setadbackend(:forward_diff)

"""
    changepoint_matrix(t::AbstractArray, t_change::AbstractArray)

Computes matrix `A`, the change-point matrix.
"""
function changepoint_matrix(t::AbstractArray, t_change::AbstractArray)
    T = size(t, 1)
    S = size(t_change, 1)

    A = zeros(T, S)
    a_row = zeros(S)  # will be copied on each t
    cp_idx = 1

    for i = 1:T
        while (cp_idx ≤ S) && (t[i] ≥ t_change[cp_idx])
            a_row[cp_idx] = 1
            cp_idx = cp_idx + 1
        end

        A[i, :] = a_row
    end

    A
end

# linear trend with switch-points
function linear_trend(k::Real,
                      m::Real,
                      δ::AbstractVector,
                      t::AbstractVector,
                      A::AbstractArray,
                      t_change::AbstractVector)
    # makes it continuous
    γ = (- t_change .* δ)

    return (k .+ A * δ) .* t + (m .+ A * γ)
end

# linear trend without switch-points
function linear_trend(k::Real,
                      m::Real,
                      t::AbstractVector)
    return (k .* t) .+ m
end

"""
    seasonal(ts, n::Integer; period = 365.25)

Constructs `n` Fourier coefficients for the times `ts` with period `period`.
"""
function seasonal(ts, n::Integer; period = 365.25)
    T = size(ts, 1)
    X = zeros(T, 2 * n)
    for tᵢ = 1:T
        for i = 1:n
            X[tᵢ, i] = cos(2π * i * ts[tᵢ] / period)
            X[tᵢ, i + n] = sin(2π * i * ts[tᵢ] / period)
        end
    end

    X
end

function seasonal(ts, steps::AbstractArray; period = 365.25)
    T = size(ts, 1)
    K = size(steps, 1)

    X = zeros(T, 2 * K)
    for tᵢ = 1:T
        for i = 1:K
            X[tᵢ, i] = cos(2π * steps[i] * ts[tᵢ] / period)
            X[tᵢ, i + K] = sin(2π * steps[i] * ts[tᵢ] / period)
        end
    end

    X
end

# A handy helper function to rescale our dataset.
function standardize(x)
    return (x .- mean(x, dims=1)) ./ std(x, dims=1), x
end

# Another helper function to unstandardize our datasets.
function unstandardize(x, orig)
    return x .* std(orig, dims=1) .+ mean(orig, dims=1)
end

# source: https://github.com/facebook/prophet/blob/master/examples/example_wp_log_peyton_manning.csv
d = DataFrame(CSV.read("example_wp_log_peyton_manning.csv"));

# standardize the output
y_true_orig = collect(skipmissing(d[:y]));
(y_true, y_true_orig) = standardize(y_true_orig)

# contstructing the "features"
T = size(d[:ds], 1);                     # number of time-steps
ts = collect(1:T);                       # time-steps

X = seasonal(ts, 10, period = 365.25)    # yearly seasonal component; N = 10 as suggested
X = hcat(X, seasonal(ts, 3, period = 7)) # weekly seasonal component; N = 3 as suggested

K = size(X, 2)

σ_s = 10 .* ones(K);                     # scale on seasonality prior

                                         # change point once every month and then use a sparse prior
t_change = [ts[i] for i = 1:30:T];       # times of change-points
S = size(t_change, 1);                   # number of change-pointsa

A = changepoint_matrix(ts, t_change);    # construct the change-point matrix

s_a = ones(K)                            # all additive contributions
s_m = zeros(K)                           # no multiplicative contribution

τ = 1.0;

# working model
@model prophet(y) = begin
    # variance of
    σ_obs ~ TruncatedNormal(0,100, 0, Inf)

    # σ should be of length `K`
    β = Array{Real}(undef, K)
    for i = 1:K
        β[i] ~ Normal(0, σ_s[i])
    end

    μ = X * (β .* s_a)

    for tᵢ = 1:T
        y[tᵢ] ~ Normal(
            μ[tᵢ],
            σ_obs
        )
    end
end

# sample posterior
iterations = 500
chain = sample(prophet(y_true), NUTS(iterations, 200, 0.65))

# compute estimate of μ
β_chain = chain[:β].value
μ_chain = zeros(iterations, T);

for i = 1:iterations
    μ_chain[i, :] = X * (β_chain[i, :] .* s_a)
end
μ = collect(transpose(mean(μ_chain, dims=1)))
μ_variance = transpose(var(μ_chain, dims=1))
μ_std = sqrt.(μ_variance)

# visualize prediction vs. true
scatter(y_true, label = "true", alpha = .8, markersize = 2, markerstrokewidth = 0.1, size=(1200, 500))
plot!(μ, label = "pred", ribbon = hcat(μ - μ_std, μ + μ_std))

# failing model
@model prophet(y) = begin
    # priors
    k ~ Laplace(0, 5);
    m ~ Normal(0, 5);

    # weights for the change-points
    δ = Array{Real}(undef, S)
    δ ~ [Laplace(0, τ)]

    # variance of observations
    σ_obs ~ Normal(0, 0.5)

    # σ should be of length `K`
    β = Array{Real}(undef, K)
    for i = 1:K
        β[i] ~ Normal(0, σ_s[i])
    end

    # output
    μ = (
        linear_trend(k, m, δ, ts, A, t_change)
#         .* (1.0 .+ X * (β .* s_m))  # multiplicative
        + X * (β .* s_a)  # additive
    )

    for tᵢ = 1:T
        y[tᵢ] ~ Normal(
            μ[tᵢ],
            σ_obs^2
        )
    end
end
	# Partial implementation of linear-trend model described in [1] using Turing.jl.
	#
	# # References
	# [1] Taylor, S. J., & Letham, B., Forecasting at scale, PeerJ Preprints, 5(), 3190–2 (2017). http://dx.doi.org/10.7287/peerj.preprints.3190v2

	using DataFrames, CSV
	using Turing
	using MCMCChain, Plots, StatsPlots

	pyplot()

	# seems to be fastest in simple case, and reverse-mode completely fails in full case due to memory-usage
	setadbackend(:forward_diff)

	"""
	changepoint_matrix(t::AbstractArray, t_change::AbstractArray)

	Computes matrix `A`, the change-point matrix.
	"""
	function changepoint_matrix(t::AbstractArray, t_change::AbstractArray)
	T = size(t, 1)
	S = size(t_change, 1)

	A = zeros(T, S)
	a_row = zeros(S) # will be copied on each t
	cp_idx = 1

	for i = 1:T
	while (cp_idx ≤ S) && (t[i] ≥ t_change[cp_idx])
	a_row[cp_idx] = 1
	cp_idx = cp_idx + 1
	end

	A[i, :] = a_row
	end

	A
	end

	# linear trend with switch-points
	function linear_trend(k::Real,
	m::Real,
	δ::AbstractVector,
	t::AbstractVector,
	A::AbstractArray,
	t_change::AbstractVector)
	# makes it continuous
	γ = (- t_change .* δ)

	return (k .+ A * δ) .* t + (m .+ A * γ)
	end

	# linear trend without switch-points
	function linear_trend(k::Real,
	m::Real,
	t::AbstractVector)
	return (k .* t) .+ m
	end

	"""
	seasonal(ts, n::Integer; period = 365.25)

	Constructs `n` Fourier coefficients for the times `ts` with period `period`.
	"""
	function seasonal(ts, n::Integer; period = 365.25)
	T = size(ts, 1)
	X = zeros(T, 2 * n)
	for tᵢ = 1:T
	for i = 1:n
	X[tᵢ, i] = cos(2π * i * ts[tᵢ] / period)
	X[tᵢ, i + n] = sin(2π * i * ts[tᵢ] / period)
	end
	end

	X
	end

	function seasonal(ts, steps::AbstractArray; period = 365.25)
	T = size(ts, 1)
	K = size(steps, 1)

	X = zeros(T, 2 * K)
	for tᵢ = 1:T
	for i = 1:K
	X[tᵢ, i] = cos(2π * steps[i] * ts[tᵢ] / period)
	X[tᵢ, i + K] = sin(2π * steps[i] * ts[tᵢ] / period)
	end
	end

	X
	end

	# A handy helper function to rescale our dataset.
	function standardize(x)
	return (x .- mean(x, dims=1)) ./ std(x, dims=1), x
	end

	# Another helper function to unstandardize our datasets.
	function unstandardize(x, orig)
	return x .* std(orig, dims=1) .+ mean(orig, dims=1)
	end

	# source: https://github.com/facebook/prophet/blob/master/examples/example_wp_log_peyton_manning.csv
	d = DataFrame(CSV.read("example_wp_log_peyton_manning.csv"));

	# standardize the output
	y_true_orig = collect(skipmissing(d[:y]));
	(y_true, y_true_orig) = standardize(y_true_orig)

	# contstructing the "features"
	T = size(d[:ds], 1); # number of time-steps
	ts = collect(1:T); # time-steps

	X = seasonal(ts, 10, period = 365.25) # yearly seasonal component; N = 10 as suggested
	X = hcat(X, seasonal(ts, 3, period = 7)) # weekly seasonal component; N = 3 as suggested

	K = size(X, 2)

	σ_s = 10 .* ones(K); # scale on seasonality prior

	# change point once every month and then use a sparse prior
	t_change = [ts[i] for i = 1:30:T]; # times of change-points
	S = size(t_change, 1); # number of change-pointsa

	A = changepoint_matrix(ts, t_change); # construct the change-point matrix

	s_a = ones(K) # all additive contributions
	s_m = zeros(K) # no multiplicative contribution

	τ = 1.0;

	# working model
	@model prophet(y) = begin
	# variance of
	σ_obs ~ TruncatedNormal(0,100, 0, Inf)

	# σ should be of length `K`
	β = Array{Real}(undef, K)
	for i = 1:K
	β[i] ~ Normal(0, σ_s[i])
	end

	μ = X * (β .* s_a)

	for tᵢ = 1:T
	y[tᵢ] ~ Normal(
	μ[tᵢ],
	σ_obs
	)
	end
	end

	# sample posterior
	iterations = 500
	chain = sample(prophet(y_true), NUTS(iterations, 200, 0.65))

	# compute estimate of μ
	β_chain = chain[:β].value
	μ_chain = zeros(iterations, T);

	for i = 1:iterations
	μ_chain[i, :] = X * (β_chain[i, :] .* s_a)
	end
	μ = collect(transpose(mean(μ_chain, dims=1)))
	μ_variance = transpose(var(μ_chain, dims=1))
	μ_std = sqrt.(μ_variance)

	# visualize prediction vs. true
	scatter(y_true, label = "true", alpha = .8, markersize = 2, markerstrokewidth = 0.1, size=(1200, 500))
	plot!(μ, label = "pred", ribbon = hcat(μ - μ_std, μ + μ_std))

	# failing model
	@model prophet(y) = begin
	# priors
	k ~ Laplace(0, 5);
	m ~ Normal(0, 5);

	# weights for the change-points
	δ = Array{Real}(undef, S)
	δ ~ [Laplace(0, τ)]

	# variance of observations
	σ_obs ~ Normal(0, 0.5)

	# σ should be of length `K`
	β = Array{Real}(undef, K)
	for i = 1:K
	β[i] ~ Normal(0, σ_s[i])
	end

	# output
	μ = (
	linear_trend(k, m, δ, ts, A, t_change)
	# .* (1.0 .+ X * (β .* s_m)) # multiplicative
	+ X * (β .* s_a) # additive
	)

	for tᵢ = 1:T
	y[tᵢ] ~ Normal(
	μ[tᵢ],
	σ_obs^2
	)
	end
	end