-
-
Save mschauer/db807bd6f88daecd19f484dc6507ba28 to your computer and use it in GitHub Desktop.
Check
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using LinearAlgebra | |
using Random | |
using GaussianDistributions | |
using GaussianDistributions: logpdf | |
using Parameters | |
pair(u) = u[1], u[2] | |
pair(p::Gaussian) = p.μ, p.Σ | |
skiplast(r) = r[1:end-1] | |
# model dX = b(x)dt + σ(x)dW | |
# argument M contains Model parameters, see below | |
b(x, M) = -0.1x + inv(1+0.5*abs(M.θ))*sin(M.θ*x*2pi) + 0.2 | |
σ(x, M) = M.σ̃ | |
# linear approximation of b and constant approximation of σ | |
b̃(x, M) = M.B*x + M.β | |
σ̃(M) = M.σ̃ | |
# time grid | |
dt = 0.005 | |
T = 30.0 | |
s = 0:dt:T | |
# observation times t | |
ti = 1:50:length(s) | |
t = s[ti] | |
@with_kw struct Model{R} @deftype R # in 1d all parameters can be of the same type R | |
# unknown parameter | |
θ = NaN | |
# parameters for linear approximation of b and constant approximation of σ | |
B = -0.1 | |
β = 0.2 | |
σ̃ = 1.0 | |
# observation scheme Y ∼ N(L*X, σϵ^2) | |
L = 1.0 | |
σϵ = 0.1 | |
Σ = σϵ*σϵ' | |
end | |
# Kalman correction step, https://en.wikipedia.org/wiki/Kalman_filter#Update | |
""" | |
correct(u::T, v, H) | |
Correction step of a Kalman filter with `u = (x, P)` the prediction with uncertainty | |
covariance `P`, and `v = (y, R)` the observation with uncertainty covariance `R` | |
and the observation operator `H`. See https://en.wikipedia.org/wiki/Kalman_filter#Update. | |
""" | |
function correct(u, v, H, c = 0.0) | |
x, Ppred = pair(u) | |
y, R = pair(v) | |
yres = y - H*x # innovation residual | |
S = (H*Ppred*H' + R) # innovation covariance | |
K = Ppred*H'*inv(S) # Kalman gain | |
x = x + K*yres | |
P = (I - K*H)*Ppred*(I - K*H)' + K*R*K' | |
c = c - logpdf(Gaussian(zero(y), R), y) | |
(x, P), c, yres, S | |
end | |
# Sample the model | |
""" | |
forwardsample(s, ti, x) | |
Simulate trajectory on timegrid `s` and observations at times `s[ti]` | |
using the Euler-Maruyama scheme. | |
""" | |
function forwardsample(M, s, ti, x) | |
@unpack L, σϵ = M | |
xs = typeof(x)[] | |
ys = typeof(L*x)[] | |
for i in skiplast(eachindex(s)) | |
dt = s[i+1] - s[i] | |
if i in ti | |
push!(ys, L*x + σϵ*randn()) | |
end | |
push!(xs, x) | |
x = x + b(x, M)*dt + σ(x, M)*sqrt(dt)*randn() | |
end | |
push!(xs, x) | |
if lastindex(s) in ti | |
push!(ys, L*x + σϵ*randn()) | |
end | |
xs, ys | |
end | |
# Compute marginal approximate filtering distributions given data `ys` backwards | |
""" | |
backwardfilter(M, s, ti, ys, (ν, P)) -> ps, p0, c | |
Backward filtering, starting with `N(ν, P)` prior, assuming that ys contains observations | |
at times `t = s[ti]` with `y ∼ N(L X[t], Σ)`. `exp(-c)` is the integration constant from Theorem 3.3. | |
""" | |
function backwardfilter(M, s, ti, ys, πT, c = 0.0) | |
@unpack L, Σ, B, β, σ̃ = M | |
@assert lastindex(s) in ti | |
j = length(ys) | |
p, c, _, _ = correct(πT, (ys[j], Σ), L, c) | |
ps = [p] | |
ν, P = pair(p) | |
for i in eachindex(s)[end-1:-1:1] | |
dt = s[i+1] - s[i] | |
H = inv(P) | |
F = H*ν | |
P = P - dt*(B*P + P*B' - σ̃*σ̃') | |
ν = ν - dt*(B*ν + β) | |
c += β'*F*dt + 0.5*F'*σ̃*σ̃'*F*dt - 0.5*sum(H .* (σ̃*σ̃'))*dt | |
push!(ps, (ν, P)) | |
if i in ti | |
j = j - 1 | |
p, c, _, _ = correct((ν, P), (ys[j], Σ), L, c) | |
(ν, P) = pair(p) | |
end | |
end | |
reverse!(ps), (ν, P), c | |
end | |
""" | |
forwardguiding(M, s, x, ps, Z) -> xs, ll | |
Forward sample a guided trajectory `xs` starting in `x` and compute it's | |
log-likelihood `ll` with innovations `Z = randn(length(s))`. | |
""" | |
function forwardguiding(M, s, x, ps, Z=randn(length(s))) | |
llstep(x, r, dt, P) = dot(b(x, M) - b̃(x, M), r)*dt - 0.5*tr((σ(x, M)*σ(x, M)' - σ̃(M)*σ̃(M)')*(inv(P) - r*r'))*dt | |
xs = typeof(x)[] | |
ll = 0.0 | |
for i in skiplast(eachindex(s)) | |
dt = s[i+1] - s[i] | |
push!(xs, x) | |
ν, P = pair(ps[i]) | |
r = inv(P)*(ν - x) | |
ll += llstep(x, r, dt, P) # accumulate log-likelihood | |
x = x + b(x, M)*dt + σ(x, M)*σ(x, M)'*r*dt + σ(x, M)*sqrt(dt)*Z[i] # evolution guided by observations | |
end | |
push!(xs, x) | |
xs, ll | |
end | |
""" | |
randomwalkmcmc(s, ti, ys, θ0, iters, ρ = 0.9, σθ = 0.01) | |
Infer parameter θ using Metropolis-Hastings with joint update of | |
innovations (Crank Nicolson with parameter ρ) and parameter θ (Gaussian random walk | |
with stepsize σθ) | |
""" | |
function randomwalkmcmc(s, ti, ys, θ0, iters, ρ = 0.9, σθ = 0.01) | |
θ = θ0 | |
Mᵒ = Model(θ = θ) | |
θs = [θ] | |
# sample initial latent path | |
ps, p0, c = backwardfilter(M, s, ti, ys, πT) | |
ν0, P0 = p0 | |
x0 = rand(Gaussian(p0...)) | |
Z = randn(length(s)) | |
x̂, ll = forwardguiding(M, s, x0, ps, Z) | |
ll += logpdf(π0, x̂[1]) - logpdf(πT, x̂[end]) + logpdf(πθ, θ) | |
ll += (-0.5*x0' + ν0')*inv(P0)*x0 | |
acc = 0 | |
for iter in 1:iters | |
# random walk proposal for parameter | |
θᵒ = θ + σθ* randn() | |
# independent proposal for starting point | |
# x0ᵒ = rand(Gaussian(p0...)) | |
# random walk for starting point | |
x0ᵒ = x0 + 0.01*rand(Gaussian(0*p0[1], p0[2])) # | |
# compute filtering density for guiding | |
Mᵒ = Model(θ = θᵒ) | |
ps, p0, c = backwardfilter(Mᵒ, s, ti, ys, πT) | |
ν0, P0 = p0 | |
# random walk proposal for innovations | |
Zᵒ = ρ*Z + sqrt(1 - ρ^2)*randn(length(s)) | |
# compute latent path | |
x̂ᵒ, llᵒ = forwardguiding(Mᵒ, s, x0ᵒ, ps, Zᵒ) | |
llᵒ += logpdf(π0, x̂ᵒ[1]) - logpdf(πT, x̂ᵒ[end]) + logpdf(πθ, θᵒ) | |
#llᵒ += -c | |
llᵒ += (-0.5*x0ᵒ' + ν0')*inv(P0)*x0ᵒ # constant may change if σ depends on parameter | |
# Metropolis-Hastings accept/reject for joint proposal of starting point, path, parameter | |
if rand() < exp(llᵒ - ll) | |
θ = θᵒ | |
ll = llᵒ | |
x0 = x0ᵒ | |
x̂ = x̂ᵒ | |
Z = Zᵒ | |
acc += 1 | |
end | |
push!(θs, θ) | |
end | |
θs, acc/iters, x̂ | |
end | |
Random.seed!(123) | |
# Set true model | |
θtrue = 0.9 | |
M = Model(θ = θtrue) | |
# First generate data from the model for illustration | |
πθ = Gaussian(0.0, 1.0) | |
π0 = Gaussian(0.0, 1.0) | |
x0 = rand(π0) | |
xs, ys = forwardsample(M, s, ti, x0) # sample trajectory | |
# run backwards filter given the observations ys | |
πT = Gaussian(0.0, 10.0) # prior for the backward filter | |
ps, p0, c = backwardfilter(M, s, ti, ys, πT) | |
# sample trajectories and their importance weight | |
K = 10 | |
x̂s = Vector(undef, K) | |
ll = zeros(K) | |
for k in 1:K | |
local x0 = rand(Gaussian(p0...)) # sample from p0 | |
x̂s[k], ll[k] = forwardguiding(M, s, x0, ps) | |
ll[k] += logpdf(π0, x̂s[k][1]) - logpdf(πT, x̂s[k][end]) # correct for having used | |
# backward prior πT instead of | |
# our actual prior π0 | |
end | |
lmax = maximum(exp.(ll)) # maximum of importance weights | |
# inference for parameter θ | |
θ = 0.7θtrue # start somewhere wrong | |
iters = 100000 | |
ρ = 0.95 # random walk parameter for innovation update (Crank Nicolson scheme) | |
σθ = 0.04 # stepsize randomwalk parameter | |
θs, a, xᵒs = @time randomwalkmcmc(s, ti, ys, θ, iters, ρ, σθ) | |
println("Acceptance rate: ", a) | |
using Makie | |
pl1 = lines(s, xs) | |
scatter!(pl1, t, ys) | |
lines!(pl1, s, xᵒs, color=:red) | |
pl1 | |
pl2 = Makie.lines(0:10:iters, θs[1:10:end]) | |
Makie.lines!(pl2, 0:10:iters, fill(θtrue, length(0:10:iters))) | |
hbox(pl1, pl2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using LinearAlgebra | |
using Random | |
using GaussianDistributions | |
using GaussianDistributions: logpdf | |
using Parameters | |
using Statistics | |
using Makie | |
pair(u) = u[1], u[2] | |
pair(p::Gaussian) = p.μ, p.Σ | |
skiplast(r) = r[1:end-1] | |
# model dX = b(x)dt + σ(x)dW | |
# argument M contains Model parameters, see below | |
b(x, M) = b̃(x, M) - 1.5sin(2pi*x) | |
σ(x, M) = σ̃(M) - 0.2*(1-sin(x)) | |
# linear approximation of b and constant approximation of σ | |
b̃(x, M) = M.B*x + M.β | |
σ̃(M) = M.σ̃ | |
@with_kw struct Model{R} @deftype R # in 1d all parameters can be of the same type R | |
# parameters for linear approximation of b and constant approximation of σ | |
B = -0.1 | |
β = 0.3 | |
σ̃ = 1.2 | |
# observation scheme Y ∼ N(L*X, σϵ^2) | |
L = 1.0 | |
σϵ = 0.2 | |
Σ = σϵ*σϵ' | |
end | |
# Kalman correction step, https://en.wikipedia.org/wiki/Kalman_filter#Update | |
""" | |
correct(u::T, v, H) | |
Correction step of a Kalman filter with `u = (x, P)` the prediction with uncertainty | |
covariance `P`, and `v = (y, R)` the observation with uncertainty covariance `R` | |
and the observation operator `H`. See https://en.wikipedia.org/wiki/Kalman_filter#Update. | |
""" | |
function correct(u, v, H, c = (0.0,0.0)) | |
xpred, Ppred = pair(u) | |
y, R = pair(v) | |
yres = y - H*xpred # innovation residual | |
S = (H*Ppred*H' + R) # innovation covariance | |
K = Ppred*H'*inv(S) # Kalman gain | |
x = xpred + K*yres | |
P = (I - K*H)*Ppred*(I - K*H)' + K*R*K' | |
c1, c2 = c | |
c1 = c1 - logpdf(Gaussian(zero(y), R), y) | |
c2 = c2 - logpdf(Gaussian(zero(x), P), x) + logpdf(Gaussian(zero(y), R), y) | |
(x, P), (c1, c2), yres, S | |
end | |
# Sample the model | |
""" | |
forwardsample(s, ti, x) | |
Simulate trajectory on timegrid `s` and observations at times `s[ti]` | |
using the Euler-Maruyama scheme. | |
""" | |
function forwardsample(M, s, ti, x) | |
@unpack L, σϵ = M | |
xs = typeof(x)[] | |
ys = typeof(L*x)[] | |
for i in skiplast(eachindex(s)) | |
dt = s[i+1] - s[i] | |
if i in ti | |
push!(ys, L*x + σϵ*randn()) | |
end | |
push!(xs, x) | |
x = x + b(x, M)*dt + σ(x, M)*sqrt(dt)*randn() | |
end | |
push!(xs, x) | |
if lastindex(s) in ti | |
push!(ys, L*x + σϵ*randn()) | |
end | |
xs, ys | |
end | |
# Compute marginal approximate filtering distributions given data `ys` backwards | |
""" | |
backwardfilter(M, s, ti, ys, (ν, P)) -> ps, p0, c | |
Backward filtering, starting with `N(ν, P)` prior, assuming that ys contains observations | |
at times `t = s[ti]` with `y ∼ N(L X[t], Σ)`. `exp(-c)` is the integration constant from Theorem 3.3. | |
""" | |
function backwardfilter(M, s, ti, ys, πT, c = (0.0,0.0)) | |
@unpack L, Σ, B, β, σ̃ = M | |
@assert lastindex(s) in ti | |
j = length(ys) | |
c1, c2 = c | |
#c1 += logpdf(πT, 0*mean(πT)) | |
c2 += logpdf(πT, 0*mean(πT)) | |
p, (c1,c2), _, _ = correct(πT, (ys[j], Σ), L, (c1,c2)) | |
ps = [p] | |
ν, P = pair(p) | |
for i in eachindex(s)[end-1:-1:1] | |
dt = s[i+1] - s[i] | |
H = inv(P) | |
F = H*ν | |
c1 = c1 - dt*(β'*F + 0.5*F'*σ̃*σ̃'*F - 0.5*sum(H .* (σ̃*σ̃'))) | |
P = P - dt*(B*P + P*B' - σ̃*σ̃') | |
ν = ν - dt*(B*ν + β) | |
push!(ps, (ν, P)) | |
if i in ti | |
j = j - 1 | |
c2 += logpdf(Gaussian(ν, P + dt*σ̃*σ̃'), 0*ν) # order? | |
p, (c1,c2), _, _ = correct((ν, P), (ys[j], Σ), L, (c1,c2)) | |
(ν, P) = pair(p) | |
else | |
c2 += -tr(B)*dt | |
end | |
end | |
reverse!(ps), (ν, P), (c1,c2) | |
end | |
""" | |
forwardguiding(M, s, x, ps, Z) -> xs, ll | |
Forward sample a guided trajectory `xs` starting in `x` and compute its | |
log-likelihood `ll` with innovations `Z = randn(length(s))`. | |
""" | |
function forwardguiding(M, s, x, ps, Z=randn(length(s))) | |
llstep(x, r, dt, P) = dot(b(x, M) - b̃(x, M), r)*dt - 0.5*tr((σ(x, M)*σ(x, M)' - σ̃(M)*σ̃(M)')*(inv(P) - r*r'))*dt | |
xs = typeof(x)[] | |
ll = 0.0 | |
for i in skiplast(eachindex(s)) | |
dt = s[i+1] - s[i] | |
push!(xs, x) | |
ν, P = pair(ps[i]) | |
r = inv(P)*(ν - x) | |
ll += llstep(x, r, dt, P) # accumulate log-likelihood | |
x = x + b(x, M)*dt + σ(x, M)*σ(x, M)'*r*dt + σ(x, M)*sqrt(dt)*Z[i] # evolution guided by observations | |
end | |
push!(xs, x) | |
xs, ll | |
end | |
Random.seed!(123) | |
# Set true model | |
M = Model() | |
ã = M.σ̃ * M.σ̃' | |
# time grid | |
dt = 0.002 | |
T = 1.0 | |
s = 0:dt:T | |
# observation times t | |
ti = 1:length(s)-1:length(s) | |
#ti = [length(s)] | |
t = s[ti] | |
# repetitions | |
K = 200000 | |
π0 = Gaussian(0.0, 0.4^2) | |
x0 = rand(π0) | |
xs, ys = forwardsample(M, s, ti, x0) # sample trajectory | |
p1 = lines(s, xs) | |
scatter!(p1, t, ys) | |
# First generate data from the model for illustration | |
# X ~ π0(0) | |
# dX_t = .... | |
# Y = [X_0 + eps, X_t + eps2] | |
Ys = Vector(undef, K) | |
for k in 1:K | |
local x0 = rand(π0) # sample from π0 | |
Ys[k] = forwardsample(M, s ,ti, x0)[2] | |
end | |
ms = 0.1 | |
if length(ti) == 2 | |
p2 = scatter(Point2f0.(Ys[1:20:end]), markersize=ms) | |
else | |
p2 = scatter((x->Point2f0(rand(), x[])).(Ys[1:20:end]), markersize=ms) | |
end | |
# proposal distribution | |
YProp = Gaussian(mean(Ys), cov(Ys)) | |
# run backwards filter given the observations ys | |
πT = Gaussian(0.0, 2.0^2) # prior for the backward filter | |
# sample trajectories to randomly chosen endpoints and their importance weights | |
Ŷs = Vector(undef, K) | |
ll = zeros(K) | |
for k in 1:K | |
local x̂s, c1, c2, ps, p0 | |
local ys = rand(YProp) | |
Ŷs[k] = ys | |
ps, p0, c = backwardfilter(M, s, ti, ys, πT) | |
# uses backward prior πT instead of our actual prior π0 | |
c1, c2 = c # log ϖ = log ρtilde(t0, x0) - log ϕ(x0, ν0, P0) | |
local x0 = rand(Gaussian(p0...)) | |
x̂s, ll[k] = forwardguiding(M, s, x0, ps) | |
#Broken | |
#@show -c1 + (-0.5*x0' + p0[1]')*inv(p0[2])*x0 - (c2 + logpdf(Gaussian(p0...), 0*x0)) | |
@show c2 + c2 + logpdf(Gaussian(p0...), 0*x0) | |
#@assert -c1 + (-0.5*x0' + p0[1]')*inv(p0[2])*x0 ≈ c2 + logpdf(Gaussian(p0...), x0) | |
ll[k] += c2 | |
#ll[k] += logpdf(Gaussian(p0...), x0) # not needed as we sampled from Gaussian(p0...) | |
ll[k] += -logpdf(YProp, ys) | |
ll[k] += logpdf(π0, x0) - logpdf(πT, x̂s[end]) # correct for having used backward prior | |
end | |
me = mean(exp.(ll)) | |
@show me, std(exp.(ll))/me | |
using Colors | |
function ss(x) | |
findall(rand(length(x)) .<= (x )./(maximum(x))) | |
end | |
if length(ti) == 2 | |
Is = ss(exp.(ll)) | |
@show length(Is)/K | |
p3 = scatter(Point2f0.(Ŷs[Is]), markersize=ms) | |
#scatter!(Point2f0.(Ŷs), markersize=ms, color=:yellow) | |
else | |
p3 = scatter((x->Point2f0(rand(), x[])).(Ŷs), markersize=sqrt.(exp.(ll))*ms) | |
end | |
# maximum of importance weights | |
cumsum(exp.(ll))./ (1:K) | |
p4 = lines((cumsum(exp.(ll))./ (1:K))[1:10:end]) | |
# Compute histogram estimates for marginal density | |
# of Y[2] | |
N = 50 # bins | |
binind(r, x) = searchsortedfirst(r, x) - 1 | |
R = maximum(abs.(last.(Ys))) | |
vrange = range(-R,R, length=N+1) | |
vints = [(vrange[i], vrange[i+1]) for i in 1:N] | |
counts = zeros(N+2) | |
[counts[binind(vrange, last(Ys[k]))+1] += 1 for k in 1:K] | |
counts /= K | |
counts2 = zeros(N+2) | |
[counts2[binind(vrange, last(Ŷs[k]))+1] += 1 for k in 1:K] | |
counts2 /= K | |
wcounts = zeros(N+2) | |
[wcounts[binind(vrange, last(Ŷs[k]))+1] += exp(ll[k]) for k in 1:K] | |
wcounts /= K | |
p5 = lines(mean.(vints), counts[2:end-1]) | |
lines!(p5, mean.(vints), wcounts[2:end-1], color=:blue) | |
lines!(p5, mean.(vints), counts2[2:end-1], color=:green) | |
using FileIO | |
pl = vbox(hbox(p2, p3), hbox(p4, p5)) | |
save("figures/histtest.png", pl) | |
pl | |
using Test | |
#@testset begin | |
ps, p0, c = backwardfilter(M, s, ti, rand(YProp), πT) | |
i = 5 | |
logpdf(Gaussian(ps[i][1], M.σ̃^2*dt + ps[i][2]), 0) - logpdf(Gaussian(ps[i-1]...), 0) | |
det(M.B)*dt |
In 101 and 114 correct the order of return arguments
During mcmc: on can either sample x0ᵒ = rand(Gaussian(p0...))
or put (-0.5*x0ᵒ' + ν0')*inv(P0)*x0ᵒ
in the likelihood, but not both.
The ll
before the loop was inconsistent with the one in the loop
dt
as argument in llstep(x, r, dt, P) = dot(b(x, M) - b̃(x, M), r)*dt - 0.5*tr((σ(x, M)*σ(x, M)' - σ̃(M)*σ̃(M)')*(inv(P) - r*r'))*dt
speeds things up
Transpose missing in 110
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Lines 106 moved earlier