Created
December 19, 2019 03:15
-
-
Save HamletWantToCode/fbd11cdc1b91c5a0c2bbe8035d99c0ff to your computer and use it in GitHub Desktop.
A working example of mixing AD #ForwardDiff.jl #ReverseDiff.jl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pyplot() | |
using Plots | |
default(grid=false) #src | |
plot(randn(10)) | |
# closeall();gui() | |
using Parameters, LTVModelsBase, ValueHistories, DSP, ForwardDiff#, JLD, ReverseDiff#, JacProp | |
const Diff = ForwardDiff | |
# const RDiff = ReverseDiff | |
@with_kw struct LinearSys | |
A | |
B | |
N = 1000 | |
nx = size(A,1) | |
nu = size(B,2) | |
h = 0.02 | |
σ0 = 0 | |
sind = 1:nx | |
uind = nx+1:(nx+nu) | |
s1ind = (nx+nu+1):(nx+nu+nx) | |
end | |
function LinearSys(seed; nx = 10, nu = nx, h=0.02, kwargs...) | |
srand(seed) | |
A = randn(nx,nx) | |
A = A-A' # skew-symmetric = pure imaginary eigenvalues | |
A = A - h*I # Make 'slightly' stable | |
A = expm(h*A) # discrete time | |
B = h*randn(nx,nu) | |
LinearSys(;A=A, B=B, nx=nx, nu=nu, h=h, kwargs...) | |
end | |
function generate_data(sys::LinearSys, seed, validation=false) | |
Parameters.@unpack A,B,N, nx, nu, h, σ0 = sys | |
srand(seed) | |
u = DSP.filt(ones(5),[5], 10randn(N+2,nu))' | |
t = h:h:N*h+h | |
x0 = randn(nx) | |
x = zeros(nx,N+1) | |
x[:,1] = x0 | |
for i = 1:N-1 | |
x[:,i+1] = A*x[:,i] + B*u[:,i] | |
end | |
validation || (x .+= σ0 * randn(size(x))) | |
u = u[:,1:N] | |
@assert all(isfinite, u) | |
x,u | |
end | |
function true_jacobian(sys::LinearSys, x, u) | |
[sys.A sys.B] | |
end | |
num_params = 30 | |
wdecay = 0 | |
stepsize = 0.02 | |
const sys = LinearSys(1, nx=5, N=100, h=0.02, σ0 = 0.01) | |
true_jacobian(x,u) = true_jacobian(sys,x,u) | |
nu = sys.nu | |
nx = sys.nx | |
# Generate validation data | |
function valdata() | |
vx,vu,vy = Vector{Float64}[],Vector{Float64}[],Vector{Float64}[] | |
for i = 20:60 | |
x,u = generate_data(sys,i, true) | |
for j in 10:5:(sys.N-1) | |
push!(vx, x[:,j]) | |
push!(vy, x[:,j+1]) | |
push!(vu, u[:,j]) | |
end | |
end | |
hcat(vx...),hcat(vu...),hcat(vy...) | |
end | |
vx,vu,vy = valdata() | |
const vt = Trajectory(vx,vu,vy) | |
# Generate training trajectories | |
const trajs = [Trajectory(generate_data(sys, i)...) for i = 1:3] | |
# TODO: take care of separate trajs | |
function todata(trajs::Vector{Trajectory}) | |
xu = hcat(getfield.(trajs,:xu)...) | |
y = hcat(getfield.(trajs,:y)...) | |
xu, y | |
end | |
function i2m(w,i,sizes) | |
s = [1; cumsum([prod.(sizes)...]) .+ 1] | |
reshape(view(w,s[i]:(s[i+1]-1)), sizes[i]) | |
end | |
function pred(w,x,sizes) | |
state = copy(x) | |
for i=1:2:length(sizes)-2 | |
state = tanh.(i2m(w,i,sizes)*state .+ i2m(w,i+1,sizes)) | |
end | |
return i2m(w,length(sizes)-1,sizes)*state .+ i2m(w,length(sizes),sizes) .+ x[1:nx,:] | |
end | |
cost(w,x,y) = sum(abs2, pred(w,x,sizes) .- y)/size(y,2) | |
# cost(w,data) = sum(cost(w, d...) for d in data)/length(data) | |
cost(w,data) = cost(w, data...) | |
function loss(w,x,y) | |
chunk = Diff.Chunk(x[:,1]) | |
function lf(w) | |
# println("Entering loss function, typeof(w):", typeof(w)) | |
model(x) = pred(w,x,sizes) | |
jcfg = Diff.JacobianConfig(model, x[:,1], chunk) | |
jacobian(x) = Diff.jacobian(model, x, jcfg) | |
l = cost(w,x,y) | |
J1 = jacobian(x[:,1]) | |
for t = 2:size(x,2) | |
J2 = jacobian(x[:,t]) | |
l += 2sum(abs2.(J1.-J2)) | |
J1 = J2 | |
end | |
l | |
end | |
end | |
## | |
function runtrain(w, loss; epochs = 500) | |
dtrn = todata(trajs) | |
dtst = todata([vt]) | |
x,y = dtrn | |
lossfun = loss(w,x,y) | |
gcfg = RDiff.GradientConfig(w) | |
@time tape = RDiff.compile(RDiff.GradientTape(lossfun, w, gcfg)) | |
# gradient(w) = Diff.gradient(lossfun, w, gcfg) | |
trace = History(Float64) | |
tracev = History(Float64) | |
push!(trace, 0, lossfun(w)) | |
push!(tracev, 0, cost(w,dtst)) | |
g = similar(w) | |
m = zeros(size(g)) | |
plot(reuse=false) | |
for epoch=1:epochs | |
lossfun = loss(w,x,y) | |
# gcfg = Diff.GradientConfig(lossfun, w) | |
# gradient(w) = RDiff.gradient!(g,tape, w) | |
# RDiff.gradient!(g, lossfun, w, gcfg) | |
γ = 0.85 | |
RDiff.gradient!(g,tape,w.-γ.*m) | |
@. m = 0.002g + γ*m | |
@. w -= m | |
if epoch % 10 == 0 | |
push!(trace, epoch, lossfun(w)) | |
push!(tracev, epoch, cost(w,dtst)) | |
plot(trace, reuse=true) | |
plot!(tracev) | |
gui() | |
println(last(trace), last(tracev)) | |
end | |
end | |
trace,tracev | |
end | |
const sizes = ((num_params,nx+nu), (num_params,1), (nx,num_params), (nx,1)) | |
tovec(w) = vcat([vec(w) for w in w]...) | |
srand(1) | |
w1 = [ 0.1randn(Float64,sizes[1]), zeros(Float64,sizes[2]),0.1randn(Float64,sizes[3]), zeros(Float64,sizes[4]) ] | |
w1 = tovec(w1) | |
w2 = deepcopy(w1) | |
# res = runtrain(w1, (w,x,y)-> (w-> cost(w,x,y)), epochs=300) | |
resj = runtrain(w2, loss, epochs=1000) | |
# @save "res1.jld" res w1 | |
# @save "resj.jld" resj w2 | |
## | |
# Eval jac | |
model(x) = pred(w2,x,sizes) | |
jcfg = Diff.JacobianConfig(model, vt.xu[:,1]) | |
jacobian(x) = Diff.jacobian(model, x, jcfg) | |
jacs = vcat([eigvals(jacobian(vt.xu[:,i])[1:nx,1:nx]) for i = 1:length(vt)]...) | |
scatter(real.(jacs), imag.(jacs)) | |
scatter!(real.(eigvals(sys.A)), imag.(eigvals(sys.A))) | |
phi = linspace(-π/5,π/5,200) | |
plot!(real.(exp.(phi.*im)), imag.(exp.(phi.*im)), l=(:dash, :black), yaxis=[-0.5,0.5], xaxis=[0,1.2]) | |
@load "resj.jld" | |
model(x) = pred(w2,x,sizes) | |
jfg = ForwardDiff.JacobianConfig(model, zeros(nx+nu)) | |
jacobian(x) = ForwardDiff.jacobian(model, x) | |
m = JacProp.ADDiffSystem(w2,sizes,nx,nu, 1, jacobian) | |
JacProp.eigvalplot(m,vt, true_jacobian) | |
# Simple test | |
using BenchmarkTools | |
using ForwardDiff | |
const Diff = ForwardDiff | |
w = randn(20,20) | |
# x = [randn(20) for i = 1:100] | |
x = randn(20,100) | |
f(w::AbstractMatrix,x) = w*x | |
f(w::AbstractVector,x) = f(reshape(w, 20,20),x) | |
# const jcfg = Diff.JacobianConfig(model, x) | |
function get_loss(w,x) | |
modelw(w) = [f(w,x) for x = x] | |
function loss(w) | |
modelx(x::AbstractVector) = f(w,x) | |
jacobian(x) = Diff.jacobian(modelx, x) | |
# @assert jacobian(x) == w | |
# sum(sum(modelw(w))) + sum(sum(jacobian(xi) for xi in x)) | |
sum(sum(modelw(w))) + sum(sum(jacobian(x[:,i]) for i in 1:size(x,2))) | |
# sum(modelw(w)) + sum(jacobian(x)) | |
end | |
loss | |
end | |
loss = get_loss(w,x) | |
grad(w) = Diff.gradient(loss,vec(w)) | |
grad(w) | |
@btime grad($w) | |
# f(w,x) = NeuralNetwork(w,x) | |
# function loss(w,x) | |
# ∇xf(x) = jacobian(x->f(w,x), x) | |
# w -> norm(∇xf(x)) | |
# end | |
# | |
# l = loss(w,x) | |
# gradient(loss,w) # Good luch with this |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment