Skip to content

Instantly share code, notes, and snippets.

@sharanry
Created January 5, 2021 05:13
Show Gist options
  • Save sharanry/6026786104067508ad8b91e2d793322c to your computer and use it in GitHub Desktop.
Save sharanry/6026786104067508ad8b91e2d793322c to your computer and use it in GitHub Desktop.
# using DrWatson
# @quickactivate "ParamRegNN"
@time using Revise
@time using CUDAdrv; @show CUDAdrv.name(CuDevice(0))
@time using CUDA
@time using AdvancedHMC #master
@time using Zygote
@time using Distributions
@time using Functors
@time using Flux
import Random
Random.seed!(123);
CUDA.seed!(123);
if has_cuda() # Check if CUDA is available
@info "CUDA is on"
end
# toy data generating function f: R^3 -> R^5
function f(x)
@assert length(x) == 3
y = softmax([sin(x[1] +x[3]), cos(x[2] + x[1]), sin(x[1] + x[2]), cos(x[2] + x[3]), cos(x[3] + x[1])])
argmax(y)
end
# toy data generation
N = 1000
x = [rand(Normal(0, 4), 3) for i in 1:N]
y = f.(x);
x = gpu(hcat(x...));
y = map(x -> Flux.onehot(x, 1:5), y);
y = gpu(Float32.(hcat(y...)));
abstract type ProbablisticLayer end
struct DenseProbDropout{F,S,T,P} <: ProbablisticLayer
σ::F
W::S
b::T
p::P # inferred using MCMC(posterior) / SGD(MAP est)
end
function DenseProbDropout(in::Integer, out::Integer, σ = identity;
initW = Flux.glorot_uniform, initb = zeros)
return DenseProbDropout(σ, CUDA.cu(initW(out, in)), CUDA.cu(initb(out)), CUDA.randn(out))
end
Functors.functor(a::DenseProbDropout) = ((σ=a.σ, W=a.W, b=a.b), x -> DenseProbDropout(x.σ, x.W, x.b, a.p))
function replace_probs(a::DenseProbDropout, p)
@assert length(p) == length(a.p)
return DenseProbDropout(a.σ, a.W, a.b, p)
end
function replace_probs(c::Chain, probs)
i = 0
layers = [
(layer isa DenseProbDropout) ?
begin
i += 1
replace_probs(layer, probs[i])
end : layer
for layer in c.layers
]
return Chain(layers...)
end
function (a::DenseProbDropout)(x)
W, b, σ, p = a.W, a.b, a.σ, a.p
return σ.((W*x .+ b) .* (p .+ 1))
end
function dropout_params(a::DenseProbDropout)
return [a.p]
end
function dropout_params(model::Chain)
inf_params = Any[]
for layer in model
if layer isa ProbablisticLayer
append!(inf_params, dropout_params(layer))
end
end
return inf_params
end
function toy_model(;inp=3, out=5)
return Chain(
DenseProbDropout(inp, 10, relu),
Dense(10, out)
)
end
m = toy_model()
m = gpu(m)
m(CUDA.rand(3, 10))
@time grad = gradient(() -> Flux.logitcrossentropy(m(x), y), Flux.Params(dropout_params(m)))
grad.grads
n_inf_params = sum(length.(dropout_params(m)))
prior = MvNormal(n_inf_params, 1)
# let us consider a single minibatch of train_data
d = (x, y)
lengths = length.(dropout_params(m))
cumsum_lengths = cumsum(lengths)
log_pdf(params) = begin
-Flux.logitcrossentropy(
replace_probs(
m,
[params[s:e] for (s, e) in zip(cumsum_lengths .- lengths .+ 1, cumsum_lengths)]
)(first(d)),
last(d)
) + logpdf(prior, params)
end
log_pdf(gpu(randn(10)))
Zygote.@adjoint function Iterators.Zip(xs)
back(dy::NamedTuple{(:is,)}) = tuple(dy.is)
back(dy::AbstractArray) = ntuple(length(xs)) do d
dx = map(y->y[d], dy)
length(dx) == length(xs[d]) ? dx : vcat(dx, falses(length(xs[d])-length(dx)))
end |> tuple
back(::AbstractArray{Nothing}) = nothing
Iterators.Zip(xs), back
end
initial_θ = vcat(dropout_params(m)...)
metric = UnitEuclideanMetric(Float32, n_inf_params)
hamiltonian = Hamiltonian(metric, log_pdf, Zygote)
initial_ϵ = Float32(0.1) #find_good_stepsize(hamiltonian, initial_θ)
integrator = Leapfrog(initial_ϵ)
proposal = StaticTrajectory(integrator, 1)
include("ahmc_gpu.jl")
CUDA.allowscalar(false)
samples = sample(hamiltonian, proposal, initial_θ, 100; progress=true)
# CUDA.allowscalar(true)
# samples = sample(hamiltonian, proposal, initial_θ, 100; progress=true)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment