lbollar/rd_graddescent.jl

## rd_graddescent.jl
using ReverseDiff: GradientTape, gradient, gradient!, compile

#=
Here I am using what I would consider classical stats style of having the
observations as rows. Julia is column major order, so it is often common
to see the observations as columns and variables as rows. With certain types
of operations, this can have major performance implications.

I was using Julia v0.5 at the time of writing this.
=#

N = 1000
nvar = 5

X = randn(1000, 5)

actual_b = [0.5, 2.1, -1.3, 1.7, -0.6]
actual_a = [1.23]

y = X * actual_b .+ actual_a

loss(a, w) = sum(abs2.(y - (X * w .+ a))) / N

w = randn(5)
a = randn(1)

const f_tape = GradientTape(loss, (randn(1), randn(5)))
const compiled_f_tape = compile(f_tape)

results = (similar(a), similar(w))

function train(a, w, X, y; lr=0.1)
    gradient!(results, compiled_f_tape, (a, w))
    a -= lr * results[1]
    w -= lr * results[2]
    return (a ,w)
end

for _ in 1:100
    a, w = train(a, w, X, y)
    println(a)
    println(w)
end

println()

@printf("Actual Beta: %f, %f, %f, %f, %f \n", w[1], w[2], w[3], w[4], w[5])
@printf("Actual Alpha: %f\n", a[1])
	using ReverseDiff: GradientTape, gradient, gradient!, compile

	#=
	Here I am using what I would consider classical stats style of having the
	observations as rows. Julia is column major order, so it is often common
	to see the observations as columns and variables as rows. With certain types
	of operations, this can have major performance implications.

	I was using Julia v0.5 at the time of writing this.
	=#

	N = 1000
	nvar = 5

	X = randn(1000, 5)

	actual_b = [0.5, 2.1, -1.3, 1.7, -0.6]
	actual_a = [1.23]

	y = X * actual_b .+ actual_a

	loss(a, w) = sum(abs2.(y - (X * w .+ a))) / N

	w = randn(5)
	a = randn(1)

	const f_tape = GradientTape(loss, (randn(1), randn(5)))
	const compiled_f_tape = compile(f_tape)

	results = (similar(a), similar(w))

	function train(a, w, X, y; lr=0.1)
	gradient!(results, compiled_f_tape, (a, w))
	a -= lr * results[1]
	w -= lr * results[2]
	return (a ,w)
	end

	for _ in 1:100
	a, w = train(a, w, X, y)
	println(a)
	println(w)
	end

	println()

	@printf("Actual Beta: %f, %f, %f, %f, %f \n", w[1], w[2], w[3], w[4], w[5])
	@printf("Actual Alpha: %f\n", a[1])