Skip to content

Instantly share code, notes, and snippets.

@sradc
Created October 11, 2021 16:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sradc/34e5b18e90e61a801bcb1997d5355d06 to your computer and use it in GitHub Desktop.
Save sradc/34e5b18e90e61a801bcb1997d5355d06 to your computer and use it in GitHub Desktop.
vectorised autodiff example
# minimal example, using code from: https://sidsite.com/posts/autodiff/
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
class Variable:
def __init__(self, value, local_gradients=[]):
self.value = value
self.local_gradients = local_gradients
def __add__(self, other):
return add(self, other)
def __mul__(self, other):
return mul(self, other)
def __sub__(self, other):
return add(self, neg(other))
def get_gradients(variable):
""" Compute the first derivatives of `variable`
with respect to child variables.
"""
gradients = defaultdict(lambda: 0)
def compute_gradients(variable, path_value):
for child_variable, local_gradient in variable.local_gradients:
# "Multiply the edges of a path":
value_of_path_to_child = path_value * local_gradient
# "Add together the different paths":
gradients[child_variable] += value_of_path_to_child
# recurse through graph:
compute_gradients(child_variable, value_of_path_to_child)
compute_gradients(variable, path_value=1)
# (path_value=1 is from `variable` differentiated w.r.t. itself)
return gradients
def add(a, b):
value = a.value + b.value
local_gradients = (
(a, 1),
(b, 1)
)
return Variable(value, local_gradients)
def mul(a, b):
value = a.value * b.value
local_gradients = (
(a, b.value),
(b, a.value)
)
return Variable(value, local_gradients)
def neg(a):
value = -1 * a.value
local_gradients = (
(a, -1),
)
return Variable(value, local_gradients)
# convert NumPy array into array of Variable objects:
to_var = np.vectorize(lambda x : Variable(x))
# get values from array of Variable objects:
to_vals = np.vectorize(lambda variable : variable.value)
# Create linear layer
np.random.seed(0)
def update_weights(weights, gradients, lrate):
for _, weight in np.ndenumerate(weights):
weight.value -= lrate * gradients[weight]
input_size = 50
output_size = 10
lrate = 0.001
x = to_var(np.random.random(input_size))
y_true = to_var(np.random.random(output_size))
weights = to_var(np.random.random((input_size, output_size)))
loss_vals = []
for i in range(100):
y_pred = np.dot(x, weights)
loss = np.sum((y_true - y_pred) * (y_true - y_pred))
loss_vals.append(loss.value)
gradients = get_gradients(loss)
update_weights(weights, gradients, lrate)
plt.plot(loss_vals)
plt.xlabel("Time step")
plt.ylabel("Loss")
plt.title("Single linear layer learning")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment