sradc/vectorised_autodiff_example.py

## vectorised_autodiff_example.py
# minimal example, using code from: https://sidsite.com/posts/autodiff/

from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np

class Variable:
    def __init__(self, value, local_gradients=[]):
        self.value = value
        self.local_gradients = local_gradients

    def __add__(self, other):
        return add(self, other)

    def __mul__(self, other):
        return mul(self, other)

    def __sub__(self, other):
        return add(self, neg(other))

def get_gradients(variable):
    """ Compute the first derivatives of `variable`
    with respect to child variables.
    """
    gradients = defaultdict(lambda: 0)

    def compute_gradients(variable, path_value):
        for child_variable, local_gradient in variable.local_gradients:
            # "Multiply the edges of a path":
            value_of_path_to_child = path_value * local_gradient
            # "Add together the different paths":
            gradients[child_variable] += value_of_path_to_child
            # recurse through graph:
            compute_gradients(child_variable, value_of_path_to_child)

    compute_gradients(variable, path_value=1)
    # (path_value=1 is from `variable` differentiated w.r.t. itself)
    return gradients

def add(a, b):
    value = a.value + b.value
    local_gradients = (
        (a, 1),
        (b, 1)
    )
    return Variable(value, local_gradients)

def mul(a, b):
    value = a.value * b.value
    local_gradients = (
        (a, b.value),
        (b, a.value)
    )
    return Variable(value, local_gradients)

def neg(a):
    value = -1 * a.value
    local_gradients = (
        (a, -1),
    )
    return Variable(value, local_gradients)

# convert NumPy array into array of Variable objects:
to_var = np.vectorize(lambda x : Variable(x))

# get values from array of Variable objects:
to_vals = np.vectorize(lambda variable : variable.value)

# Create linear layer
np.random.seed(0)

def update_weights(weights, gradients, lrate):
    for _, weight in np.ndenumerate(weights):
        weight.value -= lrate * gradients[weight]

input_size = 50
output_size = 10
lrate = 0.001

x = to_var(np.random.random(input_size))
y_true = to_var(np.random.random(output_size))
weights = to_var(np.random.random((input_size, output_size)))

loss_vals = []
for i in range(100):
    y_pred = np.dot(x, weights)
    loss = np.sum((y_true - y_pred) * (y_true - y_pred))
    loss_vals.append(loss.value)
    gradients = get_gradients(loss)
    update_weights(weights, gradients, lrate)

plt.plot(loss_vals)
plt.xlabel("Time step")
plt.ylabel("Loss")
plt.title("Single linear layer learning")
plt.show()
	# minimal example, using code from: https://sidsite.com/posts/autodiff/

	from collections import defaultdict
	import matplotlib.pyplot as plt
	import numpy as np

	class Variable:
	def __init__(self, value, local_gradients=[]):
	self.value = value
	self.local_gradients = local_gradients

	def __add__(self, other):
	return add(self, other)

	def __mul__(self, other):
	return mul(self, other)

	def __sub__(self, other):
	return add(self, neg(other))

	def get_gradients(variable):
	""" Compute the first derivatives of `variable`
	with respect to child variables.
	"""
	gradients = defaultdict(lambda: 0)

	def compute_gradients(variable, path_value):
	for child_variable, local_gradient in variable.local_gradients:
	# "Multiply the edges of a path":
	value_of_path_to_child = path_value * local_gradient
	# "Add together the different paths":
	gradients[child_variable] += value_of_path_to_child
	# recurse through graph:
	compute_gradients(child_variable, value_of_path_to_child)

	compute_gradients(variable, path_value=1)
	# (path_value=1 is from `variable` differentiated w.r.t. itself)
	return gradients

	def add(a, b):
	value = a.value + b.value
	local_gradients = (
	(a, 1),
	(b, 1)
	)
	return Variable(value, local_gradients)

	def mul(a, b):
	value = a.value * b.value
	local_gradients = (
	(a, b.value),
	(b, a.value)
	)
	return Variable(value, local_gradients)

	def neg(a):
	value = -1 * a.value
	local_gradients = (
	(a, -1),
	)
	return Variable(value, local_gradients)

	# convert NumPy array into array of Variable objects:
	to_var = np.vectorize(lambda x : Variable(x))

	# get values from array of Variable objects:
	to_vals = np.vectorize(lambda variable : variable.value)

	# Create linear layer
	np.random.seed(0)

	def update_weights(weights, gradients, lrate):
	for _, weight in np.ndenumerate(weights):
	weight.value -= lrate * gradients[weight]

	input_size = 50
	output_size = 10
	lrate = 0.001

	x = to_var(np.random.random(input_size))
	y_true = to_var(np.random.random(output_size))
	weights = to_var(np.random.random((input_size, output_size)))

	loss_vals = []
	for i in range(100):
	y_pred = np.dot(x, weights)
	loss = np.sum((y_true - y_pred) * (y_true - y_pred))
	loss_vals.append(loss.value)
	gradients = get_gradients(loss)
	update_weights(weights, gradients, lrate)

	plt.plot(loss_vals)
	plt.xlabel("Time step")
	plt.ylabel("Loss")
	plt.title("Single linear layer learning")
	plt.show()