Automatic differentiation in 3 languages
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
public class Value | |
{ | |
public double Data { get; set; } | |
public double Gradient { get; set; } | |
private Action _backward; | |
private readonly List<Value> _prev; | |
public Value(double data, IEnumerable<Value> children = null) | |
{ | |
Data = data; | |
Gradient = 0; | |
_prev = children != null ? new List<Value>(children) : new List<Value>(); | |
_backward = () => {}; | |
} | |
public static bool operator ==(Value a, Value b) => a.Data.Equals(b.Data); | |
public static bool operator ==(double a, Value b) => a.Equals(b.Data); | |
public static bool operator ==(Value a, double b) => a.Data.Equals(b); | |
public static bool operator !=(Value a, Value b) => !a.Data.Equals(b.Data); | |
public static bool operator !=(double a, Value b) => !a.Equals(b.Data); | |
public static bool operator !=(Value a, double b) => !a.Data.Equals(b); | |
public static Value operator +(Value a, Value b) | |
{ | |
var ret = new Value(a.Data + b.Data, new[]{a, b}); | |
void B() | |
{ | |
a.Gradient += ret.Gradient; | |
b.Gradient += ret.Gradient; | |
} | |
ret._backward = B; | |
return ret; | |
} | |
public static Value operator +(double a, Value b) => new Value(a) + b; | |
public static Value operator +(Value a, double b) => a + new Value(b); | |
public static Value operator -(Value a) => a * -1; // Unary neg | |
public static Value operator -(Value a, Value b) => a + -b; | |
public static Value operator -(double a, Value b) => new Value(a) - b; | |
public static Value operator -(Value a, double b) => a - new Value(b); | |
public static Value operator *(Value a, Value b) | |
{ | |
var ret = new Value(a.Data * b.Data, new[]{a, b}); | |
void B() | |
{ | |
a.Gradient += b.Data * ret.Gradient; | |
b.Gradient += a.Data * ret.Gradient; | |
}; | |
ret._backward = B; | |
return ret; | |
} | |
public static Value operator *(double a, Value b) => new Value(a) * b; | |
public static Value operator *(Value a, double b) => a * new Value(b); | |
public static Value operator /(Value a, Value b) => a * Pow(b, -1); | |
public static Value operator /(double a, Value b) => new Value(a) / b; | |
public static Value operator /(Value a, double b) => a / new Value(b); | |
public static Value Pow(Value a, Value b) | |
{ | |
var ret = new Value(Math.Pow(a.Data, b.Data), new[]{a}); | |
void B() | |
{ | |
a.Gradient += b.Data * Math.Pow(a.Data, b.Data - 1) * ret.Gradient; | |
}; | |
ret._backward = B; | |
return ret; | |
} | |
public static Value Pow(Value a, double b) => Pow(a, new Value(b)); | |
public static Value Pow(double a, Value b) => Pow(new Value(a), b); | |
public Value Relu() | |
{ | |
var ret = new Value(Data < 0 ? 0 : Data, new[]{this}); | |
var tmpThis = this; | |
void B() | |
{ | |
tmpThis.Gradient += (ret.Data > 0 ? 1 : 0) * ret.Gradient; | |
}; | |
ret._backward = B; | |
return ret; | |
} | |
public void Backward() | |
{ | |
// topological order all of the children in the graph | |
var topology = new List<Value>(); | |
var visited = new HashSet<Value>(); | |
Gradient = 1; | |
void BuildTopology(Value v) | |
{ | |
if (visited.Contains(v)) return; | |
visited.Add(v); | |
foreach (var child in v._prev) BuildTopology(child); | |
topology.Add(v); | |
} | |
BuildTopology(this); | |
// go one variable at a time and apply the chain rule to get its gradient | |
for (var i = topology.Count - 1; i > 0; i--) topology[i]._backward(); | |
} | |
public override string ToString() => $"Value(Data={Data}, Gradient={Gradient})"; | |
} |
package org.example | |
import kotlin.math.pow | |
class Value (var data: Float, var children: ArrayList<Value> = arrayListOf(), private var op: String = "") { | |
var grad: Float = 0f | |
private var backward: (()->Unit)? = null | |
private var prev: ArrayList<Value> = children | |
operator fun plus(other: Value): Value { | |
val out = Value(this.data + other.data, arrayListOf(this, other), "+") | |
val backward = { | |
this.grad += out.grad | |
other.grad += out.grad | |
} | |
out.backward = backward | |
return out | |
} | |
operator fun plus(other: Float): Value { | |
return this + Value(other, arrayListOf(), "+") | |
} | |
operator fun times(other: Value): Value { | |
val out = Value(this.data * other.data, arrayListOf(this, other), "*") | |
val backward = { | |
this.grad += other.data * out.grad | |
other.grad += this.data * out.grad | |
} | |
out.backward = backward | |
return out | |
} | |
operator fun times(other: Float): Value { | |
return this * Value(other, arrayListOf(), "*") | |
} | |
fun relu() : Value { | |
val out = if (this.data < 0) { | |
Value(0.0f, arrayListOf(this), "ReLU") | |
} else { | |
Value(this.data, arrayListOf(this), "ReLU") | |
} | |
val backward = { | |
val mul = if (out.data > 0.0f) 1.0f else 0.0f | |
this.grad += out.grad * mul | |
} | |
out.backward = backward | |
return out | |
} | |
fun backward() { | |
// topological order all of the children in the graph | |
val topo: ArrayList<Value> = ArrayList() | |
val visited: HashSet<Value> = HashSet() | |
fun buildTopology (v: Value) { | |
if (v !in visited) { | |
visited.add(v) | |
for (child in v.prev) { | |
buildTopology(child) | |
} | |
topo.add(v) | |
} | |
} | |
buildTopology(this) | |
// go one variable at a time and apply the chain rule to get its gradient | |
this.grad = 1.0f | |
for (v in topo.reversed()){ | |
v.backward?.invoke() | |
} | |
} | |
fun pow(value: Int) : Value { | |
val out = Value(this.data.pow(value), arrayListOf(this), "**$value") | |
val backward = { | |
this.grad += (value * this.data.pow(value-1))*out.grad | |
} | |
out.backward = backward | |
return out | |
} | |
operator fun minus(other: Value): Value { | |
return this + (-other) | |
} | |
operator fun unaryMinus(): Value { | |
return this * -1f | |
} | |
operator fun div(other: Value): Value { | |
return this.pow(-1) * other | |
} | |
operator fun div(other: Float): Value { | |
return this * other.pow(-1) | |
} | |
override fun toString(): String { | |
return "Value(data=$data, grad=$grad, backward=$backward, prev=$prev, op='$op')" | |
} | |
} |
class Value: | |
""" stores a single scalar value and its gradient """ | |
def __init__(self, data, _children=(), _op=''): | |
self.data = data | |
self.grad = 0 | |
# internal variables used for autograd graph construction | |
self._backward = lambda: None | |
self._prev = set(_children) | |
self._op = _op # the op that produced this node, for graphviz / debugging / etc | |
def __add__(self, other): | |
other = other if isinstance(other, Value) else Value(other) | |
out = Value(self.data + other.data, (self, other), '+') | |
def _backward(): | |
self.grad += out.grad | |
other.grad += out.grad | |
out._backward = _backward | |
return out | |
def __mul__(self, other): | |
other = other if isinstance(other, Value) else Value(other) | |
out = Value(self.data * other.data, (self, other), '*') | |
def _backward(): | |
self.grad += other.data * out.grad | |
other.grad += self.data * out.grad | |
out._backward = _backward | |
return out | |
def __pow__(self, other): | |
assert isinstance(other, (int, float)), "only supporting int/float powers for now" | |
out = Value(self.data**other, (self,), f'**{other}') | |
def _backward(): | |
self.grad += (other * self.data**(other-1)) * out.grad | |
out._backward = _backward | |
return out | |
def relu(self): | |
out = Value(0 if self.data < 0 else self.data, (self,), 'ReLU') | |
def _backward(): | |
self.grad += (out.data > 0) * out.grad | |
out._backward = _backward | |
return out | |
def backward(self): | |
# topological order all of the children in the graph | |
topo = [] | |
visited = set() | |
def build_topo(v): | |
if v not in visited: | |
visited.add(v) | |
for child in v._prev: | |
build_topo(child) | |
topo.append(v) | |
build_topo(self) | |
# go one variable at a time and apply the chain rule to get its gradient | |
self.grad = 1 | |
for v in reversed(topo): | |
v._backward() | |
def __neg__(self): # -self | |
return self * -1 | |
def __radd__(self, other): # other + self | |
return self + other | |
def __sub__(self, other): # self - other | |
return self + (-other) | |
def __rsub__(self, other): # other - self | |
return other + (-self) | |
def __rmul__(self, other): # other * self | |
return self * other | |
def __truediv__(self, other): # self / other | |
return self * other**-1 | |
def __rtruediv__(self, other): # other / self | |
return other * self**-1 | |
def __repr__(self): | |
return f"Value(data={self.data}, grad={self.grad})" |
using System; | |
using System.Collections.Generic; | |
using NUnit.Framework; | |
namespace TestsEdit | |
{ | |
public class ValueTests | |
{ | |
[Test] | |
public void BasicSimplePasses() | |
{ | |
var x = new Value(-4.0); | |
Assert.AreEqual(0.0, x.Gradient); | |
var z = 2.0 * x + 2.0 + x; | |
var q = z.Relu() + z * x; | |
var h = (z * z).Relu(); | |
var y = h + q + q * x; | |
y.Backward(); | |
Assert.IsTrue(!0.0.Equals(x.Gradient)); // Failure | |
Assert.AreEqual(46.0, x.Gradient); | |
Assert.AreEqual(-20.0, y.Data); | |
} | |
} | |
} |
package org.example | |
import org.junit.Test | |
import kotlin.test.assertEquals | |
class ValueTests { | |
@Test | |
fun `Basic Simple Passes`(){ | |
var x = Value(-4.0) | |
var z = 2.0 * x + 2.0 + x | |
var q = z.relu() + z * x | |
var h = (z * z).relu() | |
var y = h + q + q * x | |
y.backward() | |
assertTrue(0.0 != x.gradient) // Success | |
assertEquals(46.0, x.gradient) | |
assertEquals(-20.0, y.data) | |
} | |
} | |
operator fun Float.div(other: Value): Float { | |
return this / other.data | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment