arthurp/experiments.ipynb

## experiments.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import mxnet as mx\n",
    "from mxnet import autograd, gluon\n",
    "import mxnet.ndarray as nd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = nd.random.uniform(shape=(4, 4,))\n",
    "b = nd.random.uniform(shape=(4, 4,))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "net = gluon.nn.Dense(1, use_bias=False)\n",
    "net.initialize(ctx=mx.cpu())\n",
    "optimizer = mx.optimizer.SGD(learning_rate=0.05)\n",
    "trainer = gluon.Trainer(net.collect_params(), optimizer)\n",
    "w = net.params[\"dense0_weight\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now do a series of tests with this trivial network. Each test executes the exact same operations, but compute a gradient w.r.t. a different intermediate value."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.036206085\n",
      "\n",
      "[[2.0128188 2.3718886 2.7977395 2.132612 ]]\n",
      "<NDArray 1x4 @cpu(0)>\n"
     ]
    }
   ],
   "source": [
    "with autograd.record():\n",
    "    x = net(a)\n",
    "    y = net(b)\n",
    "    z = x - y\n",
    "    print(z.sum().asscalar()) # To force execution of z\n",
    "# Now compute the gradients w.r.t **x** (instead of z)\n",
    "x.backward()\n",
    "print(w.grad())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.036206085\n",
      "\n",
      "[[2.3944685 1.815006  1.9250624 2.441893 ]]\n",
      "<NDArray 1x4 @cpu(0)>\n"
     ]
    }
   ],
   "source": [
    "with autograd.record():\n",
    "    x = net(a)\n",
    "    y = net(b)\n",
    "    z = x - y\n",
    "    print(z.sum().asscalar()) # To force execution of z\n",
    "# Now compute the gradients w.r.t **y** (instead of z)\n",
    "y.backward()\n",
    "print(w.grad())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.036206085\n",
      "\n",
      "[[-0.38164973  0.5568826   0.8726771  -0.3092811 ]]\n",
      "<NDArray 1x4 @cpu(0)>\n"
     ]
    }
   ],
   "source": [
    "with autograd.record():\n",
    "    x = net(a)\n",
    "    y = net(b)\n",
    "    z = x - y\n",
    "    print(z.sum().asscalar()) # To force execution of z\n",
    "# Now compute the gradients w.r.t **z**\n",
    "z.backward()\n",
    "print(w.grad())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The `x` and `y` gradients are different and the `z` gradient is the subtraction of the two (as one would expect). This shows that recording does not overwrite any data in the `net` object and multiple calls are treated separately."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import mxnet as mx\n",
	"from mxnet import autograd, gluon\n",
	"import mxnet.ndarray as nd"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"a = nd.random.uniform(shape=(4, 4,))\n",
	"b = nd.random.uniform(shape=(4, 4,))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"net = gluon.nn.Dense(1, use_bias=False)\n",
	"net.initialize(ctx=mx.cpu())\n",
	"optimizer = mx.optimizer.SGD(learning_rate=0.05)\n",
	"trainer = gluon.Trainer(net.collect_params(), optimizer)\n",
	"w = net.params[\"dense0_weight\"]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Now do a series of tests with this trivial network. Each test executes the exact same operations, but compute a gradient w.r.t. a different intermediate value."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"0.036206085\n",
	"\n",
	"[[2.0128188 2.3718886 2.7977395 2.132612 ]]\n",
	"<NDArray 1x4 @cpu(0)>\n"
	]
	}
	],
	"source": [
	"with autograd.record():\n",
	" x = net(a)\n",
	" y = net(b)\n",
	" z = x - y\n",
	" print(z.sum().asscalar()) # To force execution of z\n",
	"# Now compute the gradients w.r.t x (instead of z)\n",
	"x.backward()\n",
	"print(w.grad())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"0.036206085\n",
	"\n",
	"[[2.3944685 1.815006 1.9250624 2.441893 ]]\n",
	"<NDArray 1x4 @cpu(0)>\n"
	]
	}
	],
	"source": [
	"with autograd.record():\n",
	" x = net(a)\n",
	" y = net(b)\n",
	" z = x - y\n",
	" print(z.sum().asscalar()) # To force execution of z\n",
	"# Now compute the gradients w.r.t y (instead of z)\n",
	"y.backward()\n",
	"print(w.grad())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"0.036206085\n",
	"\n",
	"[[-0.38164973 0.5568826 0.8726771 -0.3092811 ]]\n",
	"<NDArray 1x4 @cpu(0)>\n"
	]
	}
	],
	"source": [
	"with autograd.record():\n",
	" x = net(a)\n",
	" y = net(b)\n",
	" z = x - y\n",
	" print(z.sum().asscalar()) # To force execution of z\n",
	"# Now compute the gradients w.r.t z\n",
	"z.backward()\n",
	"print(w.grad())"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"The `x` and `y` gradients are different and the `z` gradient is the subtraction of the two (as one would expect). This shows that recording does not overwrite any data in the `net` object and multiple calls are treated separately."
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.7"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}