gnthibault/gpu_2017.ipynb

## gpu_2017.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Tensor creation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "torch.set_default_tensor_type('torch.DoubleTensor')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "1.00000e-77 *\n",
      " -0.0000 -1.7306  0.0000\n",
      "  0.0000  0.0000  0.0000\n",
      "  0.0000  0.0000  0.0000\n",
      "[torch.DoubleTensor of size 3x3]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "x = torch.Tensor(3,3)\n",
    "print(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      " 0.1083  1.1638 -0.0541\n",
      " 0.3239 -0.7268 -0.2202\n",
      "-0.7279 -0.4752  0.4285\n",
      "[torch.DoubleTensor of size 3x3]\n",
      "\n",
      "Shape:  torch.Size([3, 3])\n"
     ]
    }
   ],
   "source": [
    "x = torch.randn(3, 3)\n",
    "print(x)\n",
    "\n",
    "print(\"Shape: \", x.size())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 0.86889969  0.45310481  0.82972542]\n",
      " [ 0.27869536  0.59557206  0.83190511]\n",
      " [ 0.01545269  0.34996341  0.97978432]]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "y = np.random.random(size=(3,3))\n",
    "print(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      " 0.8689  0.4531  0.8297\n",
      " 0.2787  0.5956  0.8319\n",
      " 0.0155  0.3500  0.9798\n",
      "[torch.DoubleTensor of size 3x3]\n",
      "\n",
      "[[ 0.86889969  0.45310481  0.82972542]\n",
      " [ 0.27869536  0.59557206  0.83190511]\n",
      " [ 0.01545269  0.34996341  0.97978432]]\n"
     ]
    }
   ],
   "source": [
    "y = torch.from_numpy(y)\n",
    "print(y)\n",
    "print(y.numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "z = x + y \n",
      " 0.9772  1.6169  0.7756\n",
      " 0.6026 -0.1312  0.6117\n",
      "-0.7124 -0.1253  1.4083\n",
      "[torch.DoubleTensor of size 3x3]\n",
      "\n",
      "torch.add(x, y) \n",
      " 0.9772  1.6169  0.7756\n",
      " 0.6026 -0.1312  0.6117\n",
      "-0.7124 -0.1253  1.4083\n",
      "[torch.DoubleTensor of size 3x3]\n",
      "\n",
      "x = x + y \n",
      " 0.9772  1.6169  0.7756\n",
      " 0.6026 -0.1312  0.6117\n",
      "-0.7124 -0.1253  1.4083\n",
      "[torch.DoubleTensor of size 3x3]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Opearions\n",
    "z = x + y\n",
    "print(\"z = x + y\", z)\n",
    "\n",
    "print(\"torch.add(x, y)\", torch.add(x, y))\n",
    "\n",
    "x.add_(y)\n",
    "print(\"x = x + y\", x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      " 0.9772\n",
      " 0.6026\n",
      "-0.7124\n",
      "[torch.DoubleTensor of size 3]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Numpy like indexing\n",
    "print(z[:,0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Using GPU\n",
    "\n",
    "# z = z.cuda()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Autograd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from torch.autograd import Variable"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Variable containing:\n",
      " 1  1\n",
      " 1  1\n",
      "[torch.DoubleTensor of size 2x2]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# requires_grad is True to say that we want to compute gradients\n",
    "# This is an ariifact of the focus on NN. While training NN, we do not\n",
    "# compute gradients for the input wrt to the loss (usually)\n",
    "x = Variable(torch.ones(2, 2), requires_grad=True)\n",
    "print(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Variable containing:\n",
      " 3  3\n",
      " 3  3\n",
      "[torch.DoubleTensor of size 2x2]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "y = x + 2\n",
    "print(y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### `y` was created as a result of an operation, so it has a `grad_fn` attribute. This contain the `Function` which created `y`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<torch.autograd.function.AddConstantBackward at 0x10df92d68>"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y.grad_fn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Variable containing:\n",
      " 27  27\n",
      " 27  27\n",
      "[torch.DoubleTensor of size 2x2]\n",
      " Variable containing:\n",
      " 27\n",
      "[torch.DoubleTensor of size 1]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "z = y * y * 3\n",
    "out = z.mean()\n",
    "\n",
    "print(z, out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "out.backward()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Variable containing:\n",
      " 4.5000  4.5000\n",
      " 4.5000  4.5000\n",
      "[torch.DoubleTensor of size 2x2]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(x.grad)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = torch.randn(3)\n",
    "x = Variable(x, requires_grad=True)\n",
    "\n",
    "y = x * 2\n",
    "while y.data.norm() < 1000:\n",
    "    y = y * 2\n",
    "\n",
    "print(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gradients = torch.FloatTensor([0.1, 1.0, 0.0001])\n",
    "y.backward(gradients)\n",
    "\n",
    "print(x.grad)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Neural net definition"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import torch.nn as nn\n",
    "import torch.nn.functional as F"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Net (\n",
      "  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))\n",
      "  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))\n",
      "  (fc1): Linear (400 -> 120)\n",
      "  (fc2): Linear (120 -> 84)\n",
      "  (fc3): Linear (84 -> 10)\n",
      ")\n"
     ]
    }
   ],
   "source": [
    "class Net(nn.Module):\n",
    "\n",
    "    def __init__(self):\n",
    "        super(Net, self).__init__()\n",
    "        # 1 input image channel, 6 output channels, 5x5 square convolution\n",
    "        # kernel\n",
    "        self.conv1 = nn.Conv2d(1, 6, 5)\n",
    "        self.conv2 = nn.Conv2d(6, 16, 5)\n",
    "        # an affine operation: y = Wx + b\n",
    "        self.fc1 = nn.Linear(16 * 5 * 5, 120)\n",
    "        self.fc2 = nn.Linear(120, 84)\n",
    "        self.fc3 = nn.Linear(84, 10)\n",
    "\n",
    "    def forward(self, x):\n",
    "        # Max pooling over a (2, 2) window\n",
    "        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))\n",
    "        # If the size is a square you can only specify a single number\n",
    "        x = F.max_pool2d(F.relu(self.conv2(x)), 2)\n",
    "        x = x.view(-1, self.num_flat_features(x))\n",
    "        x = F.relu(self.fc1(x))\n",
    "        x = F.relu(self.fc2(x))\n",
    "        x = self.fc3(x)\n",
    "        return x\n",
    "    \n",
    "    def num_flat_features(self, x):\n",
    "        size = x.size()[1:]  # all dimensions except the batch dimension\n",
    "        num_features = 1\n",
    "        for s in size:\n",
    "            num_features *= s\n",
    "        return num_features\n",
    "\n",
    "net = Net()\n",
    "print(net)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Variable containing:\n",
      "-0.0718 -0.0359 -0.0265  0.0337 -0.0266  0.1690 -0.0900 -0.0714  0.1053 -0.1049\n",
      "[torch.DoubleTensor of size 1x10]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "input = Variable(torch.randn(1, 1, 32, 32))\n",
    "out = net(input)\n",
    "print(out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Variable containing:\n",
      " 38.6093\n",
      "[torch.DoubleTensor of size 1]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "output = net(input)\n",
    "target = Variable(torch.arange(1, 11))  # a dummy target, for example\n",
    "criterion = nn.MSELoss()\n",
    "\n",
    "loss = criterion(output, target)\n",
    "print(loss)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<torch.autograd.function.MSELossBackward object at 0x11134aa98>\n",
      "<torch.autograd.function.AddmmBackward object at 0x11134a9a8>\n",
      "<AccumulateGrad object at 0x111347240>\n"
     ]
    }
   ],
   "source": [
    "print(loss.grad_fn)  # MSELoss\n",
    "print(loss.grad_fn.next_functions[0][0])  # Linear\n",
    "print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "conv1.bias.grad before backward\n",
      "None\n",
      "conv1.bias.grad after backward\n",
      "Variable containing:\n",
      " 0.0837\n",
      "-0.0079\n",
      "-0.1607\n",
      " 0.0878\n",
      " 0.0214\n",
      "-0.0931\n",
      "[torch.DoubleTensor of size 6]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "net.zero_grad()     # zeroes the gradient buffers of all parameters\n",
    "\n",
    "print('conv1.bias.grad before backward')\n",
    "print(net.conv1.bias.grad)\n",
    "\n",
    "loss.backward()\n",
    "\n",
    "print('conv1.bias.grad after backward')\n",
    "print(net.conv1.bias.grad)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "learning_rate = 0.01\n",
    "for f in net.parameters():\n",
    "    f.data.sub_(f.grad.data * learning_rate)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import torch.optim as optim\n",
    "\n",
    "# create your optimizer\n",
    "optimizer = optim.SGD(net.parameters(), lr=0.01)\n",
    "\n",
    "# in your training loop:\n",
    "optimizer.zero_grad()   # zero the gradient buffers\n",
    "output = net(input)\n",
    "loss = criterion(output, target)\n",
    "loss.backward()\n",
    "optimizer.step()    # Does the update"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Tensor creation"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import torch\n",
	"torch.set_default_tensor_type('torch.DoubleTensor')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"\n",
	"1.00000e-77 *\n",
	" -0.0000 -1.7306 0.0000\n",
	" 0.0000 0.0000 0.0000\n",
	" 0.0000 0.0000 0.0000\n",
	"[torch.DoubleTensor of size 3x3]\n",
	"\n"
	]
	}
	],
	"source": [
	"x = torch.Tensor(3,3)\n",
	"print(x)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"\n",
	" 0.1083 1.1638 -0.0541\n",
	" 0.3239 -0.7268 -0.2202\n",
	"-0.7279 -0.4752 0.4285\n",
	"[torch.DoubleTensor of size 3x3]\n",
	"\n",
	"Shape: torch.Size([3, 3])\n"
	]
	}
	],
	"source": [
	"x = torch.randn(3, 3)\n",
	"print(x)\n",
	"\n",
	"print(\"Shape: \", x.size())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[[ 0.86889969 0.45310481 0.82972542]\n",
	" [ 0.27869536 0.59557206 0.83190511]\n",
	" [ 0.01545269 0.34996341 0.97978432]]\n"
	]
	}
	],
	"source": [
	"import numpy as np\n",
	"y = np.random.random(size=(3,3))\n",
	"print(y)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"\n",
	" 0.8689 0.4531 0.8297\n",
	" 0.2787 0.5956 0.8319\n",
	" 0.0155 0.3500 0.9798\n",
	"[torch.DoubleTensor of size 3x3]\n",
	"\n",
	"[[ 0.86889969 0.45310481 0.82972542]\n",
	" [ 0.27869536 0.59557206 0.83190511]\n",
	" [ 0.01545269 0.34996341 0.97978432]]\n"
	]
	}
	],
	"source": [
	"y = torch.from_numpy(y)\n",
	"print(y)\n",
	"print(y.numpy())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"z = x + y \n",
	" 0.9772 1.6169 0.7756\n",
	" 0.6026 -0.1312 0.6117\n",
	"-0.7124 -0.1253 1.4083\n",
	"[torch.DoubleTensor of size 3x3]\n",
	"\n",
	"torch.add(x, y) \n",
	" 0.9772 1.6169 0.7756\n",
	" 0.6026 -0.1312 0.6117\n",
	"-0.7124 -0.1253 1.4083\n",
	"[torch.DoubleTensor of size 3x3]\n",
	"\n",
	"x = x + y \n",
	" 0.9772 1.6169 0.7756\n",
	" 0.6026 -0.1312 0.6117\n",
	"-0.7124 -0.1253 1.4083\n",
	"[torch.DoubleTensor of size 3x3]\n",
	"\n"
	]
	}
	],
	"source": [
	"# Opearions\n",
	"z = x + y\n",
	"print(\"z = x + y\", z)\n",
	"\n",
	"print(\"torch.add(x, y)\", torch.add(x, y))\n",
	"\n",
	"x.add_(y)\n",
	"print(\"x = x + y\", x)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"\n",
	" 0.9772\n",
	" 0.6026\n",
	"-0.7124\n",
	"[torch.DoubleTensor of size 3]\n",
	"\n"
	]
	}
	],
	"source": [
	"# Numpy like indexing\n",
	"print(z[:,0])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# Using GPU\n",
	"\n",
	"# z = z.cuda()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Autograd"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"from torch.autograd import Variable"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Variable containing:\n",
	" 1 1\n",
	" 1 1\n",
	"[torch.DoubleTensor of size 2x2]\n",
	"\n"
	]
	}
	],
	"source": [
	"# requires_grad is True to say that we want to compute gradients\n",
	"# This is an ariifact of the focus on NN. While training NN, we do not\n",
	"# compute gradients for the input wrt to the loss (usually)\n",
	"x = Variable(torch.ones(2, 2), requires_grad=True)\n",
	"print(x)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Variable containing:\n",
	" 3 3\n",
	" 3 3\n",
	"[torch.DoubleTensor of size 2x2]\n",
	"\n"
	]
	}
	],
	"source": [
	"y = x + 2\n",
	"print(y)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"#### `y` was created as a result of an operation, so it has a `grad_fn` attribute. This contain the `Function` which created `y`."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"<torch.autograd.function.AddConstantBackward at 0x10df92d68>"
	]
	},
	"execution_count": 14,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"y.grad_fn"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Variable containing:\n",
	" 27 27\n",
	" 27 27\n",
	"[torch.DoubleTensor of size 2x2]\n",
	" Variable containing:\n",
	" 27\n",
	"[torch.DoubleTensor of size 1]\n",
	"\n"
	]
	}
	],
	"source": [
	"z = y * y * 3\n",
	"out = z.mean()\n",
	"\n",
	"print(z, out)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"out.backward()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Variable containing:\n",
	" 4.5000 4.5000\n",
	" 4.5000 4.5000\n",
	"[torch.DoubleTensor of size 2x2]\n",
	"\n"
	]
	}
	],
	"source": [
	"print(x.grad)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"x = torch.randn(3)\n",
	"x = Variable(x, requires_grad=True)\n",
	"\n",
	"y = x * 2\n",
	"while y.data.norm() < 1000:\n",
	" y = y * 2\n",
	"\n",
	"print(y)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"gradients = torch.FloatTensor([0.1, 1.0, 0.0001])\n",
	"y.backward(gradients)\n",
	"\n",
	"print(x.grad)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Neural net definition"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import torch.nn as nn\n",
	"import torch.nn.functional as F"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Net (\n",
	" (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))\n",
	" (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))\n",
	" (fc1): Linear (400 -> 120)\n",
	" (fc2): Linear (120 -> 84)\n",
	" (fc3): Linear (84 -> 10)\n",
	")\n"
	]
	}
	],
	"source": [
	"class Net(nn.Module):\n",
	"\n",
	" def __init__(self):\n",
	" super(Net, self).__init__()\n",
	" # 1 input image channel, 6 output channels, 5x5 square convolution\n",
	" # kernel\n",
	" self.conv1 = nn.Conv2d(1, 6, 5)\n",
	" self.conv2 = nn.Conv2d(6, 16, 5)\n",
	" # an affine operation: y = Wx + b\n",
	" self.fc1 = nn.Linear(16 * 5 * 5, 120)\n",
	" self.fc2 = nn.Linear(120, 84)\n",
	" self.fc3 = nn.Linear(84, 10)\n",
	"\n",
	" def forward(self, x):\n",
	" # Max pooling over a (2, 2) window\n",
	" x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))\n",
	" # If the size is a square you can only specify a single number\n",
	" x = F.max_pool2d(F.relu(self.conv2(x)), 2)\n",
	" x = x.view(-1, self.num_flat_features(x))\n",
	" x = F.relu(self.fc1(x))\n",
	" x = F.relu(self.fc2(x))\n",
	" x = self.fc3(x)\n",
	" return x\n",
	" \n",
	" def num_flat_features(self, x):\n",
	" size = x.size()[1:] # all dimensions except the batch dimension\n",
	" num_features = 1\n",
	" for s in size:\n",
	" num_features *= s\n",
	" return num_features\n",
	"\n",
	"net = Net()\n",
	"print(net)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Variable containing:\n",
	"-0.0718 -0.0359 -0.0265 0.0337 -0.0266 0.1690 -0.0900 -0.0714 0.1053 -0.1049\n",
	"[torch.DoubleTensor of size 1x10]\n",
	"\n"
	]
	}
	],
	"source": [
	"input = Variable(torch.randn(1, 1, 32, 32))\n",
	"out = net(input)\n",
	"print(out)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 21,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Variable containing:\n",
	" 38.6093\n",
	"[torch.DoubleTensor of size 1]\n",
	"\n"
	]
	}
	],
	"source": [
	"output = net(input)\n",
	"target = Variable(torch.arange(1, 11)) # a dummy target, for example\n",
	"criterion = nn.MSELoss()\n",
	"\n",
	"loss = criterion(output, target)\n",
	"print(loss)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 22,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"<torch.autograd.function.MSELossBackward object at 0x11134aa98>\n",
	"<torch.autograd.function.AddmmBackward object at 0x11134a9a8>\n",
	"<AccumulateGrad object at 0x111347240>\n"
	]
	}
	],
	"source": [
	"print(loss.grad_fn) # MSELoss\n",
	"print(loss.grad_fn.next_functions[0][0]) # Linear\n",
	"print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) # ReLU"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 23,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"conv1.bias.grad before backward\n",
	"None\n",
	"conv1.bias.grad after backward\n",
	"Variable containing:\n",
	" 0.0837\n",
	"-0.0079\n",
	"-0.1607\n",
	" 0.0878\n",
	" 0.0214\n",
	"-0.0931\n",
	"[torch.DoubleTensor of size 6]\n",
	"\n"
	]
	}
	],
	"source": [
	"net.zero_grad() # zeroes the gradient buffers of all parameters\n",
	"\n",
	"print('conv1.bias.grad before backward')\n",
	"print(net.conv1.bias.grad)\n",
	"\n",
	"loss.backward()\n",
	"\n",
	"print('conv1.bias.grad after backward')\n",
	"print(net.conv1.bias.grad)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 24,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"learning_rate = 0.01\n",
	"for f in net.parameters():\n",
	" f.data.sub_(f.grad.data * learning_rate)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 25,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import torch.optim as optim\n",
	"\n",
	"# create your optimizer\n",
	"optimizer = optim.SGD(net.parameters(), lr=0.01)\n",
	"\n",
	"# in your training loop:\n",
	"optimizer.zero_grad() # zero the gradient buffers\n",
	"output = net(input)\n",
	"loss = criterion(output, target)\n",
	"loss.backward()\n",
	"optimizer.step() # Does the update"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}