Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save crcrpar/a35cd05cb01a8304b6a87da05956c69e to your computer and use it in GitHub Desktop.
Save crcrpar/a35cd05cb01a8304b6a87da05956c69e to your computer and use it in GitHub Desktop.
how-gradients-are-accumulated-in-real.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "how-gradients-are-accumulated-in-real.ipynb",
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyOrSG9Ui5zRqIIYlG+0V3qw",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/crcrpar/a35cd05cb01a8304b6a87da05956c69e/how-gradients-are-accumulated-in-real.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"id": "23yq9ekpFo1p"
},
"source": [
"import torch\n",
"import torch.nn as nn"
],
"execution_count": 1,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "zwem8NiTFsKx"
},
"source": [
"m = nn.Linear(16, 1)"
],
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "aP1RncLkFvss"
},
"source": [
"def stub(m: nn.Module):\n",
"\n",
" for i in range(5):\n",
"\n",
" if (i + 1) % 3 == 0:\n",
" m.zero_grad() \n",
" print(\"Call `m.zero_grad()`\")\n",
" x = torch.rand(32, 16, dtype=torch.float32)\n",
" y = torch.rand(1, 1, dtype=torch.float32)\n",
"\n",
" loss = torch.norm(y - m(x))\n",
" loss.backward()\n",
"\n",
" print(m.weight.grad)"
],
"execution_count": 7,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Af_RCQSGGAnY",
"outputId": "30316819-97b8-4c4f-eacf-3322d4e94652"
},
"source": [
"stub(m)"
],
"execution_count": 8,
"outputs": [
{
"output_type": "stream",
"text": [
"tensor([[-10.8085, -11.0005, -12.1526, -11.1765, -12.1380, -11.3991, -11.7168,\n",
" -11.7771, -11.1394, -12.1256, -10.6325, -9.8322, -11.5118, -11.5470,\n",
" -10.1624, -10.2474]])\n",
"tensor([[-13.4173, -13.6884, -14.9282, -13.7759, -15.3630, -13.7564, -14.5933,\n",
" -14.6238, -14.3153, -14.7167, -13.6100, -12.3343, -14.1527, -14.4752,\n",
" -12.9068, -13.4075]])\n",
"Call `m.zero_grad()`\n",
"tensor([[-2.1309, -2.7636, -2.6116, -2.5728, -3.0249, -2.1882, -2.7029, -3.1503,\n",
" -2.5538, -2.7864, -1.7710, -2.1291, -2.5147, -2.0399, -2.1297, -2.6713]])\n",
"tensor([[-5.4099, -5.4315, -5.2920, -5.8538, -5.9617, -5.5935, -5.6534, -5.7883,\n",
" -5.7014, -5.6990, -4.2258, -5.0843, -5.7490, -5.0534, -4.8436, -5.1192]])\n",
"tensor([[-8.4726, -8.7562, -8.1335, -8.3456, -9.2932, -8.3069, -8.7093, -8.5616,\n",
" -8.6247, -7.9616, -7.2370, -8.0909, -8.9482, -7.5550, -7.6870, -7.2921]])\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "exBYWKXaGBNA"
},
"source": [
""
],
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment