crcrpar/how-gradients-are-accumulated-in-real.ipynb

## how-gradients-are-accumulated-in-real.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "how-gradients-are-accumulated-in-real.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "authorship_tag": "ABX9TyOrSG9Ui5zRqIIYlG+0V3qw",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/crcrpar/a35cd05cb01a8304b6a87da05956c69e/how-gradients-are-accumulated-in-real.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "23yq9ekpFo1p"
      },
      "source": [
        "import torch\n",
        "import torch.nn as nn"
      ],
      "execution_count": 1,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zwem8NiTFsKx"
      },
      "source": [
        "m = nn.Linear(16, 1)"
      ],
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "aP1RncLkFvss"
      },
      "source": [
        "def stub(m: nn.Module):\n",
        "\n",
        "    for i in range(5):\n",
        "\n",
        "        if (i + 1) % 3 == 0:\n",
        "            m.zero_grad() \n",
        "            print(\"Call `m.zero_grad()`\")\n",
        "        x = torch.rand(32, 16, dtype=torch.float32)\n",
        "        y = torch.rand(1, 1, dtype=torch.float32)\n",
        "\n",
        "        loss = torch.norm(y - m(x))\n",
        "        loss.backward()\n",
        "\n",
        "        print(m.weight.grad)"
      ],
      "execution_count": 7,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Af_RCQSGGAnY",
        "outputId": "30316819-97b8-4c4f-eacf-3322d4e94652"
      },
      "source": [
        "stub(m)"
      ],
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "tensor([[-10.8085, -11.0005, -12.1526, -11.1765, -12.1380, -11.3991, -11.7168,\n",
            "         -11.7771, -11.1394, -12.1256, -10.6325,  -9.8322, -11.5118, -11.5470,\n",
            "         -10.1624, -10.2474]])\n",
            "tensor([[-13.4173, -13.6884, -14.9282, -13.7759, -15.3630, -13.7564, -14.5933,\n",
            "         -14.6238, -14.3153, -14.7167, -13.6100, -12.3343, -14.1527, -14.4752,\n",
            "         -12.9068, -13.4075]])\n",
            "Call `m.zero_grad()`\n",
            "tensor([[-2.1309, -2.7636, -2.6116, -2.5728, -3.0249, -2.1882, -2.7029, -3.1503,\n",
            "         -2.5538, -2.7864, -1.7710, -2.1291, -2.5147, -2.0399, -2.1297, -2.6713]])\n",
            "tensor([[-5.4099, -5.4315, -5.2920, -5.8538, -5.9617, -5.5935, -5.6534, -5.7883,\n",
            "         -5.7014, -5.6990, -4.2258, -5.0843, -5.7490, -5.0534, -4.8436, -5.1192]])\n",
            "tensor([[-8.4726, -8.7562, -8.1335, -8.3456, -9.2932, -8.3069, -8.7093, -8.5616,\n",
            "         -8.6247, -7.9616, -7.2370, -8.0909, -8.9482, -7.5550, -7.6870, -7.2921]])\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "exBYWKXaGBNA"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"name": "how-gradients-are-accumulated-in-real.ipynb",
	"provenance": [],
	"collapsed_sections": [],
	"authorship_tag": "ABX9TyOrSG9Ui5zRqIIYlG+0V3qw",
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	},
	"language_info": {
	"name": "python"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/crcrpar/a35cd05cb01a8304b6a87da05956c69e/how-gradients-are-accumulated-in-real.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "23yq9ekpFo1p"
	},
	"source": [
	"import torch\n",
	"import torch.nn as nn"
	],
	"execution_count": 1,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "zwem8NiTFsKx"
	},
	"source": [
	"m = nn.Linear(16, 1)"
	],
	"execution_count": 2,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "aP1RncLkFvss"
	},
	"source": [
	"def stub(m: nn.Module):\n",
	"\n",
	" for i in range(5):\n",
	"\n",
	" if (i + 1) % 3 == 0:\n",
	" m.zero_grad() \n",
	" print(\"Call `m.zero_grad()`\")\n",
	" x = torch.rand(32, 16, dtype=torch.float32)\n",
	" y = torch.rand(1, 1, dtype=torch.float32)\n",
	"\n",
	" loss = torch.norm(y - m(x))\n",
	" loss.backward()\n",
	"\n",
	" print(m.weight.grad)"
	],
	"execution_count": 7,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "Af_RCQSGGAnY",
	"outputId": "30316819-97b8-4c4f-eacf-3322d4e94652"
	},
	"source": [
	"stub(m)"
	],
	"execution_count": 8,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"tensor([[-10.8085, -11.0005, -12.1526, -11.1765, -12.1380, -11.3991, -11.7168,\n",
	" -11.7771, -11.1394, -12.1256, -10.6325, -9.8322, -11.5118, -11.5470,\n",
	" -10.1624, -10.2474]])\n",
	"tensor([[-13.4173, -13.6884, -14.9282, -13.7759, -15.3630, -13.7564, -14.5933,\n",
	" -14.6238, -14.3153, -14.7167, -13.6100, -12.3343, -14.1527, -14.4752,\n",
	" -12.9068, -13.4075]])\n",
	"Call `m.zero_grad()`\n",
	"tensor([[-2.1309, -2.7636, -2.6116, -2.5728, -3.0249, -2.1882, -2.7029, -3.1503,\n",
	" -2.5538, -2.7864, -1.7710, -2.1291, -2.5147, -2.0399, -2.1297, -2.6713]])\n",
	"tensor([[-5.4099, -5.4315, -5.2920, -5.8538, -5.9617, -5.5935, -5.6534, -5.7883,\n",
	" -5.7014, -5.6990, -4.2258, -5.0843, -5.7490, -5.0534, -4.8436, -5.1192]])\n",
	"tensor([[-8.4726, -8.7562, -8.1335, -8.3456, -9.2932, -8.3069, -8.7093, -8.5616,\n",
	" -8.6247, -7.9616, -7.2370, -8.0909, -8.9482, -7.5550, -7.6870, -7.2921]])\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "exBYWKXaGBNA"
	},
	"source": [
	""
	],
	"execution_count": null,
	"outputs": []
	}
	]
	}