Skip to content

Instantly share code, notes, and snippets.

@ejmejm
Created May 9, 2021 09:14
Show Gist options
  • Star 28 You must be signed in to star a gist
  • Fork 10 You must be signed in to fork a gist
  • Save ejmejm/1baeddbbe48f58dbced9c019c25ebf71 to your computer and use it in GitHub Desktop.
Save ejmejm/1baeddbbe48f58dbced9c019c25ebf71 to your computer and use it in GitHub Desktop.
pytorch_tips_yt_follow.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "pytorch_tips_yt_follow.ipynb",
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyP1YWlW87wmbehl+0r0EmNb",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/ejmejm/1baeddbbe48f58dbced9c019c25ebf71/pytorch_tips_yt_follow.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "EYpb4rz9GDFx"
},
"source": [
"# 7 PyTorch Tips You Should Know"
]
},
{
"cell_type": "code",
"metadata": {
"id": "xdIxsiZf-mAS"
},
"source": [
"import time\n",
"\n",
"import torch\n",
"from torch import nn"
],
"execution_count": 1,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "WEXFLOIkAlrF"
},
"source": [
"# 1. Create Tensors Directly on the Target Device"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "3i0i1gk4-RkL",
"outputId": "81fe4095-592d-4025-88fc-c630a8534617"
},
"source": [
"start_time = time.time()\n",
"\n",
"for _ in range(100):\n",
" # Creating on the CPU, then transfering to the GPU\n",
" cpu_tensor = torch.ones((1000, 64, 64))\n",
" gpu_tensor = cpu_tensor.cuda()\n",
"\n",
"print('Total time: {:.3f}s'.format(time.time() - start_time))"
],
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"text": [
"Total time: 0.584s\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "69pMZsQs-Sto",
"outputId": "5d791f8d-2659-4876-87e8-ddf15528fc2d"
},
"source": [
"start_time = time.time()\n",
"\n",
"for _ in range(100):\n",
" # Creating on GPU directly\n",
" cpu_tensor = torch.ones((1000, 64, 64), device='cuda')\n",
"\n",
"print('Total time: {:.3f}s'.format(time.time() - start_time))"
],
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"text": [
"Total time: 0.009s\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ta-sxuoPAyJQ"
},
"source": [
"# 2. Use `Sequential` Layers When Possible"
]
},
{
"cell_type": "code",
"metadata": {
"id": "f2r4DXtL-RPP"
},
"source": [
"class ExampleModel(nn.Module):\n",
" def __init__(self):\n",
" super().__init__()\n",
"\n",
" input_size = 2\n",
" output_size = 3\n",
" hidden_size = 16\n",
"\n",
" self.input_layer = nn.Linear(input_size, hidden_size)\n",
" self.input_activation = nn.ReLU()\n",
"\n",
" self.mid_layer = nn.Linear(hidden_size, hidden_size)\n",
" self.mid_activation = nn.ReLU()\n",
"\n",
" self.output_layer = nn.Linear(hidden_size, output_size)\n",
"\n",
" def forward(self, x):\n",
" z = self.input_layer(x)\n",
" z = self.input_activation(z)\n",
" \n",
" z = self.mid_layer(z)\n",
" z = self.mid_activation(z)\n",
" \n",
" out = self.output_layer(z)\n",
"\n",
" return out"
],
"execution_count": 6,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "dOf_q_JT-V82",
"outputId": "9bc666a9-d040-4ddf-9fff-e32d6f24387c"
},
"source": [
"example_model = ExampleModel()\n",
"print(example_model)\n",
"print('Output shape:', example_model(torch.ones([100, 2])).shape)"
],
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"text": [
"ExampleModel(\n",
" (input_layer): Linear(in_features=2, out_features=16, bias=True)\n",
" (input_activation): ReLU()\n",
" (mid_layer): Linear(in_features=16, out_features=16, bias=True)\n",
" (mid_activation): ReLU()\n",
" (output_layer): Linear(in_features=16, out_features=3, bias=True)\n",
")\n",
"Output shape: torch.Size([100, 3])\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "pCHY0KtT-WGj"
},
"source": [
"class ExampleSequentialModel(nn.Module):\n",
" def __init__(self):\n",
" super().__init__()\n",
"\n",
" input_size = 2\n",
" output_size = 3\n",
" hidden_size = 16\n",
"\n",
" self.layers = nn.Sequential(\n",
" nn.Linear(input_size, hidden_size),\n",
" nn.ReLU(),\n",
" nn.Linear(hidden_size, hidden_size),\n",
" nn.ReLU(),\n",
" nn.Linear(hidden_size, output_size))\n",
"\n",
" def forward(self, x):\n",
" out = self.layers(x)\n",
" return out"
],
"execution_count": 9,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "m1PVgMcT-WNT",
"outputId": "c1c09ad7-50fe-46e9-9c7a-6d05654c1e66"
},
"source": [
"example_seq_model = ExampleSequentialModel()\n",
"print(example_seq_model)\n",
"print('Output shape:', example_seq_model(torch.ones([100, 2])).shape)"
],
"execution_count": 10,
"outputs": [
{
"output_type": "stream",
"text": [
"ExampleSequentialModel(\n",
" (layers): Sequential(\n",
" (0): Linear(in_features=2, out_features=16, bias=True)\n",
" (1): ReLU()\n",
" (2): Linear(in_features=16, out_features=16, bias=True)\n",
" (3): ReLU()\n",
" (4): Linear(in_features=16, out_features=3, bias=True)\n",
" )\n",
")\n",
"Output shape: torch.Size([100, 3])\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "OLGRA4CyAztx"
},
"source": [
"# 3. Don't Make Lists of Layers"
]
},
{
"cell_type": "code",
"metadata": {
"id": "uI2xZ3EP-Xkp"
},
"source": [
"class BadListModel(nn.Module):\n",
" def __init__(self):\n",
" super().__init__()\n",
"\n",
" input_size = 2\n",
" output_size = 3\n",
" hidden_size = 16\n",
"\n",
" self.input_layer = nn.Linear(input_size, hidden_size)\n",
" self.input_activation = nn.ReLU()\n",
"\n",
" # Fairly common when using residual layers\n",
" self.mid_layers = []\n",
" for _ in range(5):\n",
" self.mid_layers.append(nn.Linear(hidden_size, hidden_size))\n",
" self.mid_layers.append(nn.ReLU())\n",
"\n",
" self.output_layer = nn.Linear(hidden_size, output_size)\n",
"\n",
" def forward(self, x):\n",
" z = self.input_layer(x)\n",
" z = self.input_activation(z)\n",
" \n",
" for layer in self.mid_layers:\n",
" z = layer(z)\n",
" \n",
" out = self.output_layer(z)\n",
"\n",
" return out"
],
"execution_count": 11,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "48GAPovU-Xe1",
"outputId": "fe2978cd-8f8e-4af0-9cee-8de88764f0cb"
},
"source": [
"bad_list_model = BadListModel()\n",
"print('Output shape:', bad_list_model(torch.ones([100, 2])).shape)"
],
"execution_count": 12,
"outputs": [
{
"output_type": "stream",
"text": [
"Output shape: torch.Size([100, 3])\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 358
},
"id": "t6n32oG_-XXp",
"outputId": "4fd28d4c-0283-4922-dd4f-242088241632"
},
"source": [
"gpu_input = torch.ones([100, 2], device='cuda')\n",
"gpu_bad_list_model = bad_list_model.cuda()\n",
"print('Output shape:', bad_list_model(gpu_input).shape)"
],
"execution_count": 13,
"outputs": [
{
"output_type": "error",
"ename": "RuntimeError",
"evalue": "ignored",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-13-e523900f19d5>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mgpu_input\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mones\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdevice\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'cuda'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mgpu_bad_list_model\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbad_list_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcuda\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Output shape:'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbad_list_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgpu_input\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 887\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 888\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 889\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 890\u001b[0m for hook in itertools.chain(\n\u001b[1;32m 891\u001b[0m \u001b[0m_global_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-11-2df20007fc89>\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mlayer\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmid_layers\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 25\u001b[0;31m \u001b[0mz\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlayer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mz\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 26\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutput_layer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mz\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 887\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 888\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 889\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 890\u001b[0m for hook in itertools.chain(\n\u001b[1;32m 891\u001b[0m \u001b[0m_global_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/modules/linear.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 92\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 93\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 94\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 95\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 96\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mextra_repr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py\u001b[0m in \u001b[0;36mlinear\u001b[0;34m(input, weight, bias)\u001b[0m\n\u001b[1;32m 1751\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhas_torch_function_variadic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1752\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mhandle_torch_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlinear\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbias\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1753\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_nn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbias\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1754\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1755\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mRuntimeError\u001b[0m: Tensor for 'out' is on CPU, Tensor for argument #1 'self' is on CPU, but expected them to be on GPU (while checking arguments for addmm)"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "n9z0YF7oGZXf"
},
"source": [
"## Better Way to Do This"
]
},
{
"cell_type": "code",
"metadata": {
"id": "EnnyjZp3-Y0a"
},
"source": [
"class CorrectListModel(nn.Module):\n",
" def __init__(self):\n",
" super().__init__()\n",
"\n",
" input_size = 2\n",
" output_size = 3\n",
" hidden_size = 16\n",
"\n",
" self.input_layer = nn.Linear(input_size, hidden_size)\n",
" self.input_activation = nn.ReLU()\n",
"\n",
" # Fairly common when using residual layers\n",
" self.mid_layers = []\n",
" for _ in range(5):\n",
" self.mid_layers.append(nn.Linear(hidden_size, hidden_size))\n",
" self.mid_layers.append(nn.ReLU())\n",
" self.mid_layers = nn.Sequential(*self.mid_layers)\n",
"\n",
" self.output_layer = nn.Linear(hidden_size, output_size)\n",
"\n",
" def forward(self, x):\n",
" z = self.input_layer(x)\n",
" z = self.input_activation(z)\n",
" z = self.mid_layers(z)\n",
" out = self.output_layer(z)\n",
"\n",
" return out"
],
"execution_count": 14,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "zJt9-A5a-Ys5",
"outputId": "062bf1c9-5ca1-4b50-8d9c-0761b70997b6"
},
"source": [
"correct_list_model = CorrectListModel()\n",
"gpu_input = torch.ones([100, 2], device='cuda')\n",
"gpu_correct_list_model = correct_list_model.cuda()\n",
"print('Output shape:', correct_list_model(gpu_input).shape)"
],
"execution_count": 15,
"outputs": [
{
"output_type": "stream",
"text": [
"Output shape: torch.Size([100, 3])\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "lTlaVIitAzxJ"
},
"source": [
"# 4. Make Use of Distributions"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "wNgAZKGh-bgX",
"outputId": "05065134-02a3-4763-9d06-3d6cf70b1921"
},
"source": [
"# Setup\n",
"example_model = ExampleModel()\n",
"input_tensor = torch.rand(5, 2)\n",
"output = example_model(input_tensor)\n",
"print(output)"
],
"execution_count": 16,
"outputs": [
{
"output_type": "stream",
"text": [
"tensor([[ 0.1965, 0.0558, -0.2112],\n",
" [ 0.2035, 0.0650, -0.2077],\n",
" [ 0.2150, 0.0577, -0.2096],\n",
" [ 0.1957, 0.0540, -0.2117],\n",
" [ 0.2045, 0.0566, -0.2085]], grad_fn=<AddmmBackward>)\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "hgyPL1Gn-baX"
},
"source": [
"from torch.distributions import Categorical\n",
"from torch.distributions.kl import kl_divergence"
],
"execution_count": 17,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "rCU8D-mO-bTO",
"outputId": "f43f4d62-0323-43cb-f3a9-75f8520118e3"
},
"source": [
"dist = Categorical(logits=output)\n",
"dist"
],
"execution_count": 18,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Categorical(logits: torch.Size([5, 3]))"
]
},
"metadata": {
"tags": []
},
"execution_count": 18
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "0dCkCQyY-bKq",
"outputId": "5ef69a62-f9d3-43f4-c94e-a034298190bf"
},
"source": [
"# Get probabilities\n",
"dist.probs"
],
"execution_count": 19,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"tensor([[0.3946, 0.3428, 0.2625],\n",
" [0.3947, 0.3437, 0.2616],\n",
" [0.3986, 0.3406, 0.2607],\n",
" [0.3947, 0.3426, 0.2627],\n",
" [0.3962, 0.3417, 0.2621]], grad_fn=<SoftmaxBackward>)"
]
},
"metadata": {
"tags": []
},
"execution_count": 19
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "jAnfI0Dt-bEo",
"outputId": "141ba422-7b6b-47d4-da6a-0c131bf736ba"
},
"source": [
"# Take samples\n",
"dist.sample()"
],
"execution_count": 24,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"tensor([0, 1, 0, 0, 2])"
]
},
"metadata": {
"tags": []
},
"execution_count": 24
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "f-BFWjGo-a9M",
"outputId": "318612ba-c2e2-4fc5-d25a-e35863b1cc05"
},
"source": [
"# Calculate the KL-Divergence\n",
"dist_1 = Categorical(logits=output[0])\n",
"dist_2 = Categorical(logits=output[1])\n",
"kl_divergence(dist_1, dist_2)"
],
"execution_count": 25,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"tensor(2.5076e-06, grad_fn=<SumBackward1>)"
]
},
"metadata": {
"tags": []
},
"execution_count": 25
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "2qZwgkkjAzz-"
},
"source": [
"# 5. Use `detach()` On Long-Term Metrics"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "MN-ZgkpX-dCG",
"outputId": "361ac300-15c1-48fb-d8d5-a12b837b5e5f"
},
"source": [
"# Setup\n",
"example_model = ExampleModel()\n",
"data_batches = [torch.rand((10, 2)) for _ in range(5)]\n",
"criterion = nn.MSELoss(reduce='mean')"
],
"execution_count": 27,
"outputs": [
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.7/dist-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='mean' instead.\n",
" warnings.warn(warning.format(ret))\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "j5tRrcMXKb4i"
},
"source": [
"## Bad Example"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "y0WYtsqx-cwX",
"outputId": "297d6d2f-4002-44e4-fa60-254a936709f0"
},
"source": [
"losses = []\n",
"\n",
"# Training loop\n",
"for batch in data_batches:\n",
" output = example_model(batch)\n",
"\n",
" target = torch.rand((10, 3))\n",
" loss = criterion(output, target)\n",
" losses.append(loss)\n",
"\n",
" # Optimization happens here\n",
"\n",
"print(losses)"
],
"execution_count": 28,
"outputs": [
{
"output_type": "stream",
"text": [
"[tensor(0.4718, grad_fn=<MseLossBackward>), tensor(0.5156, grad_fn=<MseLossBackward>), tensor(0.6583, grad_fn=<MseLossBackward>), tensor(0.4429, grad_fn=<MseLossBackward>), tensor(0.4133, grad_fn=<MseLossBackward>)]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "1P685bJgJvzn"
},
"source": [
"## Better Example"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "f4zbLSpT-dbu",
"outputId": "864b398e-034a-4aff-a4b0-9abad42bc090"
},
"source": [
"losses = []\n",
"\n",
"# Training loop\n",
"for batch in data_batches:\n",
" output = example_model(batch)\n",
"\n",
" target = torch.rand((10, 3))\n",
" loss = criterion(output, target)\n",
" losses.append(loss.item()) # Or `loss.item()`\n",
"\n",
" # Optimization happens here\n",
"\n",
"print(losses)"
],
"execution_count": 31,
"outputs": [
{
"output_type": "stream",
"text": [
"[0.5439911484718323, 0.5461570620536804, 0.6738904118537903, 0.5780249834060669, 0.5130327939987183]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "QkSVHK38Az3F"
},
"source": [
"# 6. Trick to Delete a Model from GPU"
]
},
{
"cell_type": "code",
"metadata": {
"id": "CocJHuhl-e_V"
},
"source": [
"import gc"
],
"execution_count": 32,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "mKlBHS-D-e4F"
},
"source": [
"example_model = ExampleModel().cuda()\n",
"\n",
"del example_model\n",
"\n",
"gc.collect()\n",
"# The model will normally stay on the cache until something takes it's place\n",
"torch.cuda.empty_cache()"
],
"execution_count": 33,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "uagavyWeAz6K"
},
"source": [
"# 7. Call `eval()` Before Testing"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "vgkGWVcU-fgp",
"outputId": "a9727c7d-194d-4109-fcee-dd664ff83670"
},
"source": [
"example_model = ExampleModel()\n",
"\n",
"# Do training\n",
"\n",
"example_model.eval()\n",
"\n",
"# Do testing\n",
"\n",
"example_model.train()\n",
"\n",
"# Do training again"
],
"execution_count": 34,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"ExampleModel(\n",
" (input_layer): Linear(in_features=2, out_features=16, bias=True)\n",
" (input_activation): ReLU()\n",
" (mid_layer): Linear(in_features=16, out_features=16, bias=True)\n",
" (mid_activation): ReLU()\n",
" (output_layer): Linear(in_features=16, out_features=3, bias=True)\n",
")"
]
},
"metadata": {
"tags": []
},
"execution_count": 34
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "XnvexQJY-gyH"
},
"source": [
"### Affects\n",
" - Dropout\n",
" - Batch Normalization\n",
" - RNNs\n",
" - Lazy Variants\n",
"\n",
"source: https://stackoverflow.com/questions/66534762/which-pytorch-modules-are-affected-by-model-eval-and-model-train"
]
},
{
"cell_type": "code",
"metadata": {
"id": "c28JegUPEBdl"
},
"source": [
""
],
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment