kaczmarj/mirroredstrategy_cudnn_gru.ipynb

## mirroredstrategy_cudnn_gru.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Welcome To Colaboratory",
      "provenance": [],
      "collapsed_sections": [],
      "toc_visible": true
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "yXdB-l0w7vM5",
        "colab_type": "text"
      },
      "source": [
        "# `tf.distribute.MirroredStrategy` and cuDNN GRU\n",
        "\n",
        "`tf.distribute.MirroredStrategy` prevents use of the cuDNN GRU implementation, even when using a single GPU."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "QGjjhSEi8TFK",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import os\n",
        "\n",
        "if os.environ[\"COLAB_GPU\"] != \"1\":\n",
        "    raise RuntimeError(\"Use the GPU runtime.\")"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Y5IfYDTY7fRy",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import numpy as np\n",
        "import tensorflow as tf\n",
        "\n",
        "tfk = tf.keras\n",
        "tfkl = tfk.layers\n",
        "\n",
        "# The message that the cuDNN GRU implementation is used is printed at the debug level.\n",
        "tf.get_logger().setLevel(\"DEBUG\")\n",
        "\n",
        "# Generate data.\n",
        "x = np.random.rand(5000, 200, 750).astype(np.float32)\n",
        "x += 0.01\n",
        "x.clip(min=0, max=1, out=x)\n",
        "y = np.random.randint(2, size=(5000, 1), dtype=np.int32)\n",
        "\n",
        "def gru_cudnn(input_shape=(200, 750), dropout_rate=0.5):\n",
        "    model = tfk.Sequential()\n",
        "    model.add(tfkl.InputLayer(input_shape))\n",
        "    model.add(tfkl.Masking(mask_value=0.0))\n",
        "    model.add(tfkl.GRU(128))\n",
        "    model.add(tfkl.Dropout(dropout_rate))\n",
        "    model.add(tfkl.Dense(1))\n",
        "    return model"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "C_R39V617sul",
        "colab_type": "text"
      },
      "source": [
        "## Train without a strategy\n",
        "\n",
        "This does use the cuDNN GRU implementation."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Okj7RPfp7e-M",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 69
        },
        "outputId": "6cd1aaa1-e0c4-4a61-a4d4-05c4b4fec9f3"
      },
      "source": [
        "model = gru_cudnn()\n",
        "model.compile(\n",
        "    optimizer=tfk.optimizers.Adam(1e-3), \n",
        "    loss=tfk.losses.BinaryCrossentropy(from_logits=True))\n",
        "model.fit(x, y)"
      ],
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "DEBUG:tensorflow:Layer gru will use cuDNN kernel when run on GPU.\n",
            "157/157 [==============================] - 3s 21ms/step - loss: 0.7957\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<tensorflow.python.keras.callbacks.History at 0x7f3dc654b668>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 6
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "954XeccX7pqu",
        "colab_type": "text"
      },
      "source": [
        "## Train using mirrored strategy (but one gpu)\n",
        "\n",
        "This does not use the cuDNN GRU implementation."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "J0-Srv_m7eNd",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 156
        },
        "outputId": "73638ade-c6ed-42f4-e5c5-b08856693f68"
      },
      "source": [
        "strategy = tf.distribute.MirroredStrategy(devices=[\"GPU:0\"])\n",
        "with strategy.scope():\n",
        "    model = gru_cudnn()\n",
        "    model.compile(\n",
        "        optimizer=tfk.optimizers.Adam(1e-3), \n",
        "        loss=tfk.losses.BinaryCrossentropy(from_logits=True))\n",
        "model.fit(x, y)"
      ],
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)\n",
            "DEBUG:tensorflow:Layer gru_1 will use cuDNN kernel when run on GPU.\n",
            "INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).\n",
            "INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).\n",
            "INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).\n",
            "INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).\n",
            "157/157 [==============================] - 67s 429ms/step - loss: 0.8210\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<tensorflow.python.keras.callbacks.History at 0x7f3dbff5f128>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 7
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "KQGjWBN87dk9",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"name": "Welcome To Colaboratory",
	"provenance": [],
	"collapsed_sections": [],
	"toc_visible": true
	},
	"kernelspec": {
	"display_name": "Python 3",
	"name": "python3"
	},
	"accelerator": "GPU"
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "yXdB-l0w7vM5",
	"colab_type": "text"
	},
	"source": [
	"# `tf.distribute.MirroredStrategy` and cuDNN GRU\n",
	"\n",
	"`tf.distribute.MirroredStrategy` prevents use of the cuDNN GRU implementation, even when using a single GPU."
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "QGjjhSEi8TFK",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"import os\n",
	"\n",
	"if os.environ[\"COLAB_GPU\"] != \"1\":\n",
	" raise RuntimeError(\"Use the GPU runtime.\")"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "Y5IfYDTY7fRy",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"import numpy as np\n",
	"import tensorflow as tf\n",
	"\n",
	"tfk = tf.keras\n",
	"tfkl = tfk.layers\n",
	"\n",
	"# The message that the cuDNN GRU implementation is used is printed at the debug level.\n",
	"tf.get_logger().setLevel(\"DEBUG\")\n",
	"\n",
	"# Generate data.\n",
	"x = np.random.rand(5000, 200, 750).astype(np.float32)\n",
	"x += 0.01\n",
	"x.clip(min=0, max=1, out=x)\n",
	"y = np.random.randint(2, size=(5000, 1), dtype=np.int32)\n",
	"\n",
	"def gru_cudnn(input_shape=(200, 750), dropout_rate=0.5):\n",
	" model = tfk.Sequential()\n",
	" model.add(tfkl.InputLayer(input_shape))\n",
	" model.add(tfkl.Masking(mask_value=0.0))\n",
	" model.add(tfkl.GRU(128))\n",
	" model.add(tfkl.Dropout(dropout_rate))\n",
	" model.add(tfkl.Dense(1))\n",
	" return model"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "C_R39V617sul",
	"colab_type": "text"
	},
	"source": [
	"## Train without a strategy\n",
	"\n",
	"This does use the cuDNN GRU implementation."
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "Okj7RPfp7e-M",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 69
	},
	"outputId": "6cd1aaa1-e0c4-4a61-a4d4-05c4b4fec9f3"
	},
	"source": [
	"model = gru_cudnn()\n",
	"model.compile(\n",
	" optimizer=tfk.optimizers.Adam(1e-3), \n",
	" loss=tfk.losses.BinaryCrossentropy(from_logits=True))\n",
	"model.fit(x, y)"
	],
	"execution_count": 6,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"DEBUG:tensorflow:Layer gru will use cuDNN kernel when run on GPU.\n",
	"157/157 [==============================] - 3s 21ms/step - loss: 0.7957\n"
	],
	"name": "stdout"
	},
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"<tensorflow.python.keras.callbacks.History at 0x7f3dc654b668>"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 6
	}
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "954XeccX7pqu",
	"colab_type": "text"
	},
	"source": [
	"## Train using mirrored strategy (but one gpu)\n",
	"\n",
	"This does not use the cuDNN GRU implementation."
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "J0-Srv_m7eNd",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 156
	},
	"outputId": "73638ade-c6ed-42f4-e5c5-b08856693f68"
	},
	"source": [
	"strategy = tf.distribute.MirroredStrategy(devices=[\"GPU:0\"])\n",
	"with strategy.scope():\n",
	" model = gru_cudnn()\n",
	" model.compile(\n",
	" optimizer=tfk.optimizers.Adam(1e-3), \n",
	" loss=tfk.losses.BinaryCrossentropy(from_logits=True))\n",
	"model.fit(x, y)"
	],
	"execution_count": 7,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)\n",
	"DEBUG:tensorflow:Layer gru_1 will use cuDNN kernel when run on GPU.\n",
	"INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).\n",
	"INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).\n",
	"INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).\n",
	"INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).\n",
	"157/157 [==============================] - 67s 429ms/step - loss: 0.8210\n"
	],
	"name": "stdout"
	},
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"<tensorflow.python.keras.callbacks.History at 0x7f3dbff5f128>"
	]
	},
	"metadata": {
	"tags": []
	},
	"execution_count": 7
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "KQGjWBN87dk9",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	""
	],
	"execution_count": 0,
	"outputs": []
	}
	]
	}