Skip to content

Instantly share code, notes, and snippets.

@kaczmarj
Created June 12, 2020 19:22
Show Gist options
  • Save kaczmarj/ecda57beb73afb7e894193e541e6489a to your computer and use it in GitHub Desktop.
Save kaczmarj/ecda57beb73afb7e894193e541e6489a to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Welcome To Colaboratory",
"provenance": [],
"collapsed_sections": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "yXdB-l0w7vM5",
"colab_type": "text"
},
"source": [
"# `tf.distribute.MirroredStrategy` and cuDNN GRU\n",
"\n",
"`tf.distribute.MirroredStrategy` prevents use of the cuDNN GRU implementation, even when using a single GPU."
]
},
{
"cell_type": "code",
"metadata": {
"id": "QGjjhSEi8TFK",
"colab_type": "code",
"colab": {}
},
"source": [
"import os\n",
"\n",
"if os.environ[\"COLAB_GPU\"] != \"1\":\n",
" raise RuntimeError(\"Use the GPU runtime.\")"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Y5IfYDTY7fRy",
"colab_type": "code",
"colab": {}
},
"source": [
"import numpy as np\n",
"import tensorflow as tf\n",
"\n",
"tfk = tf.keras\n",
"tfkl = tfk.layers\n",
"\n",
"# The message that the cuDNN GRU implementation is used is printed at the debug level.\n",
"tf.get_logger().setLevel(\"DEBUG\")\n",
"\n",
"# Generate data.\n",
"x = np.random.rand(5000, 200, 750).astype(np.float32)\n",
"x += 0.01\n",
"x.clip(min=0, max=1, out=x)\n",
"y = np.random.randint(2, size=(5000, 1), dtype=np.int32)\n",
"\n",
"def gru_cudnn(input_shape=(200, 750), dropout_rate=0.5):\n",
" model = tfk.Sequential()\n",
" model.add(tfkl.InputLayer(input_shape))\n",
" model.add(tfkl.Masking(mask_value=0.0))\n",
" model.add(tfkl.GRU(128))\n",
" model.add(tfkl.Dropout(dropout_rate))\n",
" model.add(tfkl.Dense(1))\n",
" return model"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "C_R39V617sul",
"colab_type": "text"
},
"source": [
"## Train without a strategy\n",
"\n",
"This does use the cuDNN GRU implementation."
]
},
{
"cell_type": "code",
"metadata": {
"id": "Okj7RPfp7e-M",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 69
},
"outputId": "6cd1aaa1-e0c4-4a61-a4d4-05c4b4fec9f3"
},
"source": [
"model = gru_cudnn()\n",
"model.compile(\n",
" optimizer=tfk.optimizers.Adam(1e-3), \n",
" loss=tfk.losses.BinaryCrossentropy(from_logits=True))\n",
"model.fit(x, y)"
],
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"text": [
"DEBUG:tensorflow:Layer gru will use cuDNN kernel when run on GPU.\n",
"157/157 [==============================] - 3s 21ms/step - loss: 0.7957\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<tensorflow.python.keras.callbacks.History at 0x7f3dc654b668>"
]
},
"metadata": {
"tags": []
},
"execution_count": 6
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "954XeccX7pqu",
"colab_type": "text"
},
"source": [
"## Train using mirrored strategy (but one gpu)\n",
"\n",
"This does not use the cuDNN GRU implementation."
]
},
{
"cell_type": "code",
"metadata": {
"id": "J0-Srv_m7eNd",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 156
},
"outputId": "73638ade-c6ed-42f4-e5c5-b08856693f68"
},
"source": [
"strategy = tf.distribute.MirroredStrategy(devices=[\"GPU:0\"])\n",
"with strategy.scope():\n",
" model = gru_cudnn()\n",
" model.compile(\n",
" optimizer=tfk.optimizers.Adam(1e-3), \n",
" loss=tfk.losses.BinaryCrossentropy(from_logits=True))\n",
"model.fit(x, y)"
],
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"text": [
"INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)\n",
"DEBUG:tensorflow:Layer gru_1 will use cuDNN kernel when run on GPU.\n",
"INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).\n",
"INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).\n",
"INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).\n",
"INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).\n",
"157/157 [==============================] - 67s 429ms/step - loss: 0.8210\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<tensorflow.python.keras.callbacks.History at 0x7f3dbff5f128>"
]
},
"metadata": {
"tags": []
},
"execution_count": 7
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "KQGjWBN87dk9",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment