Last active
April 23, 2021 08:21
-
-
Save Rishit-dagli/5d06c69c69e990f9e15249e15002bb07 to your computer and use it in GitHub Desktop.
See how you can use .cache in TensorFlow to boost your performance
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "caching.ipynb", | |
"provenance": [], | |
"collapsed_sections": [] | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "jcp1GPK4QWaE" | |
}, | |
"source": [ | |
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/gist/Rishit-dagli/5d06c69c69e990f9e15249e15002bb07/caching.ipynb)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "PZkuqrB7MUPT" | |
}, | |
"source": [ | |
"💡#TensorFlowTip\r\n", | |
"\r\n", | |
"Use .cache to save on some ops like opening file and reading data during each epoch\r\n", | |
"- transformations before cache are only run for 1st epoch\r\n", | |
"- can cache in-memory or on-disk\r\n", | |
"- not repeatedly perform expensive CPU ops\r\n", | |
"\r\n", | |
"Try it out for yourself - https://gist.github.com/Rishit-dagli/5d06c69c69e990f9e15249e15002bb07" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "EhmOI16Suqae" | |
}, | |
"source": [ | |
"import tensorflow as tf\r\n", | |
"import time" | |
], | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "qHHAIT4Lu9jh" | |
}, | |
"source": [ | |
"class SampleDataset(tf.data.Dataset):\r\n", | |
" def _generator(num_samples):\r\n", | |
" # Opening the file\r\n", | |
" time.sleep(0.05)\r\n", | |
"\r\n", | |
" for sample_idx in range(num_samples):\r\n", | |
" # Reading data (line, record) from the file\r\n", | |
" time.sleep(0.025)\r\n", | |
"\r\n", | |
" yield (sample_idx,)\r\n", | |
"\r\n", | |
" def __new__(cls, num_samples=3):\r\n", | |
" return tf.data.Dataset.from_generator(\r\n", | |
" cls._generator,\r\n", | |
" output_types=tf.dtypes.int64,\r\n", | |
" output_shapes=(1,),\r\n", | |
" args=(num_samples,)\r\n", | |
" )\r\n", | |
"\r\n", | |
"def benchmark(dataset, num_epochs=2):\r\n", | |
" start_time = time.perf_counter()\r\n", | |
" for epoch_num in range(num_epochs):\r\n", | |
" for sample in dataset:\r\n", | |
" # Performing a training step\r\n", | |
" time.sleep(0.01)\r\n", | |
" tf.print(\"Execution time:\", time.perf_counter() - start_time)" | |
], | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "xsZjHNwQutCq" | |
}, | |
"source": [ | |
"def mapped_function(s):\r\n", | |
" # Do some pre-processing\r\n", | |
" tf.py_function(lambda: time.sleep(0.05), [], ())\r\n", | |
" return s" | |
], | |
"execution_count": 3, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "3E75_We1vLqh", | |
"outputId": "805eb42a-f357-476a-816f-3542514dfa2a" | |
}, | |
"source": [ | |
"benchmark(\r\n", | |
" SampleDataset()\r\n", | |
" .map(mapped_function),\r\n", | |
" num_epochs = 5\r\n", | |
")" | |
], | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Execution time: 1.6306257950000145\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "sdEx8ROTvWS9", | |
"outputId": "290bc79c-84a6-4d70-8a18-3ef7bc88c1b6" | |
}, | |
"source": [ | |
"benchmark(\r\n", | |
" SampleDataset()\r\n", | |
" .map(mapped_function)\r\n", | |
" # .cache(), # in-memory cache\r\n", | |
" .cache(filename = 'tmp.cache'), # on-disk cache\r\n", | |
" num_epochs = 5\r\n", | |
")" | |
], | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Execution time: 0.18321933699996862\n" | |
], | |
"name": "stdout" | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment