Skip to content

Instantly share code, notes, and snippets.

@ceshine
Created April 24, 2020 06:31
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ceshine/0549086d8c59efb1d706f6e369b8e136 to your computer and use it in GitHub Desktop.
Save ceshine/0549086d8c59efb1d706f6e369b8e136 to your computer and use it in GitHub Desktop.
Tensorflow Profiler with Custom Training Loop
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"Collapsed": "false"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/data/miniconda3/envs/tf/lib/python3.7/site-packages/tf_helper_bot/bot.py:9: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n",
" from tqdm.autonotebook import tqdm\n"
]
}
],
"source": [
"import os\n",
"import logging\n",
"\n",
"import numpy as np\n",
"import tensorflow as tf\n",
"from tensorflow.keras.mixed_precision import experimental as amp\n",
"from tf_helper_bot import BaseBot, MovingAverageStatsTrackerCallback\n",
"#from tf_helper_bot.schedulers import CosineDecayWithWarmup\n",
"from tf_helper_bot.mixup import mixup_loss_fn\n",
"from tf_helper_bot.optimizers import RAdam\n",
"from tensorflow.python.profiler import profiler_v2 as profiler\n",
"\n",
"from cliff.model import get_model\n",
"from cliff.dataset import tfrecord_dataset"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"Collapsed": "false"
},
"outputs": [],
"source": [
"MIXED_PRECISION = True\n",
"BATCH_SIZE = 24\n",
"ARCH = \"b3\"\n",
"GRAD_ACCU = 1\n",
"TRAIN_FOLDER = \"../data/tfrecords/train/\"\n",
"# VALID_FOLDER = \"../data/tfrecords/valid/\"\n",
"logging.getLogger(\"tensorflow\").setLevel(logging.INFO)\n",
"os.environ[\"TF_GPU_THREAD_MODE\"] = \"gpu_private\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"Collapsed": "false"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Compute dtype: float16\n",
"Variable dtype: float32\n"
]
}
],
"source": [
"if MIXED_PRECISION:\n",
" policy = amp.Policy('mixed_float16')\n",
" amp.set_policy(policy)\n",
" print('Compute dtype: %s' % policy.compute_dtype)\n",
" print('Variable dtype: %s' % policy.variable_dtype)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"Collapsed": "false"
},
"outputs": [],
"source": [
"model = get_model(ARCH, n_classes=6)\n",
"class_weights = np.array([1., 1., 3., 2., 3., 2.])\n",
"strategy = tf.distribute.get_strategy()\n",
"train_dataset, train_steps = tfrecord_dataset(\n",
" tf.io.gfile.glob(TRAIN_FOLDER + \"*\"),\n",
" BATCH_SIZE, is_train=True, strategy=strategy,\n",
" return_sample_weights=False,\n",
" class_weights=class_weights,\n",
" mixup_alpha=-1, # disabled\n",
" cutmix_alpha=-1 # disabled\n",
")\n",
"optimizer = RAdam(learning_rate=2e-4, epsilon=1e-6)\n",
"if MIXED_PRECISION:\n",
" optimizer = amp.LossScaleOptimizer(optimizer, loss_scale='dynamic')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"Collapsed": "false"
},
"outputs": [],
"source": [
"bot = BaseBot(\n",
" model=model,\n",
" criterion=mixup_loss_fn,\n",
" optimizer=optimizer,\n",
" train_dataset=train_dataset,\n",
" valid_dataset=None,\n",
" steps_per_epoch=train_steps,\n",
" gradient_accumulation_steps=GRAD_ACCU,\n",
" callbacks=(\n",
" MovingAverageStatsTrackerCallback(\n",
" avg_window=3,\n",
" log_interval=2,\n",
" ),\n",
" ),\n",
" metrics=(),\n",
" mixed_precision=MIXED_PRECISION\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"Collapsed": "false"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[INFO][04/24/2020 14:23:12] Step 2 | loss 1.9551 | lr 2.00e-04 | 24.209s per step\n",
"[INFO][04/24/2020 14:23:13] Step 4 | loss 1.8648 | lr 2.00e-04 | 0.340s per step\n",
"[INFO][04/24/2020 14:23:14] Step 6 | loss 1.8709 | lr 2.00e-04 | 0.358s per step\n",
"[INFO][04/24/2020 14:23:14] Step 8 | loss 1.8832 | lr 2.00e-04 | 0.363s per step\n",
"[INFO][04/24/2020 14:23:15] Step 10 | loss 1.8606 | lr 2.00e-04 | 0.342s per step\n"
]
}
],
"source": [
"# Warmup (compile the graph and train a few steps)\n",
"bot.train(checkpoint_interval=1000, n_steps=10)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"Collapsed": "false"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[INFO][04/24/2020 14:23:19] Step 12 | loss 1.7825 | lr 2.00e-04 | 2.058s per step\n",
"[INFO][04/24/2020 14:23:20] Step 14 | loss 1.7328 | lr 2.00e-04 | 0.459s per step\n",
"[INFO][04/24/2020 14:23:21] Step 16 | loss 1.8026 | lr 2.00e-04 | 0.434s per step\n",
"[INFO][04/24/2020 14:23:22] Step 18 | loss 1.8207 | lr 2.00e-04 | 0.451s per step\n",
"[INFO][04/24/2020 14:23:23] Step 20 | loss 1.8043 | lr 2.00e-04 | 0.412s per step\n"
]
}
],
"source": [
"tf.profiler.experimental.start('../cache/tblogdir')\n",
"bot.train(checkpoint_interval=1000, n_steps=10)\n",
"tf.profiler.experimental.stop()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"Collapsed": "false"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[INFO][04/24/2020 14:23:40] Step 22 | loss 1.8380 | lr 2.00e-04 | 2.191s per step\n",
"[INFO][04/24/2020 14:23:40] Step 24 | loss 1.8116 | lr 2.00e-04 | 0.408s per step\n",
"[INFO][04/24/2020 14:23:41] Step 26 | loss 1.7550 | lr 2.00e-04 | 0.431s per step\n",
"[INFO][04/24/2020 14:23:42] Step 28 | loss 1.7031 | lr 2.00e-04 | 0.437s per step\n",
"[INFO][04/24/2020 14:23:43] Step 30 | loss 1.6449 | lr 2.00e-04 | 0.418s per step\n"
]
}
],
"source": [
"# https://github.com/tensorflow/tensorflow/blob/e02b78e9df4e74161ae9733e038fd978db75901e/tensorflow/python/keras/callbacks.py#L1706\n",
"profiler.warmup()\n",
"profiler.start(logdir='../cache/tblogdir')\n",
"bot.train(checkpoint_interval=1000, n_steps=10)\n",
"profiler.stop()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"Collapsed": "false"
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment