Last active
September 24, 2018 01:53
-
-
Save raytroop/14e27ef034c9e849f6e780a7e59a2776 to your computer and use it in GitHub Desktop.
clip operation in TensorFlow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1.10.1\n", | |
"1.15.1\n" | |
] | |
} | |
], | |
"source": [ | |
"import tensorflow as tf\n", | |
"import numpy as np\n", | |
"print(tf.__version__)\n", | |
"print(np.__version__)\n", | |
"tf.set_random_seed(42)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 1 tf.clip_by_value\n", | |
"```python\n", | |
"tf.clip_by_value(\n", | |
" t,\n", | |
" clip_value_min,\n", | |
" clip_value_max,\n", | |
" name=None\n", | |
")\n", | |
"```\n", | |
"Clips tensor values to a specified min and max.\n", | |
"<br>\n", | |
"`tf.clip_by_value` is very simple\n", | |
"<br>\n", | |
"Note: parameter `t` is single Tensor" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x = tf.random_normal(shape=(3, 3), mean=0.0, stddev=1.0, seed=42)\n", | |
"x_clip = tf.clip_by_value(x, clip_value_min=-0.5, clip_value_max=0.5)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[[ 1.3148774 -0.15421568 0.9113878 ]\n", | |
" [-0.7991441 -0.10875293 0.28436786]\n", | |
" [ 0.7661625 -0.6211289 0.9974318 ]]\n", | |
"[[ 0.5 -0.15421568 0.5 ]\n", | |
" [-0.5 -0.10875293 0.28436786]\n", | |
" [ 0.5 -0.5 0.5 ]]\n" | |
] | |
} | |
], | |
"source": [ | |
"with tf.Session() as sess:\n", | |
" x_, x_clip_ = sess.run([x, x_clip])\n", | |
"print(x_)\n", | |
"print(x_clip_)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"t_ = np.clip(x_, a_min=-0.5, a_max=0.5)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"np.allclose(x_clip_, t_)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 2 tf.clip_by_norm\n", | |
"```python\n", | |
"tf.clip_by_norm(\n", | |
" t,\n", | |
" clip_norm,\n", | |
" axes=None,\n", | |
" name=None\n", | |
")\n", | |
"```\n", | |
"Clips tensor values to a maximum L2-norm.\n", | |
"<br>\n", | |
"\n", | |
" t * clip_norm / max(l2norm(t), clip_norm)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"clip_norm = 2.0\n", | |
"x = tf.random_normal(shape=(3, 3), mean=0.0, stddev=1.0, seed=42)\n", | |
"x_clip = tf.clip_by_norm(x, clip_norm=clip_norm)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[[ 1.3148774 -0.15421568 0.9113878 ]\n", | |
" [-0.7991441 -0.10875293 0.28436786]\n", | |
" [ 0.7661625 -0.6211289 0.9974318 ]]\n", | |
"[[ 1.1442033 -0.13419814 0.7930876 ]\n", | |
" [-0.6954134 -0.09463655 0.24745627]\n", | |
" [ 0.6667129 -0.540505 0.8679629 ]]\n" | |
] | |
} | |
], | |
"source": [ | |
"with tf.Session() as sess:\n", | |
" x_, x_clip_ = sess.run([x, x_clip])\n", | |
"print(x_)\n", | |
"print(x_clip_)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"l2norm = np.linalg.norm(x_)\n", | |
"t_ = x_ * clip_norm / np.maximum(l2norm, clip_norm)\n", | |
"np.allclose(x_clip_, t_)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 3 tf.clip_by_average_norm\n", | |
"```python\n", | |
"tf.clip_by_average_norm(\n", | |
" t,\n", | |
" clip_norm,\n", | |
" name=None\n", | |
")\n", | |
"```\n", | |
"Clips tensor values to a **maximum average L2-norm**.\n", | |
"<br>\n", | |
"\n", | |
" t * clip_norm / max(l2norm_avg(t), clip_norm)\n", | |
"\n", | |
"The doc are a bit ambiguous, from test and impl, it(average L2-norm) seems avg_norm is norm/len(vector)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"[https://github.com/tensorflow/tensorflow/blob/4dcfddc5d12018a5a0fdca652b9221ed95e9eb23/tensorflow/python/kernel_tests/clip_ops_test.py#L372-L382](https://github.com/tensorflow/tensorflow/blob/4dcfddc5d12018a5a0fdca652b9221ed95e9eb23/tensorflow/python/kernel_tests/clip_ops_test.py#L372-L382)\n", | |
"```python\n", | |
"def testClipByAverageNormClipped(self):\n", | |
" # Norm clipping when average clip_norm < 0.83333333\n", | |
" with self.test_session(use_gpu=True):\n", | |
" x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])\n", | |
" # Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333\n", | |
" np_ans = [[-2.88, 0.0, 0.0], [3.84, 0.0, 0.0]]\n", | |
" clip_norm = 0.8\n", | |
" ans = clip_ops.clip_by_average_norm(x, clip_norm)\n", | |
" tf_ans = ans.eval()\n", | |
" \n", | |
" self.assertAllClose(np_ans, tf_ans)\n", | |
"```" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"clip_norm = 0.1\n", | |
"x = tf.random_normal(shape=(3, 3), mean=0.0, stddev=1.0, seed=42)\n", | |
"x_clip = tf.clip_by_average_norm(x, clip_norm=clip_norm)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[[ 1.3148774 -0.15421568 0.9113878 ]\n", | |
" [-0.7991441 -0.10875293 0.28436786]\n", | |
" [ 0.7661625 -0.6211289 0.9974318 ]]\n", | |
"[[ 0.51489156 -0.06038916 0.35688946]\n", | |
" [-0.31293604 -0.04258645 0.11135533]\n", | |
" [ 0.3000208 -0.24322726 0.39058334]]\n" | |
] | |
} | |
], | |
"source": [ | |
"with tf.Session() as sess:\n", | |
" x_, x_clip_ = sess.run([x, x_clip])\n", | |
"print(x_)\n", | |
"print(x_clip_)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"avg_l2norm = np.linalg.norm(x_) / x_.size\n", | |
"t_ = x_ * clip_norm / np.maximum(avg_l2norm, clip_norm)\n", | |
"np.allclose(x_clip_, t_)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 4 tf.clip_by_global_norm\n", | |
"```python\n", | |
"tf.clip_by_global_norm(\n", | |
" t_list,\n", | |
" clip_norm,\n", | |
" use_norm=None,\n", | |
" name=None\n", | |
")\n", | |
"```\n", | |
"Clips values of **multiple tensors** by the ratio of the sum of their norms.\n", | |
"<br>\n", | |
"\n", | |
"Given a tuple or list of tensors t_list, and a clipping ratio `clip_norm`, this operation returns a list of clipped tensors list_clipped and the global norm (`global_norm`) of all tensors in t_list. \n", | |
"<br>\n", | |
"Optionally, if you've already computed the global norm for t_list, you can specify the global norm with `use_norm`.\n", | |
"<br>\n", | |
"\n", | |
"To perform the clipping, the values t_list[i] are set to:\n", | |
"\n", | |
" t_list[i] * clip_norm / max(global_norm, clip_norm)\n", | |
"where:\n", | |
"\n", | |
" global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"clip_norm = 2.0\n", | |
"x = tf.random_normal(shape=(3, 3), mean=0.0, stddev=1.0, seed=42)\n", | |
"y = tf.random_normal(shape=(5, 5), mean=0.0, stddev=2.0, seed=42)\n", | |
"(x_clip, y_clip), global_norm = tf.clip_by_global_norm([x, y], clip_norm=clip_norm)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"10.661552\n" | |
] | |
} | |
], | |
"source": [ | |
"with tf.Session() as sess:\n", | |
" x_, y_, x_clip_, y_clip_, global_norm_ = sess.run([x, y, x_clip, y_clip, global_norm])\n", | |
"print(global_norm_)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"gnorm = np.linalg.norm([np.linalg.norm(t) for t in [x_, y_]])\n", | |
"t_ = x_ * clip_norm / np.maximum(gnorm, clip_norm)\n", | |
"np.allclose(x_clip_, t_)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"np.allclose(global_norm_, gnorm)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Application\n", | |
"clip operation is typically used to clip gradients before applying them with an optimizer.\n", | |
"```python\n", | |
"grads = tf.gradients(loss, tf.trainable_variables())\n", | |
"grads, _ = tf.clip_by_global_norm(grads, 50) # gradient clipping\n", | |
"grads_and_vars = list(zip(grads, tf.trainable_variables()))\n", | |
"train_op = optimizer.apply_gradients(grads_and_vars)\n", | |
"```\n", | |
"[link](https://medium.com/@dubovikov.kirill/actually-we-can-work-with-gradients-directly-in-tensorflow-via-optimizers-compute-gradients-and-fc2b5612665a)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment